http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10792
Revision: 10792 Author: yurik Date: 2012-12-13 01:08:33 +0000 (Thu, 13 Dec 2012) Log Message: ----------- Reworked CaseChecker for a number of new possibilities (redirects, etc) Minor compile fixes
Modified Paths: -------------- trunk/pywikipedia/casechecker.py trunk/pywikipedia/pageimport.py trunk/pywikipedia/standardize_notes.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/casechecker.py =================================================================== --- trunk/pywikipedia/casechecker.py 2012-12-12 15:58:41 UTC (rev 10791) +++ trunk/pywikipedia/casechecker.py 2012-12-13 01:08:33 UTC (rev 10792) @@ -3,25 +3,24 @@ """ Script to enumerate all pages on the wiki and find all titles with mixed latin and cyrilic alphabets. """ +# +# (C) Pywikipedia bot team, 2006-2012 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$'
# # Permutations code was taken from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/190465 # - def xuniqueCombinations(items, n): if n==0: yield [] else: for i in xrange(len(items)): for cc in xuniqueCombinations(items[i+1:], n-1): yield [items[i]] + cc - # End of permutation code # -# (C) Pywikipedia bot team, 2006-2012 -# -# Distributed under the terms of the MIT license. -# -__version__ = '$Id$'
# # Windows Concole colors @@ -75,6 +74,14 @@ 'en': u'mixed case rename', 'ru': u'[[ВП:КЛ]]', } + msgDeleteRedirect = { + 'en': u'This redirect contains identical looking Cyrillic and Latin letters in its title', + 'ru': u'[[ВП:КЛ]] Перенаправление содержит смесь кириллицы и латиницы в названии', + } + textDeleteRedirect = { + 'en': u'{{db-r3|bot=CaseChecker}}\n\nThis redirect used to point to %s', + 'ru': u'{{Db-redirtypo|[[ВП:КЛ]] Перенаправление на %s содержало смесь кириллицы и латиницы в названии}}', + } msgLinkReplacement = { 'en': u'Case Replacements', 'ar': u'استبدالات الحالة', @@ -105,7 +112,7 @@ u'^[' + romanNumChars + ']+[' + localLowerLtr + ']+$')
whitelists = { - 'ru': u'User:Rubinbot/Whitelist', + 'ru': u'ВП:КЛ/Проверенные', }
latLtr = u'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' @@ -114,19 +121,25 @@ latClrFnt = u'<font color=brown>' suffixClr = u'</font>'
- wordBreaker = re.compile(u'[ _-/|#[]()]') + wordBreaker = re.compile(u'[ _-/|#[]():]') + stripChars = u' \t,'
titles = True links = False - aplimit = 500 + aplimit = None apfrom = u'' title = None replace = False - stopAfter = 0 + stopAfter = -1 wikilog = None wikilogfile = 'wikilog.txt' + failedTitles = 'failedTitles.txt' + nosuggestions = 'nosuggestions.txt' + doFailed = False + titleList = None autonomous = False namespaces = [] + filterredir = 'nonredirects'
def __init__(self):
@@ -145,6 +158,10 @@ self.titles = False elif arg == '-replace': self.replace = True + elif arg == '-redir': + self.filterredir = 'all' + elif arg == '-redironly': + self.filterredir = 'redirects' elif arg.startswith('-limit:'): self.stopAfter = int(arg[7:]) elif arg == '-autonomous' or arg == '-a': @@ -153,26 +170,42 @@ self.namespaces.append( int(arg[4:]) ) elif arg.startswith('-wikilog:'): self.wikilogfile = arg[9:] + elif arg.startswith('-failedlog:'): + self.failedTitles = arg[11:] + elif arg == '-failed': + self.doFailed = True else: pywikibot.output(u'Unknown argument %s.' % arg) pywikibot.showHelp() + pywikibot.stopme() sys.exit()
- if self.namespaces == []: + if self.namespaces == [] and not self.doFailed: if self.apfrom == u'': # 0 should be after templates ns self.namespaces = [14, 10, 12, 0] else: self.namespaces = [0]
- self.params = { 'action' : 'query', - 'generator' : 'allpages', - 'gaplimit' : self.aplimit, - 'gapfilterredir': 'nonredirects'} + if self.aplimit is None: + self.aplimit = 200 if self.links else 'max'
+ if not self.doFailed: + self.queryParams = { 'action' : 'query', + 'generator' : 'allpages', + 'gaplimit' : self.aplimit, + 'gapfilterredir': self.filterredir} + else: + self.queryParams = { 'action' : 'query' } + if self.apfrom != u'': pywikibot.output(u'Argument "-from" is ignored with "-failed"') + + propParam = 'info' if self.links: - self.params['prop'] = 'links|categories' + propParam += '|links|categories' + self.queryParams['pllimit'] = 'max' + self.queryParams['cllimit'] = 'max'
+ self.queryParams['prop'] = propParam
self.site = pywikibot.getSite()
@@ -183,27 +216,32 @@
if not os.path.isabs(self.wikilogfile): self.wikilogfile = pywikibot.config.datafilepath(self.wikilogfile) - try: - self.wikilog = codecs.open(self.wikilogfile, 'a', 'utf-8') - except IOError: - self.wikilog = codecs.open(self.wikilogfile, 'w', 'utf-8') + self.wikilog = self.OpenLogFile(self.wikilogfile)
+ if not os.path.isabs(self.failedTitles): + self.failedTitles = pywikibot.config.datafilepath(self.failedTitles) + + if self.doFailed: + with codecs.open(self.failedTitles, 'r', 'utf-8') as f: + self.titleList = [self.Page(t) for t in f] + self.failedTitles += '.failed' + self.lclToLatDict = dict([(ord(self.localSuspects[i]), self.latinSuspects[i]) - for i in range(len(self.localSuspects))]) + for i in xrange(len(self.localSuspects))]) self.latToLclDict = dict([(ord(self.latinSuspects[i]), self.localSuspects[i]) - for i in range(len(self.localSuspects))]) + for i in xrange(len(self.localSuspects))])
if self.localKeyboard is not None: self.lclToLatKeybDict = dict( [(ord(self.localKeyboard[i]), self.latinKeyboard[i]) - for i in range(len(self.localKeyboard))]) + for i in xrange(len(self.localKeyboard))]) self.latToLclKeybDict = dict( [(ord(self.latinKeyboard[i]), self.localKeyboard[i]) - for i in range(len(self.localKeyboard))]) + for i in xrange(len(self.localKeyboard))]) else: self.lclToLatKeybDict = {} self.latToLclKeybDict = {} @@ -216,6 +254,10 @@ self.localLtr))
# Get whitelist + self.knownWords = set() + self.seenUnresolvedLinks = set() + + # TODO: handle "continue" if self.site.lang in self.whitelists: wlpage = self.whitelists[self.site.lang] pywikibot.output(u'Loading whitelist from %s' % wlpage) @@ -225,227 +267,255 @@ 'titles' : wlpage, 'redirects' : '', 'indexpageids' : '', + 'pllimit' : 'max', }
- data = query.GetData(wlparams, pywikibot.getSite(self.site.lang), - encodeTitle=False) + data = query.GetData(wlparams) if len(data['query']['pageids']) == 1: pageid = data['query']['pageids'][0] links = data['query']['pages'][pageid]['links'] - self.knownWords = set( [n['title'] for n in links] ) + + allWords = [nn for n in links for nn in self.FindBadWords(n['title'])] + + self.knownWords = set(allWords) +# kw = set() +# for w in allWords: +# if len(self.ProcessTitle(w)[1]) > 0: +# kw.add(w) +# self.knownWords = kw + else: - raise "The number of pageids is not 1" - pywikibot.output(u'Loaded whitelist with %i items' - % len(self.knownWords)) + raise ValueError(u'The number of pageids is not 1') + + pywikibot.output(u'Loaded whitelist with %i items' % len(self.knownWords)) if pywikibot.verbose and len(self.knownWords) > 0: - pywikibot.output( - u'Whitelist: [[%s]]' % u']], [['.join(self.knownWords)) + pywikibot.output(u'Whitelist: %s' % u', '.join([self.MakeLink(i, False) for i in self.knownWords])) else: pywikibot.output(u'Whitelist is not known for language %s' % self.site.lang) - self.knownWords = set()
+ def RunQuery(self, params): + while True: + # Get data + data = query.GetData(params) + + # Process received data + yield data + + # Clear any continuations first + if 'clcontinue' in params: del params['clcontinue'] + if 'plcontinue' in params: del params['plcontinue'] + + if 'query-continue' not in data: + if 'gapcontinue' in params: del params['gapcontinue'] + break + + qc = data['query-continue'] + # First continue properties only, once done, continue with allpages + if 'categories' in qc or 'links' in qc: + if 'categories' in qc: params.update(qc['categories']) + if 'links' in qc: params.update(qc['links']) + elif 'allpages' in qc: + params.update(qc['allpages']) + else: + raise ValueError(u'Unexpected query-continue values: %s' % qc) + continue + def Run(self): try: - count = 0 - lastLetter = '' - for namespace in self.namespaces: - self.params['gapnamespace'] = namespace - title = None + self.lastLetter = ''
- while True: - # Get data - self.params['gapfrom'] = self.apfrom - data = query.GetData(self.params, self.site) - if 'query-continue' in data: - self.params.update(data['query-continue']['allpages']) - else: - self.apfrom = None + if not self.doFailed: + for namespace in self.namespaces: + self.currentTitle = None + self.queryParams['gapnamespace'] = namespace + self.queryParams['gapfrom'] = self.apfrom + for data in self.RunQuery(self.queryParams): + self.ProcessDataBlock(data) + else: + self.currentTitle = None + batchSize = 10 + for batchStart in xrange(0, len(self.titleList), batchSize): + self.queryParams['titles'] = self.titleList[batchStart:batchStart+batchSize] + for data in self.RunQuery(self.queryParams): + self.ProcessDataBlock(data)
- # Process received data - if 'query' in data and 'pages' in data['query']: - firstItem = True - for pageID, page in data['query']['pages'].iteritems(): - printed = False - title = page['title'] - if firstItem: - if lastLetter != title[0]: - try: - print 'Processing %s' % title - except: - print 'Processing unprintable title' - lastLetter = title[0] - firstItem = False - if self.titles: - err = self.ProcessTitle(title) - if err: - changed = False - if self.replace: - newTitle = self.PickTarget(False, - title, - title, - err[1]) - if newTitle: - editSummary = pywikibot.translate( - self.site, self.msgRename) - src = pywikibot.Page(self.site, - title) - if page['ns'] == 14: - import category - dst = pywikibot.Page(self.site, - newTitle) - bot = category.CategoryMoveRobot( - src.title(withNamespace=False), - dst.title(withNamespace=False), - self.autonomous, - editSummary, True) - bot.run() - else: - src.move(newTitle, editSummary) - changed = True + print "*" * 29, "Done"
- if not changed: - self.WikiLog(u"* " + err[0]) - printed = True + except: + pywikibot.output(u'Exception at Title = %s, Next = %s' % (self.currentTitle, self.apfrom)) + try: + import traceback + pywikibot.output(traceback.format_exc()) + except: + pywikibot.output(u'Unable to print exception info') + raise
- if self.links: - allLinks = None - if 'links' in page: - allLinks = page['links'] - if 'categories' in page: - if allLinks: - allLinks = allLinks + page['categories'] - else: - allLinks = page['categories'] + def ProcessDataBlock(self, data): + if 'query' not in data or 'pages' not in data['query']: + return
- if allLinks: - pageObj = None - pageTxt = None - msg = [] + firstItem = True + for pageID, page in data['query']['pages'].iteritems(): + + printed = False + title = page['title'] + self.currentTitle = title
- for l in allLinks: - ltxt = l['title'] - err = self.ProcessTitle(ltxt) - if err: - newTitle = None - if self.replace: - newTitle = self.PickTarget( - True, title, ltxt, err[1]) - if newTitle: - if pageObj is None: - pageObj = pywikibot.Page( - self.site, title) - pageTxt = pageObj.get() - msg.append( - u'[[%s]] => [[%s]]' - % (ltxt, newTitle)) -# pageTxt = pageTxt.replace(ltxt, newTitle) -# pageTxt = pageTxt.replace(ltxt[0].lower() + ltxt[1:], newTitle[0].lower() + newTitle[1:]) -# pageTxt = pageTxt.replace(ltxt.replace(u' ', '_'), newTitle) + if 'missing' in page: + continue
- frmParts = self.wordBreaker.split( - ltxt) - toParts = self.wordBreaker.split( - newTitle) - if len(frmParts) != len( - toParts): - raise ValueError( -u'Splitting parts do not match counts') - for i in range(0, - len(frmParts)): - if len(frmParts[i]) != len( - toParts[i]): - raise ValueError( -u'Splitting parts do not match word length') - if len(frmParts[i]) > 0: - pageTxt = pageTxt.replace( - frmParts[i], toParts[i]) - pageTxt = pageTxt.replace( - frmParts[i][0].lower() + frmParts[i][1:], - toParts[i][0].lower() + toParts[i][1:]) + if firstItem: + if self.lastLetter != title[0]: + pywikibot.ui.output('Processing %s\n' % title) + self.lastLetter = title[0] + firstItem = False
- if not newTitle: - if not printed: - self.WikiLog( -u"* [[:%s]]: link to %s" % (title, err[0])) - printed = True - else: - self.WikiLog( - u"** link to %s" - % err[0]) + if self.titles: + err = self.ProcessTitle(title) + if err: + changed = False + if self.replace: + if len(err[1]) == 1: + newTitle = err[1][0] + # choice = pywikibot.inputChoice(u'Move %s to %s?' % (title, newTitle), ['Yes', 'No'], ['y', 'n']) + editSummary = pywikibot.translate(self.site, self.msgRename) + dst = self.Page(newTitle)
+ if 'redirect' in page: + src = self.Page(title) + redir = src.getRedirectTarget() + redirTitle = redir.title(asLink = True, textlink = True)
- if pageObj is not None: - coloredMsg = u', '.join( - [self.ColorCodeWord(m) - for m in msg]) - if pageObj.get() == pageTxt: - self.WikiLog( -u"* Error: Text replacement failed in [[:%s]] (%s)" % (title, coloredMsg)) - else: - pywikibot.output( - u'Case Replacements: %s' - % u', '.join(msg)) - try: - pageObj.put( - pageTxt, - u'%s: %s' - % (pywikibot.translate( - self.site, - self.msgLinkReplacement), - u', '.join(msg))) - except KeyboardInterrupt: - raise - except: - self.WikiLog( -u"* Error: Could not save updated page [[:%s]] (%s)" % (title, coloredMsg)) + if not dst.exists(): + src.move(newTitle, editSummary, movesubpages=True) + changed = True
+ replErrors = False + for p in src.getReferences(follow_redirects = False): + if p.namespace() == 2: + continue + oldText = p.get(get_redirect = True) + newText = self.ReplaceLink(oldText, title, newTitle) + if not self.PutNewPage(p, newText, [self.MakeMoveSummary(title, newTitle)]): + replErrors = True + if not replErrors: + editSummary = pywikibot.translate(self.site, self.msgDeleteRedirect) + newText = pywikibot.translate(self.site, self.textDeleteRedirect) % redirTitle + src.put(newText, editSummary, minorEdit=False) + changed = True
- count += 1 - if self.stopAfter > 0 and count == self.stopAfter: - raise "Stopping because we are done" + elif not dst.exists(): + src = self.Page(title) + if page['ns'] == 14: + import category + dst = self.Page(newTitle) + bot = category.CategoryMoveRobot( + src.title(withNamespace=False), + dst.title(withNamespace=False), + self.autonomous, + editSummary + u' ' + self.MakeMoveSummary(title, newTitle), + True) + bot.run() + else: + src.move(newTitle, editSummary, movesubpages=True) + changed = True
- if self.apfrom is None: - break + if not changed: + if len(err[1]) > 0: + self.AppendLineToLog(self.failedTitles, title) + else: + self.AddNoSuggestionTitle(title)
- self.apfrom = u'' # Restart apfrom for other namespaces + self.WikiLog(u"* " + err[0]) + printed = True
- print "*" * 29, "Done" + if self.links: + allLinks = None + if 'links' in page: + allLinks = page['links'] + if 'categories' in page: + if allLinks: + allLinks = allLinks + page['categories'] + else: + allLinks = page['categories']
- except: - if self.apfrom is not None: - pywikibot.output(u'Exception at Title = %s, Next = %s' % (title, self.apfrom)) - raise + if allLinks: + pageObj = None + pageTxt = None + msg = [] + foundSuggestions = False
+ for l in allLinks: + ltxt = l['title'] + err = self.ProcessTitle(ltxt) + if err: + if len(err[1]) > 0: + foundSuggestions = True + elif self.AddNoSuggestionTitle(ltxt): + continue + + newTitle = None + if self.replace: + newTitle = self.PickTarget(title, ltxt, err[1]) + if newTitle: + if pageObj is None: + pageObj = self.Page(title) + pageTxt = pageObj.get() + + msg.append(self.MakeMoveSummary(ltxt, newTitle)) + + pageTxt = self.ReplaceLink(pageTxt, ltxt, newTitle) + + if not newTitle: + if not printed: + self.WikiLog(u"* %s: link to %s" % (self.MakeLink(title, False), err[0])) + printed = True + else: + self.WikiLog(u"** link to %s" % err[0]) + + + if pageObj is not None: + if self.PutNewPage(pageObj, pageTxt, msg): + # done, no need to log anything + foundSuggestions = False + + if foundSuggestions: + self.AppendLineToLog(self.failedTitles, title) + + + if self.stopAfter > 0: + self.stopAfter -= 1 + if self.stopAfter == 0: + raise ValueError(u'Stopping because we are done') + def WikiLog(self, text): pywikibot.output(text) self.wikilog.write(text + u'\n') self.wikilog.flush()
- def ProcessTitle(self, title): - - found = False + def FindBadWords(self, title): for m in self.badWordPtrn.finditer(title): + yield title[m.span()[0] : m.span()[1]]
- badWord = title[m.span()[0] : m.span()[1]] - if badWord in self.knownWords: - continue + def ProcessTitle(self, title):
- # Allow any roman numerals with local suffixes - if self.romanNumSfxPtrn.match(badWord) is not None: - continue + badWords = list(self.FindBadWords(title)) + + if len(badWords) > 0: + # Allow known words, allow any roman numerals with local suffixes + badWords = set([i for i in badWords if i not in self.knownWords and self.romanNumSfxPtrn.match(i) is not None]) + + if len(badWords) == 0 or self.Page(title).isImage(): + return None + + count = 0 + ambigBadWords = set() + ambigBadWordsCount = 0 + mapLcl = {} + mapLat = {}
- if not found: - # lazy-initialization of the local variables - possibleWords = [] - tempWords = [] - count = 0 - duplWordCount = 0 - ambigBadWords = set() - ambigBadWordsCount = 0 - mapLcl = {} - mapLat = {} - found = True + for badWord in badWords:
# See if it would make sense to treat the whole word as either # cyrilic or latin @@ -457,7 +527,7 @@ else: if mightBeLcl and l not in self.latinSuspects: mightBeLcl = False - if l not in self.latLtr: raise "Assert failed" + if l not in self.latLtr: raise ValueError(u'Assert failed')
# Some words are well known and frequently mixed-typed if mightBeLcl and mightBeLat: @@ -474,11 +544,28 @@ ambigBadWords.add(badWord) # Cannot do len(ambigBadWords) because they might be duplicates ambigBadWordsCount += 1 + if not mightBeLcl and not mightBeLat: + # try to match one of the knownWords + bwLen = len(badWord) + kw = [w for w in self.knownWords if len(w) == bwLen] + for p in xrange(bwLen): + if len(kw) == 0: + break + c = badWord[p] + co = ord(c) + if co in self.latToLclDict: + c2 = self.latToLclDict[co] + elif co in self.lclToLatDict: + c2 = self.lclToLatDict[co] + else: + c2 = None + kw = [w for w in kw if p < len(w) and (w[p] == c or (c2 is not None and w[p] == c2))] + if len(kw) > 1: + pywikibot.output(u"Word '%s' could be treated as more than one known words" % badWord) + elif len(kw) == 1: + mapLcl[badWord] = kw[0] count += 1
- if not found: - return None - infoText = self.MakeLink(title) possibleAlternatives = []
@@ -506,7 +593,7 @@ # combinations from the bad words list, and convert just the # picked words to cyrilic, whereas making all other words as # latin character. - for itemCntToPick in range(0, len(ambigBadWords)+1): + for itemCntToPick in xrange(0, len(ambigBadWords)+1): title2 = title for uc in xuniqueCombinations(list(ambigBadWords), itemCntToPick): @@ -526,70 +613,63 @@
return (infoText, possibleAlternatives)
- def PickTarget(self, isLink, title, original, candidates): + def PickTarget(self, title, original, candidates): + if len(candidates) == 0: return None
- if isLink: - if len(candidates) == 1: - return candidates[0] + if len(candidates) == 1: + return candidates[0]
- pagesDontExist = [] - pagesRedir = {} - pagesExist = [] + pagesDontExist = [] + pagesRedir = {} + pagesExist = []
- for newTitle in candidates: - dst = pywikibot.Page(self.site, newTitle) - if not dst.exists(): - pagesDontExist.append(newTitle) - elif dst.isRedirectPage(): - pagesRedir[newTitle] = dst.getRedirectTarget().title() - else: - pagesExist.append(newTitle) + for newTitle in candidates: + dst = self.Page(newTitle) + if not dst.exists(): + pagesDontExist.append(newTitle) + elif dst.isRedirectPage(): + pagesRedir[newTitle] = dst.getRedirectTarget().title() + else: + pagesExist.append(newTitle)
- if len(pagesExist) == 1: - return pagesExist[0] - elif len(pagesExist) == 0 and len(pagesRedir) > 0: - if len(pagesRedir) == 1: - return pagesRedir.keys()[0] - t = None - for k,v in pagesRedir.iteritems(): - if not t: - t = v # first item - elif t != v: - break - else: - # all redirects point to the same target - # pick the first one, doesn't matter what it is - return pagesRedir.keys()[0] + if len(pagesExist) == 1: + return pagesExist[0] + elif len(pagesExist) == 0 and len(pagesRedir) > 0: + if len(pagesRedir) == 1: + return pagesRedir.keys()[0] + t = None + for k,v in pagesRedir.iteritems(): + if not t: + t = v # first item + elif t != v: + break + else: + # all redirects point to the same target + # pick the first one, doesn't matter what it is + return pagesRedir.keys()[0]
- if not self.autonomous: - pywikibot.output(u'Could not auto-decide for page [[%s]]. Which link should be chosen?' % title) - pywikibot.output(u'Original title: ', newline=False) - self.ColorCodeWord(original + "\n", True) - count = 1 - for t in candidates: - if t in pagesDontExist: msg = u'missing' - elif t in pagesRedir: msg = u'Redirect to ' + pagesRedir[t] - else: msg = u'page exists' - self.ColorCodeWord(u' %d: %s (%s)\n' - % (count, t, msg), True) - count += 1 + if not self.autonomous: + pywikibot.output(u'Could not auto-decide for page %s. Which link should be chosen?' % self.MakeLink(title, False)) + pywikibot.output(u'Original title: ', newline=False) + self.ColorCodeWord(original + "\n", True) + count = 1 + for t in candidates: + if t in pagesDontExist: msg = u'missing' + elif t in pagesRedir: msg = u'Redirect to ' + pagesRedir[t] + else: msg = u'page exists' + self.ColorCodeWord(u' %d: %s (%s)\n' + % (count, t, msg), True) + count += 1
- answers = [str(i) for i in range(0, count)] - choice = int(pywikibot.inputChoice( - u'Which link to choose? (0 to skip)', - answers, [a[0] for a in answers])) - if choice > 0: - return candidates[choice-1] + answers = [str(i) for i in xrange(0, count)] + choice = int(pywikibot.inputChoice( + u'Which link to choose? (0 to skip)', + answers, [a[0] for a in answers])) + if choice > 0: + return candidates[choice-1]
- else: - if len(candidates) == 1: - newTitle = candidates[0] - dst = pywikibot.Page(self.site, newTitle) - if not dst.exists(): - # choice = pywikibot.inputChoice(u'Move %s to %s?' % (title, newTitle), ['Yes', 'No'], ['y', 'n']) - return newTitle return None
def ColorCodeWord(self, word, toScreen = False): @@ -620,10 +700,94 @@ if toScreen: SetColor(FOREGROUND_WHITE) else: return res + self.suffixClr + u"</b>"
- def MakeLink(self, title): - return u"[[:%s|««« %s »»»]]" % (title, self.ColorCodeWord(title)) + def AddNoSuggestionTitle(self, title): + if title in self.seenUnresolvedLinks: + return True + self.seenUnresolvedLinks.add(title)
+ params = { + 'action' : 'query', + 'list' : 'backlinks', + 'bltitle' : title, + 'bllimit' : '50', + }
+ data = query.GetData(params) + cl = 0 + redirs = 0 + if 'backlinks' in data['query']: + bl = data['query']['backlinks'] + cl = len(bl) + redirs = len([i for i in bl if 'redirect' in i]) + + if cl > 0 and 'query-continue' in data: + count = '50+' + else: + count = str(cl if cl > 0 else 'no backlinks') + + self.AppendLineToLog(self.nosuggestions, u'* %s (%s%s)' % + (self.MakeLink(title), count, u', %d redirects' % redirs if redirs > 0 else u'')) + return False + + def PutNewPage(self, pageObj, pageTxt, msg): + title = pageObj.title(asLink = True, textlink = True) + coloredMsg = u', '.join([self.ColorCodeWord(m) for m in msg]) + if pageObj.get(get_redirect = True) == pageTxt: + self.WikiLog(u"* Error: Text replacement failed in %s (%s)" % (self.MakeLink(title, False), coloredMsg)) + else: + pywikibot.output(u'Case Replacements: %s' % u', '.join(msg)) + try: + pageObj.put( + pageTxt, + u'%s: %s' + % (pywikibot.translate( + self.site, + self.msgLinkReplacement), + u', '.join(msg))) + return True + except KeyboardInterrupt: + raise + except: + self.WikiLog(u"* Error: Could not save updated page %s (%s)" % (self.MakeLink(title, False), coloredMsg)) + return False + + def MakeMoveSummary(self, fromTitle, toTitle): + return u'[[%s]]→[[%s]]' % (fromTitle, toTitle) + + def MakeLink(self, title, colorcode=True): + prf = u'' if self.Page(title).namespace() == 0 else u':' + cc = u'|««« %s »»»' % self.ColorCodeWord(title) if colorcode else u'' + return u"[[%s%s%s]]" % (prf, title, cc) + + def OpenLogFile(self, filename): + try: + return codecs.open(filename, 'a', 'utf-8') + except IOError: + return codecs.open(filename, 'w', 'utf-8') + + def AppendLineToLog(self, filename, text): + with self.OpenLogFile(filename) as f: + f.write(text + u'\n') + + def Page(self, title): + return pywikibot.Page(self.site, title) + + def ReplaceLink(self, text, oldtxt, newtxt): + + frmParts = [str.strip(self.stripChars) for str in self.wordBreaker.split(oldtxt)] + toParts = [str.strip(self.stripChars) for str in self.wordBreaker.split(newtxt)] + + if len(frmParts) != len(toParts): + raise ValueError(u'Splitting parts do not match counts') + for i in xrange(0, len(frmParts)): + if len(frmParts[i]) != len(toParts[i]): + raise ValueError(u'Splitting parts do not match word length') + if len(frmParts[i]) > 0: + text = text.replace(frmParts[i][0].lower() + frmParts[i][1:], toParts[i][0].lower() + toParts[i][1:]) + text = text.replace(frmParts[i][0].upper() + frmParts[i][1:], toParts[i][0].upper() + toParts[i][1:]) + + return text + if __name__ == "__main__": try: bot = CaseChecker()
Modified: trunk/pywikipedia/pageimport.py =================================================================== --- trunk/pywikipedia/pageimport.py 2012-12-12 15:58:41 UTC (rev 10791) +++ trunk/pywikipedia/pageimport.py 2012-12-13 01:08:33 UTC (rev 10792) @@ -32,7 +32,7 @@ #
import urllib -import wikipedia as pywikibot, +import wikipedia as pywikibot import login, config
class Importer(pywikibot.Page):
Modified: trunk/pywikipedia/standardize_notes.py =================================================================== --- trunk/pywikipedia/standardize_notes.py 2012-12-12 15:58:41 UTC (rev 10791) +++ trunk/pywikipedia/standardize_notes.py 2012-12-13 01:08:33 UTC (rev 10792) @@ -1126,8 +1126,7 @@ return if (len(commandline_replacements) == 2): replacements[commandline_replacements[0]] = commandline_replacements[1] - editSummary = pywikibot.translate(pywikibot.getSite(), msg) - % ' (-' + commandline_replacements[0] + ' +' + commandline_replacements[1] + ')' + editSummary = pywikibot.translate(pywikibot.getSite(), msg) % ' (-' + commandline_replacements[0] + ' +' + commandline_replacements[1] + ')' else: change = '' default_summary_message = pywikibot.translate(pywikibot.getSite(), msg) % change
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2012-12-12 15:58:41 UTC (rev 10791) +++ trunk/pywikipedia/wikipedia.py 2012-12-13 01:08:33 UTC (rev 10792) @@ -1635,7 +1635,7 @@ """Yield all pages that link to the page by API
If you need a full list of referring pages, use this: - pages = [page for page in s.getReferences()] + pages = list(s.getReferences()) Parameters: * follow_redirects - if True, also returns pages that link to a redirect pointing to the page.