Pywikipedia-svn March 2013

pywikipedia-svn@lists.wikimedia.org

8 participants
171 discussions

SVN: [11161] trunk/pywikipedia/interwiki.py
by xqt＠svn.wikimedia.org 02 Mar '13

02 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11161 Revision: 11161 Author: xqt Date: 2013-03-02 14:46:09 +0000 (Sat, 02 Mar 2013) Log Message: ----------- some PEP8 changes Modified Paths: -------------- trunk/pywikipedia/interwiki.py Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2013-03-02 13:59:34 UTC (rev 11160) +++ trunk/pywikipedia/interwiki.py 2013-03-02 14:46:09 UTC (rev 11161) @@ -343,7 +343,10 @@ __version__ = '$Id$' # -import sys, copy, re, os +import sys +import copy +import re +import os import time import codecs import socket @@ -360,102 +363,96 @@ '&pagegenerators_help;': pagegenerators.parameterHelp } + class SaveError(pywikibot.Error): """ An attempt to save a page with changed interwiki has failed. """ + class LinkMustBeRemoved(SaveError): """ An interwiki link has to be removed, but this can't be done because of user preferences or because the user chose not to change the page. """ + class GiveUpOnPage(pywikibot.Error): """ The user chose not to work on this page and its linked pages any more. """ + # Subpage templates. Must be in lower case, # whereas subpage itself must be case sensitive moved_links = { - 'ar' : ([u'documentation', - u'template documentation', - u'شرح', - u'توثيق'], u'/doc'), - 'bn' : (u'documentation', u'/doc'), - 'ca' : (u'ús de la plantilla', u'/ús'), - 'cs' : (u'dokumentace', u'/doc'), - 'da' : (u'dokumentation', u'/doc'), - 'de' : (u'dokumentation', u'/Meta'), + 'ar': ([u'documentation', u'template documentation', u'شرح', u'توثيق'], + u'/doc'), + 'bn': (u'documentation', u'/doc'), + 'ca': (u'ús de la plantilla', u'/ús'), + 'cs': (u'dokumentace', u'/doc'), + 'da': (u'dokumentation', u'/doc'), + 'de': (u'dokumentation', u'/Meta'), 'dsb': ([u'dokumentacija', u'doc'], u'/Dokumentacija'), - 'en' : ([u'documentation', - u'template documentation', - u'template doc', - u'doc', - u'documentation, template'], u'/doc'), - 'es' : ([u'documentación', u'documentación de plantilla'], u'/doc'), - 'eu' : (u'txantiloi dokumentazioa', u'/dok'), - 'fa' : ([u'documentation', - u'template documentation', - u'template doc', - u'doc', - u'توضیحات', - u'زیرصفحه توضیحات'], u'/doc'), + 'en': ([u'documentation', u'template documentation', u'template doc', + u'doc', u'documentation, template'], u'/doc'), + 'es': ([u'documentación', u'documentación de plantilla'], u'/doc'), + 'eu': (u'txantiloi dokumentazioa', u'/dok'), + 'fa': ([u'documentation', u'template documentation', u'template doc', + u'doc', u'توضیحات', u'زیرصفحه توضیحات'], u'/doc'), # fi: no idea how to handle this type of subpage at :Metasivu: - 'fi' : (u'mallineohje', None), - 'fr' : ([u'/documentation', u'documentation', u'doc_modèle', - u'documentation modèle', u'documentation modèle compliqué', - u'documentation modèle en sous-page', - u'documentation modèle compliqué en sous-page', - u'documentation modèle utilisant les parserfunctions en sous-page', + 'fi': (u'mallineohje', None), + 'fr': ([u'/documentation', u'documentation', u'doc_modèle', + u'documentation modèle', u'documentation modèle compliqué', + u'documentation modèle en sous-page', + u'documentation modèle compliqué en sous-page', + u'documentation modèle utilisant les parserfunctions en sous-page', ], - u'/Documentation'), + u'/Documentation'), 'hsb': ([u'dokumentacija', u'doc'], u'/Dokumentacija'), - 'hu' : (u'sablondokumentáció', u'/doc'), - 'id' : (u'template doc', u'/doc'), + 'hu': (u'sablondokumentáció', u'/doc'), + 'id': (u'template doc', u'/doc'), 'ilo': (u'documentation', u'/doc'), - 'ja' : (u'documentation', u'/doc'), - 'ka' : (u'თარგის ინფო', u'/ინფო'), - 'ko' : (u'documentation', u'/설명문서'), - 'ms' : (u'documentation', u'/doc'), - 'no' : (u'dokumentasjon', u'/dok'), - 'nn' : (u'dokumentasjon', u'/dok'), - 'pl' : (u'dokumentacja', u'/opis'), - 'pt' : ([u'documentação', u'/doc'], u'/doc'), - 'ro' : (u'documentaţie', u'/doc'), - 'ru' : (u'doc', u'/doc'), + 'ja': (u'documentation', u'/doc'), + 'ka': (u'თარგის ინფო', u'/ინფო'), + 'ko': (u'documentation', u'/설명문서'), + 'ms': (u'documentation', u'/doc'), + 'no': (u'dokumentasjon', u'/dok'), + 'nn': (u'dokumentasjon', u'/dok'), + 'pl': (u'dokumentacja', u'/opis'), + 'pt': ([u'documentação', u'/doc'], u'/doc'), + 'ro': (u'documentaţie', u'/doc'), + 'ru': (u'doc', u'/doc'), 'simple': ([u'documentation', u'template documentation', u'template doc', u'doc', u'documentation, template'], u'/doc'), - 'sk' : (u'dokumentácia', u'/Dokumentácia'), - 'sv' : (u'dokumentation', u'/dok'), - 'uk' : ([u'документація', - u'doc', - u'documentation'], u'/Документація'), - 'vi' : (u'documentation', u'/doc'), - 'zh' : ([u'documentation', u'doc'], u'/doc'), + 'sk': (u'dokumentácia', u'/Dokumentácia'), + 'sv': (u'dokumentation', u'/dok'), + 'uk': ([u'документація', u'doc', u'documentation'], u'/Документація'), + 'vi': (u'documentation', u'/doc'), + 'zh': ([u'documentation', u'doc'], u'/doc'), } # A list of template names in different languages. # Pages which contain these shouldn't be changed. ignoreTemplates = { '_default': [u'delete'], - 'ar' : [u'قيد الاستخدام'], - 'cs' : [u'Pracuje_se'], - 'de' : [u'inuse', 'in use', u'in bearbeitung', u'inbearbeitung', - u'löschen', u'sla', - u'löschantrag', u'löschantragstext', - u'falschschreibung', - u'obsolete schreibung', 'veraltete schreibweise'], - 'en' : [u'inuse', u'softredirect'], - 'fa' : [u'در دست ویرایش ۲', u'حذف سریع'], + 'ar': [u'قيد الاستخدام'], + 'cs': [u'Pracuje_se'], + 'de': [u'inuse', 'in use', u'in bearbeitung', u'inbearbeitung', + u'löschen', u'sla', + u'löschantrag', u'löschantragstext', + u'falschschreibung', + u'obsolete schreibung', 'veraltete schreibweise'], + 'en': [u'inuse', u'softredirect'], + 'fa': [u'در دست ویرایش ۲', u'حذف سریع'], 'pdc': [u'lösche'], - 'zh' : [u'inuse'], + 'zh': [u'inuse'], } + class Global(object): """ Container class for global settings. @@ -497,9 +494,9 @@ contentsondisk = config.interwiki_contents_on_disk lacklanguage = None minlinks = 0 - quiet = False + quiet = False restoreAll = False - async = False + async = False summary = u'' repository = False @@ -512,9 +509,12 @@ elif arg.startswith('-hintfile'): hintfilename = arg[10:] if (hintfilename is None) or (hintfilename == ''): - hintfilename = pywikibot.input(u'Please enter the hint filename:') + hintfilename = pywikibot.input( + u'Please enter the hint filename:') f = codecs.open(hintfilename, 'r', config.textfile_encoding) - R = re.compile(ur'\[\[(.+?)(?:\]\]|\|)') # hint or title ends either before | or before ]] + + # hint or title ends either before | or before ]] + R = re.compile(ur'\[\[(.+?)(?:\]\]|\|)') for pageTitle in R.findall(f.read()): self.hints.append(pageTitle) f.close() @@ -574,7 +574,7 @@ elif arg.startswith('-neverlink:'): self.neverlink += arg[11:].split(",") elif arg.startswith('-ignore:'): - self.ignore += [pywikibot.Page(None,p) for p in arg[8:].split(",")] + self.ignore += [pywikibot.Page(None, p) for p in arg[8:].split(",")] elif arg.startswith('-ignorefile:'): ignorefile = arg[12:] ignorePageGen = pagegenerators.TextfilePageGenerator(ignorefile) @@ -604,7 +604,8 @@ self.async = True elif arg.startswith('-summary'): if len(arg) == 8: - self.summary = pywikibot.input(u'What summary do you want to use?') + self.summary = pywikibot.input( + u'What summary do you want to use?') else: self.summary = arg[9:] elif arg.startswith('-lack:'): @@ -618,6 +619,7 @@ return False return True + class StoredPage(pywikibot.Page): """ Store the Page contents on disk to avoid sucking too much @@ -634,20 +636,20 @@ SPstore = None # attributes created by pywikibot.Page.__init__ - SPcopy = [ '_editrestriction', - '_site', - '_namespace', - '_section', - '_title', - 'editRestriction', - 'moveRestriction', - '_permalink', - '_userName', - '_ipedit', - '_editTime', - '_startTime', - '_revisionId', - '_deletedRevs' ] + SPcopy = ['_editrestriction', + '_site', + '_namespace', + '_section', + '_title', + 'editRestriction', + 'moveRestriction', + '_permalink', + '_userName', + '_ipedit', + '_editTime', + '_startTime', + '_revisionId', + '_deletedRevs'] def SPdeleteStore(): if StoredPage.SPpath: @@ -685,6 +687,7 @@ _contents = property(SPgetContents, SPsetContents, SPdelContents) + class PageTree(object): """ Structure to manipulate a set of pages. @@ -763,6 +766,7 @@ for page in plist: yield page + class Subject(object): """ Class to follow the progress of a single 'subject' (i.e. a page with @@ -813,9 +817,8 @@ done <- NL(pending) U done return done - - Subject objects only operate on pages that should have been preloaded before. - In fact, at any time: + Subject objects only operate on pages that should have been preloaded + before. In fact, at any time: * todo contains new Pages that have not been loaded yet * done contains Pages that have been loaded, and that have been treated. * If batch preloadings are successful, Page._get() is never called from @@ -848,7 +851,7 @@ # As we haven't yet found a page that links to the origin page, we # start with an empty list for it. if originPage: - self.foundIn = {self.originPage:[]} + self.foundIn = {self.originPage: []} else: self.foundIn = {} # This is a list of all pages that are currently scheduled for @@ -876,7 +879,6 @@ for page in tree.filter(site): if page.exists() and page.isDisambig(): return page - return None def getFoundNonDisambig(self, site): """ @@ -887,10 +889,9 @@ """ for tree in [self.done, self.pending]: for page in tree.filter(site): - if page.exists() and not page.isDisambig() \ - and not page.isRedirectPage() and not page.isCategoryRedirect(): + if page.exists() and not page.isDisambig() and \ + not page.isRedirectPage() and not page.isCategoryRedirect(): return page - return None def getFoundInCorrectNamespace(self, site): """ @@ -901,25 +902,36 @@ """ for tree in [self.done, self.pending, self.todo]: for page in tree.filter(site): - # -hintsonly: before we have an origin page, any namespace will do. - if self.originPage and page.namespace() == self.originPage.namespace(): - if page.exists() and not page.isRedirectPage() and not page.isCategoryRedirect(): + # -hintsonly: before we have an origin page, any namespace will + # do. + if self.originPage and \ + page.namespace() == self.originPage.namespace(): + if page.exists() and not \ + page.isRedirectPage() and not page.isCategoryRedirect(): return page - return None - def translate(self, hints = None, keephintedsites = False): + def translate(self, hints=None, keephintedsites=False): """Add the given translation hints to the todo list""" if globalvar.same and self.originPage: if hints: - pages = titletranslate.translate(self.originPage, hints = hints + ['all:'], - auto = globalvar.auto, removebrackets = globalvar.hintnobracket) + pages = titletranslate.translate( + self.originPage, + hints=hints + ['all:'], + auto=globalvar.auto, + removebrackets=globalvar.hintnobracket) else: - pages = titletranslate.translate(self.originPage, hints = ['all:'], - auto = globalvar.auto, removebrackets = globalvar.hintnobracket) + pages = titletranslate.translate( + self.originPage, + hints=['all:'], + auto=globalvar.auto, + removebrackets=globalvar.hintnobracket) else: - pages = titletranslate.translate(self.originPage, hints=hints, - auto=globalvar.auto, removebrackets=globalvar.hintnobracket, - site=pywikibot.getSite()) + pages = titletranslate.translate( + self.originPage, + hints=hints, + auto=globalvar.auto, + removebrackets=globalvar.hintnobracket, + site=pywikibot.getSite()) for page in pages: if globalvar.contentsondisk: page = StoredPage(page) @@ -946,7 +958,8 @@ # Bug-check: Isn't there any work still in progress? We can't work on # different sites at a time! if len(self.pending) > 0: - raise 'BUG: Can\'t start to work on %s; still working on %s' % (site, self.pending) + raise "BUG: Can't start to work on %s; still working on %s" \ + % (site, self.pending) # Prepare a list of suitable pages result = [] for page in self.todo.filter(site): @@ -958,7 +971,7 @@ # If there are any, return them. Otherwise, nothing is in progress. return result - def makeForcedStop(self,counter): + def makeForcedStop(self, counter): """ Ends work on the page before the normal end. """ @@ -1018,26 +1031,33 @@ if linkedPage in self.foundIn: # We have seen this page before, don't ask again. return False - elif self.originPage and self.originPage.namespace() != linkedPage.namespace(): + elif self.originPage and \ + self.originPage.namespace() != linkedPage.namespace(): # Allow for a mapping between different namespaces - crossFrom = self.originPage.site.family.crossnamespace.get(self.originPage.namespace(), {}) - crossTo = crossFrom.get(self.originPage.site.language(), crossFrom.get('_default', {})) - nsmatch = crossTo.get(linkedPage.site.language(), crossTo.get('_default', [])) + crossFrom = self.originPage.site.family.crossnamespace.get( + self.originPage.namespace(), {}) + crossTo = crossFrom.get(self.originPage.site.language(), + crossFrom.get('_default', {})) + nsmatch = crossTo.get(linkedPage.site.language(), + crossTo.get('_default', [])) if linkedPage.namespace() in nsmatch: return False if globalvar.autonomous: - pywikibot.output(u"NOTE: Ignoring link from page %s in namespace %i to page %s in namespace %i." - % (linkingPage, linkingPage.namespace(), - linkedPage, linkedPage.namespace())) + pywikibot.output( +u"NOTE: Ignoring link from page %s in namespace %i to page %s in namespace %i." + % (linkingPage, linkingPage.namespace(), linkedPage, + linkedPage.namespace())) # Fill up foundIn, so that we will not write this notice self.foundIn[linkedPage] = [linkingPage] return True else: preferredPage = self.getFoundInCorrectNamespace(linkedPage.site) if preferredPage: - pywikibot.output(u"NOTE: Ignoring link from page %s in namespace %i to page %s in namespace %i because page %s in the correct namespace has already been found." - % (linkingPage, linkingPage.namespace(), linkedPage, - linkedPage.namespace(), preferredPage)) + pywikibot.output( +u"NOTE: Ignoring link from page %s in namespace %i to page %s in namespace %i " +u"because page %s in the correct namespace has already been found." + % (linkingPage, linkingPage.namespace(), linkedPage, + linkedPage.namespace(), preferredPage)) return True else: choice = pywikibot.inputChoice( @@ -1052,13 +1072,17 @@ if choice == 'g': self.makeForcedStop(counter) elif choice == 'a': - newHint = pywikibot.input(u'Give the alternative for language %s, not using a language code:' - % linkedPage.site.language()) + newHint = pywikibot.input( + u'Give the alternative for language %s, not ' + u'using a language code:' + % linkedPage.site.language()) if newHint: - alternativePage = pywikibot.Page(linkedPage.site, newHint) + alternativePage = pywikibot.Page( + linkedPage.site, newHint) if alternativePage: # add the page that was entered by the user - self.addIfNew(alternativePage, counter, None) + self.addIfNew(alternativePage, counter, + None) else: pywikibot.output( u"NOTE: ignoring %s and its interwiki links" @@ -1070,14 +1094,18 @@ return False def wiktionaryMismatch(self, page): - if self.originPage and globalvar.same=='wiktionary': + if self.originPage and globalvar.same == 'wiktionary': if page.title().lower() != self.originPage.title().lower(): - pywikibot.output(u"NOTE: Ignoring %s for %s in wiktionary mode" % (page, self.originPage)) - return True - elif page.title() != self.originPage.title() and self.originPage.site.nocapitalize and page.site.nocapitalize: - pywikibot.output(u"NOTE: Ignoring %s for %s in wiktionary mode because both languages are uncapitalized." + pywikibot.output(u"NOTE: Ignoring %s for %s in wiktionary mode" % (page, self.originPage)) return True + elif page.title() != self.originPage.title() and \ + self.originPage.site.nocapitalize and page.site.nocapitalize: + pywikibot.output( + u"NOTE: Ignoring %s for %s in wiktionary mode because both " + u"languages are uncapitalized." + % (page, self.originPage)) + return True return False def disambigMismatch(self, page, counter): @@ -1095,15 +1123,17 @@ chosen to use instead of the given page. """ if not self.originPage: - return (False, None) # any page matches until we have an origin page + return (False, None) # any page matches til we have an origin page if globalvar.autonomous: if self.originPage.isDisambig() and not page.isDisambig(): - pywikibot.output(u"NOTE: Ignoring link from disambiguation page %s to non-disambiguation %s" - % (self.originPage, page)) + pywikibot.output( + u"NOTE: Ignoring link from disambiguation page %s to " + u"non-disambiguation %s" % (self.originPage, page)) return (True, None) elif not self.originPage.isDisambig() and page.isDisambig(): - pywikibot.output(u"NOTE: Ignoring link from non-disambiguation page %s to disambiguation %s" - % (self.originPage, page)) + pywikibot.output( + u"NOTE: Ignoring link from non-disambiguation page %s to " + u"disambiguation %s" % (self.originPage, page)) return (True, None) else: choice = 'y' @@ -1111,32 +1141,39 @@ disambig = self.getFoundDisambig(page.site) if disambig: pywikibot.output( - u"NOTE: Ignoring non-disambiguation page %s for %s because disambiguation page %s has already been found." + u"NOTE: Ignoring non-disambiguation page %s for %s " + u"because disambiguation page %s has already been " + u"found." % (page, self.originPage, disambig)) return (True, None) else: choice = pywikibot.inputChoice( - u'WARNING: %s is a disambiguation page, but %s doesn\'t seem to be one. Follow it anyway?' + u"WARNING: %s is a disambiguation page, but %s doesn't " + u"seem to be one. Follow it anyway?" % (self.originPage, page), ['Yes', 'No', 'Add an alternative', 'Give up'], ['y', 'n', 'a', 'g']) elif not self.originPage.isDisambig() and page.isDisambig(): nondisambig = self.getFoundNonDisambig(page.site) if nondisambig: - pywikibot.output(u"NOTE: Ignoring disambiguation page %s for %s because non-disambiguation page %s has already been found." - % (page, self.originPage, nondisambig)) + pywikibot.output( + u"NOTE: Ignoring disambiguation page %s for %s because " + u"non-disambiguation page %s has already been found." + % (page, self.originPage, nondisambig)) return (True, None) else: choice = pywikibot.inputChoice( - u'WARNING: %s doesn\'t seem to be a disambiguation page, but %s is one. Follow it anyway?' + u'WARNING: %s doesn\'t seem to be a disambiguation ' + u'page, but %s is one. Follow it anyway?' % (self.originPage, page), ['Yes', 'No', 'Add an alternative', 'Give up'], ['y', 'n', 'a', 'g']) if choice == 'n': return (True, None) elif choice == 'a': - newHint = pywikibot.input(u'Give the alternative for language %s, not using a language code:' - % page.site.language()) + newHint = pywikibot.input( + u'Give the alternative for language %s, not using a ' + u'language code:' % page.site.language()) alternativePage = pywikibot.Page(page.site, newHint) return (True, alternativePage) elif choice == 'g': @@ -1171,7 +1208,8 @@ return if (self.untranslated or globalvar.askhints) and not self.hintsAsked \ and self.originPage and self.originPage.exists() \ - and not self.originPage.isRedirectPage() and not self.originPage.isCategoryRedirect(): + and not self.originPage.isRedirectPage() and \ + not self.originPage.isCategoryRedirect(): # Only once! self.hintsAsked = True if globalvar.untranslated: @@ -1181,17 +1219,24 @@ pywikibot.output(self.originPage.get()[:t]) # loop while True: - newhint = pywikibot.input(u'Give a hint (? to see pagetext):') + newhint = pywikibot.input( + u'Give a hint (? to see pagetext):') if newhint == '?': t += globalvar.showtextlinkadd pywikibot.output(self.originPage.get()[:t]) elif newhint and not ':' in newhint: - pywikibot.output(u'Please enter a hint in the format language:pagename or type nothing if you do not have a hint.') + pywikibot.output( + u'Please enter a hint in the format ' + u'language:pagename or type nothing if you do not ' + u'have a hint.') elif not newhint: break else: - pages = titletranslate.translate(self.originPage, hints=[newhint], - auto = globalvar.auto, removebrackets=globalvar.hintnobracket) + pages = titletranslate.translate( + self.originPage, + hints=[newhint], + auto=globalvar.auto, + removebrackets=globalvar.hintnobracket) for page in pages: self.addIfNew(page, counter, None) if globalvar.hintsareright: @@ -1204,14 +1249,14 @@ In other words, all the pages in self.pending have already been preloaded. - The only argument is an instance - of a counter class, that has methods minus() and plus() to keep - counts of the total work todo. + The only argument is an instance of a counter class, that has methods + minus() and plus() to keep counts of the total work todo. + """ # Loop over all the pages that should have been taken care of for page in self.pending: - if page.title == None: ### seems a DataPage - page.get() ### get it's title (and content) + if page.title == None: ### seems a DataPage + page.get() ### get it's title (and content) # Mark the page as done self.done.add(page) @@ -1220,9 +1265,11 @@ dictName, year = page.autoFormat() if dictName is not None: if self.originPage: - pywikibot.output(u'WARNING: %s:%s relates to %s:%s, which is an auto entry %s(%s)' - % (self.originPage.site.language(), self.originPage, - page.site.language(), page, dictName, year)) + pywikibot.output( + u'WARNING: %s:%s relates to %s:%s, which is an ' + u'auto entry %s(%s)' + % (self.originPage.site.language(), self.originPage, + page.site.language(), page, dictName, year)) # Abort processing if the bot is running in autonomous mode. if globalvar.autonomous: @@ -1240,12 +1287,13 @@ pywikibot.output(u"NOTE: %s does not exist. Skipping." % page) if page == self.originPage: - # The page we are working on is the page that does not exist. - # No use in doing any work on it in that case. + # The page we are working on is the page that does not + # exist. No use in doing any work on it in that case. for site, count in self.todo.siteCounts(): counter.minus(site, count) self.todo = PageTree() - # In some rare cases it might be we already did check some 'automatic' links + # In some rare cases it might be we already did check some + # 'automatic' links self.done = PageTree() continue @@ -1270,7 +1318,8 @@ pywikibot.output(u"NOTE: %s is %sredirect to %s" % (page, redir, redirectTargetPage)) if self.originPage is None or page == self.originPage: - # the 1st existig page becomes the origin page, if none was supplied + # the 1st existig page becomes the origin page, if none was + # supplied if globalvar.initialredirect: if globalvar.contentsondisk: redirectTargetPage = StoredPage(redirectTargetPage) @@ -1281,8 +1330,8 @@ self.todo.add(redirectTargetPage) counter.plus(redirectTargetPage.site) else: - # This is a redirect page to the origin. We don't need to - # follow the redirection. + # This is a redirect page to the origin. We don't need + # to follow the redirection. # In this case we can also stop all hints! for site, count in self.todo.siteCounts(): counter.minus(site, count) @@ -1326,7 +1375,8 @@ # Page exists, isnt a redirect, and is a plain link (no section) if self.originPage is None: - # the 1st existig page becomes the origin page, if none was supplied + # the 1st existig page becomes the origin page, if none was + # supplied self.originPage = page try: iw = page.interwiki() @@ -1348,7 +1398,8 @@ duplicate = None for p in self.done.filter(page.site): - if p != page and p.exists() and not p.isRedirectPage() and not p.isCategoryRedirect(): + if p != page and p.exists() and \ + not p.isRedirectPage() and not p.isCategoryRedirect(): duplicate = p break @@ -1358,7 +1409,8 @@ # Ignore the interwiki links. iw = () if globalvar.lacklanguage: - if globalvar.lacklanguage in [link.site.language() for link in iw]: + if globalvar.lacklanguage in [link.site.language() + for link in iw]: iw = () self.workonme = False if len(iw) < globalvar.minlinks: @@ -1366,8 +1418,9 @@ self.workonme = False elif globalvar.autonomous and duplicate and not skip: - pywikibot.output(u"Stopping work on %s because duplicate pages"\ - " %s and %s are found" % (self.originPage, duplicate, page)) + pywikibot.output(u"Stopping work on %s because duplicate pages" + " %s and %s are found" + % (self.originPage, duplicate, page)) self.makeForcedStop(counter) try: f = codecs.open(

1 0

SVN: [11160] trunk/pywikipedia/catimages.py
by drtrigon＠svn.wikimedia.org 02 Mar '13

02 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11160 Revision: 11160 Author: drtrigon Date: 2013-03-02 13:59:34 +0000 (Sat, 02 Mar 2013) Log Message: ----------- new feature; + XCF support (like commons through ImageMagick) Modified Paths: -------------- trunk/pywikipedia/catimages.py Modified: trunk/pywikipedia/catimages.py =================================================================== --- trunk/pywikipedia/catimages.py 2013-03-02 13:57:27 UTC (rev 11159) +++ trunk/pywikipedia/catimages.py 2013-03-02 13:59:34 UTC (rev 11160) @@ -83,7 +83,8 @@ # modules needing compilation are imported later on request: # (see https://jira.toolserver.org/browse/TS-1452) # e.g. opencv, jseg, slic, pydmtx, zbar, (pyml or equivalent) -# binaries: exiftool, pdftotext/pdfimages (poppler), ffprobe (ffmpeg), (ocropus) +# binaries: exiftool, pdftotext/pdfimages (poppler), ffprobe (ffmpeg), +# convert/identify (ImageMagick), (ocropus) # TODO: # (pdfminer not used anymore/at the moment...) # python-djvulibre or python-djvu for djvu support @@ -894,6 +895,11 @@ # result = {} # djvu: python-djvulibre or python-djvu for djvu support # http://pypi.python.org/pypi/python-djvulibre/0.3.9 + #elif self.image_fileext == u'.xcf' + # # use ImageMagick instead of PIL to get these info ... + # data = Popen("identify -verbose info: %s" % self.image_path, + # shell=True, stderr=PIPE).stderr.read() + # print data else: pywikibot.output(u'WARNING: unknown (generic) file type [_detect_Properties_PIL]') return @@ -1669,7 +1675,8 @@ mat = np.dot((cameraMatrix), mat) # linalg.inv(cameraMatrix) #_cameraMatrix, rotMatrix, transVect, rotMatrixX, rotMatrixY, rotMatrixZ, eulerAngles = cv2.decomposeProjectionMatrix(rmat) #mat = np.dot(rotMatrix, np.eye(3)) - matD2raw, matD2norm, matnorm = self._util_getD2coords( mat, cameraMatrix, distCoeffs ) + #matD2raw, matD2norm, matnorm = self._util_getD2coords( mat, cameraMatrix, distCoeffs ) + matD2raw, matD2norm, matnorm = self._util_getD2coords( mat, np.eye(3), distCoeffs ) for i in range(3): imagePoints, D2norm, norm = matD2raw[:,:,:,i], 40*matD2norm[:,i], matnorm[:,i] D2norm = D2norm/linalg.norm(D2norm)*40 @@ -1682,10 +1689,10 @@ pywikibot.output(u'result for calibrated camera:\n rot=%s\n perp=%s\n perp2D=%s' % (rot.transpose()[0], perp[:,2], ortho)) pywikibot.output(u'nice would be to do the same for uncalibrated/default cam settings') -# still beta/experimental thus suppress value output for the moment -# self._info['Chessboard'][0]['Rotation'] = tuple(rot.transpose()[0]) -# self._info['Chessboard'][0]['Perp_Dir'] = tuple(perp[:,2]) -# self._info['Chessboard'][0]['Perp_Dir_2D'] = tuple(ortho) + # still in testing phase; some of the values might have big errors + self._info['Chessboard'][0]['Rotation'] = tuple(rot.transpose()[0]) + self._info['Chessboard'][0]['Perp_Dir'] = tuple(perp[:,2]) + self._info['Chessboard'][0]['Perp_Dir_2D'] = tuple(ortho) return @@ -2760,7 +2767,7 @@ # http://cairographics.org/pyrsvg/ # http://stackoverflow.com/questions/9166400/convert-rgba-png-to-rgb-with-pil self.image_size = (None, None) - if self.image_fileext == u'.svg': + if self.image_fileext == u'.svg': try: svg = rsvg.Handle(self.image_path) img = cairo.ImageSurface(cairo.FORMAT_ARGB32, svg.props.width, svg.props.height) @@ -2780,6 +2787,24 @@ self.image_path_JPEG = self.image_path except SystemError: self.image_path_JPEG = self.image_path + elif self.image_fileext == u'.xcf': + # Very few programs other than GIMP read XCF files. This is by design + # from the GIMP developers, the format is not really documented or + # supported as a general-purpose file format. + # Commons uses ImageMagick, thus we have EXACTLY THE SAME support! + # (can also be a drawback, e.g. when the library is buggy...) + proc = Popen("convert %s %s" % (self.image_path, self.image_path_JPEG), + shell=True, stderr=PIPE)#.stderr.read() + proc.wait() + if proc.returncode == 127: + raise ImportError("convert (ImageMagick) not found!") + elif proc.returncode: + self.image_path_JPEG = self.image_path + + #data = Popen("identify -verbose info: %s" % self.image_path, + # shell=True, stderr=PIPE).stderr.read() + #print data + self.image_size = Image.open(self.image_path_JPEG).size else: try: im = Image.open(self.image_path) # might be png, gif etc, for instance

1 0

SVN: [11159] trunk/pywikipedia/userinterfaces/terminal_interface_base.py
by xqt＠svn.wikimedia.org 02 Mar '13

02 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11159 Revision: 11159 Author: xqt Date: 2013-03-02 13:57:27 +0000 (Sat, 02 Mar 2013) Log Message: ----------- some docs from rewrite Modified Paths: -------------- trunk/pywikipedia/userinterfaces/terminal_interface_base.py Modified: trunk/pywikipedia/userinterfaces/terminal_interface_base.py =================================================================== --- trunk/pywikipedia/userinterfaces/terminal_interface_base.py 2013-03-02 13:56:43 UTC (rev 11158) +++ trunk/pywikipedia/userinterfaces/terminal_interface_base.py 2013-03-02 13:57:27 UTC (rev 11159) @@ -121,11 +121,13 @@ def input(self, question, password = False): """ + Ask the user a question and return the answer. + Works like raw_input(), but returns a unicode string instead of ASCII. Unlike raw_input, this function automatically adds a space after the question. - + """ # sound the terminal bell to notify the user @@ -142,6 +144,9 @@ return text def inputChoice(self, question, options, hotkeys, default=None): + """ + Ask the user a question with a predefined list of acceptable answers. + """ options = options[:] # we don't want to edit the passed parameter for i in range(len(options)): option = options[i] @@ -164,18 +169,19 @@ answer = self.input(prompt) if answer.lower() in hotkeys or answer.upper() in hotkeys: return answer - elif default and answer=='': # empty string entered + elif default and answer=='': # empty string entered return default def editText(self, text, jumpIndex=None, highlight=None): - """ + """Return the text as edited by the user. + Uses a Tkinter edit box because we don't have a console editor Parameters: * text - a Unicode string * jumpIndex - an integer: position at which to put the caret * highlight - a substring; each occurence will be highlighted - + """ try: import gui @@ -186,6 +192,7 @@ return editor.edit(text, jumpIndex=jumpIndex, highlight=highlight) def askForCaptcha(self, url): + """Show the user a CAPTCHA image and return the answer.""" try: import webbrowser wikipedia.output(u'Opening CAPTCHA in your web browser...')

1 0

SVN: [11158] branches/rewrite/pywikibot/userinterfaces/ terminal_interface.py
by xqt＠svn.wikimedia.org 02 Mar '13

02 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11158 Revision: 11158 Author: xqt Date: 2013-03-02 13:56:43 +0000 (Sat, 02 Mar 2013) Log Message: ----------- update from trunk Modified Paths: -------------- branches/rewrite/pywikibot/userinterfaces/terminal_interface.py Modified: branches/rewrite/pywikibot/userinterfaces/terminal_interface.py =================================================================== --- branches/rewrite/pywikibot/userinterfaces/terminal_interface.py 2013-03-02 13:46:19 UTC (rev 11157) +++ branches/rewrite/pywikibot/userinterfaces/terminal_interface.py 2013-03-02 13:56:43 UTC (rev 11158) @@ -166,6 +166,7 @@ Unlike raw_input, this function automatically adds a space after the question. + """ # sound the terminal bell to notify the user @@ -211,12 +212,10 @@ caseHotkey = hotkey if m: pos = m.start() - options[i] = '%s[%s]%s' % (option[:pos], caseHotkey, option[pos+1:]) + options[i] = '%s[%s]%s' % (option[:pos], caseHotkey, + option[pos+1:]) else: options[i] = '%s [%s]' % (option, caseHotkey) - - answer = '' - # loop until the user entered a valid choice while True: prompt = '%s (%s)' % (question, ', '.join(options)) @@ -224,13 +223,11 @@ # it's okay to enter input with the lock, RLock is reentrant. answer = self.input(prompt) if answer.lower() in hotkeys or answer.upper() in hotkeys: - break - elif default and answer=='': # empty string entered - answer = default - break - return answer + return answer + elif default and answer=='': # empty string entered + return default - def editText(self, text, jumpIndex = None, highlight = None): + def editText(self, text, jumpIndex=None, highlight=None): """Return the text as edited by the user. Uses a Tkinter edit box because we don't have a console editor

1 0

SVN: [11157] branches/rewrite/scripts/basic.py
by xqt＠svn.wikimedia.org 02 Mar '13

02 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11157 Revision: 11157 Author: xqt Date: 2013-03-02 13:46:19 +0000 (Sat, 02 Mar 2013) Log Message: ----------- some PEP8 changes Modified Paths: -------------- branches/rewrite/scripts/basic.py Modified: branches/rewrite/scripts/basic.py =================================================================== --- branches/rewrite/scripts/basic.py 2013-03-02 12:57:54 UTC (rev 11156) +++ branches/rewrite/scripts/basic.py 2013-03-02 13:46:19 UTC (rev 11157) @@ -33,6 +33,7 @@ '&params;': pagegenerators.parameterHelp } + class BasicBot: # Edit summary message that should be used is placed on /i18n subdirectory. # The file containing these messages should have the same name as the caller @@ -103,7 +104,7 @@ % page.title()) # show what was changed pywikibot.showDiff(page.get(), text) - pywikibot.output(u'Comment: %s' %comment) + pywikibot.output(u'Comment: %s' % comment) if not self.dry: choice = pywikibot.inputChoice( u'Do you want to accept these changes?',

1 0

SVN: [11156] trunk/pywikipedia/BeautifulSoup.py
by xqt＠svn.wikimedia.org 02 Mar '13

02 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11156 Revision: 11156 Author: xqt Date: 2013-03-02 12:57:54 +0000 (Sat, 02 Mar 2013) Log Message: ----------- some PEP8 changes Modified Paths: -------------- trunk/pywikipedia/BeautifulSoup.py Modified: trunk/pywikipedia/BeautifulSoup.py =================================================================== --- trunk/pywikipedia/BeautifulSoup.py 2013-03-02 10:39:02 UTC (rev 11155) +++ trunk/pywikipedia/BeautifulSoup.py 2013-03-02 12:57:54 UTC (rev 11156) @@ -89,9 +89,9 @@ import re import sgmllib try: - from htmlentitydefs import name2codepoint + from htmlentitydefs import name2codepoint except ImportError: - name2codepoint = {} + name2codepoint = {} try: set except NameError: @@ -103,12 +103,13 @@ DEFAULT_OUTPUT_ENCODING = "utf-8" + def _match_css_class(str): """Build a RE to match the given CSS class.""" return re.compile(r"(^|.*\s)%s($|\s)" % str) + # First, the classes that represent markup elements. - class PageElement(object): """Contains the navigational information for some part of the page (either a tag or a piece of text)""" @@ -128,8 +129,8 @@ def replaceWith(self, replaceWith): oldParent = self.parent myIndex = self.parent.index(self) - if hasattr(replaceWith, "parent")\ - and replaceWith.parent is self.parent: + if hasattr(replaceWith, "parent") and \ + replaceWith.parent is self.parent: # We're replacing this element with one of its siblings. index = replaceWith.parent.index(replaceWith) if index and index < myIndex: @@ -186,11 +187,11 @@ return lastChild def insert(self, position, newChild): - if isinstance(newChild, basestring) \ - and not isinstance(newChild, NavigableString): + if isinstance(newChild, basestring) and not \ + isinstance(newChild, NavigableString): newChild = NavigableString(newChild) - position = min(position, len(self.contents)) + position = min(position, len(self.contents)) if hasattr(newChild, 'parent') and newChild.parent is not None: # We're 'inserting' an element that's already one # of this object's children. @@ -227,7 +228,7 @@ while not parentsNextSibling: parentsNextSibling = parent.nextSibling parent = parent.parent - if not parent: # This is the last element in the document. + if not parent: # This is the last element in the document. break if parentsNextSibling: newChildsLastElement.next = parentsNextSibling @@ -272,8 +273,9 @@ criteria and appear after this Tag in the document.""" return self._findAll(name, attrs, text, limit, self.nextSiblingGenerator, **kwargs) - fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): """Returns the first item that matches the given criteria and appears before this Tag in the document.""" @@ -284,8 +286,8 @@ """Returns all items that match the given criteria and appear before this Tag in the document.""" return self._findAll(name, attrs, text, limit, self.previousGenerator, - **kwargs) - fetchPrevious = findAllPrevious # Compatibility with pre-3.x + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): """Returns the closest sibling to this Tag that matches the @@ -299,7 +301,7 @@ criteria and appear before this Tag in the document.""" return self._findAll(name, attrs, text, limit, self.previousSiblingGenerator, **kwargs) - fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x def findParent(self, name=None, attrs={}, **kwargs): """Returns the closest parent of this Tag that matches the given @@ -318,8 +320,9 @@ return self._findAll(name, attrs, None, limit, self.parentGenerator, **kwargs) - fetchParents = findParents # Compatibility with pre-3.x + fetchParents = findParents # Compatibility with pre-3.x + #These methods do the real heavy lifting. def _findOne(self, method, name, attrs, text, **kwargs): @@ -415,11 +418,12 @@ s = unicode(s) else: if encoding: - s = self.toEncoding(str(s), encoding) + s = self.toEncoding(str(s), encoding) else: s = unicode(s) return s + class NavigableString(unicode, PageElement): def __new__(cls, value): @@ -444,7 +448,8 @@ if attr == 'string': return self else: - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + raise AttributeError("'%s' object has no attribute '%s'" + % (self.__class__.__name__, attr)) def __unicode__(self): return str(self).decode(DEFAULT_OUTPUT_ENCODING) @@ -455,11 +460,13 @@ else: return self + class CData(NavigableString): def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding) + class ProcessingInstruction(NavigableString): def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): output = self @@ -467,14 +474,17 @@ output = self.substituteEncoding(output, encoding) return "<?%s?>" % self.toEncoding(output, encoding) + class Comment(NavigableString): def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): return "" % NavigableString.__str__(self, encoding) + class Declaration(NavigableString): def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): return "<!%s>" % NavigableString.__str__(self, encoding) + class Tag(PageElement): """Represents a found HTML tag with its attributes and contents.""" @@ -482,15 +492,15 @@ def _invert(h): "Cheap function to invert a hash." i = {} - for k,v in h.items(): + for k, v in h.items(): i[v] = k return i - XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", - "quot" : '"', - "amp" : "&", - "lt" : "<", - "gt" : ">" } + XML_ENTITIES_TO_SPECIAL_CHARS = {"apos": "'", + "quot": '"', + "amp": "&", + "lt": "<", + "gt": ">"} XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) @@ -549,8 +559,8 @@ self.attrs = map(convert, self.attrs) def getString(self): - if (len(self.contents) == 1 - and isinstance(self.contents[0], NavigableString)): + if (len(self.contents) == 1 and isinstance(self.contents[0], + NavigableString)): return self.contents[0] def setString(self, string): @@ -592,7 +602,7 @@ raise ValueError("Tag.index: element not in tag") def has_key(self, key): - return self._getAttrMap().has_key(key) + return key in self._getAttrMap() def __getitem__(self, key): """tag[key] returns the value of the 'key' attribute for the tag, @@ -636,7 +646,7 @@ #We don't break because bad HTML can define the same #attribute multiple times. self._getAttrMap() - if self.attrMap.has_key(key): + if key in self.attrMap: del self.attrMap[key] def __call__(self, *args, **kwargs): @@ -651,7 +661,8 @@ return self.find(tag[:-3]) elif tag.find('__') != 0: return self.find(tag) - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + raise AttributeError("'%s' object has no attribute '%s'" + % (self.__class__, tag)) def __eq__(self, other): """Returns true iff this tag has the same name, the same attributes, @@ -661,7 +672,9 @@ same attributes in a different order. Should this be fixed?""" if other is self: return True - if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or \ + not hasattr(other, 'contents') or self.name != other.name or \ + self.attrs != other.attrs or len(self) != len(other): return False for i in range(0, len(self.contents)): if self.contents[i] != other.contents[i]: @@ -734,7 +747,8 @@ # value might also contain angle brackets, or # ampersands that aren't part of entities. We need # to escape those to XML entities too. - val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, + val) attrs.append(fmt % (self.toEncoding(key, encoding), self.toEncoding(val, encoding))) @@ -798,7 +812,7 @@ prettyPrint=False, indentLevel=0): """Renders the contents of this tag as a string in the given encoding. If encoding is None, returns a Unicode string..""" - s=[] + s = [] for c in self: text = None if isinstance(c, NavigableString): @@ -912,13 +926,13 @@ if isinstance(markupName, Tag): markup = markupName markupAttrs = markup - callFunctionWithTagData = callable(self.name) \ - and not isinstance(markupName, Tag) + callFunctionWithTagData = callable(self.name) and \ + not isinstance(markupName, Tag) if (not self.name) \ - or callFunctionWithTagData \ - or (markup and self._matches(markup, self.name)) \ - or (not markup and self._matches(markupName, self.name)): + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): if callFunctionWithTagData: match = self.name(markupName, markupAttrs) else: @@ -926,11 +940,11 @@ markupAttrMap = None for attr, matchAgainst in self.attrs.items(): if not markupAttrMap: - if hasattr(markupAttrs, 'get'): + if hasattr(markupAttrs, 'get'): markupAttrMap = markupAttrs - else: + else: markupAttrMap = {} - for k,v in markupAttrs: + for k, v in markupAttrs: markupAttrMap[k] = v attrValue = markupAttrMap.get(attr) if not self._matches(attrValue, matchAgainst): @@ -948,11 +962,10 @@ found = None # If given a list of items, scan it for a text element that # matches. - if hasattr(markup, "__iter__") \ - and not isinstance(markup, Tag): + if hasattr(markup, "__iter__") and not isinstance(markup, Tag): for element in markup: - if isinstance(element, NavigableString) \ - and self.search(element): + if isinstance(element, NavigableString) and \ + self.search(element): found = element break # If it's a Tag, make sure its name or attributes match. @@ -961,13 +974,13 @@ if not self.text: found = self.searchTag(markup) # If it's text, make sure the text matches. - elif isinstance(markup, NavigableString) or \ - isinstance(markup, basestring): + elif isinstance(markup, NavigableString) or isinstance(markup, + basestring): if self._matches(markup, self.text): found = markup else: - raise Exception, "I don't know how to match against a %s" \ - % markup.__class__ + raise Exception("I don't know how to match against a %s" + % markup.__class__) return found def _matches(self, markup, matchAgainst): @@ -988,10 +1001,10 @@ if hasattr(matchAgainst, 'match'): # It's a regexp object. result = markup and matchAgainst.search(markup) - elif hasattr(matchAgainst, '__iter__'): # list-like + elif hasattr(matchAgainst, '__iter__'): # list-like result = markup in matchAgainst elif hasattr(matchAgainst, 'items'): - result = markup.has_key(matchAgainst) + result = matchAgainst in markup elif matchAgainst and isinstance(markup, basestring): if isinstance(markup, unicode): matchAgainst = unicode(matchAgainst) @@ -1002,6 +1015,7 @@ result = matchAgainst == markup return result + class ResultSet(list): """A ResultSet is just a list that keeps track of the SoupStrainer that created it.""" @@ -1009,6 +1023,7 @@ list.__init__([]) self.source = source + # Now, some helper functions. def buildTagMap(default, *args): @@ -1019,9 +1034,9 @@ for portion in args: if hasattr(portion, 'items'): #It's a map. Merge it. - for k,v in portion.items(): + for k, v in portion.items(): built[k] = v - elif hasattr(portion, '__iter__'): # is a list + elif hasattr(portion, '__iter__'): # is a list #It's a list. Map each item to the default. for k in portion: built[k] = default @@ -1030,6 +1045,7 @@ built[portion] = default return built + # Now, the parser classes. class BeautifulStoneSoup(Tag, SGMLParser): @@ -1074,7 +1090,7 @@ # can be replaced with a single space. A text node that contains # fancy Unicode spaces (usually non-breaking) should be left # alone. - STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, } def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, markupMassage=True, smartQuotesTo=XML_ENTITIES, @@ -1151,7 +1167,7 @@ n = int(name) except ValueError: return - if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + if not 0 <= n <= 127: # ASCII ends at 127, not 255 return return self.convert_codepoint(n) @@ -1162,9 +1178,10 @@ if not hasattr(self, 'originalEncoding'): self.originalEncoding = None else: - dammit = UnicodeDammit\ - (markup, [self.fromEncoding, inDocumentEncoding], - smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + dammit = UnicodeDammit(markup, + [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, + isHTML=isHTML) markup = dammit.unicode self.originalEncoding = dammit.originalEncoding self.declaredHTMLEncoding = dammit.declaredHTMLEncoding @@ -1194,7 +1211,7 @@ #print "__getattr__ called on %s.%s" % (self.__class__, methodName) if methodName.startswith('start_') or methodName.startswith('end_') \ - or methodName.startswith('do_'): + or methodName.startswith('do_'): return SGMLParser.__getattr__(self, methodName) elif not methodName.startswith('__'): return Tag.__getattr__(self, methodName) @@ -1204,8 +1221,8 @@ def isSelfClosingTag(self, name): """Returns true iff the given string is the name of a self-closing tag according to this parser.""" - return self.SELF_CLOSING_TAGS.has_key(name) \ - or self.instanceSelfClosingTags.has_key(name) + return name in self.SELF_CLOSING_TAGS or \ + name in self.instanceSelfClosingTags def reset(self): Tag.__init__(self, self, self.ROOT_TAG_NAME) @@ -1244,8 +1261,8 @@ currentData = ' ' self.currentData = [] if self.parseOnlyThese and len(self.tagStack) <= 1 and \ - (not self.parseOnlyThese.text or \ - not self.parseOnlyThese.search(currentData)): + (not self.parseOnlyThese.text or not + self.parseOnlyThese.search(currentData)): return o = containerClass(currentData) o.setup(self.currentTag, self.previous) @@ -1254,7 +1271,6 @@ self.previous = o self.currentTag.contents.append(o) - def _popToTag(self, name, inclusivePop=True): """Pops the tag stack up to and including the most recent instance of the given tag. If inclusivePop is false, pops the tag @@ -1296,8 +1312,8 @@ """ nestingResetTriggers = self.NESTABLE_TAGS.get(name) - isNestable = nestingResetTriggers != None - isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + isNestable = nestingResetTriggers is not None + isResetNesting = name in self.RESET_NESTING_TAGS popTo = None inclusive = True for i in range(len(self.tagStack)-1, 0, -1): @@ -1310,7 +1326,7 @@ if (nestingResetTriggers is not None and p.name in nestingResetTriggers) \ or (nestingResetTriggers is None and isResetNesting - and self.RESET_NESTING_TAGS.has_key(p.name)): + and p.name in self.RESET_NESTING_TAGS): #If we encounter one of the nesting reset triggers #peculiar to this tag, or we encounter another tag @@ -1337,7 +1353,8 @@ self._smartPop(name) if self.parseOnlyThese and len(self.tagStack) <= 1 \ - and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + and (self.parseOnlyThese.text or + not self.parseOnlyThese.searchTag(name, attrs)): return tag = Tag(self, name, attrs, self.currentTag, self.previous) @@ -1411,7 +1428,7 @@ data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) if not data and self.convertHTMLEntities and \ - not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): # TODO: We've got a problem here. We're told this is # an entity reference, but it's not an XML entity # reference or an HTML entity reference. Nonetheless, @@ -1448,12 +1465,12 @@ declaration as a CData object.""" j = None if self.rawdata[i:i+9] == '<![CDATA[': - k = self.rawdata.find(']]>', i) - if k == -1: - k = len(self.rawdata) - data = self.rawdata[i+9:k] - j = k+3 - self._toStringSubclass(data, CData) + k = self.rawdata.find(']]>', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k + 3 + self._toStringSubclass(data, CData) else: try: j = SGMLParser.parse_declaration(self, i) @@ -1463,6 +1480,7 @@ j = i + len(toHandle) return j + class BeautifulSoup(BeautifulStoneSoup): """This parser knows the following facts about HTML: @@ -1512,18 +1530,18 @@ BeautifulStoneSoup before writing your own subclass.""" def __init__(self, *args, **kwargs): - if not kwargs.has_key('smartQuotesTo'): + if not 'smartQuotesTo' in kwargs: kwargs['smartQuotesTo'] = self.HTML_ENTITIES kwargs['isHTML'] = True BeautifulStoneSoup.__init__(self, *args, **kwargs) SELF_CLOSING_TAGS = buildTagMap(None, - ('br' , 'hr', 'input', 'img', 'meta', + ('br', 'hr', 'input', 'img', 'meta', 'spacer', 'link', 'frame', 'base', 'col')) PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) - QUOTE_TAGS = {'script' : None, 'textarea' : None} + QUOTE_TAGS = {'script': None, 'textarea': None} #According to the HTML standard, each of these inline tags can #contain another tag of the same type. Furthermore, it's common @@ -1537,21 +1555,21 @@ NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') #Lists can contain other lists, but there are restrictions. - NESTABLE_LIST_TAGS = { 'ol' : [], - 'ul' : [], - 'li' : ['ul', 'ol'], - 'dl' : [], - 'dd' : ['dl'], - 'dt' : ['dl'] } + NESTABLE_LIST_TAGS = {'ol': [], + 'ul': [], + 'li': ['ul', 'ol'], + 'dl': [], + 'dd': ['dl'], + 'dt': ['dl']} #Tables can contain other tables, but there are restrictions. - NESTABLE_TABLE_TAGS = {'table' : [], - 'tr' : ['table', 'tbody', 'tfoot', 'thead'], - 'td' : ['tr'], - 'th' : ['tr'], - 'thead' : ['table'], - 'tbody' : ['table'], - 'tfoot' : ['table'], + NESTABLE_TABLE_TAGS = {'table': [], + 'tr': ['table', 'tbody', 'tfoot', 'thead'], + 'td': ['tr'], + 'th': ['tr'], + 'thead': ['table'], + 'tbody': ['table'], + 'tfoot': ['table'], } NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') @@ -1587,11 +1605,11 @@ contentType = value contentTypeIndex = i - if httpEquiv and contentType: # It's an interesting meta tag. + if httpEquiv and contentType: # It's an interesting meta tag. match = self.CHARSET_RE.search(contentType) if match: if (self.declaredHTMLEncoding is not None or - self.originalEncoding == self.fromEncoding): + self.originalEncoding == self.fromEncoding): # An HTML encoding was sniffed while converting # the document to Unicode, or an HTML encoding was # sniffed during a previous pass through the @@ -1616,9 +1634,11 @@ if tag and tagNeedsEncodingSubstitution: tag.containsSubstitutions = True + class StopParsing(Exception): pass + class ICantBelieveItsBeautifulSoup(BeautifulSoup): """The BeautifulSoup class is oriented towards skipping over @@ -1644,10 +1664,10 @@ it's valid HTML and BeautifulSoup screwed up by assuming it wouldn't be.""" - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ - ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', - 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', - 'big') + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = ( + 'em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', 'cite', + 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', 'big' + ) I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) @@ -1655,6 +1675,7 @@ I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + class MinimalSoup(BeautifulSoup): """The MinimalSoup class is for parsing HTML that contains pathologically bad markup. It makes no assumptions about tag @@ -1668,6 +1689,7 @@ RESET_NESTING_TAGS = buildTagMap('noscript') NESTABLE_TAGS = {} + class BeautifulSOAP(BeautifulStoneSoup): """This class will push a tag with only a single string child into the tag's parent as an attribute. The attribute's name is the tag @@ -1695,10 +1717,11 @@ parent._getAttrMap() if (isinstance(tag, Tag) and len(tag.contents) == 1 and isinstance(tag.contents[0], NavigableString) and - not parent.attrMap.has_key(tag.name)): + not tag.name in parent.attrMap): parent[tag.name] = tag.contents[0] BeautifulStoneSoup.popTag(self) + #Enterprise class names! It has come to our attention that some people #think the names of the Beautiful Soup parser classes are too silly #and "unprofessional" for use in enterprise screen-scraping. We feel @@ -1749,6 +1772,7 @@ except ImportError: pass + class UnicodeDammit: """A class for detecting the encoding of a *ML document and converting it to a Unicode string. If the source encoding is @@ -1759,14 +1783,14 @@ # meta tags to the corresponding Python codec names. It only covers # values that aren't in Python's aliases and can't be determined # by the heuristics in find_codec. - CHARSET_ALIASES = { "macintosh" : "mac-roman", - "x-sjis" : "shift-jis" } + CHARSET_ALIASES = {"macintosh": "mac-roman", + "x-sjis": "shift-jis"} def __init__(self, markup, overrideEncodings=[], smartQuotesTo='xml', isHTML=False): self.declaredHTMLEncoding = None self.markup, documentEncoding, sniffedEncoding = \ - self._detectEncoding(markup, isHTML) + self._detectEncoding(markup, isHTML) self.smartQuotesTo = smartQuotesTo self.triedEncodings = [] if markup == '' or isinstance(markup, unicode): @@ -1819,9 +1843,8 @@ if self.smartQuotesTo and proposed.lower() in("windows-1252", "iso-8859-1", "iso-8859-2"): - markup = re.compile("([\x80-\x9f])").sub \ - (lambda(x): self._subMSChar(x.group(1)), - markup) + markup = re.compile("([\x80-\x9f])").sub( + lambda(x): self._subMSChar(x.group(1)), markup) try: # print "Trying to convert document to %s" % proposed @@ -1841,11 +1864,11 @@ # strip Byte Order Mark (if present) if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ - and (data[2:4] != '\x00\x00'): + and (data[2:4] != '\x00\x00'): encoding = 'utf-16be' data = data[2:] - elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ - and (data[2:4] != '\x00\x00'): + elif (len(data) >= 4) and \ + (data[:2] == '\xff\xfe') and (data[2:4] != '\x00\x00'): encoding = 'utf-16le' data = data[2:] elif data[:3] == '\xef\xbb\xbf': @@ -1871,8 +1894,8 @@ # UTF-16BE sniffed_xml_encoding = 'utf-16be' xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ - and (xml_data[2:4] != '\x00\x00'): + elif (len(xml_data) >= 4) and \ + (xml_data[:2] == '\xfe\xff') and (xml_data[2:4] != '\x00\x00'): # UTF-16BE with BOM sniffed_xml_encoding = 'utf-16be' xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') @@ -1881,7 +1904,7 @@ sniffed_xml_encoding = 'utf-16le' xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ - (xml_data[2:4] != '\x00\x00'): + (xml_data[2:4] != '\x00\x00'): # UTF-16LE with BOM sniffed_xml_encoding = 'utf-16le' xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') @@ -1927,7 +1950,6 @@ xml_encoding = sniffed_xml_encoding return xml_data, xml_encoding, sniffed_xml_encoding - def find_codec(self, charset): return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ or (charset and self._codec(charset.replace("-", ""))) \ @@ -1945,63 +1967,70 @@ return codec EBCDIC_TO_ASCII_MAP = None + def _ebcdic_to_ascii(self, s): c = self.__class__ if not c.EBCDIC_TO_ASCII_MAP: - emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, - 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, - 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, - 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, - 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, - 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, - 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, - 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, - 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, - 201,202,106,107,108,109,110,111,112,113,114,203,204,205, - 206,207,208,209,126,115,116,117,118,119,120,121,122,210, - 211,212,213,214,215,216,217,218,219,220,221,222,223,224, - 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, - 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, - 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, - 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, - 250,251,252,253,254,255) + emap = (0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, + 29, 30, 31, 128, 129, 130, 131, 132, 10, 23, 27, 136, 137, + 138, 139, 140, 5, 6, 7, 144, 145, 22, 147, 148, 149, 150, 4, + 152, 153, 154, 155, 20, 21, 158, 26, 32, 160, 161, 162, 163, + 164, 165, 166, 167, 168, 91, 46, 60, 40, 43, 33, 38, 169, + 170, 171, 172, 173, 174, 175, 176, 177, 93, 36, 42, 41, 59, + 94, 45, 47, 178, 179, 180, 181, 182, 183, 184, 185, 124, 44, + 37, 95, 62, 63, 186, 187, 188, 189, 190, 191, 192, 193, 194, + 96, 58, 35, 64, 39, 61, 34, 195, 97, 98, 99, 100, 101, 102, + 103, 104, 105, 196, 197, 198, 199, 200, 201, 202, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 203, 204, 205, 206, 207, + 208, 209, 126, 115, 116, 117, 118, 119, 120, 121, 122, 210, + 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, + 223, 224, 225, 226, 227, 228, 229, 230, 231, 123, 65, 66, + 67, 68, 69, 70, 71, 72, 73, 232, 233, 234, 235, 236, 237, + 125, 74, 75, 76, 77, 78, 79, 80, 81, 82, 238, 239, 240, 241, + 242, 243, 92, 159, 83, 84, 85, 86, 87, 88, 89, 90, 244, 245, + 246, 247, 248, 249, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + 250, 251, 252, 253, 254, 255) import string - c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ - ''.join(map(chr, range(256))), ''.join(map(chr, emap))) + c.EBCDIC_TO_ASCII_MAP = string.maketrans(''.join(map(chr, + range(256))), + ''.join(map(chr, emap))) return s.translate(c.EBCDIC_TO_ASCII_MAP) - MS_CHARS = { '\x80' : ('euro', '20AC'), - '\x81' : ' ', - '\x82' : ('sbquo', '201A'), - '\x83' : ('fnof', '192'), - '\x84' : ('bdquo', '201E'), - '\x85' : ('hellip', '2026'), - '\x86' : ('dagger', '2020'), - '\x87' : ('Dagger', '2021'), - '\x88' : ('circ', '2C6'), - '\x89' : ('permil', '2030'), - '\x8A' : ('Scaron', '160'), - '\x8B' : ('lsaquo', '2039'), - '\x8C' : ('OElig', '152'), - '\x8D' : '?', - '\x8E' : ('#x17D', '17D'), - '\x8F' : '?', - '\x90' : '?', - '\x91' : ('lsquo', '2018'), - '\x92' : ('rsquo', '2019'), - '\x93' : ('ldquo', '201C'), - '\x94' : ('rdquo', '201D'), - '\x95' : ('bull', '2022'), - '\x96' : ('ndash', '2013'), - '\x97' : ('mdash', '2014'), - '\x98' : ('tilde', '2DC'), - '\x99' : ('trade', '2122'), - '\x9a' : ('scaron', '161'), - '\x9b' : ('rsaquo', '203A'), - '\x9c' : ('oelig', '153'), - '\x9d' : '?', - '\x9e' : ('#x17E', '17E'), - '\x9f' : ('Yuml', ''),} + MS_CHARS = { + '\x80': ('euro', '20AC'), + '\x81': ' ', + '\x82': ('sbquo', '201A'), + '\x83': ('fnof', '192'), + '\x84': ('bdquo', '201E'), + '\x85': ('hellip', '2026'), + '\x86': ('dagger', '2020'), + '\x87': ('Dagger', '2021'), + '\x88': ('circ', '2C6'), + '\x89': ('permil', '2030'), + '\x8A': ('Scaron', '160'), + '\x8B': ('lsaquo', '2039'), + '\x8C': ('OElig', '152'), + '\x8D': '?', + '\x8E': ('#x17D', '17D'), + '\x8F': '?', + '\x90': '?', + '\x91': ('lsquo', '2018'), + '\x92': ('rsquo', '2019'), + '\x93': ('ldquo', '201C'), + '\x94': ('rdquo', '201D'), + '\x95': ('bull', '2022'), + '\x96': ('ndash', '2013'), + '\x97': ('mdash', '2014'), + '\x98': ('tilde', '2DC'), + '\x99': ('trade', '2122'), + '\x9a': ('scaron', '161'), + '\x9b': ('rsaquo', '203A'), + '\x9c': ('oelig', '153'), + '\x9d': '?', + '\x9e': ('#x17E', '17E'), + '\x9f': ('Yuml', ''), + } #######################################################################

1 0

SVN: [11155] trunk/pywikipedia/basic.py
by xqt＠svn.wikimedia.org 02 Mar '13

02 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11155 Revision: 11155 Author: xqt Date: 2013-03-02 10:39:02 +0000 (Sat, 02 Mar 2013) Log Message: ----------- some PEP8 changes Modified Paths: -------------- trunk/pywikipedia/basic.py Modified: trunk/pywikipedia/basic.py =================================================================== --- trunk/pywikipedia/basic.py 2013-03-02 10:37:00 UTC (rev 11154) +++ trunk/pywikipedia/basic.py 2013-03-02 10:39:02 UTC (rev 11155) @@ -36,6 +36,7 @@ '&params;': pagegenerators.parameterHelp } + class BasicBot: # Edit summary message that should be used is placed on /i18n subdirectory. # The file containing these messages should have the same name as the caller @@ -108,7 +109,7 @@ % page.title()) # show what was changed pywikibot.showDiff(page.get(), text) - pywikibot.output(u'Comment: %s' %comment) + pywikibot.output(u'Comment: %s' % comment) choice = pywikibot.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N') @@ -131,6 +132,7 @@ return True return False + class AutoBasicBot(BasicBot): # Intended for usage e.g. as cronjob without prompting the user. @@ -142,23 +144,31 @@ ## @since 10326 # @remarks needed by various bots def save(self, page, text, comment=None, **kwargs): - pywikibot.output(u'\03{lightblue}Writing to wiki on %s...\03{default}' % page.title(asLink=True)) + pywikibot.output(u'\03{lightblue}Writing to wiki on %s...\03{default}' + % page.title(asLink=True)) comment_output = comment or pywikibot.action - pywikibot.output(u'\03{lightblue}Comment: %s\03{default}' % comment_output) + pywikibot.output(u'\03{lightblue}Comment: %s\03{default}' + % comment_output) #pywikibot.showDiff(page.get(), text) - for i in range(3): # try max. 3 times + for i in range(3): try: # Save the page page.put(text, comment=comment, **kwargs) except pywikibot.LockedPage: - pywikibot.output(u"\03{lightblue}Page %s is locked; skipping.\03{default}" % page.title(asLink=True)) + pywikibot.output( + u"\03{lightblue}Page %s is locked; skipping.\03{default}" + % page.title(asLink=True)) except pywikibot.EditConflict: - pywikibot.output(u'\03{lightblue}Skipping %s because of edit conflict\03{default}' % (page.title())) + pywikibot.output( + u'\03{lightblue}Skipping %s because of edit ' + u'conflict\03{default}' % (page.title())) except pywikibot.SpamfilterError, error: - pywikibot.output(u'\03{lightblue}Cannot change %s because of spam blacklist entry %s\03{default}' % (page.title(), error.url)) + pywikibot.output( + u'\03{lightblue}Cannot change %s because of spam blacklist ' + u'entry %s\03{default}' % (page.title(), error.url)) else: return True return False @@ -167,18 +177,22 @@ # @remarks needed by various bots def append(self, page, text, comment=None, section=None, **kwargs): if section: - pywikibot.output(u'\03{lightblue}Appending to wiki on %s in section %s...\03{default}' % (page.title(asLink=True), section)) - - for i in range(3): # try max. 3 times + pywikibot.output( + u'\03{lightblue}Appending to wiki on %s in section ' + u'%s...\03{default}' % (page.title(asLink=True), section)) + for i in range(3): try: # Append to page section - page.append(text, comment=comment, section=section, **kwargs) + page.append(text, comment=comment, section=section, + **kwargs) except pywikibot.PageNotSaved, error: - pywikibot.output(u'\03{lightblue}Cannot change %s because of %s\03{default}' % (page.title(), error)) + pywikibot.output( + u'\03{lightblue}Cannot change %s because of ' + u'%s\03{default}' % (page.title(), error)) else: return True else: - content = self.load( page ) # 'None' if not existing page + content = self.load(page) # 'None' if not existing page if not content: # (create new page) content = u'' @@ -197,7 +211,7 @@ Returns a list of dict with the templates parameters found. """ - self._content = self.load(page) # 'None' if not existing page + self._content = self.load(page) # 'None' if not existing page templates = [] if not self._content: @@ -208,7 +222,7 @@ param_default = {} param_default.update(default) param_default.update(tmpl[1]) - templates.append( param_default ) + templates.append(param_default) return templates ## @since 10326 @@ -226,8 +240,10 @@ Returns a list of jobs. This list may be empty. """ - try: actual = page.getVersionHistory(revCount=1)[0] - except: pass + try: + actual = page.getVersionHistory(revCount=1)[0] + except: + pass secure = False for item in queue_security[0]: @@ -235,20 +251,21 @@ secure = secure and (actual[3] == queue_security[1]) - if not secure: return [] + if not secure: + return [] data = self._REGEX_eol.split(page.get()) if reset: pywikibot.output(u'\03{lightblue}Job queue reset...\03{default}') - pywikibot.setAction(u'reset job queue') - page.put(u'', minorEdit = True) + page.put(u'', minorEdit=True) queue = [] for line in data: - queue.append( line[1:].strip() ) + queue.append(line[1:].strip()) return queue + def main(): # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages

1 0

SVN: [11154] trunk/pywikipedia/add_text.py
by xqt＠svn.wikimedia.org 02 Mar '13

02 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11154 Revision: 11154 Author: xqt Date: 2013-03-02 10:37:00 +0000 (Sat, 02 Mar 2013) Log Message: ----------- Import order Modified Paths: -------------- trunk/pywikipedia/add_text.py Modified: trunk/pywikipedia/add_text.py =================================================================== --- trunk/pywikipedia/add_text.py 2013-03-02 10:27:00 UTC (rev 11153) +++ trunk/pywikipedia/add_text.py 2013-03-02 10:37:00 UTC (rev 11154) @@ -73,11 +73,13 @@ __version__ = '$Id$' # -import re, pagegenerators, urllib2, urllib +import re, urllib2, urllib +import webbrowser +import codecs import wikipedia as pywikibot from pywikibot import i18n -import codecs, config -import webbrowser +import pagegenerators +import config # This is required for the text that is shown when you run this script # with the parameter -help.

1 0

SVN: [11153] trunk/pywikipedia/add_text.py
by xqt＠svn.wikimedia.org 02 Mar '13

02 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11153 Revision: 11153 Author: xqt Date: 2013-03-02 10:27:00 +0000 (Sat, 02 Mar 2013) Log Message: ----------- some PEP8 changes Modified Paths: -------------- trunk/pywikipedia/add_text.py Modified: trunk/pywikipedia/add_text.py =================================================================== --- trunk/pywikipedia/add_text.py 2013-03-02 09:14:47 UTC (rev 11152) +++ trunk/pywikipedia/add_text.py 2013-03-02 10:27:00 UTC (rev 11153) @@ -66,7 +66,7 @@ # # (C) Filnik, 2007-2010 -# (C) Pywikipedia bot team, 2007-2010 +# (C) Pywikipedia bot team, 2007-2013 # # Distributed under the terms of the MIT license. # @@ -82,17 +82,23 @@ # This is required for the text that is shown when you run this script # with the parameter -help. docuReplacements = { - '&params;': pagegenerators.parameterHelp, + '&params;': pagegenerators.parameterHelp, } nn_iw_msg = u'' + class NoEnoughData(pywikibot.Error): """ Error class for when the user doesn't specified all the data needed """ + class NothingFound(pywikibot.Error): - """ An exception indicating that a regex has return [] instead of results.""" + """ + An exception indicating that a regex has return [] instead of results. + """ + + # Useful for the untagged function def pageText(url): """ Function to load HTML text of a URL """ @@ -104,36 +110,43 @@ response.close() # When you load to many users, urllib2 can give this error. except urllib2.HTTPError: - pywikibot.output(u"Server error. Pausing for 10 seconds... " + time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime()) ) + pywikibot.output(u"Server error. Pausing for 10 seconds... " + + time.strftime("%d %b %Y %H:%M:%S (UTC)", + time.gmtime())) response.close() time.sleep(10) return pageText(url) return text -def untaggedGenerator(untaggedProject, limit = 500): +def untaggedGenerator(untaggedProject, limit=500): """ Function to get the pages returned by this tool: - http://toolserver.org/~daniel/WikiSense/UntaggedImages.php """ + http://toolserver.org/~daniel/WikiSense/UntaggedImages.php + + """ lang = untaggedProject.split('.', 1)[0] project = '.' + untaggedProject.split('.', 1)[1] + URL = 'http://toolserver.org/~daniel/WikiSense/UntaggedImages.php?' if lang == 'commons': - link = 'http://toolserver.org/~daniel/WikiSense/UntaggedImages.php?wikifam=commons.…' + link = '%swikifam=commons.wikimedia.org&since=-100d&until=&img_user_text=&order=img_timestamp&max=%d&order=img_timestamp&format=html' \ + % (URL, limit) else: - link = 'http://toolserver.org/~daniel/WikiSense/UntaggedImages.php?wikilang=' + lang + '&wikifam=' + project + '&order=img_timestamp&max=' + str(limit) + '&ofs=0&max=' + str(limit) + link = '%swikilang=%s&wikifam=%s&order=img_timestamp&max=%d&ofs=0&max=%d' \ + % (URL, lang, project, limit, limit) text = pageText(link) - #print text - regexp = r"""<td valign='top' title='Name'><a href='http://.*?\.org/w/index\.php\?title=(.*?)'>.*?</a></td>""" + regexp = r"<td valign='top' title='Name'><a href='http://.*?\.org/w/index\.php\?title=(.*?)'>.*?</a></td>" results = re.findall(regexp, text) if results == []: print link raise NothingFound( -'Nothing found! Try to use the tool by yourself to be sure that it works!') + 'Nothing found! Try to use the tool by yourself to be sure that it ' + 'works!') else: for result in results: yield pywikibot.Page(pywikibot.getSite(), result) -def add_text(page = None, addText = None, summary = None, regexSkip = None, - regexSkipUrl = None, always = False, up = False, putText = True, - oldTextGiven = None, reorderEnabled = True, create=False): +def add_text(page=None, addText=None, summary=None, regexSkip=None, + regexSkipUrl=None, always=False, up=False, putText=True, + oldTextGiven=None, reorderEnabled=True, create=False): if not addText: raise NoEnoughData('You have to specify what text you want to add!') if not summary: @@ -174,12 +187,11 @@ errorCount = 0 site = pywikibot.getSite() - # /wiki/ is not always the right path in non-wiki projects pathWiki = site.family.nicepath(site.lang) if putText: pywikibot.output(u'Loading %s...' % page.title()) - if oldTextGiven == None: + if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: @@ -189,29 +201,29 @@ text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) - return (False, False, always) # continue + return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) - return (False, False, always) # continue + return (False, False, always) else: text = oldTextGiven # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl - if regexSkipUrl != None: + if regexSkipUrl is not None: url = '%s%s' % (pathWiki, page.urlname()) result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output( u'''Exception! regex (or word) used with -exceptUrl is in the page. Skip! Match was: %s''' % result) - return (False, False, always) # continue - if regexSkip != None: + return (False, False, always) + if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output( u'''Exception! regex (or word) used with -except is in the page. Skip! Match was: %s''' % result) - return (False, False, always) # continue + return (False, False, always) # If not up, text put below if not up: newtext = text @@ -229,8 +241,9 @@ # nn got a message between the categories and the iw's # and they want to keep it there, first remove it hasCommentLine = False - if (site.language()==u'nn'): - regex = re.compile('()') + if (site.language() == u'nn'): + regex = re.compile( + '()') found = regex.findall(newtext) if found: hasCommentLine = True @@ -240,31 +253,31 @@ newtext += u"\n%s" % addText # Reputting the categories newtext = pywikibot.replaceCategoryLinks(newtext, - categoriesInside, site, True) + categoriesInside, site, + True) #Put the nn iw message back - if site.language()==u'nn' and (interwikiInside or hasCommentLine): + if site.language() == u'nn' and (interwikiInside or hasCommentLine): newtext = newtext + u'\r\n\r\n' + nn_iw_msg # Dealing the stars' issue allstars = [] starstext = pywikibot.removeDisabledParts(text) for star in starsList: - regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, - re.I) + regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' + % star, re.I) found = regex.findall(starstext) if found != []: newtext = regex.sub('', newtext) allstars += found if allstars != []: - newtext = newtext.strip()+'\r\n\r\n' + newtext = newtext.strip() + '\r\n\r\n' allstars.sort() for element in allstars: newtext += '%s\r\n' % element.strip() # Adding the interwiki - newtext = pywikibot.replaceLanguageLinks(newtext, interwikiInside, site) + newtext = pywikibot.replaceLanguageLinks(newtext, interwikiInside, + site) else: - # Adding the text newtext += u"\n%s" % addText - # If instead the text must be added above... else: newtext = addText + '\n' + text if putText and text != newtext: @@ -279,7 +292,8 @@ if not always: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', - ['Yes', 'No', 'All', 'open in Browser'], ['y', 'N', 'a', 'b'], 'N') + ['Yes', 'No', 'All', 'open in Browser'], + ['y', 'n', 'a', 'b'], 'n') if choice == 'a': always = True elif choice == 'n': @@ -326,14 +340,19 @@ else: return (text, newtext, always) + def main(): # If none, the var is setted only for check purpose. - summary = None; addText = None; regexSkip = None; regexSkipUrl = None; - generator = None; always = False - textfile=None - talkPage=False + summary = None + addText = None + regexSkip = None + regexSkipUrl = None + generator = None + always = False + textfile = None + talkPage = False reorderEnabled = True - namespaces=[] + namespaces = [] # Load a lot of default generators genFactory = pagegenerators.GeneratorFactory() # Put the text above or below the text? @@ -360,8 +379,7 @@ if len(arg) == 5: generator = [pywikibot.Page( pywikibot.getSite(), - pywikibot.input(u'What page do you want to use?') - )] + pywikibot.input(u'What page do you want to use?'))] else: generator = [pywikibot.Page(pywikibot.getSite(), arg[6:])] elif arg.startswith('-excepturl'): @@ -397,7 +415,6 @@ f.close() if not generator: generator = genFactory.getCombinedGenerator() - # Check if there are the minimal settings if not generator: raise NoEnoughData( 'You have to specify the generator you want to use for the script!') @@ -406,14 +423,14 @@ site = pywikibot.getSite() for namespace in site.namespaces(): index = site.getNamespaceIndex(namespace) - if index%2==1 and index>0: + if index % 2 == 1 and index > 0: namespaces += [index] generator = pagegenerators.NamespaceFilterPageGenerator( generator, namespaces) - # Main Loop for page in generator: (text, newtext, always) = add_text(page, addText, summary, regexSkip, - regexSkipUrl, always, up, True, reorderEnabled=reorderEnabled, + regexSkipUrl, always, up, True, + reorderEnabled=reorderEnabled, create=talkPage) if __name__ == "__main__":

1 0

SVN: [11152] trunk/pywikipedia/followlive.py
by xqt＠svn.wikimedia.org 02 Mar '13

02 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11152 Revision: 11152 Author: xqt Date: 2013-03-02 09:14:47 +0000 (Sat, 02 Mar 2013) Log Message: ----------- some PEP8 changes Modified Paths: -------------- trunk/pywikipedia/followlive.py Modified: trunk/pywikipedia/followlive.py =================================================================== --- trunk/pywikipedia/followlive.py 2013-03-02 08:45:43 UTC (rev 11151) +++ trunk/pywikipedia/followlive.py 2013-03-02 09:14:47 UTC (rev 11152) @@ -8,10 +8,16 @@ There must be A LOT of bugs ! Use with caution and verify what it is doing ! """ - +# +# (C) Pywikipedia team, 2005-2013 +# +# Distributed under the terms of the MIT license. +# __version__='$Id$' -import sys, datetime, time, traceback +import sys +import datetime, time +import traceback import wikipedia as pywikibot import editarticle @@ -34,278 +40,278 @@ # templates contains a message and its position templates = { 'ar': { - '{{شطب}}' :{ - 'msg' : 'وسم مساعد بالبوت: هذا المقال ينبغي حذفه', + '{{شطب}}': { + 'msg': 'وسم مساعد بالبوت: هذا المقال ينبغي حذفه', 'pos': 'top'}, - '{{تنظيف}}' :{ - 'msg' : 'وسم مساعد بالبوت: هذا المقال يحتاج للتنظيف', + '{{تنظيف}}': { + 'msg': 'وسم مساعد بالبوت: هذا المقال يحتاج للتنظيف', 'pos': 'top'}, - '{{بذرة}}':{ - 'msg' : 'وسم مساعد بالبوت: هذا المقال بذرة', + '{{بذرة}}': { + 'msg': 'وسم مساعد بالبوت: هذا المقال بذرة', 'pos': 'bottom'}, - '{{ويكي}}' :{ - 'msg' : 'وسم مساعد بالبوت: هذا المقال يحتاج إلى التنسيق بضيغة الويكي حسب [[ويكيبيديا:دليل الأسلوب|دليل الأسلوب]]', - 'pos' : 'top'}, + '{{ويكي}}': { + 'msg': 'وسم مساعد بالبوت: هذا المقال يحتاج إلى التنسيق بضيغة الويكي حسب [[ويكيبيديا:دليل الأسلوب|دليل الأسلوب]]', + 'pos': 'top'}, }, 'en': { - '{{db-reason}}' :{ - 'msg' : 'Robot-assisted tagging: this article should be deleted', + '{{db-reason}}': { + 'msg': 'Robot-assisted tagging: this article should be deleted', 'pos': 'top'}, - '{{cleanup}}' :{ - 'msg' : 'Robot-assisted tagging: this article need cleanup', + '{{cleanup}}': { + 'msg': 'Robot-assisted tagging: this article need cleanup', 'pos': 'top'}, - '{{stub}}':{ - 'msg' : 'Robot-assisted tagging: this article is a stub', + '{{stub}}': { + 'msg': 'Robot-assisted tagging: this article is a stub', 'pos': 'bottom'}, - '{{uncategorized}}' :{ - 'msg' : 'Robot-assisted tagging: This article needs to be [[Wikipedia:Categorization|categorized]]', - 'pos' : 'top'}, + '{{uncategorized}}': { + 'msg': 'Robot-assisted tagging: This article needs to be [[Wikipedia:Categorization|categorized]]', + 'pos': 'top'}, - '{{notability}}':{ - 'msg' : 'Robot-assisted tagging: the [[Wikipedia:Notability|notability]] of this article is unclear.', + '{{notability}}': { + 'msg': 'Robot-assisted tagging: the [[Wikipedia:Notability|notability]] of this article is unclear.', 'pos': 'top'}, - '{{not verified}}':{ - 'msg' : 'Robot-assisted tagging: this article needs to be checked for factuality.', + '{{not verified}}': { + 'msg': 'Robot-assisted tagging: this article needs to be checked for factuality.', 'pos': 'top'}, - '{{copyedit}}':{ - 'msg' : 'Robot-assisted tagging: the writing of this article needs to be [[Wikipedia:How to copy-edit|copyedited]] and improved.', - 'pos' : 'top'}, + '{{copyedit}}': { + 'msg': 'Robot-assisted tagging: the writing of this article needs to be [[Wikipedia:How to copy-edit|copyedited]] and improved.', + 'pos': 'top'}, - '{{unreferenced}}':{ - 'msg' : 'Robot-assisted tagging: this article needs [[Wikipedia:Citing sources|references]] so it can be verified.', + '{{unreferenced}}': { + 'msg': 'Robot-assisted tagging: this article needs [[Wikipedia:Citing sources|references]] so it can be verified.', 'pos': 'bottom'}, - '{{wikify}}' :{ - 'msg' : 'Robot-assisted tagging: this article needs to be wikified per the [[Wikipedia:Manual of Style|Manual of Style]]', - 'pos' : 'top'}, + '{{wikify}}': { + 'msg': 'Robot-assisted tagging: this article needs to be wikified per the [[Wikipedia:Manual of Style|Manual of Style]]', + 'pos': 'top'}, }, - 'ia':{ - '{{Eliminar}}' :{ - 'msg' : 'Assistite per robot: iste articulo debe esser eliminate', + 'ia': { + '{{Eliminar}}': { + 'msg': 'Assistite per robot: iste articulo debe esser eliminate', 'pos': 'top'}, - '{{Revision}}' :{ - 'msg' : 'Assistite per robot: iste articulo require revision', + '{{Revision}}': { + 'msg': 'Assistite per robot: iste articulo require revision', 'pos': 'top'}, - '{{Stub}}' :{ - 'msg' : 'Assistite per robot: iste articulo es in stato embryonic', + '{{Stub}}': { + 'msg': 'Assistite per robot: iste articulo es in stato embryonic', 'pos': 'bottom'}, }, - 'fr':{ + 'fr': { u'{{suppression}}': { - 'msg' : u'à l\'aide du robot: cet article devrait être supprimé', + 'msg': u'à l\'aide du robot: cet article devrait être supprimé', 'pos': 'top'}, - u'{{à vérifier}}' : { + u'{{à vérifier}}': { 'msg': u'à l\'aide du robot: cet article est à vérifier', 'pos': 'top'}, - u'{{ébauche}}' : { + u'{{ébauche}}': { 'msg': u'à l\'aide du robot: cet article est une ébauche', 'pos': 'top'}, }, - 'he':{ + 'he': { u'{{מחק}}': { - 'msg' : u'יש למחוק ערך זה', + 'msg': u'יש למחוק ערך זה', 'pos': 'top' }, u'{{לשכתב}}': { - 'msg' : u'ערך זה דורש שכתוב', + 'msg': u'ערך זה דורש שכתוב', 'pos': 'top' }, u'{{קצרמר}}': { - 'msg' : u'ערך זה הוא קצרמר', + 'msg': u'ערך זה הוא קצרמר', 'pos': 'bottom' }, - u'{{הבהרת חשיבות}}':{ - 'msg' : u'חשיבותו של ערך זה אינה ברורה.', + u'{{הבהרת חשיבות}}': { + 'msg': u'חשיבותו של ערך זה אינה ברורה.', 'pos': 'top' }, u'{{עריכה}}': { - 'msg' : u'ערך זה דורש עריכה', + 'msg': u'ערך זה דורש עריכה', 'pos': 'top'}, }, - 'ia':{ + 'ia': { u'{{Eliminar}}': { - 'msg' : u'Iste articulo debe esser eliminate', + 'msg': u'Iste articulo debe esser eliminate', 'pos': 'top' }, u'{{Revision}}': { - 'msg' : u'Iste articulo require revision', + 'msg': u'Iste articulo require revision', 'pos': 'top' }, u'{{Stub}}': { - 'msg' : u'Iste articulo es in stato embryonic', + 'msg': u'Iste articulo es in stato embryonic', 'pos': 'bottom' }, }, - 'nl':{ + 'nl': { u'{{weg}}': { - 'msg' : '{weg}', - 'pos' : 'top' + 'msg': '{weg}', + 'pos': 'top' }, u'{{nuweg}}': { - 'msg' : '{nuweg}', - 'pos' : 'top' + 'msg': '{nuweg}', + 'pos': 'top' }, u'{{wiu}}': { - 'msg' : '{wiu}', - 'pos' : 'top' + 'msg': '{wiu}', + 'pos': 'top' }, u'{{beg}}': { - 'msg' : '{beg}', - 'pos' : 'bottom' + 'msg': '{beg}', + 'pos': 'bottom' }, u'{{wikify}}': { - 'msg' : '{wikify}', - 'pos' : 'top' + 'msg': '{wikify}', + 'pos': 'top' }, u'{{wb}}': { - 'msg' : '{wb}', - 'pos' : 'top' + 'msg': '{wb}', + 'pos': 'top' }, }, - 'pl':{ + 'pl': { u'{{ek}}': { - 'msg' : u'[[Kategoria:Ekspresowe kasowanko|ek]]', - 'pos':'top' + 'msg': u'[[Kategoria:Ekspresowe kasowanko|ek]]', + 'pos': 'top' }, - u'{{dopracować}}' : { - 'msg' : u'Dopracować', - 'pos':'top' + u'{{dopracować}}': { + 'msg': u'Dopracować', + 'pos': 'top' }, - u'{{linki}}' : { - 'msg' : u'Linki wewnętrzne do dodania', - 'pos':'top' + u'{{linki}}': { + 'msg': u'Linki wewnętrzne do dodania', + 'pos': 'top' }, - u'{{źródła}}' : { - 'msg' : u'W artykule brakuje źródeł', - 'pos':'top' + u'{{źródła}}': { + 'msg': u'W artykule brakuje źródeł', + 'pos': 'top' }, - u'{{stub}}' : { - 'msg' : u'stub (zalążek)', - 'pos':'bottom' + u'{{stub}}': { + 'msg': u'stub (zalążek)', + 'pos': 'bottom' }, }, 'pt': { - u'{{wikificar}}' : { + u'{{wikificar}}': { 'msg': 'Assistida por bot: {{wikificar}}', - 'pos':'top'}, + 'pos': 'top'}, - u'{{reciclar}}' : { + u'{{reciclar}}': { 'msg': 'Assistida por bot: {{reciclar}}', - 'pos':'top'}, + 'pos': 'top'}, - u'{{lixo|~~~~}}' : { + u'{{lixo|~~~~}}': { 'msg': 'Assistida por bot: {{lixo}}', - 'pos':'top'}, + 'pos': 'top'}, - u'{{revisão}}' : { + u'{{revisão}}': { 'msg': 'Assistida por bot: {{revisão}}', - 'pos':'top'}, + 'pos': 'top'}, - u'{{impróprio}}' : { + u'{{impróprio}}': { 'msg': 'Assistida por bot: {{impróprio}}', - 'pos':'top'}, + 'pos': 'top'}, - u'{{apagar vaidade}}' : { + u'{{apagar vaidade}}': { 'msg': 'Assistida por bot: {{apagar vaidade}}', - 'pos':'top'}, + 'pos': 'top'}, }, 'sv': { - u'{{radera}}' :{ - 'msg' : u'Robotkoll: Artikeln bör raderas', + u'{{radera}}': { + 'msg': u'Robotkoll: Artikeln bör raderas', 'pos': 'top'}, - u'{{städa}}' :{ - 'msg' : u'Robotkoll: Artikeln bör städas', + u'{{städa}}': { + 'msg': u'Robotkoll: Artikeln bör städas', 'pos': 'top'}, - u'{{stub}}':{ - 'msg' : u'Robotkoll: Artikeln är en stubbe', + u'{{stub}}': { + 'msg': u'Robotkoll: Artikeln är en stubbe', 'pos': 'bottom'}, - u'{{subst:relevanskontroll}}':{ - 'msg' : u'Robotkoll: Artikeln bör kollas mot [[WP:REL|Wikipedias relevanskriterier]].', + u'{{subst:relevanskontroll}}': { + 'msg': u'Robotkoll: Artikeln bör kollas mot [[WP:REL|Wikipedias relevanskriterier]].', 'pos': 'top'}, - u'{{verifieras}}':{ - 'msg' : u'Robotkoll: Artikeln bör verifieras', + u'{{verifieras}}': { + 'msg': u'Robotkoll: Artikeln bör verifieras', 'pos': 'top'}, - u'{{språkvård}}':{ - 'msg' : u'Robotkoll: Artikeln bör språkvårdas', - 'pos' : 'top'}, + u'{{språkvård}}': { + 'msg': u'Robotkoll: Artikeln bör språkvårdas', + 'pos': 'top'}, - u'{{Källor}}':{ - 'msg' : u'Robotkoll: Artikeln behöver källor', + u'{{Källor}}': { + 'msg': u'Robotkoll: Artikeln behöver källor', 'pos': 'bottom'}, - u'{{wikify}}' :{ - 'msg' : u'Robotkoll: Artikeln behöver wikifieras', - 'pos' : 'top'}, + u'{{wikify}}': { + 'msg': u'Robotkoll: Artikeln behöver wikifieras', + 'pos': 'top'}, }, 'zh': { - u'{{Delete}}' :{ - 'msg' : u'機器人掛上模板: 本文應被刪除。', + u'{{Delete}}': { + 'msg': u'機器人掛上模板: 本文應被刪除。', 'pos': 'top'}, - u'{{subst:Cleanup/auto}}' :{ - 'msg' : u'機器人掛上模板: 本文需清理', + u'{{subst:Cleanup/auto}}': { + 'msg': u'機器人掛上模板: 本文需清理', 'pos': 'top'}, - u'{{subst:Uncategorized/auto}}' :{ - 'msg' : u'機器人掛上模板: 本頁需要適當的頁面分類', - 'pos' : u'bottom'}, + u'{{subst:Uncategorized/auto}}': { + 'msg': u'機器人掛上模板: 本頁需要適當的頁面分類', + 'pos': u'bottom'}, - u'{{subst:Notability/auto}}':{ - 'msg' : u'機器人掛上模板: 本條目主題未突顯其知名度或顯著性', + u'{{subst:Notability/auto}}': { + 'msg': u'機器人掛上模板: 本條目主題未突顯其知名度或顯著性', 'pos': 'top'}, - u'{{subst:refimprove/auto}}':{ - 'msg' : u'機器人掛上模板: 本條目参考文献不足', + u'{{subst:refimprove/auto}}': { + 'msg': u'機器人掛上模板: 本條目参考文献不足', 'pos': 'top'}, - u'{{copyedit}}':{ - 'msg' : u'機器人掛上模板: 本條目或段落需要校對', - 'pos' : 'top'}, + u'{{copyedit}}': { + 'msg': u'機器人掛上模板: 本條目或段落需要校對', + 'pos': 'top'}, - u'{{subst:Unreferenced/auto}}':{ - 'msg' : u'機器人掛上模板: 本條目沒有列出任何參考或來源', + u'{{subst:Unreferenced/auto}}': { + 'msg': u'機器人掛上模板: 本條目沒有列出任何參考或來源', 'pos': 'top'}, - u'{{subst:wikify/auto}}' :{ - 'msg' : u'機器人掛上模板: 本條目需要維基化', - 'pos' : 'top'}, + u'{{subst:wikify/auto}}': { + 'msg': u'機器人掛上模板: 本條目需要維基化', + 'pos': 'top'}, - u'{{subst:Notchinese/auto}}':{ - 'msg' : u'機器人掛上模板: 本条目没有翻译', - 'pos' : 'top'}, + u'{{subst:Notchinese/auto}}': { + 'msg': u'機器人掛上模板: 本条目没有翻译', + 'pos': 'top'}, - u'{{subst:Substub/auto}}' :{ - 'msg' : u'機器人掛上模板: 小小作品', - 'pos' : 'top'}, + u'{{subst:Substub/auto}}': { + 'msg': u'機器人掛上模板: 小小作品', + 'pos': 'top'}, - u'{{stub}}':{ - 'msg' : u'機器人掛上模板: 本文是小作品', + u'{{stub}}': { + 'msg': u'機器人掛上模板: 本文是小作品', 'pos': 'bottom'}, - u'{{notchinesetitle}}':{ - 'msg' : u'機器人掛上模板: 本条目名称需要翻译成中文', + u'{{notchinesetitle}}': { + 'msg': u'機器人掛上模板: 本条目名称需要翻译成中文', 'pos': 'top'}, - u'{{subst:Translating/auto}}':{ - 'msg' : u'機器人掛上模板: 本条目没有翻译完成', + u'{{subst:Translating/auto}}': { + 'msg': u'機器人掛上模板: 本条目没有翻译完成', 'pos': 'top'}, - u'{{fansite}}':{ - 'msg' : u'機器人掛上模板: 本条目內容類似愛好者網站', + u'{{fansite}}': { + 'msg': u'機器人掛上模板: 本条目內容類似愛好者網站', 'pos': 'top'}, }, @@ -325,48 +331,50 @@ # do nothing if this is in it done = { - 'ar':(u'{{شطب}}', u'{{حذف}}', u'{{خرق}}'), - 'en':('{{VfD}}', '{{AfD}}', '{{AfD1}}', '{{cleanup}}', '{{nonsense}}', - '{{deletedpage}}', '{{db-reason}}', '{{notability}}', - '{{not verified}}', '{{unreferenced}}', '{{db-empty}}', - '{{db-nocontext}}', '{{db-foreign}}', '{{db-notenglish}}', - '{{db-nocontent}}', '{{db-blankcsd}}', '{{db-transwiki}}', - '{{db-attack}}', '{{db-band}}', '{{db-club}}', '{{db-bio}}', - '{{db-bio-notenglish}}', '{{db-inc}}', '{{db-bio-photo}}', - '{{db-catempty}}', '{{db-c2}}', '{{db-catfd}}', '{{badname}}', - '{{db-pagemove}}', '{{db-nonsense}}', '{{db-spam}}', '{{db-copyvio}}', - '{{db-test}}', '{{db-vandalism}}', '{{db-repost}}', '{{db-banned}}', - '{{db-histmerge}}', '{{db-move}}', '{{db-g6}}', '{{db-afd}}', - '{{db-disambig}}', '{{db-authora}}', '{{db-author}}', - '{{db-blanked}}', '{{csd:g7}}', '{{db-talk}}', '{{db-botnomain}}', - '{{db-redundantimage}}', '{{db-noimage}}', '{{db-noncom}}', - '{{db-ccnoncom}}', '{{db-unksource}}', '{{db-norat}}', - '{{db-badfairuse}}', '{{duplicate}}', '{{db-meta}}', - '{{db-emptyportal}}', '{{db-redirnone}}', '{{db-rediruser}}', - '{{db-redirtypo}}', '{{csd-c3}}', '{{cc-by-nc-sa}}', '{{cc-nd-nc}}', - '{{cc-nc}}', '{{cc-by-nc-2.0}}', '{{cc-by-nc-sa-2.0}}', - '{{cc-by-nd-nc-2.0}}', '{{cc-by-2.0-nc-nd}}', '{{cc-by-nc-nd-2.0}}', - '{{db-contact}}', '{{db-i2}}', '{{db-i1}}', '{{communityuseonly}}', - '{{db-disparage}}', '{{db-web}}', '{{db-userreq}}', '{{db-nouser}}', - '{{db-u3}}', '{{db-unfree}}'), - 'fr':(u'{{suppression}}', u'{{à vérifier}}', u'{{ébauche}}'), - 'ia':(u'{{Eliminar}}', u'{{Revision}}', u'{{Stub}}'), - 'he':(u'{{מחק}}', u'{{פירושונים}}', u'{{הצבעת מחיקה}}'), - 'nl':('{{nuweg}}', '{{weg}}', '{{wb}}', '{{wiu}}', '{{nocat}}'), - 'pl':('{{ek}}', u'{{dopracować}}', '{{linki}}', u'{{źródła}}', u'{{stub}}'), - 'pt':('{{wikificar}}', '{{reciclar}}', '{{lixo}}', u'{{revisão}}', - u'{{impróprio}}', u'{{apagar vaidade}}'), - 'sv':(u'{{radera', u'{{Radera', u'{{städa}}', u'{{stub}}', - u'{{verifieras}}', u'{{språkvård}}', u'{{Källor', u'{{källor', - u'{{wikify}}', u'{{Ickewiki}}', u'{{ickewiki}}', u'{{Wikify}}'), - 'zh':(u'{{VfD}}', u'{{AfD}}', u'{{unreferenced}}', u'{{db-reason}}', - u'{{cleanup}}', u'{{stub}}', u'{{uncategorized}}', u'{{notability}}', - u'{{copyedit}}', u'{{unreferenced}}', u'{{wikify}}', - u'{{Translating}}',u'{{copyvio}}',u'{{Notchinese}}'), - } + 'ar': (u'{{شطب}}', u'{{حذف}}', u'{{خرق}}'), + 'en': ('{{VfD}}', '{{AfD}}', '{{AfD1}}', '{{cleanup}}', '{{nonsense}}', + '{{deletedpage}}', '{{db-reason}}', '{{notability}}', + '{{not verified}}', '{{unreferenced}}', '{{db-empty}}', + '{{db-nocontext}}', '{{db-foreign}}', '{{db-notenglish}}', + '{{db-nocontent}}', '{{db-blankcsd}}', '{{db-transwiki}}', + '{{db-attack}}', '{{db-band}}', '{{db-club}}', '{{db-bio}}', + '{{db-bio-notenglish}}', '{{db-inc}}', '{{db-bio-photo}}', + '{{db-catempty}}', '{{db-c2}}', '{{db-catfd}}', '{{badname}}', + '{{db-pagemove}}', '{{db-nonsense}}', '{{db-spam}}', + '{{db-copyvio}}', '{{db-test}}', '{{db-vandalism}}', '{{db-repost}}', + '{{db-banned}}', '{{db-histmerge}}', '{{db-move}}', '{{db-g6}}', + '{{db-afd}}', '{{db-disambig}}', '{{db-authora}}', '{{db-author}}', + '{{db-blanked}}', '{{csd:g7}}', '{{db-talk}}', '{{db-botnomain}}', + '{{db-redundantimage}}', '{{db-noimage}}', '{{db-noncom}}', + '{{db-ccnoncom}}', '{{db-unksource}}', '{{db-norat}}', + '{{db-badfairuse}}', '{{duplicate}}', '{{db-meta}}', + '{{db-emptyportal}}', '{{db-redirnone}}', '{{db-rediruser}}', + '{{db-redirtypo}}', '{{csd-c3}}', '{{cc-by-nc-sa}}', '{{cc-nd-nc}}', + '{{cc-nc}}', '{{cc-by-nc-2.0}}', '{{cc-by-nc-sa-2.0}}', + '{{cc-by-nd-nc-2.0}}', '{{cc-by-2.0-nc-nd}}', '{{cc-by-nc-nd-2.0}}', + '{{db-contact}}', '{{db-i2}}', '{{db-i1}}', '{{communityuseonly}}', + '{{db-disparage}}', '{{db-web}}', '{{db-userreq}}', '{{db-nouser}}', + '{{db-u3}}', '{{db-unfree}}'), + 'fr': (u'{{suppression}}', u'{{à vérifier}}', u'{{ébauche}}'), + 'ia': (u'{{Eliminar}}', u'{{Revision}}', u'{{Stub}}'), + 'he': (u'{{מחק}}', u'{{פירושונים}}', u'{{הצבעת מחיקה}}'), + 'nl': ('{{nuweg}}', '{{weg}}', '{{wb}}', '{{wiu}}', '{{nocat}}'), + 'pl': ('{{ek}}', u'{{dopracować}}', '{{linki}}', u'{{źródła}}', + u'{{stub}}'), + 'pt': ('{{wikificar}}', '{{reciclar}}', '{{lixo}}', u'{{revisão}}', + u'{{impróprio}}', u'{{apagar vaidade}}'), + 'sv': (u'{{radera', u'{{Radera', u'{{städa}}', u'{{stub}}', + u'{{verifieras}}', u'{{språkvård}}', u'{{Källor', u'{{källor', + u'{{wikify}}', u'{{Ickewiki}}', u'{{ickewiki}}', u'{{Wikify}}'), + 'zh': (u'{{VfD}}', u'{{AfD}}', u'{{unreferenced}}', u'{{db-reason}}', + u'{{cleanup}}', u'{{stub}}', u'{{uncategorized}}', u'{{notability}}', + u'{{copyedit}}', u'{{unreferenced}}', u'{{wikify}}', + u'{{Translating}}', u'{{copyvio}}', u'{{Notchinese}}'), +} # TODO: merge 'done' with 'templates' above + class PageHandler: # Initialization stuff def __init__(self, page, date, length, loggedIn, user, comment): @@ -445,14 +453,14 @@ if answer[0] == 'u': # Answer entered as an utf8 string try: - choices=answer[1:].split(',') + choices = answer[1:].split(',') except ValueError: # User entered wrong value pywikibot.output(u'ERROR: "%s" is not valid' % answer) continue else: try: - choices=answer.split(',') + choices = answer.split(',') except ValueError: # User entered wrong value pywikibot.output(u'ERROR: "%s" is not valid' % answer) @@ -460,11 +468,11 @@ #test input for choice in choices: try: - x=int(choice) + x = int(choice) except ValueError: break else: - answered=x in range(1,len(questionlist)+1) + answered = x in range(1, len(questionlist)+1) if not answered: pywikibot.output(u'ERROR: "%s" is not valid' % answer) continue @@ -472,8 +480,8 @@ for choice in choices: answer = int(choice) # grab the template parameters - tpl = pywikibot.translate(pywikibot.getSite(), templates) \ - [questionlist[answer]] + tpl = pywikibot.translate(pywikibot.getSite(), + templates)[questionlist[answer]] if tpl['pos'] == 'top': pywikibot.output(u'prepending %s...' % questionlist[answer]) self.content = questionlist[answer] + '\n' + self.content @@ -482,13 +490,13 @@ self.content += '\n' + questionlist[answer] else: pywikibot.output( - u'ERROR: "pos" should be "top" or "bottom" for template %s. Contact a developer.' - % questionlist[answer]) + u'ERROR: "pos" should be "top" or "bottom" for template ' + u'%s. Contact a developer.' % questionlist[answer]) sys.exit("Exiting") summary += tpl['msg']+' ' pywikibot.output(u'Probably added %s' % questionlist[answer]) # pywikibot.output(newcontent) bug #2986247 - self.page.put(self.content, comment = summary) + self.page.put(self.content, comment=summary) pywikibot.output(u'with comment %s\n' % summary) def run(self): @@ -520,8 +528,8 @@ questions = '\n' questionlist = {} for t in pywikibot.translate(pywikibot.getSite(), templates): - i+=1 - questions += ( u'%s) %s\n' % (i,t) ) + i += 1 + questions += (u'%s) %s\n' % (i, t)) questionlist[i] = t question = questions + question

1 0

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Pywikipedia-svn March 2013