Pywikibot-commits October 2013

pywikibot-commits@lists.wikimedia.org

1 participants
94 discussions

[Gerrit] Find correct line feed in text, fix for right placement of h... - change (pywikibot/compat)
by Xqt (Code Review) 19 Oct '13

19 Oct '13

Xqt has submitted this change and it was merged. Change subject: Find correct line feed in text, fix for right placement of header with noreferences.py (Bug 55409), some pep8 changes ...................................................................... Find correct line feed in text, fix for right placement of header with noreferences.py (Bug 55409), some pep8 changes Change-Id: Ieacc301341f71385b9e92a61ad44a59fe79ea367 --- M noreferences.py 1 file changed, 48 insertions(+), 36 deletions(-) Approvals: Mpaa: Looks good to me, but someone else must approve Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/noreferences.py b/noreferences.py index aec02ef..af2923f 100644 --- a/noreferences.py +++ b/noreferences.py @@ -34,12 +34,14 @@ a list of affected articles """ -__version__='$Id$' +__version__ = '$Id$' -import re, sys +import re +import sys import pywikibot from pywikibot import i18n -import pagegenerators, catlib +import pagegenerators +import catlib import editarticle # This is required for the text that is shown when you run this script @@ -73,7 +75,7 @@ u'Siehe auch', u'Weblink', # bad, but common singular form of Weblinks ], - 'dsb':[ + 'dsb': [ u'Nožki', ], 'en': [ # no explicit policy on where to put the references @@ -114,7 +116,7 @@ u'Voir aussi', u'Notes' ], - 'hsb':[ + 'hsb': [ u'Nóžki', ], 'hu': [ @@ -128,12 +130,12 @@ u'Collegamenti esterni', u'Vedi anche', ], - 'ja':[ + 'ja': [ u'関連項目', u'参考文献', u'外部リンク', ], - 'ko':[ # no explicit policy on where to put the references + 'ko': [ # no explicit policy on where to put the references u'외부 링크', u'외부링크', u'바깥 고리', @@ -200,7 +202,7 @@ 'da': [ u'Noter', ], - 'de': [ #see [[de:WP:REF]] + 'de': [ # see [[de:WP:REF]] u'Einzelnachweise', u'Anmerkungen', u'Belege', @@ -210,7 +212,7 @@ u'Quellen', u'Quellenangaben', ], - 'dsb':[ + 'dsb': [ u'Nožki', ], 'en': [ # not sure about which ones are preferred. @@ -247,7 +249,7 @@ 'he': [ u'הערות שוליים', ], - 'hsb':[ + 'hsb': [ u'Nóžki', ], 'hu': [ @@ -323,20 +325,22 @@ referencesTemplates = { 'wikipedia': { 'ar': [u'Reflist', u'ثبت المراجع', u'قائمة المراجع'], - 'be': [u'Зноскі', u'Примечания', u'Reflist', u'Спіс заўваг', u'Заўвагі'], + 'be': [u'Зноскі', u'Примечания', u'Reflist', u'Спіс заўваг', + u'Заўвагі'], 'be-x-old': [u'Зноскі'], 'da': [u'Reflist'], - 'dsb':[u'Referency'], + 'dsb': [u'Referency'], 'en': [u'Reflist', u'Refs', u'FootnotesSmall', u'Reference', u'Ref-list', u'Reference list', u'References-small', u'Reflink', u'Footnotes', u'FootnotesSmall'], 'eo': [u'Referencoj'], 'es': ['Listaref', 'Reflist', 'muchasref'], 'fa': [u'Reflist', u'Refs', u'FootnotesSmall', u'Reference', - u'پانویس', u'پانویس‌ها ', u'پانویس ۲', u'پانویس۲',u'فهرست منابع'], + u'پانویس', u'پانویس‌ها ', u'پانویس ۲', u'پانویس۲', + u'فهرست منابع'], 'fi': [u'Viitteet', u'Reflist'], 'fr': [u'Références', u'Notes', u'References', u'Reflist'], - 'hsb':[u'Referency'], + 'hsb': [u'Referency'], 'hu': [u'reflist', u'források', u'references', u'megjegyzések'], 'is': [u'reflist'], 'it': [u'References'], @@ -353,7 +357,7 @@ 'ru': [u'Reflist', u'Ref-list', u'Refs', u'Sources', u'Примечания', u'Список примечаний', u'Сноска', u'Сноски'], - 'szl':[u'Przipisy', u'Připisy'], + 'szl': [u'Przipisy', u'Připisy'], 'zh': [u'Reflist', u'RefFoot', u'NoteFoot'], }, } @@ -364,14 +368,14 @@ 'wikipedia': { 'be': u'{{зноскі}}', 'da': u'{{reflist}}', - 'dsb':u'{{referency}}', + 'dsb': u'{{referency}}', 'fa': u'{{پانویس}}', 'fi': u'{{viitteet}}', - 'hsb':u'{{referency}}', + 'hsb': u'{{referency}}', 'hu': u'{{Források}}', 'pl': u'{{Przypisy}}', 'ru': u'{{примечания}}', - 'szl':u'{{Przipisy}}', + 'szl': u'{{Przipisy}}', 'zh': u'{{reflist}}', }, } @@ -394,6 +398,7 @@ 'zh': u'参考资料格式错误的页面', }, } + class XmlDumpNoReferencesPageGenerator: """ @@ -430,7 +435,7 @@ self.refR = re.compile('</ref>', re.IGNORECASE) self.referencesR = re.compile('<references.*?/>', re.IGNORECASE) self.referencesTagR = re.compile('<references>.*?</references>', - re.IGNORECASE|re.DOTALL) + re.IGNORECASE | re.DOTALL) try: self.referencesTemplates = referencesTemplates[ pywikibot.getSite().family.name][pywikibot.getSite().lang] @@ -454,7 +459,7 @@ return False elif self.referencesTemplates: templateR = u'{{(' + u'|'.join(self.referencesTemplates) + ')' - if re.search(templateR, oldTextCleaned, re.IGNORECASE|re.UNICODE): + if re.search(templateR, oldTextCleaned, re.IGNORECASE | re.UNICODE): if verbose: pywikibot.output( u'No changes necessary: references template found.') @@ -478,7 +483,7 @@ """ # Is there an existing section where we can add the references tag? for section in pywikibot.translate(self.site, referencesSections): - sectionR = re.compile(r'\r\n=+ *%s *=+ *\r\n' % section) + sectionR = re.compile(r'\r?\n=+ *%s *=+ *\r?\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) @@ -492,7 +497,9 @@ pywikibot.output( u'Adding references tag to existing %s section...\n' % section) - newText = oldText[:match.end()] + u'\n' + self.referencesText + u'\n' + oldText[match.end():] + newText = oldText[:match.end()] + u'\n' + \ + self.referencesText + u'\n' + \ + oldText[match.end():] return newText else: break @@ -500,7 +507,7 @@ # Create a new section for the references tag for section in pywikibot.translate(self.site, placeBeforeSections): # Find out where to place the new section - sectionR = re.compile(r'\r\n(?P<ident>=+) *%s *(?P=ident) *\r\n' + sectionR = re.compile(r'\r?\n(?P<ident>=+) *%s *(?P=ident) *\r?\n' % section) index = 0 while index < len(oldText): @@ -508,8 +515,8 @@ if match: if pywikibot.isDisabled(oldText, match.start()): pywikibot.output( - 'Existing %s section is commented out, won\'t add the references in front of it.' - % section) + 'Existing %s section is commented out, won\'t add ' + 'the references in front of it.' % section) index = match.end() else: pywikibot.output( @@ -517,7 +524,8 @@ % section) index = match.start() ident = match.group('ident') - return self.createReferenceSection(oldText, index, ident) + return self.createReferenceSection(oldText, index, + ident) else: break # This gets complicated: we want to place the new references @@ -530,7 +538,7 @@ # At the end, look at the length of the temp text. That's the position # where we'll insert the references section. catNamespaces = '|'.join(self.site.category_namespaces()) - categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces + categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*' # won't work with nested templates # the negative lookahead assures that we'll match the last template @@ -539,9 +547,9 @@ ### {{commons}} or {{commonscat}} are part of Weblinks section ### * {{template}} is mostly part of a section ### so templatePattern must be fixed - templatePattern = r'\r\n{{((?!}}).)+?}}\s*' - commentPattern = r').)*?-->\s*' - metadataR = re.compile(r'(\r\n)?(%s|%s|%s|%s)$' + templatePattern = r'\r?\n{{((?!}}).)+?}}\s*' + commentPattern = r').)*?-->\s*' + metadataR = re.compile(r'(\r?\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern, templatePattern, commentPattern), re.DOTALL) tmpText = oldText @@ -552,11 +560,13 @@ else: break pywikibot.output( - u'Found no section that can be preceeded by a new references section.\nPlacing it before interwiki links, categories, and bottom templates.') + u'Found no section that can be preceeded by a new references ' + u'section.\nPlacing it before interwiki links, categories, and ' + u'bottom templates.') index = len(tmpText) return self.createReferenceSection(oldText, index) - def createReferenceSection(self, oldText, index, ident = '=='): + def createReferenceSection(self, oldText, index, ident='=='): if self.site.language() in noTitleRequired: newSection = u'\n%s\n' % (self.referencesText) else: @@ -621,13 +631,16 @@ pywikibot.output(u"Page %s is locked?!" % page.title(asLink=True)) continue - if pywikibot.getSite().sitename() == 'wikipedia:en' and page.isIpEdit(): + if pywikibot.getSite().sitename() == 'wikipedia:en' and \ + page.isIpEdit(): pywikibot.output( - u"Page %s is edited by IP. Possible vandalized" % page.title(asLink=True)) + u"Page %s is edited by IP. Possible vandalized" + % page.title(asLink=True)) continue if self.lacksReferences(text): newText = self.addReferences(text) self.save(page, newText) + def main(): #page generator @@ -684,7 +697,7 @@ pywikibot.showHelp('noreferences') else: if namespaces: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) + gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = NoReferencesBot(preloadingGen, always) bot.run() @@ -694,4 +707,3 @@ main() finally: pywikibot.stopme() - -- To view, visit https://gerrit.wikimedia.org/r/88121 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ieacc301341f71385b9e92a61ad44a59fe79ea367 Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] pep8 fixes - change (pywikibot/core)
by jenkins-bot (Code Review) 19 Oct '13

19 Oct '13

jenkins-bot has submitted this change and it was merged. Change subject: pep8 fixes ...................................................................... pep8 fixes Change-Id: Icf77e34b6072d89d7dfcabd4b55d8ad5e388e5cd Original-Change-Id: I9de67bfb3713106c30d26af9c6faf30c0156c4df --- M scripts/noreferences.py 1 file changed, 21 insertions(+), 10 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/noreferences.py b/scripts/noreferences.py index cb857e0..253c095 100755 --- a/scripts/noreferences.py +++ b/scripts/noreferences.py @@ -134,7 +134,7 @@ u'参考文献', u'外部リンク', ], - 'ko': [ # no explicit policy on where to put the references + 'ko': [ # no explicit policy on where to put the references u'외부 링크', u'외부링크', u'바깥 고리', @@ -324,7 +324,8 @@ referencesTemplates = { 'wikipedia': { 'ar': [u'Reflist', u'ثبت المراجع', u'قائمة المراجع'], - 'be': [u'Зноскі', u'Примечания', u'Reflist', u'Спіс заўваг', u'Заўвагі'], + 'be': [u'Зноскі', u'Примечания', u'Reflist', u'Спіс заўваг', + u'Заўвагі'], 'be-x-old': [u'Зноскі'], 'da': [u'Reflist'], 'dsb': [u'Referency'], @@ -334,7 +335,8 @@ 'eo': [u'Referencoj'], 'es': ['Listaref', 'Reflist', 'muchasref'], 'fa': [u'Reflist', u'Refs', u'FootnotesSmall', u'Reference', - u'پانویس', u'پانویس‌ها ', u'پانویس ۲', u'پانویس۲', u'فهرست منابع'], + u'پانویس', u'پانویس‌ها ', u'پانویس ۲', u'پانویس۲', + u'فهرست منابع'], 'fi': [u'Viitteet', u'Reflist'], 'fr': [u'Références', u'Notes', u'References', u'Reflist'], 'hsb': [u'Referency'], @@ -494,7 +496,11 @@ pywikibot.output( u'Adding references tag to existing %s section...\n' % section) - newText = oldText[:match.end()] + u'\n' + self.referencesText + u'\n' + oldText[match.end():] + newText = ( + oldText[:match.end()] + u'\n' + + self.referencesText + u'\n' + + oldText[match.end():] + ) return newText else: break @@ -510,8 +516,8 @@ if match: if pywikibot.isDisabled(oldText, match.start()): pywikibot.output( - 'Existing %s section is commented out, won\'t add the references in front of it.' - % section) + 'Existing %s section is commented out, won\'t add ' + 'the references in front of it.' % section) index = match.end() else: pywikibot.output( @@ -519,7 +525,8 @@ % section) index = match.start() ident = match.group('ident') - return self.createReferenceSection(oldText, index, ident) + return self.createReferenceSection(oldText, index, + ident) else: break # This gets complicated: we want to place the new references @@ -554,7 +561,9 @@ else: break pywikibot.output( - u'Found no section that can be preceeded by a new references section.\nPlacing it before interwiki links, categories, and bottom templates.') + u'Found no section that can be preceeded by a new references ' + u'section.\nPlacing it before interwiki links, categories, and ' + u'bottom templates.') index = len(tmpText) return self.createReferenceSection(oldText, index) @@ -623,9 +632,11 @@ pywikibot.output(u"Page %s is locked?!" % page.title(asLink=True)) continue - if pywikibot.getSite().sitename() == 'wikipedia:en' and page.isIpEdit(): + if pywikibot.getSite().sitename() == 'wikipedia:en' and \ + page.isIpEdit(): pywikibot.output( - u"Page %s is edited by IP. Possible vandalized" % page.title(asLink=True)) + u"Page %s is edited by IP. Possible vandalized" + % page.title(asLink=True)) continue if self.lacksReferences(text): newText = self.addReferences(text) -- To view, visit https://gerrit.wikimedia.org/r/90693 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Icf77e34b6072d89d7dfcabd4b55d8ad5e388e5cd Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Find correct line feed in text, fix for right placement of h... - change (pywikibot/core)
by jenkins-bot (Code Review) 19 Oct '13

19 Oct '13

jenkins-bot has submitted this change and it was merged. Change subject: Find correct line feed in text, fix for right placement of header with noreferences.py (Bug 55409) (update from compat gerrit 88121) ...................................................................... Find correct line feed in text, fix for right placement of header with noreferences.py (Bug 55409) (update from compat gerrit 88121) Change-Id: I9de67bfb3713106c30d26af9c6faf30c0156c4df --- M scripts/noreferences.py 1 file changed, 4 insertions(+), 4 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/noreferences.py b/scripts/noreferences.py index 8c8e0dd..cb857e0 100755 --- a/scripts/noreferences.py +++ b/scripts/noreferences.py @@ -480,7 +480,7 @@ """ # Is there an existing section where we can add the references tag? for section in pywikibot.translate(self.site, referencesSections): - sectionR = re.compile(r'\r\n=+ *%s *=+ *\r\n' % section) + sectionR = re.compile(r'\r?\n=+ *%s *=+ *\r?\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) @@ -502,7 +502,7 @@ # Create a new section for the references tag for section in pywikibot.translate(self.site, placeBeforeSections): # Find out where to place the new section - sectionR = re.compile(r'\r\n(?P<ident>=+) *%s *(?P=ident) *\r\n' + sectionR = re.compile(r'\r?\n(?P<ident>=+) *%s *(?P=ident) *\r?\n' % section) index = 0 while index < len(oldText): @@ -541,9 +541,9 @@ ### {{commons}} or {{commonscat}} are part of Weblinks section ### * {{template}} is mostly part of a section ### so templatePattern must be fixed - templatePattern = r'\r\n{{((?!}}).)+?}}\s*' + templatePattern = r'\r?\n{{((?!}}).)+?}}\s*' commentPattern = r').)*?-->\s*' - metadataR = re.compile(r'(\r\n)?(%s|%s|%s|%s)$' + metadataR = re.compile(r'(\r?\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern, templatePattern, commentPattern), re.DOTALL) tmpText = oldText -- To view, visit https://gerrit.wikimedia.org/r/88122 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I9de67bfb3713106c30d26af9c6faf30c0156c4df Gerrit-PatchSet: 3 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] synchronize with compat - change (pywikibot/core)
by jenkins-bot (Code Review) 19 Oct '13

19 Oct '13

jenkins-bot has submitted this change and it was merged. Change subject: synchronize with compat ...................................................................... synchronize with compat Change-Id: I3acc2d20025bbbf1c9959f8e02fb7f816807eecd --- M pywikibot/pagegenerators.py 1 file changed, 75 insertions(+), 59 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index bd24b0b..d749dbc 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -"""This module offers a wide variety of page generators. A page generator is an +""" +This module offers a wide variety of page generators. A page generator is an object that is iterable (see http://www.python.org/dev/peps/pep-0255/ ) and that yields page objects on which other scripts can then work. @@ -12,7 +13,7 @@ &params; """ # -# (C) Pywikipedia bot team, 2008-2012 +# (C) Pywikipedia bot team, 2008-2013 # # Distributed under the terms of the MIT license. # @@ -94,14 +95,6 @@ -newpages Work on the most recent new pages. If given as -newpages:x, will work on the x newest pages. --random Work on random pages returned by [[Special:Random]] - Can also be given as "-random:n" where n is the number - of pages to be returned, otherwise the default is 10 pages. - --randomredirect Work on random redirect pages returned by [[Special:RandomRedirect]]. - Can also be given as "-randomredirect:n" where n is the number - of pages to be returned, else 10 pages are returned. - -recentchanges Work on the pages with the most recent changes. If given as -recentchanges:x, will work on the x most recently changed pages. @@ -140,12 +133,22 @@ -usercontribs Work on all articles that were edited by a certain user : Example : -usercontribs:DumZiBoT + -weblink Work on all articles that contain an external link to a given URL; may be given as "-weblink:url" -withoutinterwiki Work on all pages that don't have interlanguage links. Argument can be given as "-withoutinterwiki:n" where n is some number (??). + +-random Work on random pages returned by [[Special:Random]]. + Can also be given as "-random:n" where n is the number + of pages to be returned, otherwise the default is 10 pages. + +-randomredirect Work on random redirect pages returned by + [[Special:RandomRedirect]]. Can also be given as + "-randomredirect:n" where n is the number of pages to be + returned, else 10 pages are returned. -google Work on all pages that are found in a Google search. You need a Google Web API license key. Note that Google @@ -999,10 +1002,10 @@ class YahooSearchPageGenerator: - ''' - To use this generator, install pYsearch - ''' - def __init__(self, query=None, count=100, site=None): # values larger than 100 fail + """ To use this generator, install pYsearch """ + + # values larger than 100 fail + def __init__(self, query=None, count=100, site=None): self.query = query or pywikibot.input(u'Please enter the search query:') self.count = count if site is None: @@ -1012,7 +1015,6 @@ def queryYahoo(self, query): from yahoo.search.web import WebSearch srch = WebSearch(config.yahoo_appid, query=query, results=self.count) - dom = srch.get_results() results = srch.parse_results(dom) for res in results: @@ -1032,12 +1034,14 @@ class GoogleSearchPageGenerator: - ''' + """ To use this generator, you must install the pyGoogle module from http://pygoogle.sf.net/ and get a Google Web API license key from http://www.google.com/apis/index.html . The google_key must be set to your license key in your configuration. - ''' + + """ + def __init__(self, query=None, site=None): self.query = query or pywikibot.input(u'Please enter the search query:') if site is None: @@ -1045,8 +1049,9 @@ self.site = site ######### - # partially commented out because it is probably not in compliance with Google's "Terms of - # service" (see 5.3, http://www.google.com/accounts/TOS?loc=US) + # partially commented out because it is probably not in compliance with + # Google's "Terms of service" + # (see 5.3, http://www.google.com/accounts/TOS?loc=US) def queryGoogle(self, query): #if config.google_key: if True: @@ -1065,22 +1070,24 @@ google.LICENSE_KEY = config.google_key offset = 0 estimatedTotalResultsCount = None - while not estimatedTotalResultsCount \ - or offset < estimatedTotalResultsCount: - while (True): + while not estimatedTotalResultsCount or \ + offset < estimatedTotalResultsCount: + while True: # Google often yields 502 errors. try: pywikibot.output(u'Querying Google, offset %i' % offset) - data = google.doGoogleSearch(query, start=offset, filter=False) + data = google.doGoogleSearch(query, start=offset, + filter=False) break except KeyboardInterrupt: raise except: - # SOAPpy.Errors.HTTPError or SOAP.HTTPError (502 Bad Gateway) - # can happen here, depending on the module used. It's not easy - # to catch this properly because pygoogle decides which one of - # the soap modules to use. - pywikibot.output(u"An error occured. Retrying in 10 seconds...") + # SOAPpy.Errors.HTTPError or SOAP.HTTPError + # (502 Bad Gateway) can happen here, depending on the module + # used. It's not easy to catch this properly because + # pygoogle decides which one of the soap modules to use. + pywikibot.output(u"An error occured. " + u"Retrying in 10 seconds...") time.sleep(10) continue @@ -1089,40 +1096,48 @@ yield result.URL # give an estimate of pages to work on, but only once. if not estimatedTotalResultsCount: - pywikibot.output(u'Estimated total result count: %i pages.' % data.meta.estimatedTotalResultsCount) + pywikibot.output(u'Estimated total result count: %i pages.' + % data.meta.estimatedTotalResultsCount) estimatedTotalResultsCount = data.meta.estimatedTotalResultsCount #print 'estimatedTotalResultsCount: ', estimatedTotalResultsCount offset += 10 - ######### - # commented out because it is probably not in compliance with Google's "Terms of - # service" (see 5.3, http://www.google.com/accounts/TOS?loc=US) - - #def queryViaWeb(self, query): - #""" - #Google has stopped giving out API license keys, and sooner or later - #they will probably shut down the service. - #This is a quick and ugly solution: we just grab the search results from - #the normal web interface. - #""" - #linkR = re.compile(r'<a href="([^>"]+?)" class=l>', re.IGNORECASE) - #offset = 0 - - #while True: - #pywikibot.output("Google: Querying page %d" % (offset / 100 + 1)) - #address = "http://www.google.com/search?q=%s&num=100&hl=en&start=%d" % (urllib.quote_plus(query), offset) - ## we fake being Firefox because Google blocks unknown browsers - #request = urllib2.Request(address, None, {'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.8) Gecko/20051128 SUSE/1.5-0.1 Firefox/1.5'}) - #urlfile = urllib2.urlopen(request) - #page = urlfile.read() - #urlfile.close() - #for url in linkR.findall(page): - #yield url - #if "<div id=nn>" in page: # Is there a "Next" link for next page of results? - #offset += 100 # Yes, go to next page of results. - #else: - #return - ######### +############# +## commented out because it is probably not in compliance with Google's +## "Terms of service" (see 5.3, http://www.google.com/accounts/TOS?loc=US) +## +## def queryViaWeb(self, query): +## """ +## Google has stopped giving out API license keys, and sooner or later +## they will probably shut down the service. +## This is a quick and ugly solution: we just grab the search results from +## the normal web interface. +## """ +## linkR = re.compile(r'<a href="([^>"]+?)" class=l>', re.IGNORECASE) +## offset = 0 +## +## while True: +## pywikibot.output("Google: Querying page %d" % (offset / 100 + 1)) +## address = "http://www.google.com/search?q=%s&num=100&hl=en&start=%d" \ +## % (urllib.quote_plus(query), offset) +## # we fake being Firefox because Google blocks unknown browsers +## request = urllib2.Request( +## address, None, +## {'User-Agent': +## 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.8) Gecko/20051128 ' +## 'SUSE/1.5-0.1 Firefox/1.5'}) +## urlfile = urllib2.urlopen(request) +## page = urlfile.read() +## urlfile.close() +## for url in linkR.findall(page): +## yield url +## +## # Is there a "Next" link for next page of results? +## if "<div id=nn>" in page: +## offset += 100 # Yes, go to next page of results. +## else: +## return +############# def __iter__(self): # restrict query to local site @@ -1133,7 +1148,8 @@ if url[:len(base)] == base: title = url[len(base):] page = pywikibot.Page(pywikibot.Link(title, self.site)) - # Google contains links in the format http://de.wikipedia.org/wiki/en:Foobar + # Google contains links in the format + # http://de.wikipedia.org/wiki/en:Foobar if page.site == self.site: yield page -- To view, visit https://gerrit.wikimedia.org/r/89506 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I3acc2d20025bbbf1c9959f8e02fb7f816807eecd Gerrit-PatchSet: 3 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] pep8 fixes - change (pywikibot/core)
by jenkins-bot (Code Review) 19 Oct '13

19 Oct '13

jenkins-bot has submitted this change and it was merged. Change subject: pep8 fixes ...................................................................... pep8 fixes Fix for Ic7afd8a11b23d55e5bc9089b041e16bef3ad3d0e Change-Id: I07f7555953737b7f5a7c76356b7c3988bbea0780 --- M scripts/interwiki.py 1 file changed, 3 insertions(+), 2 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/interwiki.py b/scripts/interwiki.py index 0908443..edf08ab 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -1753,8 +1753,9 @@ smallWikiAllowed = True else: pywikibot.output( - u'NOTE: number of edits are restricted at %s' - % page.site.sitename()) + u'NOTE: number of edits are restricted at %s' + % page.site.sitename() + ) # if we have an account for this site if site.family.name in config.usernames and \ -- To view, visit https://gerrit.wikimedia.org/r/90694 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I07f7555953737b7f5a7c76356b7c3988bbea0780 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Invalid literal found for minutesDiff - change (pywikibot/core)
by Xqt (Code Review) 17 Oct '13

17 Oct '13

Xqt has submitted this change and it was merged. Change subject: Invalid literal found for minutesDiff ...................................................................... Invalid literal found for minutesDiff Fixed bug 55208 in core. Change-Id: Ic7afd8a11b23d55e5bc9089b041e16bef3ad3d0e --- M scripts/interwiki.py 1 file changed, 8 insertions(+), 22 deletions(-) Approvals: Xqt: Verified; Looks good to me, approved diff --git a/scripts/interwiki.py b/scripts/interwiki.py index ff94157..0908443 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -348,6 +348,7 @@ import re import os import time +import datetime import codecs import pickle import socket @@ -1604,21 +1605,6 @@ """ - #from clean_sandbox - def minutesDiff(time1, time2): - if type(time1) is long: - time1 = str(time1) - if type(time2) is long: - time2 = str(time2) - - t1 = ((((int(time1[0:4]) * 12 + int(time1[4:6])) * 30 + - int(time1[6:8])) * 24 + int(time1[8:10])) * 60 + - int(time1[10:12])) - t2 = ((((int(time2[0:4]) * 12 + int(time2[4:6])) * 30 + - int(time2[6:8])) * 24 + int(time2[8:10])) * 60 + \ - int(time2[10:12])) - return abs(t2 - t1) - if not self.isDone(): raise "Bugcheck: finish called before done" if not self.workonme: @@ -1756,19 +1742,19 @@ if not smallWikiAllowed: import userlib user = userlib.User(page.site, page.userName()) + # erstmal auch keine namen mit bot if not 'bot' in user.groups() \ - and not 'bot' in page.userName().lower(): # erstmal auch keine namen mit bot + and not 'bot' in page.userName().lower(): smallWikiAllowed = True else: - diff = minutesDiff(page.editTime(), - time.strftime("%Y%m%d%H%M%S", - time.gmtime())) - if diff > 30 * 24 * 60: + _now = datetime.datetime.utcnow() + _editTime = page.editTime() + if abs((_now - _editTime).days) > 30: smallWikiAllowed = True else: pywikibot.output( -u'NOTE: number of edits are restricted at %s' - % page.site.sitename()) + u'NOTE: number of edits are restricted at %s' + % page.site.sitename()) # if we have an account for this site if site.family.name in config.usernames and \ -- To view, visit https://gerrit.wikimedia.org/r/90214 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ic7afd8a11b23d55e5bc9089b041e16bef3ad3d0e Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Remove outdated TODO: we already check for hash - change (pywikibot/compat)
by Xqt (Code Review) 16 Oct '13

16 Oct '13

Xqt has submitted this change and it was merged. Change subject: Remove outdated TODO: we already check for hash ...................................................................... Remove outdated TODO: we already check for hash Maarten and tests confirm. Change-Id: I10ba612d5488893a6f3145623022b481e7c1976d --- M flickrripper.py 1 file changed, 0 insertions(+), 1 deletion(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/flickrripper.py b/flickrripper.py index 24d676e..1b1c721 100644 --- a/flickrripper.py +++ b/flickrripper.py @@ -17,7 +17,6 @@ #*Upload the image Todo: -*Check if the image is already uploaded (SHA hash) *Check and prevent filename collisions **Initial suggestion **User input -- To view, visit https://gerrit.wikimedia.org/r/90094 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I10ba612d5488893a6f3145623022b481e7c1976d Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Nemo bis <federicoleva(a)tiscali.it> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Bug 55208 - invalid literal found for minutesDiff - change (pywikibot/compat)
by jenkins-bot (Code Review) 16 Oct '13

16 Oct '13

jenkins-bot has submitted this change and it was merged. Change subject: Bug 55208 - invalid literal found for minutesDiff ...................................................................... Bug 55208 - invalid literal found for minutesDiff Refactored also time delta computation. Change-Id: Ib937194ab690511293c85fb0d9639db8efc0ab4b --- M interwiki.py 1 file changed, 7 insertions(+), 27 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/interwiki.py b/interwiki.py index 88a4693..a100d5c 100644 --- a/interwiki.py +++ b/interwiki.py @@ -347,7 +347,7 @@ import copy import re import os -import time +import datetime import codecs import socket import webbrowser @@ -1614,26 +1614,6 @@ """ - #from clean_sandbox - def minutesDiff(time1, time2): - if type(time1) is long: - time1 = str(time1) - if type(time2) is long: - time2 = str(time2) - if '-' in time1: - pywikibot.output(u'BUG>>> in %s' % self.originPage.aslink(True)) - pywikibot.output(u'time1:%s' % time1) - pywikibot.output(u'time2:%s' % time2) - time1 = str(pywikibot.parsetime2stamp(time1)) - - t1 = (((int(time1[0:4]) * 12 + int(time1[4:6])) * 30 + - int(time1[6:8])) * 24 + int(time1[8:10])) * 60 + \ - int(time1[10:12]) - t2 = (((int(time2[0:4]) * 12 + int(time2[4:6])) * 30 + - int(time2[6:8])) * 24 + int(time2[8:10])) * 60 + \ - int(time2[10:12]) - return abs(t2 - t1) - if not self.isDone(): raise "Bugcheck: finish called before done" if not self.workonme: @@ -1766,15 +1746,15 @@ and not 'bot' in page.userName().lower(): #erstmal auch keine namen mit bot smallWikiAllowed = True else: - diff = minutesDiff(page.editTime(), - time.strftime("%Y%m%d%H%M%S", - time.gmtime())) - if diff > 30 * 24 * 60: + _now = datetime.datetime.utcnow() + _editTime = datetime.datetime.strptime(str(page.editTime()), + "%Y%m%d%H%M%S") + if abs((_now - _editTime).days) > 30: smallWikiAllowed = True else: pywikibot.output( -u'NOTE: number of edits are restricted at %s' - % page.site.sitename()) + u'NOTE: number of edits are restricted at %s' + % page.site.sitename()) # if we have an account for this site if site.family.name in config.usernames and \ -- To view, visit https://gerrit.wikimedia.org/r/89985 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ib937194ab690511293c85fb0d9639db8efc0ab4b Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Bug 55663 - AttributeError: 'module' object has no attribute... - change (pywikibot/compat)
by Xqt (Code Review) 15 Oct '13

15 Oct '13

Xqt has submitted this change and it was merged. Change subject: Bug 55663 - AttributeError: 'module' object has no attribute 'NullHandler' ...................................................................... Bug 55663 - AttributeError: 'module' object has no attribute 'NullHandler' Change-Id: If5530756135827d4c3874f00dfd0740326930783 --- M wikipedia.py 1 file changed, 18 insertions(+), 1 deletion(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/wikipedia.py b/wikipedia.py index 1b27881..052b610 100644 --- a/wikipedia.py +++ b/wikipedia.py @@ -9119,6 +9119,23 @@ logger = None #_handlers_initialized = False + +class NullHandler(logging.Handler): + """ + For backward-compatibility with Python 2.6, a local class definition + is used instead of logging.NullHandler + """ + + def handle(self, record): + pass + + def emit(self, record): + pass + + def createLock(self): + self.lock = None + + def setLogfileStatus(enabled, logname=None, header=False): # NOTE-1: disable 'fh.setFormatter(formatter)' below in order to get "old" # logging format (without additional info) @@ -9233,7 +9250,7 @@ logger = logging.getLogger() # root logger - nh = logging.NullHandler() + nh = NullHandler() logger.addHandler(nh) logger.setLevel(DEBUG+1) -- To view, visit https://gerrit.wikimedia.org/r/89523 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: If5530756135827d4c3874f00dfd0740326930783 Gerrit-PatchSet: 3 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: DrTrigon <dr.trigon(a)surfeu.ch> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] pep8 changes, synchronize with core - change (pywikibot/compat)
by jenkins-bot (Code Review) 14 Oct '13

14 Oct '13

jenkins-bot has submitted this change and it was merged. Change subject: pep8 changes, synchronize with core ...................................................................... pep8 changes, synchronize with core Change-Id: I274201bd4247ec56f6573cf3bc40d29ef72ac6e7 --- M pagegenerators.py 1 file changed, 232 insertions(+), 145 deletions(-) Approvals: Legoktm: Looks good to me, approved jenkins-bot: Verified diff --git a/pagegenerators.py b/pagegenerators.py index 10330b2..8decfb0 100644 --- a/pagegenerators.py +++ b/pagegenerators.py @@ -18,19 +18,24 @@ # # Distributed under the terms of the MIT license. # -__version__='$Id$' +__version__ = '$Id$' import re import sys import codecs +import date import datetime -import urllib, urllib2, time +import time +import urllib +import urllib2 import traceback import wikipedia as pywikibot import config from pywikibot import i18n from pywikibot.support import deprecate_arg -import date, catlib, userlib, query +import catlib +import userlib +import query parameterHelp = u"""\ -cat Work on all pages which are in a specific category. @@ -162,10 +167,10 @@ -random Work on random pages returned by [[Special:Random]]. Can also be given as "-random:n" where n is the number - of pages to be returned, else 10 pages are returned. + of pages to be returned, otherwise the default is 10 pages. --randomredirect Work on random redirect target pages returned by - [[Special:Randomredirect]]. Can also be given as +-randomredirect Work on random redirect pages returned by + [[Special:RandomRedirect]]. Can also be given as "-randomredirect:n" where n is the number of pages to be returned, else 10 pages are returned. @@ -204,9 +209,10 @@ # For python 2.4 compatibility # see http://www.mail-archive.com/python-dev@python.org/msg12668.html try: - GeneratorExit + GeneratorExit except NameError: - class GeneratorExit(Exception): pass + class GeneratorExit(Exception): + pass class GeneratorFactory(object): @@ -223,7 +229,7 @@ return map(int, self.namespaces) def getCombinedGenerator(self, gen=None): - """Returns the combination of all accumulated generators, + """Return the combination of all accumulated generators, that have been created in the process of handling arguments. Only call this after all arguments have been parsed. @@ -239,7 +245,8 @@ gensList = CombinedPageGenerator(self.gens) genToReturn = DuplicateFilterPageGenerator(gensList, total=self.limit) if (self.getNamespaces()): - genToReturn = NamespaceFilterPageGenerator(genToReturn, self.getNamespaces()) + genToReturn = NamespaceFilterPageGenerator(genToReturn, + self.getNamespaces()) return genToReturn def getCategoryGen(self, arg, length, recurse=False): @@ -275,8 +282,7 @@ cat = catlib.Category(site, "%s:%s" % (site.namespace(14), categoryname)) - return SubCategoriesPageGenerator(cat, - start=startfrom, recurse=recurse) + return SubCategoriesPageGenerator(cat, start=startfrom, recurse=recurse) def handleArg(self, arg): """Parse one argument at a time. @@ -308,12 +314,12 @@ if len(arg) == 12: gen = UnusedFilesGenerator() else: - gen = UnusedFilesGenerator(number = int(arg[13:])) + gen = UnusedFilesGenerator(number=int(arg[13:])) elif arg.startswith('-unwatched'): if len(arg) == 10: gen = UnwatchedPagesPageGenerator() else: - gen = UnwatchedPagesPageGenerator(number = int(arg[11:])) + gen = UnwatchedPagesPageGenerator(number=int(arg[11:])) elif arg.startswith('-usercontribs'): args = arg[14:].split(';') number = None @@ -321,33 +327,34 @@ number = int(args[1]) except: number = 250 - gen = UserContributionsGenerator(args[0], number, namespaces=self.getNamespaces) + gen = UserContributionsGenerator(args[0], number, + namespaces=self.getNamespaces) elif arg.startswith('-withoutinterwiki'): if len(arg) == 17: gen = WithoutInterwikiPageGenerator() else: - gen = WithoutInterwikiPageGenerator(number = int(arg[18:])) + gen = WithoutInterwikiPageGenerator(number=int(arg[18:])) elif arg.startswith('-interwiki'): title = arg[11:] if not title: title = i18n.input('pywikibot-enter-page-processing') page = pywikibot.Page(site, title) gen = InterwikiPageGenerator(page) - elif arg.startswith('-randomredirect'): - if len(arg) == 15: - gen = RandomRedirectPageGenerator() - else: - gen = RandomRedirectPageGenerator(number=int(arg[16:])) elif arg.startswith('-random'): if len(arg) == 7: gen = RandomPageGenerator() else: gen = RandomPageGenerator(number=int(arg[8:])) - elif arg.startswith('-recentchanges'): - if len(arg) == 14: - gen = RecentchangesPageGenerator() + elif arg.startswith('-randomredirect'): + if len(arg) == 15: + gen = RandomRedirectPageGenerator() else: + gen = RandomRedirectPageGenerator(number=int(arg[16:])) + elif arg.startswith('-recentchanges'): + if len(arg) >= 15: gen = RecentchangesPageGenerator(number=int(arg[15:])) + else: + gen = RecentchangesPageGenerator() gen = DuplicateFilterPageGenerator(gen) elif arg.startswith('-file'): textfilename = arg[6:] @@ -376,13 +383,13 @@ self.limit = int(arg[len('-limit:'):]) return True elif arg.startswith('-catr'): - gen = self.getCategoryGen(arg, len('-catr'), recurse = True) + gen = self.getCategoryGen(arg, len('-catr'), recurse=True) elif arg.startswith('-category'): gen = self.getCategoryGen(arg, len('-category')) elif arg.startswith('-cat'): gen = self.getCategoryGen(arg, len('-cat')) elif arg.startswith('-subcatsr'): - gen = self.setSubCategoriesGen(arg, 9, recurse = True) + gen = self.setSubCategoriesGen(arg, 9, recurse=True) elif arg.startswith('-subcats'): gen = self.setSubCategoriesGen(arg, 8) elif arg.startswith('-page'): @@ -426,16 +433,16 @@ transclusionPageTitle = pywikibot.input( u'Pages that transclude which page should be processed?') transclusionPage = pywikibot.Page(site, - "%s:%s" % (site.namespace(10), - transclusionPageTitle)) + "%s:%s" % (site.namespace(10), + transclusionPageTitle)) gen = ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True) elif arg.startswith('-gorandom'): - for firstPage in RandomPageGenerator(number = 1): + for firstPage in RandomPageGenerator(number=1): firstPageTitle = firstPage.title() namespace = pywikibot.Page(site, firstPageTitle).namespace() - firstPageTitle = pywikibot.Page(site, - firstPageTitle).title(withNamespace=False) + firstPageTitle = pywikibot.Page(site, firstPageTitle + ).title(withNamespace=False) gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects=False) elif arg.startswith('-start'): @@ -448,8 +455,8 @@ else: namespace = pywikibot.Page(site, firstPageTitle).namespace() - firstPageTitle = pywikibot.Page(site, - firstPageTitle).title(withNamespace=False) + firstPageTitle = pywikibot.Page(site, firstPageTitle + ).title(withNamespace=False) gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects=False) elif arg.startswith('-redirectonly'): @@ -458,8 +465,8 @@ firstPageTitle = pywikibot.input( u'At which page do you want to start?') namespace = pywikibot.Page(site, firstPageTitle).namespace() - firstPageTitle = pywikibot.Page(site, - firstPageTitle).title(withNamespace=False) + firstPageTitle = pywikibot.Page(site, firstPageTitle + ).title(withNamespace=False) gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects='only') elif arg.startswith('-prefixindex'): @@ -468,16 +475,16 @@ if not prefix: prefix = pywikibot.input( u'What page names are you looking for?') - gen = PrefixingPageGenerator(prefix = prefix) + gen = PrefixingPageGenerator(prefix=prefix) elif arg.startswith('-newimages'): limit = arg[11:] or pywikibot.input( u'How many images do you want to load?') - gen = NewimagesPageGenerator(number = int(limit)) + gen = NewimagesPageGenerator(number=int(limit)) elif arg == ('-new') or arg.startswith('-new:'): - if len(arg) >=5: - gen = NewpagesPageGenerator(number = int(arg[5:])) + if len(arg) >= 5: + gen = NewpagesPageGenerator(number=int(arg[5:])) else: - gen = NewpagesPageGenerator(number = 60) + gen = NewpagesPageGenerator(number=60) elif arg.startswith('-imagelinks'): imagelinkstitle = arg[len('-imagelinks:'):] if not imagelinkstitle: @@ -490,7 +497,8 @@ if not mediawikiQuery: mediawikiQuery = pywikibot.input( u'What do you want to search for?') - gen = SearchPageGenerator(mediawikiQuery, number=None, namespaces=self.getNamespaces) + gen = SearchPageGenerator(mediawikiQuery, number=None, + namespaces=self.getNamespaces) elif arg.startswith('-google'): gen = GoogleSearchPageGenerator(arg[8:]) elif arg.startswith('-titleregex'): @@ -503,7 +511,8 @@ gen = YahooSearchPageGenerator(arg[7:]) elif arg.startswith('-'): mode, log, user = arg.partition('log') - if log == 'log' and mode not in ['-', '-no']: #exclude -log, -nolog + # exclude -log, -nolog + if log == 'log' and mode not in ['-', '-no']: number = 500 if not user: user = None @@ -528,7 +537,7 @@ return False -def AllpagesPageGenerator(start ='!', namespace=None, includeredirects=True, +def AllpagesPageGenerator(start='!', namespace=None, includeredirects=True, site=None): """ Iterate Page objects for all titles in a single namespace. @@ -542,6 +551,7 @@ includeredirects=includeredirects): yield page + def PrefixingPageGenerator(prefix, namespace=None, includeredirects=True, site=None): if site is None: @@ -550,8 +560,10 @@ if namespace is None: namespace = prefixpage.namespace() title = prefixpage.title(withNamespace=False) - for page in site.prefixindex(prefix=title, namespace=namespace, includeredirects=includeredirects): + for page in site.prefixindex(prefix=title, namespace=namespace, + includeredirects=includeredirects): yield page + def LogpagesPageGenerator(number=500, mode='', user=None, repeat=False, site=None, namespace=[]): @@ -561,7 +573,8 @@ repeat=repeat, namespace=namespace): yield page[0] -@deprecate_arg("get_redirect", None) #20120822 + +@deprecate_arg("get_redirect", None) # 20120822 def NewpagesPageGenerator(number=100, repeat=False, site=None, namespace=0): """ Iterate Page objects for all new titles in a single namespace. @@ -572,6 +585,7 @@ for item in site.newpages(number=number, repeat=repeat, namespace=namespace, rcshow=['!redirect']): yield item[0] + def RecentchangesPageGenerator(number=100, site=None): """Generate pages that are in the recent changes list. @@ -584,25 +598,32 @@ for item in site.recentchanges(number=number): yield item[0] + def FileLinksGenerator(referredImagePage): for page in referredImagePage.usingPages(): yield page + def ImagesPageGenerator(pageWithImages): - for imagePage in pageWithImages.imagelinks(followRedirects=False, loose=True): + for imagePage in pageWithImages.imagelinks(followRedirects=False, + loose=True): yield imagePage -def UnusedFilesGenerator(number = 100, repeat = False, site = None, extension = None): + +def UnusedFilesGenerator(number=100, repeat=False, site=None, extension=None): if site is None: site = pywikibot.getSite() - for page in site.unusedfiles(number=number, repeat=repeat, extension=extension): + for page in site.unusedfiles(number=number, repeat=repeat, + extension=extension): yield pywikibot.ImagePage(page.site(), page.title()) + def InterwikiPageGenerator(page): """Iterator over all interwiki (non-language) links on a page.""" yield page for link in page.interwiki(): yield link + def ReferringPageGenerator(referredPage, followRedirects=False, withTemplateInclusion=True, @@ -612,6 +633,7 @@ withTemplateInclusion, onlyTemplateInclusion): yield page + def CategorizedPageGenerator(category, recurse=False, start=None): """Yield all pages in a specific category. @@ -630,6 +652,7 @@ if start is None or a.title() >= start: yield a + def SubCategoriesPageGenerator(category, recurse=False, start=None): """Yield all subcategories in a specific category. @@ -646,16 +669,19 @@ for s in category.subcategories(recurse=recurse, startFrom=start): yield s + def LinkedPageGenerator(linkingPage): """Yield all pages linked from a specific page.""" for page in linkingPage.linkedPages(): yield page -def NewimagesPageGenerator(number = 100, repeat = False, site = None): + +def NewimagesPageGenerator(number=100, repeat=False, site=None): if site is None: site = pywikibot.getSite() for page in site.newimages(number, repeat=repeat): yield page[0] + def TextfilePageGenerator(filename=None, site=None): """Iterate pages from a list in a text file. @@ -674,7 +700,9 @@ if site is None: site = pywikibot.getSite() f = codecs.open(filename, 'r', config.textfile_encoding) - R = re.compile(ur'\[\[(.+?)(?:\]\]|\|)') # title ends either before | or before ]] + + # title ends either before | or before ]] + R = re.compile(ur'\[\[(.+?)(?:\]\]|\|)') pageTitle = None for pageTitle in R.findall(f.read()): # If the link is in interwiki format, the Page object may reside @@ -693,11 +721,13 @@ yield pywikibot.Page(site, title) f.close() -def WithoutInterwikiPageGenerator(number = 100, repeat = False, site = None): + +def WithoutInterwikiPageGenerator(number=100, repeat=False, site=None): if site is None: site = pywikibot.getSite() for page in site.withoutinterwiki(number=number, repeat=repeat): yield page + def UnCategorizedCategoryGenerator(number=100, repeat=False, site=None): if site is None: @@ -705,11 +735,13 @@ for page in site.uncategorizedcategories(number=number, repeat=repeat): yield page + def UnCategorizedImageGenerator(number=100, repeat=False, site=None): if site is None: site = pywikibot.getSite() for page in site.uncategorizedimages(number=number, repeat=repeat): yield page + def UnCategorizedPageGenerator(number=100, repeat=False, site=None): if site is None: @@ -717,11 +749,13 @@ for page in site.uncategorizedpages(number=number, repeat=repeat): yield page + def UnCategorizedTemplatesGenerator(number=100, repeat=False, site=None): if site is None: site = pywikibot.getSite() for page in site.uncategorizedtemplates(number=number, repeat=repeat): yield page + def LonelyPagesPageGenerator(number=100, repeat=False, site=None): if site is None: @@ -729,11 +763,13 @@ for page in site.lonelypages(number=number, repeat=repeat): yield page + def UnwatchedPagesPageGenerator(number=100, repeat=False, site=None): if site is None: site = pywikibot.getSite() for page in site.unwatchedpages(number=number, repeat=repeat): yield page + def AncientPagesPageGenerator(number=100, repeat=False, site=None): if site is None: @@ -741,36 +777,41 @@ for page in site.ancientpages(number=number, repeat=repeat): yield page[0] -def DeadendPagesPageGenerator(number = 100, repeat = False, site = None): + +def DeadendPagesPageGenerator(number=100, repeat=False, site=None): if site is None: site = pywikibot.getSite() for page in site.deadendpages(number=number, repeat=repeat): yield page -def LongPagesPageGenerator(number = 100, repeat = False, site = None): + +def LongPagesPageGenerator(number=100, repeat=False, site=None): if site is None: site = pywikibot.getSite() for page in site.longpages(number=number, repeat=repeat): yield page[0] -def ShortPagesPageGenerator(number = 100, repeat = False, site = None): + +def ShortPagesPageGenerator(number=100, repeat=False, site=None): if site is None: site = pywikibot.getSite() for page in site.shortpages(number=number, repeat=repeat): yield page[0] -def RandomPageGenerator(number = 10, site = None): +def RandomPageGenerator(number=10, site=None): if site is None: site = pywikibot.getSite() for i in xrange(number): yield site.randompage() -def RandomRedirectPageGenerator(number = 10, site = None): + +def RandomRedirectPageGenerator(number=10, site=None): if site is None: site = pywikibot.getSite() for i in xrange(number): yield site.randomredirectpage() + def PagesFromTitlesGenerator(iterable, site=None): """Generate pages from the titles (unicode strings) yielded by iterable.""" @@ -781,6 +822,7 @@ break yield pywikibot.Page(site, title) + def LinksearchPageGenerator(link, step=500, site=None): """Yields all pages that include a specified link, according to [[Special:Linksearch]]. @@ -790,7 +832,8 @@ for page in site.linksearch(link, limit=step): yield page -def UserContributionsGenerator(username, number = 250, namespaces = [], site = None ): + +def UserContributionsGenerator(username, number=250, namespaces=[], site=None): """ Yields number unique pages edited by user:username namespaces : List of namespace numbers to fetch contribs from. Also accepted @@ -805,7 +848,8 @@ for page in user.contributions(number, namespaces): yield page[0] -def SearchPageGenerator(query, number = 100, namespaces = None, site = None): + +def SearchPageGenerator(query, number=100, namespaces=None, site=None): """ Provides a list of results using the internal MediaWiki search engine. @@ -817,14 +861,15 @@ site = pywikibot.getSite() if callable(namespaces): namespaces = namespaces() - for page in site.search(query, number=number, namespaces = namespaces): + for page in site.search(query, number=number, namespaces=namespaces): yield page[0] + class YahooSearchPageGenerator: - ''' - To use this generator, install pYsearch - ''' - def __init__(self, query = None, count = 100, site = None): # values larger than 100 fail + """ To use this generator, install pYsearch """ + + # values larger than 100 fail + def __init__(self, query=None, count=100, site=None): self.query = query or pywikibot.input(u'Please enter the search query:') self.count = count if site is None: @@ -832,41 +877,45 @@ self.site = site def queryYahoo(self, query): - from yahoo.search.web import WebSearch - srch = WebSearch(config.yahoo_appid, query=query, results=self.count) - - dom = srch.get_results() - results = srch.parse_results(dom) - for res in results: - url = res.Url - yield url + from yahoo.search.web import WebSearch + srch = WebSearch(config.yahoo_appid, query=query, results=self.count) + dom = srch.get_results() + results = srch.parse_results(dom) + for res in results: + url = res.Url + yield url def __iter__(self): # restrict query to local site localQuery = '%s site:%s' % (self.query, self.site.hostname()) - base = 'http://%s%s' % (self.site.hostname(), self.site.nice_get_address('')) + base = 'http://%s%s' % (self.site.hostname(), + self.site.nice_get_address('')) for url in self.queryYahoo(localQuery): if url[:len(base)] == base: title = url[len(base):] page = pywikibot.Page(self.site, title) yield page + class GoogleSearchPageGenerator: - ''' + """ To use this generator, you must install the pyGoogle module from http://pygoogle.sf.net/ and get a Google Web API license key from http://www.google.com/apis/index.html . The google_key must be set to your license key in your configuration. - ''' - def __init__(self, query = None, site = None): + + """ + + def __init__(self, query=None, site=None): self.query = query or pywikibot.input(u'Please enter the search query:') if site is None: site = pywikibot.getSite() self.site = site ######### - # partially commented out because it is probably not in compliance with Google's "Terms of - # service" (see 5.3, http://www.google.com/accounts/TOS?loc=US) + # partially commented out because it is probably not in compliance with + # Google's "Terms of service" + # (see 5.3, http://www.google.com/accounts/TOS?loc=US) def queryGoogle(self, query): #if config.google_key: if True: @@ -887,15 +936,17 @@ url = u'http://ajax.googleapis.com/ajax/services/search/web?' params = { 'key': config.google_key, - 'v':'1.0', + 'v': '1.0', 'q': query, } url += urllib.urlencode(params) while True: try: - pywikibot.output(u'Querying Google AJAX Search API...') #, offset %i' % offset) - result = json.loads(self.site.getUrl(url, refer = config.google_api_refer, no_hostname=True)) + pywikibot.output(u'Querying Google AJAX Search API...') + result = json.loads( + self.site.getUrl(url, refer=config.google_api_refer, + no_hostname=True)) for res in result['responseData']['results']: yield res['url'] except: @@ -908,22 +959,24 @@ google.LICENSE_KEY = config.google_key offset = 0 estimatedTotalResultsCount = None - while not estimatedTotalResultsCount \ - or offset < estimatedTotalResultsCount: - while (True): + while not estimatedTotalResultsCount or \ + offset < estimatedTotalResultsCount: + while True: # Google often yields 502 errors. try: pywikibot.output(u'Querying Google, offset %i' % offset) - data = google.doGoogleSearch(query, start = offset, filter = False) + data = google.doGoogleSearch(query, start=offset, + filter=False) break except KeyboardInterrupt: raise except: - # SOAPpy.Errors.HTTPError or SOAP.HTTPError (502 Bad Gateway) - # can happen here, depending on the module used. It's not easy - # to catch this properly because pygoogle decides which one of - # the soap modules to use. - pywikibot.output(u"An error occured. Retrying in 10 seconds...") + # SOAPpy.Errors.HTTPError or SOAP.HTTPError + # (502 Bad Gateway) can happen here, depending on the module + # used. It's not easy to catch this properly because + # pygoogle decides which one of the soap modules to use. + pywikibot.output(u"An error occured. " + u"Retrying in 10 seconds...") time.sleep(10) continue @@ -932,40 +985,48 @@ yield result.URL # give an estimate of pages to work on, but only once. if not estimatedTotalResultsCount: - pywikibot.output(u'Estimated total result count: %i pages.' % data.meta.estimatedTotalResultsCount) + pywikibot.output(u'Estimated total result count: %i pages.' + % data.meta.estimatedTotalResultsCount) estimatedTotalResultsCount = data.meta.estimatedTotalResultsCount #print 'estimatedTotalResultsCount: ', estimatedTotalResultsCount offset += 10 - ######### - # commented out because it is probably not in compliance with Google's "Terms of - # service" (see 5.3, http://www.google.com/accounts/TOS?loc=US) - - #def queryViaWeb(self, query): - #""" - #Google has stopped giving out API license keys, and sooner or later - #they will probably shut down the service. - #This is a quick and ugly solution: we just grab the search results from - #the normal web interface. - #""" - #linkR = re.compile(r'<a href="([^>"]+?)" class=l>', re.IGNORECASE) - #offset = 0 - - #while True: - #pywikibot.output("Google: Querying page %d" % (offset / 100 + 1)) - #address = "http://www.google.com/search?q=%s&num=100&hl=en&start=%d" % (urllib.quote_plus(query), offset) - ## we fake being Firefox because Google blocks unknown browsers - #request = urllib2.Request(address, None, {'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.8) Gecko/20051128 SUSE/1.5-0.1 Firefox/1.5'}) - #urlfile = urllib2.urlopen(request) - #page = urlfile.read() - #urlfile.close() - #for url in linkR.findall(page): - #yield url - #if "<div id=nn>" in page: # Is there a "Next" link for next page of results? - #offset += 100 # Yes, go to next page of results. - #else: - #return - ######### +############# +## commented out because it is probably not in compliance with Google's +## "Terms of service" (see 5.3, http://www.google.com/accounts/TOS?loc=US) +## +## def queryViaWeb(self, query): +## """ +## Google has stopped giving out API license keys, and sooner or later +## they will probably shut down the service. +## This is a quick and ugly solution: we just grab the search results from +## the normal web interface. +## """ +## linkR = re.compile(r'<a href="([^>"]+?)" class=l>', re.IGNORECASE) +## offset = 0 +## +## while True: +## pywikibot.output("Google: Querying page %d" % (offset / 100 + 1)) +## address = "http://www.google.com/search?q=%s&num=100&hl=en&start=%d" \ +## % (urllib.quote_plus(query), offset) +## # we fake being Firefox because Google blocks unknown browsers +## request = urllib2.Request( +## address, None, +## {'User-Agent': +## 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.8) Gecko/20051128 ' +## 'SUSE/1.5-0.1 Firefox/1.5'}) +## urlfile = urllib2.urlopen(request) +## page = urlfile.read() +## urlfile.close() +## for url in linkR.findall(page): +## yield url +## +## # Is there a "Next" link for next page of results? +## if "<div id=nn>" in page: +## offset += 100 # Yes, go to next page of results. +## else: +## return +############# def __iter__(self): # restrict query to local site @@ -976,17 +1037,19 @@ if url[:len(base)] == base: title = url[len(base):] page = pywikibot.Page(self.site, title) - # Google contains links in the format http://de.wikipedia.org/wiki/en:Foobar - if page.site() == self.site: + # Google contains links in the format + # http://de.wikipedia.org/wiki/en:Foobar + if page.site == self.site: yield page -def MySQLPageGenerator(query, site = None): + +def MySQLPageGenerator(query, site=None): import MySQLdb as mysqldb if site is None: site = pywikibot.getSite() - conn = mysqldb.connect(config.db_hostname, db = site.dbName(), - user = config.db_username, - passwd = config.db_password) + conn = mysqldb.connect(config.db_hostname, db=site.dbName(), + user=config.db_username, + passwd=config.db_password) cursor = conn.cursor() pywikibot.output(u'Executing query:\n%s' % query) query = query.encode(site.encoding()) @@ -1009,7 +1072,8 @@ page = pywikibot.Page(site, pageTitle) yield page -def YearPageGenerator(start = 1, end = 2050, site = None): + +def YearPageGenerator(start=1, end=2050, site=None): if site is None: site = pywikibot.getSite() pywikibot.output(u"Starting with year %i" % start) @@ -1018,10 +1082,11 @@ pywikibot.output(u'Preparing %i...' % i) # There is no year 0 if i != 0: - current_year = date.formatYear(site.lang, i ) + current_year = date.formatYear(site.lang, i) yield pywikibot.Page(site, current_year) -def DayPageGenerator(startMonth = 1, endMonth = 12, site = None): + +def DayPageGenerator(startMonth=1, endMonth=12, site=None): if site is None: site = pywikibot.getSite() fd = date.FormatDate(site) @@ -1031,7 +1096,8 @@ for day in xrange(1, date.getNumberOfDaysInMonth(month)+1): yield pywikibot.Page(site, fd(month, day)) -def NamespaceFilterPageGenerator(generator, namespaces, site = None): + +def NamespaceFilterPageGenerator(generator, namespaces, site=None): """ Wraps around another generator. Yields only those pages that are in one of the given namespaces. @@ -1056,6 +1122,7 @@ if page.namespace() in namespaces: yield page + def PageTitleFilterPageGenerator(generator, ignoreList): """ Wraps around another generator. Yields only those pages are not @@ -1067,7 +1134,8 @@ """ def isIgnored(page): - if not (page.site().family.name in ignoreList and page.site().lang in ignoreList[page.site().family.name]): + if not (page.site().family.name in ignoreList and + page.site().lang in ignoreList[page.site().family.name]): return False for ig in ignoreList[page.site().family.name][page.site().lang]: @@ -1082,13 +1150,17 @@ else: yield page + def RedirectFilterPageGenerator(generator): """ - Wraps around another generator. Yields only those pages that are not redirects. + Wraps around another generator. Yields only those pages that are not + redirects. + """ for page in generator: if not page.isRedirectPage(): yield page + def DuplicateFilterPageGenerator(generator, total=None): """ @@ -1098,7 +1170,8 @@ seenPages = dict() count = 0 for page in generator: - _page = u"%s:%s:%s" % (page._site.family.name, page._site.lang, page._title) + _page = u"%s:%s:%s" % (page._site.family.name, page._site.lang, + page._title) if _page not in seenPages: seenPages[_page] = True if total: @@ -1107,7 +1180,9 @@ break yield page -def RegexFilterPageGenerator(generator, regex, inverse=False, ignore_namespace=True): + +def RegexFilterPageGenerator(generator, regex, inverse=False, + ignore_namespace=True): """ Wraps around another generator. Yields only those pages, the titles of which are positively matched to any regex in list. If invert is False, @@ -1121,13 +1196,13 @@ regex = [regex] # test if regex is already compiled if isinstance(regex[0], basestring): - reg = [ re.compile(r, re.I) for r in regex ] + reg = [re.compile(r, re.I) for r in regex] else: reg = regex for page in generator: # get the page title - title = page.title(withNamespace = not ignore_namespace) + title = page.title(withNamespace=not ignore_namespace) if inverse: # yield page if NOT matched by all regex @@ -1145,21 +1220,29 @@ yield page break -def EdittimeFilterPageGenerator(generator, begintime=datetime.datetime.min, endtime=datetime.datetime.max): + +def EdittimeFilterPageGenerator(generator, begintime=datetime.datetime.min, + endtime=datetime.datetime.max): """ Wraps around another generator. Yields only those pages which were changed between begintime and endtime. @param generator: A generator object - @param begintime: A datetime object. Only pages after this time will be returned. - @param endtime: A datetime object Only pages before this time will be returned. + @param begintime: A datetime object. Only pages after this time will be + returned. + @param endtime: A datetime object Only pages before this time will be + returned. + """ for page in generator: - if page.editTime(datetime=True)==None: + if page.editTime(datetime=True) is None: # FIXME: The page object should probably handle this page.get() - if page.editTime(datetime=True) and begintime < page.editTime(datetime=True) and page.editTime(datetime=True) < endtime: + if page.editTime(datetime=True) and \ + begintime < page.editTime(datetime=True) and \ + page.editTime(datetime=True) < endtime: yield page + def CombinedPageGenerator(generators): """ @@ -1171,6 +1254,7 @@ for page in generator: yield page + def CategoryGenerator(generator): """ Wraps around another generator. Yields the same pages, but as Category @@ -1180,6 +1264,7 @@ for page in generator: yield catlib.Category(page.site(), page.title()) + def ImageGenerator(generator): """ Wraps around another generator. Yields the same pages, but as Image @@ -1188,6 +1273,7 @@ """ for page in generator: yield pywikibot.ImagePage(page.site(), page.title()) + def PageWithTalkPageGenerator(generator): """ @@ -1209,6 +1295,7 @@ pages, etc. Thus, it is not necessary to load each page separately. Operates asynchronously, so the next batch of pages is loaded in the background before the first batch is fully consumed. + """ @deprecate_arg("lookahead", None) def __init__(self, generator, pageNumber=60): @@ -1247,9 +1334,9 @@ # Query the sites one by one. site = page_list[0].site() pagesThisSite = [page for page in page_list - if page.site() == site] + if page.site() == site] page_list = [page for page in page_list - if page.site() != site] + if page.site() != site] pywikibot.getall(site, pagesThisSite) for page in pagesThisSite: yield page @@ -1262,7 +1349,6 @@ self.preload(page_list, retry=True) # Ignore this error, and get the pages the traditional way later. pass - def main(*args): @@ -1278,12 +1364,13 @@ i = 0 for page in gen: i += 1 - pywikibot.output("%4d: %s" % (i, page.title()), toStdout = True) + pywikibot.output("%4d: %s" % (i, page.title()), + toStdout=True) else: pywikibot.showHelp() finally: pywikibot.stopme() -if __name__=="__main__": +if __name__ == "__main__": main() -- To view, visit https://gerrit.wikimedia.org/r/89505 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I274201bd4247ec56f6573cf3bc40d29ef72ac6e7 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: jenkins-bot

1 0

← Newer
1
2
3
4
5
6
7
8
9
10
Older →

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

Pywikibot-commits October 2013