Pywikipedia-svn December 2009

pywikipedia-svn@lists.wikimedia.org

6 participants
127 discussions

SVN: [7789] trunk/pywikipedia/redirect.py
by xqt＠svn.wikimedia.org 17 Dec '09

17 Dec '09

Revision: 7789 Author: xqt Date: 2009-12-17 19:20:12 +0000 (Thu, 17 Dec 2009) Log Message: ----------- * change comparison that title not have to exist; * break outer loop in get_redirect_pageids_via_api() if title >= api_until * raise API error and empty results * put move_regex into the function Modified Paths: -------------- trunk/pywikipedia/redirect.py Modified: trunk/pywikipedia/redirect.py =================================================================== --- trunk/pywikipedia/redirect.py 2009-12-17 18:13:05 UTC (rev 7788) +++ trunk/pywikipedia/redirect.py 2009-12-17 19:20:12 UTC (rev 7789) @@ -283,16 +283,16 @@ params['apnamespace'] = ns if self.api_start: params['apfrom'] = self.api_start - while True: + done = False + while not done: data = query.GetData(params, self.site) if "limits" in data: # process aplimit = max params['aplimit'] = int(data['limits']['allpages']) for x in data['query']['allpages']: - if self.api_until and x['title'] == self.api_until: - break + done = self.api_until and x['title'] >= self.api_until + if done: break yield x['pageid'] - - if 'query-continue' in data: + if not done and 'query-continue' in data: params['apfrom'] = data['query-continue']['allpages']['apfrom'] else: break @@ -341,6 +341,10 @@ for apiQ in self._next_redirects_via_api_commandline(): params['pageids'] = apiQ data = query.GetData(params, self.site) + if 'error' in data: + raise RuntimeError("API query error: %s" % data) + if data == []: + raise RuntimeError("No results given.") redirects = {} pages = {} redirects = dict((x['from'], x['to']) @@ -452,14 +456,14 @@ wikipedia.output(u'\nChecking redirect %i of %i...' % (num + 1, len(redict))) - move_regex = re.compile( - r'moved <a href.*?>(.*?)</a> to <a href=.*?>.*?</a>.*?</li>') - def get_moved_pages_redirects(self): '''generate redirects to recently-moved pages''' # this will run forever, until user interrupts it import datetime + move_regex = re.compile( + r'moved <a href.*?>(.*?)</a> to <a href=.*?>.*?</a>.*?</li>') + if self.offset <= 0: self.offset = 1 offsetpattern = re.compile( @@ -482,7 +486,7 @@ import traceback wikipedia.output(unicode(traceback.format_exc())) return - g = self.move_regex.findall(move_list) + g = move_regex.findall(move_list) if wikipedia.verbose: wikipedia.output(u"%s moved pages" % len(g)) for moved_title in g: @@ -684,8 +688,7 @@ sd_tagging_sum) targetPage.put(content, summ) redir.put(content, summ) - else: - break # TODO Better implement loop redirect + break # TODO Better implement loop redirect else: newRedir = targetPage continue #

1 0

SVN: [7788] trunk/pywikipedia/redirect.py
by russblau＠svn.wikimedia.org 17 Dec '09

17 Dec '09

Revision: 7788 Author: russblau Date: 2009-12-17 18:13:05 +0000 (Thu, 17 Dec 2009) Log Message: ----------- Refactor: Python is an object-oriented language; it makes no sense to pass around object attributes as arguments to the object's own methods. Modified Paths: -------------- trunk/pywikipedia/redirect.py Modified: trunk/pywikipedia/redirect.py =================================================================== --- trunk/pywikipedia/redirect.py 2009-12-17 15:18:04 UTC (rev 7787) +++ trunk/pywikipedia/redirect.py 2009-12-17 18:13:05 UTC (rev 7788) @@ -187,6 +187,8 @@ self.site = wikipedia.getSite() self.xmlFilename = xmlFilename self.namespaces = namespaces + if use_api and self.namespaces == []: + self.namespaces = [ 0 ] self.offset = offset self.use_move_log = use_move_log self.use_api = use_api @@ -263,35 +265,30 @@ else: return redict - def get_redirect_pageids_via_api(self, number=u'max', namespaces=[], - start=None, until=None): + def get_redirect_pageids_via_api(self): """ - Generator which will yield page IDs of Pages that are redirects. - Get number of page ids in one go. - Iterates over namespaces, Main if an empty list. - In each namespace, start alphabetically from a pagetitle start, - which need not exist. + Return generator that yields page IDs of Pages that are redirects. """ - if namespaces == []: - namespaces = [ 0 ] params = { - 'action':'query', - 'list':'allpages', - 'apfilterredir':'redirects', - 'aplimit':number, - 'apdir':'ascending', + 'action': 'query', + 'list': 'allpages', + 'apfilterredir': 'redirects', + 'aplimit': self.api_number, + 'apdir': 'ascending', #'':'', } - for ns in namespaces: + if self.api_number is None: + params['aplimit'] = "max" + for ns in self.namespaces: params['apnamespace'] = ns - if start: - params['apfrom'] = start + if self.api_start: + params['apfrom'] = self.api_start while True: data = query.GetData(params, self.site) if "limits" in data: # process aplimit = max params['aplimit'] = int(data['limits']['allpages']) for x in data['query']['allpages']: - if until and x['title'] == until: + if self.api_until and x['title'] == self.api_until: break yield x['pageid'] @@ -300,17 +297,13 @@ else: break - def _next_redirects_via_api_commandline(self, number='max', namespaces=[], - start=None, until=None ): + def _next_redirects_via_api_commandline(self): """ Return a generator that retrieves pageids from the API 500 at a time and yields them as a list """ - if namespaces == []: - namespaces = [ 0 ] apiQ = [] - for pageid in self.get_redirect_pageids_via_api(number, namespaces, - start, until): + for pageid in self.get_redirect_pageids_via_api(): apiQ.append(pageid) if len(apiQ) >= 500: yield apiQ @@ -318,8 +311,7 @@ if apiQ: yield apiQ - def get_redirects_via_api(self, number=u'max', namespaces=[], start=None, - until=None, maxlen=8): + def get_redirects_via_api(self, maxlen=8): """ Return a generator that yields tuples of data about redirect Pages: 0 - page title of a redirect page @@ -339,21 +331,14 @@ 2 - target page title of the redirect, or chain (may not exist) 3 - target page of the redirect, or end of chain, or page title where chain or loop detecton was halted, or None if unknown - Get number of page ids in one go. - Iterates over namespaces, Main if an empty list. - In each namespace, start alphabetically from a pagetitle start, which - need not exist. """ import urllib - if namespaces == []: - namespaces = [ 0 ] params = { 'action':'query', 'redirects':1, #'':'', } - for apiQ in self._next_redirects_via_api_commandline( - number, namespaces, start, until): + for apiQ in self._next_redirects_via_api_commandline(): params['pageids'] = apiQ data = query.GetData(params, self.site) redirects = {} @@ -389,10 +374,7 @@ if self.use_api: count = 0 for (pagetitle, type, target, final) \ - in self.get_redirects_via_api( - namespaces=self.namespaces, - start=self.api_start, - until=self.api_until, maxlen=2): + in self.get_redirects_via_api(maxlen=2): if type == 0: yield pagetitle if self.api_number: @@ -403,7 +385,7 @@ elif self.xmlFilename == None: # retrieve information from the live wiki's maintenance page # broken redirect maintenance page's URL - path = self.site.broken_redirects_address(default_limit = False) + path = self.site.broken_redirects_address(default_limit=False) wikipedia.output(u'Retrieving special page...') maintenance_txt = self.site.getUrl(path) @@ -430,9 +412,7 @@ if self.use_api: count = 0 for (pagetitle, type, target, final) \ - in self.get_redirects_via_api( - namespaces=self.namespaces, start=self.api_start, - until=self.api_until, maxlen = 2): + in self.get_redirects_via_api(maxlen=2): if type != 0 and type != 1: yield pagetitle if self.api_number: @@ -752,11 +732,7 @@ delete_reason = wikipedia.translate(self.site, reason_broken) count = 0 for (redir_name, code, target, final)\ - in self.generator.get_redirects_via_api( - namespaces=self.generator.namespaces, - start=self.generator.api_start, - until=self.generator.api_until, - maxlen = 2): + in self.generator.get_redirects_via_api(maxlen=2): if code == 1: continue elif code == 0: @@ -765,8 +741,7 @@ else: self.fix_1_double_redirect(redir_name) count += 1 - # print ('%s .. %s' % (count, self.number)) - if self.exiting or ( self.number and count >= self.number ): + if self.exiting or (self.number and count >= self.number): break def run(self):

1 0

SVN: [7787] trunk/pywikipedia/redirect.py
by russblau＠svn.wikimedia.org 17 Dec '09

17 Dec '09

Revision: 7787 Author: russblau Date: 2009-12-17 15:18:04 +0000 (Thu, 17 Dec 2009) Log Message: ----------- Bugfix: prevent infinite loop when using -start parameter Modified Paths: -------------- trunk/pywikipedia/redirect.py Modified: trunk/pywikipedia/redirect.py =================================================================== --- trunk/pywikipedia/redirect.py 2009-12-17 15:15:51 UTC (rev 7786) +++ trunk/pywikipedia/redirect.py 2009-12-17 15:18:04 UTC (rev 7787) @@ -283,16 +283,13 @@ #'':'', } for ns in namespaces: - # print (ns) params['apnamespace'] = ns - # print (apiQns) + if start: + params['apfrom'] = start while True: - if start: - params['apfrom'] = start data = query.GetData(params, self.site) if "limits" in data: # process aplimit = max params['aplimit'] = int(data['limits']['allpages']) - # wikipedia.output(u'===RESULT===\n%s\n' % data) for x in data['query']['allpages']: if until and x['title'] == until: break

1 0

SVN: [7786] trunk/pywikipedia/redirect.py
by russblau＠svn.wikimedia.org 17 Dec '09

17 Dec '09

Revision: 7786 Author: russblau Date: 2009-12-17 15:15:51 +0000 (Thu, 17 Dec 2009) Log Message: ----------- Cleanup code formatting, whitespace, and break long lines; fix some incomprehensible or incorrect docstrings. Modified Paths: -------------- trunk/pywikipedia/redirect.py Modified: trunk/pywikipedia/redirect.py =================================================================== --- trunk/pywikipedia/redirect.py 2009-12-17 14:20:01 UTC (rev 7785) +++ trunk/pywikipedia/redirect.py 2009-12-17 15:15:51 UTC (rev 7786) @@ -69,7 +69,7 @@ # Summary message for fixing double redirects msg_double={ - 'als':u'Bötli: Uflösig vun de doppleti Wyterleitig zue %s', + 'als':u'Bötli: Uflösig vun de doppleti Wyterleitig zue %s', 'ar': u'روبوت: تصليح تحويلة مزدوجة → %s', 'bat-smg': u'Robots: Taisuoms dvėgobs paradresavėms → %s', 'be-x-old': u'Робат: выпраўленьне падвойнага перанакіраваньня → %s', @@ -121,7 +121,7 @@ 'be-x-old': u'Робат: мэта перанакіраваньня не існуе', 'cs': u'Přerušené přesměrování', 'de': u'Bot: Weiterleitungsziel existiert nicht', - 'en': u'Robot: Redirect target doesn\'t exist', + 'en': u'[[WP:CSD#G8|G8]]: [[Wikipedia:Redirect|Redirect]] to a deleted or non-existent page', 'es': u'Robot: La página a la que redirige no existe', 'fa': u'ربات:تغییرمسیر مقصد ندارد', 'fi': u'Botti: Ohjauksen kohdesivua ei ole olemassa', @@ -182,10 +182,9 @@ class RedirectGenerator: def __init__(self, xmlFilename=None, namespaces=[], offset=-1, - use_move_log=False, - use_api=False, start=None, until=None, number=None): + use_move_log=False, use_api=False, start=None, until=None, + number=None): self.site = wikipedia.getSite() - self.xmlFilename = xmlFilename self.namespaces = namespaces self.offset = offset @@ -195,7 +194,7 @@ self.api_until = until self.api_number = number - def get_redirects_from_dump(self, alsoGetPageTitles = False): + def get_redirects_from_dump(self, alsoGetPageTitles=False): ''' Load a local XML dump file, look at all pages which have the redirect flag set, and find out where they're pointing at. Return @@ -264,14 +263,15 @@ else: return redict - def get_redirect_pageids_via_api(self, number = u'max', namespaces = [], start = None, until = None ): + def get_redirect_pageids_via_api(self, number=u'max', namespaces=[], + start=None, until=None): """ Generator which will yield page IDs of Pages that are redirects. Get number of page ids in one go. Iterates over namespaces, Main if an empty list. - In each namespace, start alphabetically from a pagetitle start, wich need not exist. + In each namespace, start alphabetically from a pagetitle start, + which need not exist. """ - # wikipedia.output(u'====> get_redirect_pageids_via_api(number=%s, #ns=%d, start=%s, until=%s)' % (number, len(namespaces), start, until)) if namespaces == []: namespaces = [ 0 ] params = { @@ -282,7 +282,6 @@ 'apdir':'ascending', #'':'', } - for ns in namespaces: # print (ns) params['apnamespace'] = ns @@ -290,32 +289,31 @@ while True: if start: params['apfrom'] = start - # print (apiQ) data = query.GetData(params, self.site) if "limits" in data: # process aplimit = max params['aplimit'] = int(data['limits']['allpages']) - # wikipedia.output(u'===RESULT===\n%s\n' % result) + # wikipedia.output(u'===RESULT===\n%s\n' % data) for x in data['query']['allpages']: if until and x['title'] == until: break yield x['pageid'] - + if 'query-continue' in data: params['apfrom'] = data['query-continue']['allpages']['apfrom'] else: break - - def _next_redirects_via_api_commandline(self, number = 'max', namespaces = [], start = None, until = None ): + def _next_redirects_via_api_commandline(self, number='max', namespaces=[], + start=None, until=None ): """ - yields commands to the api for checking a set op page ids. + Return a generator that retrieves pageids from the API 500 at a time + and yields them as a list """ - # wikipedia.output(u'====> _next_redirects_via_api_commandline(apiQi=%s, number=%s, #ns=%d, start=%s, until=%s)' % (apiQi, number, len(namespaces), start, until)) if namespaces == []: namespaces = [ 0 ] - #maxurllen = 1018 # accomodate "GET " + apiQ + CR + LF in 1024 bytes. apiQ = [] - for pageid in self.get_redirect_pageids_via_api(number, namespaces, start, until): + for pageid in self.get_redirect_pageids_via_api(number, namespaces, + start, until): apiQ.append(pageid) if len(apiQ) >= 500: yield apiQ @@ -323,28 +321,32 @@ if apiQ: yield apiQ - def get_redirects_via_api(self, number = u'max', namespaces = [], start = None, until = None, maxlen = 8 ): + def get_redirects_via_api(self, number=u'max', namespaces=[], start=None, + until=None, maxlen=8): """ - Generator which will yield a tuple of data about Pages that are redirects: + Return a generator that yields tuples of data about redirect Pages: 0 - page title of a redirect page 1 - type of redirect: 0 - broken redirect, target page title missing - 1 - normal redirect, target page exists and is not a redirect + 1 - normal redirect, target page exists and is not a + redirect 2..maxlen - start of a redirect chain of that many redirects - (currently, the API seems not to return sufficient data - to make these return values possible, but that may change) + (currently, the API seems not to return sufficient + data to make these return values possible, but + that may change) maxlen+1 - start of an even longer chain, or a loop - (currently, the API seems not to return sufficient data - to allow this return vaules, but that may change) + (currently, the API seems not to return sufficient + data to allow this return values, but that may + change) None - start of a redirect chain of unknown length, or loop 2 - target page title of the redirect, or chain (may not exist) 3 - target page of the redirect, or end of chain, or page title where chain or loop detecton was halted, or None if unknown Get number of page ids in one go. Iterates over namespaces, Main if an empty list. - In each namespace, start alphabetically from a pagetitle start, wich need not exist. + In each namespace, start alphabetically from a pagetitle start, which + need not exist. """ - # wikipedia.output(u'====> get_redirects_via_api(number=%s, #ns=%d, start=%s, until=%s, maxlen=%s)' % (number, len(namespaces), start, until, maxlen)) import urllib if namespaces == []: namespaces = [ 0 ] @@ -353,17 +355,16 @@ 'redirects':1, #'':'', } - for apiQ in self._next_redirects_via_api_commandline(number, namespaces, start, until): - # wikipedia.output (u'===apiQ=%s' % apiQ) + for apiQ in self._next_redirects_via_api_commandline( + number, namespaces, start, until): params['pageids'] = apiQ data = query.GetData(params, self.site) - # wikipedia.output(u'===RESULT===\n%s\n' % result) redirects = {} pages = {} - redirects = dict([[x['from'], x['to']] for x in data['query']['redirects']]) - + redirects = dict((x['from'], x['to']) + for x in data['query']['redirects']) + for pagetitle in data['query']['pages'].values(): - # wikipedia.output (u'M: %s' % pagetitle) if 'missing' in pagetitle and 'pageid' not in pagetitle: pages[pagetitle['title']] = False else: @@ -386,15 +387,15 @@ result = None pass yield (redirect, result, target, final) - # wikipedia.output (u'X%d: %s => %s ----> %s' % (result, redirect, target, final)) def retrieve_broken_redirects(self): if self.use_api: count = 0 - for (pagetitle, type, target, final) in self.get_redirects_via_api( - namespaces = self.namespaces, - start = self.api_start, - until = self.api_until, maxlen = 2): + for (pagetitle, type, target, final) \ + in self.get_redirects_via_api( + namespaces=self.namespaces, + start=self.api_start, + until=self.api_until, maxlen=2): if type == 0: yield pagetitle if self.api_number: @@ -431,10 +432,10 @@ def retrieve_double_redirects(self): if self.use_api: count = 0 - for (pagetitle, type, target, final) in self.get_redirects_via_api( - namespaces = self.namespaces, - start = self.api_start, - until = self.api_until, maxlen = 2): + for (pagetitle, type, target, final) \ + in self.get_redirects_via_api( + namespaces=self.namespaces, start=self.api_start, + until=self.api_until, maxlen = 2): if type != 0 and type != 1: yield pagetitle if self.api_number: @@ -474,11 +475,8 @@ wikipedia.output(u'\nChecking redirect %i of %i...' % (num + 1, len(redict))) - # /wiki/ - wiki = re.escape(wikipedia.getSite().nice_get_address('')) - # /w/index.php - index = re.escape(wikipedia.getSite().path()) - move_regex = re.compile(r'moved <a href.*?>(.*?)</a> to <a href=.*?>.*?</a>.*?</li>') + move_regex = re.compile( + r'moved <a href.*?>(.*?)</a> to <a href=.*?>.*?</a>.*?</li>') def get_moved_pages_redirects(self): '''generate redirects to recently-moved pages''' @@ -487,9 +485,12 @@ if self.offset <= 0: self.offset = 1 - offsetpattern = re.compile(r"""$<a href="/w/index\.php\?title=Special:Log&offset=(\d+)&limit=500&type=move" title="Special:Log" rel="next">older 500</a>$""") + offsetpattern = re.compile( + r"""$<a href="/w/index\.php\?title=Special:Log&offset=(\d+)""" + r"""&limit=500&type=move" title="Special:Log" rel="next">""" + r"""older 500</a>$""") start = datetime.datetime.utcnow() \ - - datetime.timedelta(0, self.offset*3600) + - datetime.timedelta(0, self.offset*3600) # self.offset hours ago offset_time = start.strftime("%Y%m%d%H%M%S") while True: @@ -519,7 +520,8 @@ # moved_page is now a redirect, so any redirects pointing # to it need to be changed try: - for page in moved_page.getReferences(follow_redirects=True, redirectsOnly=True): + for page in moved_page.getReferences(follow_redirects=True, + redirectsOnly=True): yield page except wikipedia.NoPage: # original title must have been deleted after move @@ -531,9 +533,7 @@ class RedirectRobot: def __init__(self, action, generator, always=False, number=None): - self.site = wikipedia.getSite() - self.action = action self.generator = generator self.always = always @@ -542,7 +542,8 @@ def prompt(self, question): if not self.always: - choice = wikipedia.inputChoice(question, ['Yes', 'No', 'All', 'Quit'], + choice = wikipedia.inputChoice(question, + ['Yes', 'No', 'All', 'Quit'], ['y', 'N', 'a', 'q'], 'N') if choice == 'n': return False @@ -565,7 +566,8 @@ redir_page = wikipedia.Page(self.site, redir_name) # Show the title of the page we're working on. # Highlight the title in purple. - wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % redir_page.title()) + wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" + % redir_page.title()) try: targetPage = redir_page.getRedirectTarget() except wikipedia.IsNotRedirectPage: @@ -576,26 +578,36 @@ try: targetPage.get() except wikipedia.NoPage: - if self.prompt(u'Redirect target %s does not exist. Do you want to delete %s?' - % (targetPage.aslink(), redir_page.aslink())): + if self.prompt( + u'Redirect target %s does not exist. Do you want to delete %s?' + % (targetPage.aslink(), + redir_page.aslink())): try: redir_page.delete(reason, prompt = False) except wikipedia.NoUsername: - if targetPage.site().lang in sd_template and targetPage.site().lang in sd_tagging_sum: - wikipedia.output("No sysop in user-config.py, put page to speedy deletion.") + if targetPage.site().lang in sd_template \ + and targetPage.site().lang in sd_tagging_sum: + wikipedia.output( + u"No sysop in user-config.py, put page to speedy deletion.") content = redir_page.get(get_redirect=True) - content = wikipedia.translate(targetPage.site().lang,sd_template)+"\n"+content - summary = wikipedia.translate(targetPage.site().lang,sd_tagging_sum) + content = wikipedia.translate( + targetPage.site().lang, + sd_template)+"\n"+content + summary = wikipedia.translate( + targetPage.site().lang, + sd_tagging_sum) redir_page.put(content, summary) except wikipedia.IsRedirectPage: wikipedia.output( - u'Redirect target %s is also a redirect! Won\'t delete anything.' % targetPage.aslink()) + u'Redirect target %s is also a redirect! Won\'t delete anything.' + % targetPage.aslink()) else: #we successfully get the target page, meaning that #it exists and is not a redirect: no reason to touch it. wikipedia.output( - u'Redirect target %s does exist! Won\'t delete anything.' % targetPage.aslink()) + u'Redirect target %s does exist! Won\'t delete anything.' + % targetPage.aslink()) wikipedia.output(u'') def fix_double_redirects(self): @@ -608,7 +620,8 @@ redir = wikipedia.Page(self.site, redir_name) # Show the title of the page we're working on. # Highlight the title in purple. - wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % redir.title()) + wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" + % redir.title()) newRedir = redir redirList = [] # bookkeeping to detect loops while True: @@ -638,7 +651,7 @@ #sometimes this error occures. Invalid Title starting with a '#' except wikipedia.InvalidTitle, err: wikipedia.output(u'Warning: %s' % err) - break + break except wikipedia.NoPage: if len(redirList) == 1: wikipedia.output(u'Skipping: Page %s does not exist.' @@ -662,7 +675,8 @@ u' Links to: %s.' % targetPage.aslink()) if targetPage.site() != self.site: - wikipedia.output(u'Warning: redirect target (%s) is on a different site.' + wikipedia.output( + u'Warning: redirect target (%s) is on a different site.' % (targetPage.aslink())) if self.always: break # skip if automatic @@ -686,8 +700,11 @@ and targetPage.site().lang in sd_tagging_sum: wikipedia.output(u"Tagging redirect for deletion") # Delete the two redirects - content = wikipedia.translate(targetPage.site().lang, sd_template)+"\n"+content - summ = wikipedia.translate(targetPage.site().lang, sd_tagging_sum) + content = wikipedia.translate( + targetPage.site().lang, + sd_template)+"\n"+content + summ = wikipedia.translate(targetPage.site().lang, + sd_tagging_sum) targetPage.put(content, summ) redir.put(content, summ) else: @@ -707,7 +724,8 @@ oldText) if text == oldText: break - summary = wikipedia.translate(self.site, msg_double) % targetPage.aslink() + summary = wikipedia.translate(self.site, msg_double)\ + % targetPage.aslink() wikipedia.showDiff(oldText, text) if self.prompt(u'Do you want to accept the changes?'): try: @@ -715,17 +733,20 @@ except wikipedia.LockedPage: wikipedia.output(u'%s is locked.' % redir.title()) except wikipedia.SpamfilterError, error: - wikipedia.output(u"Saving page [[%s]] prevented by spam filter: %s" - % (redir.title(), error.url)) + wikipedia.output( + u"Saving page [[%s]] prevented by spam filter: %s" + % (redir.title(), error.url)) except wikipedia.PageNotSaved, error: wikipedia.output(u"Saving page [[%s]] failed: %s" - % (redir.title(), error)) + % (redir.title(), error)) except wikipedia.NoUsername: - wikipedia.output(u"Page [[%s]] not saved; sysop privileges required." - % redir.title()) + wikipedia.output( + u"Page [[%s]] not saved; sysop privileges required." + % redir.title()) except wikipedia.Error, error: - wikipedia.output(u"Unexpected error occurred trying to save [[%s]]: %s" - % (redir.title(), error)) + wikipedia.output( + u"Unexpected error occurred trying to save [[%s]]: %s" + % (redir.title(), error)) break def fix_double_or_delete_broken_redirects(self): @@ -733,10 +754,12 @@ # get reason for deletion text delete_reason = wikipedia.translate(self.site, reason_broken) count = 0 - for (redir_name, code, target, final) in self.generator.get_redirects_via_api( - namespaces = self.generator.namespaces, - start = self.generator.api_start, - until = self.generator.api_until, maxlen = 2): + for (redir_name, code, target, final)\ + in self.generator.get_redirects_via_api( + namespaces=self.generator.namespaces, + start=self.generator.api_start, + until=self.generator.api_until, + maxlen = 2): if code == 1: continue elif code == 0: @@ -802,8 +825,9 @@ if ns == '': ## "-namespace:" does NOT yield -namespace:0 further down the road! ns = wikipedia.input( - u'Please enter a namespace by its number: ') -# u'Please enter a namespace by its name or number: ') TODO! at least for some generators. + u'Please enter a namespace by its number: ') +# u'Please enter a namespace by its name or number: ') +# TODO! at least for some generators. if ns == '': ns = '0' try: @@ -826,10 +850,12 @@ else: wikipedia.output(u'Unknown argument: %s' % arg) - if not action or (api and moved_pages) or (xmlFilename and moved_pages) or (api and xmlFilename): + if not action or (api and moved_pages) or (xmlFilename and moved_pages)\ + or (api and xmlFilename): wikipedia.showHelp('redirect') else: - gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, api, start, until, number) + gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, + api, start, until, number) bot = RedirectRobot(action, gen, always, number) bot.run() @@ -838,4 +864,3 @@ main() finally: wikipedia.stopme() -

1 0

SVN: [7785] trunk/pywikipedia/blockpageschecker.py
by xqt＠svn.wikimedia.org 17 Dec '09

17 Dec '09

Revision: 7785 Author: xqt Date: 2009-12-17 14:20:01 +0000 (Thu, 17 Dec 2009) Log Message: ----------- -help option enabled Modified Paths: -------------- trunk/pywikipedia/blockpageschecker.py Modified: trunk/pywikipedia/blockpageschecker.py =================================================================== --- trunk/pywikipedia/blockpageschecker.py 2009-12-17 12:43:48 UTC (rev 7784) +++ trunk/pywikipedia/blockpageschecker.py 2009-12-17 14:20:01 UTC (rev 7785) @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- """ This is a script originally written by Wikihermit and then rewritten by Filnik, -to delete the templates used to warn in the pages that a page is blocked, -when the page isn't blocked at all. Indeed, very often sysops block the pages -for a setted time but then the forget to delete the warning! This script is useful -if you want to delete those useless warning left in these pages. +to delete the templates used to warn in the pages that a page is blocked, when +the page isn't blocked at all. Indeed, very often sysops block the pages for a +setted time but then the forget to delete the warning! This script is useful if +you want to delete those useless warning left in these pages. Parameters: @@ -20,21 +20,25 @@ Argument can also be given as "-page:pagetitle". You can give this parameter multiple times to edit multiple pages. --protectedpages: Check all the blocked pages (useful when you have not categories - or when you have problems with them. (add the namespace after ":" where - you want to check - default checks all protected pages) +-protectedpages: Check all the blocked pages; useful when you have not + categories or when you have problems with them. (add the + namespace after ":" where you want to check - default checks + all protected pages.) -moveprotected: Same as -protectedpages, for moveprotected pages Furthermore, the following command line parameters are supported: --always Doesn't ask every time if the bot should make the change or not, do it always. +-always Doesn't ask every time if the bot should make the change or not, + do it always. --debug When the bot can't delete the template from the page (wrong regex or something like that) - it will ask you if it should open the page on your browser. - (attention: pages included may give false positives..) +-debug When the bot can't delete the template from the page (wrong + regex or something like that) it will ask you if it should open + the page on your browser. + (attention: pages included may give false positives!) --move The bot will check if the page is blocked also for the move option, not only for edit +-move The bot will check if the page is blocked also for the move + option, not only for edit --- Warning! --- You have to edit this script in order to add your preferences @@ -59,7 +63,7 @@ # # Distributed under the terms of the MIT license. # -__version__ = '$Id: blockpageschecker.py,v 1.5 2008/04/24 19.40.00 filnik Exp$' +__version__ = '$Id$' # import re, webbrowser @@ -207,9 +211,6 @@ """ Main Function """ # Loading the comments global categoryToCheck, comment, project_inserted - if config.mylang not in project_inserted: - pywikibot.output(u"Your project is not supported by this script. You have to edit the script and add it!") - return # always, define a generator to understand if the user sets one, defining what's genFactory always = False; generator = False; debug = False moveBlockCheck = False; genFactory = pagegenerators.GeneratorFactory() @@ -242,6 +243,10 @@ else: genFactory.handleArg(arg) + if config.mylang not in project_inserted: + pywikibot.output(u"Your project is not supported by this script.\nYou have to edit the script and add it!") + return + # Load the right site site = pywikibot.getSite()

1 0

SVN: [7784] trunk/pywikipedia/wikipedia.py
by xqt＠svn.wikimedia.org 17 Dec '09

17 Dec '09

Revision: 7784 Author: xqt Date: 2009-12-17 12:43:48 +0000 (Thu, 17 Dec 2009) Log Message: ----------- handle 504 gateway timeout during getUrl processing Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-12-17 09:11:31 UTC (rev 7783) +++ trunk/pywikipedia/wikipedia.py 2009-12-17 12:43:48 UTC (rev 7784) @@ -5946,15 +5946,26 @@ except urllib2.HTTPError, e: if e.code in [401, 404]: raise PageNotFound(u'Page %s could not be retrieved. Check your family file ?' % url) - output(u"Result:%s %s" % (e.code, e.msg)) - raise + elif e.code == 504: + output(u'HTTPError: %s %s' % (e.code, e.msg)) + if retry: + output(u"""WARNING: Could not open '%s'.Maybe the server or\n your connection is down. Retrying in %i minutes...""" + % (url, retry_idle_time)) + time.sleep(retry_idle_time * 60) + # Next time wait longer, but not longer than half an hour + if retry_idle_time > 30: + retry_idle_time = 30 + continue + raise + else: + output(u"Result: %s %s" % (e.code, e.msg)) + raise except Exception, e: output(u'%s' %e) if retry: output(u"""WARNING: Could not open '%s'. Maybe the server or\n your connection is down. Retrying in %i minutes...""" % (url, retry_idle_time)) time.sleep(retry_idle_time * 60) - # Next time wait longer, but not longer than half an hour retry_idle_time *= 2 if retry_idle_time > 30: retry_idle_time = 30

1 0

SVN: [7783] trunk/pywikipedia/interwiki.py
by xqt＠svn.wikimedia.org 17 Dec '09

17 Dec '09

Revision: 7783 Author: xqt Date: 2009-12-17 09:11:31 +0000 (Thu, 17 Dec 2009) Log Message: ----------- always-option: don't ask for the rest of (only) one subject Modified Paths: -------------- trunk/pywikipedia/interwiki.py Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2009-12-17 07:50:12 UTC (rev 7782) +++ trunk/pywikipedia/interwiki.py 2009-12-17 09:11:31 UTC (rev 7783) @@ -536,6 +536,7 @@ """ autonomous = False confirm = False + always = False select = False debug = True followredirect = True @@ -1538,6 +1539,7 @@ updatedSites = [] notUpdatedSites = [] # Process all languages here + globalvar.always = False if globalvar.limittwo: lclSite = self.originPage.site() lclSiteDone = False @@ -1723,7 +1725,7 @@ ask = True if globalvar.force: ask = False - if globalvar.confirm: + if globalvar.confirm and not globalvar.always: ask = True # If we need to ask, do so if ask: @@ -1732,8 +1734,8 @@ answer = 'n' else: answer = pywikibot.inputChoice(u'Submit?', - ['Yes', 'No', 'open in Browser', 'Give up'], - ['y', 'n', 'b', 'g']) + ['Yes', 'No', 'open in Browser', 'Give up', 'Always'], + ['y', 'n', 'b', 'g', 'a']) if answer == 'b': webbrowser.open("http://%s%s" % ( page.site().hostname(), @@ -1741,6 +1743,10 @@ )) pywikibot.input("Press Enter when finished in browser.") return True + elif answer == 'a': + # don't ask for the rest of this subject + globalvar.always = True + answer = 'y' else: # If we do not need to ask, allow answer = 'y'

1 0

SVN: [7782] trunk/pywikipedia/wikipedia.py
by xqt＠svn.wikimedia.org 17 Dec '09

17 Dec '09

Revision: 7782 Author: xqt Date: 2009-12-17 07:50:12 +0000 (Thu, 17 Dec 2009) Log Message: ----------- handle 504 gateway timeout during postData processing Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-12-16 21:46:01 UTC (rev 7781) +++ trunk/pywikipedia/wikipedia.py 2009-12-17 07:50:12 UTC (rev 7782) @@ -5803,7 +5803,7 @@ if e.code in [401, 404]: raise PageNotFound(u'Page %s could not be retrieved. Check your family file ?' % url) # just check for HTTP Status 500 (Internal Server Error)? - elif e.code == 500: + elif e.code in [500, 504]: output(u'HTTPError: %s %s' % (e.code, e.msg)) if config.retry_on_fail: output(u"""WARNING: Could not open '%s'.\nMaybe the server is down. Retrying in %i minutes..."""

1 0

SVN: [7781] trunk/pywikipedia/imagecopy.py
by multichill＠svn.wikimedia.org 16 Dec '09

16 Dec '09

Revision: 7781 Author: multichill Date: 2009-12-16 21:46:01 +0000 (Wed, 16 Dec 2009) Log Message: ----------- *Fixed always option *Hacked in category option Modified Paths: -------------- trunk/pywikipedia/imagecopy.py Modified: trunk/pywikipedia/imagecopy.py =================================================================== --- trunk/pywikipedia/imagecopy.py 2009-12-16 21:12:25 UTC (rev 7780) +++ trunk/pywikipedia/imagecopy.py 2009-12-16 21:46:01 UTC (rev 7781) @@ -245,9 +245,10 @@ class imageTransfer (threading.Thread): - def __init__ ( self, imagePage, newname): + def __init__ ( self, imagePage, newname, category): self.imagePage = imagePage self.newname = newname + self.category = category threading.Thread.__init__ ( self ) def run(self): @@ -275,6 +276,11 @@ # I want every picture to be tagged with the bottemplate so i can check my contributions later. CH=u'\n\n{{BotMoveToCommons|'+ self.imagePage.site().language() + '.' + self.imagePage.site().family.name +'|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}' + CH + + if self.category: + CH = CH.replace(u'{{subst:Unc}} ', u'') + CH = CH + u'[[Category:' + self.category + u']]' + bot = UploadRobot(url=self.imagePage.fileUrl(), description=CH, useFilename=self.newname, keepFilename=True, verifyDescription=False, ignoreWarning = True, targetSite = wikipedia.getSite('commons', 'commons')) bot.run() @@ -462,12 +468,16 @@ generator = None; #newname = ""; imagepage = None; + always = False + category = u'' # Load a lot of default generators genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg == '-always': always = True + elif arg.startswith('-cc:'): + category = arg [len('-cc:'):] else: genFactory.handleArg(arg) @@ -526,7 +536,7 @@ # We dont overwrite images, pick another name, go to the start of the loop if not skip: - imageTransfer(imagepage, newname).start() + imageTransfer(imagepage, newname, category).start() wikipedia.output(u'Still ' + str(threading.activeCount()) + u' active threads, lets wait') for openthread in threading.enumerate():

1 0

SVN: [7780] trunk/pywikipedia/imagecopy.py
by multichill＠svn.wikimedia.org 16 Dec '09

16 Dec '09

Revision: 7780 Author: multichill Date: 2009-12-16 21:12:25 +0000 (Wed, 16 Dec 2009) Log Message: ----------- Added -always option. Won't prompt the user. Use with care! Modified Paths: -------------- trunk/pywikipedia/imagecopy.py Modified: trunk/pywikipedia/imagecopy.py =================================================================== --- trunk/pywikipedia/imagecopy.py 2009-12-15 17:33:40 UTC (rev 7779) +++ trunk/pywikipedia/imagecopy.py 2009-12-16 21:12:25 UTC (rev 7780) @@ -478,6 +478,7 @@ pregenerator = pagegenerators.PreloadingGenerator(generator) for page in pregenerator: + skip = False if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()) : imagepage = wikipedia.ImagePage(page.site(), page.title()) @@ -492,31 +493,37 @@ except NotImplementedError: #No API, using the page file instead (datetime, username, resolution, size, comment) = imagepage.getFileVersionHistory().pop() - while True: + if always: + newname=imagepage.titleWithoutNamespace() + CommonsPage=wikipedia.Page(wikipedia.getSite('commons', 'commons'), u'File:'+newname) + if CommonsPage.exists(): + skip = True + else: + while True: + + # Do the Tkdialog to accept/reject and change te name + (newname, skip)=Tkdialog(imagepage.titleWithoutNamespace(), imagepage.get(), username, imagepage.permalink(), imagepage.templates()).getnewname() - # Do the Tkdialog to accept/reject and change te name - (newname, skip)=Tkdialog(imagepage.titleWithoutNamespace(), imagepage.get(), username, imagepage.permalink(), imagepage.templates()).getnewname() + if skip: + wikipedia.output('Skipping this image') + break - if skip: - wikipedia.output('Skipping this image') - break - - # Did we enter a new name? - if len(newname)==0: - #Take the old name - newname=imagepage.titleWithoutNamespace() - else: - newname = newname.decode('utf-8') + # Did we enter a new name? + if len(newname)==0: + #Take the old name + newname=imagepage.titleWithoutNamespace() + else: + newname = newname.decode('utf-8') - # Check if the image already exists - CommonsPage=wikipedia.Page( - wikipedia.getSite('commons', 'commons'), - 'Image:'+newname) - if not CommonsPage.exists(): - break - else: - wikipedia.output('Image already exists, pick another name or skip this image') - # We dont overwrite images, pick another name, go to the start of the loop + # Check if the image already exists + CommonsPage=wikipedia.Page( + wikipedia.getSite('commons', 'commons'), + u'File:'+newname) + if not CommonsPage.exists(): + break + else: + wikipedia.output('Image already exists, pick another name or skip this image') + # We dont overwrite images, pick another name, go to the start of the loop if not skip: imageTransfer(imagepage, newname).start()

1 0

← Newer
1
...
4
5
6
7
8
9
10
...
13
Older →

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Pywikipedia-svn December 2009