Revision: 6313 Author: russblau Date: 2009-01-29 22:53:53 +0000 (Thu, 29 Jan 2009)
Log Message: ----------- Fixing bug 2539701 Exception handling
Modified Paths: -------------- trunk/pywikipedia/redirect.py
Modified: trunk/pywikipedia/redirect.py =================================================================== --- trunk/pywikipedia/redirect.py 2009-01-29 22:47:54 UTC (rev 6312) +++ trunk/pywikipedia/redirect.py 2009-01-29 22:53:53 UTC (rev 6313) @@ -22,8 +22,9 @@
-namespace:n Namespace to process. Works only with an XML dump.
--offset:n Number of redirect to restart with (see progress). Works only - with an XML dump or with -moves. +-offset:n With -xml, the number of the redirect to restart with (see + progress). With -moves, the number of hours ago to start + scanning moved pages. Otherwise, ignored.
-moves Instead of using Special:Doubleredirects, use the page move log to find double-redirect candidates (only works with @@ -291,30 +292,39 @@ # this will run forever, until user interrupts it import datetime
+ if not self.offset: + self.offset = 1 offsetpattern = re.compile( r"""(<a href="/w/index\.php\?title=Special:Log&offset=(\d+)&limit=500&type=move" title="Special:Log" rel="next">older 500</a>)""") - start = datetime.datetime.utcnow() - datetime.timedelta(0, 3600) - # one hour ago - offset = start.strftime("%Y%m%d%H%M%S") + start = datetime.datetime.utcnow() \ + - datetime.timedelta(0, self.offset*3600) + # self.offset hours ago + offset_time = start.strftime("%Y%m%d%H%M%S") site = wikipedia.getSite() while True: move_url = \ site.path() + "?title=Special:Log&limit=500&offset=%s&type=move"\ - % offset + % offset_time try: move_list = site.getUrl(move_url) -# wikipedia.output(u"[%s]" % offset) + if wikipedia.verbose: + wikipedia.output(u"[%s]" % offset) except: import traceback - traceback.print_exc() + wikipedia.output(unicode(traceback.format_exc())) return - for moved_page in self.move_regex.findall(move_list): + g = self.move_regex.findall(move_list) + if wikipedia.verbose: + wikipedia.output(u"%s moved pages" % len(g)) + for moved_title in g: + moved_page = wikipedia.Page(site, moved_title) + if not moved_page.isRedirectPage(): + continue # moved_page is now a redirect, so any redirects pointing # to it need to be changed try: - for page in wikipedia.Page(site, moved_page - ).getReferences(follow_redirects=True, - redirectsOnly=True): + for page in moved_page.getReferences(follow_redirects=True, + redirectsOnly=True): yield page except wikipedia.NoPage: # original title must have been deleted after move @@ -322,7 +332,7 @@ m = offsetpattern.search(move_list) if not m: break - offset = m.group(1) + offset_time = m.group(1)
class RedirectRobot: @@ -444,13 +454,21 @@ wikipedia.output( u'Warning: Redirect target %s forms a redirect loop.' % targetPage.aslink()) - - content=targetPage.get(get_redirect=True) - if sd_template.has_key(targetPage.site().lang) and sd_tagging_sum.has_key(targetPage.site().lang): + try: + content = targetPage.get(get_redirect=True) + except wikipedia.SectionError: + content = wikipedia.Page( + targetPage.site(), + targetPage.sectionFreeTitle() + ).get(get_redirect=True) + if sd_template.has_key(targetPage.site().lang) \ + and sd_tagging_sum.has_key(targetPage.site().lang): wikipedia.output(u"Tagging redirect for deletion") # Delete the two redirects - content = wikipedia.translate(targetPage.site().lang,sd_template)+"\n"+content - summary = wikipedia.translate(targetPage.site().lang,sd_tagging_sum) + content = wikipedia.translate(targetPage.site().lang, + sd_template)+"\n"+content + summary = wikipedia.translate(targetPage.site().lang, + sd_tagging_sum) targetPage.put(content, summary) redir.put(content, summary) else: @@ -462,8 +480,8 @@ text = mysite.redirectRegex().sub( '#%s %s' % (mysite.redirect( True ), - targetPage.aslink()), - oldText) + targetPage.aslink()), + oldText) if text == oldText: break wikipedia.showDiff(oldText, text)