http://www.mediawiki.org/wiki/Special:Code/pywikipedia/8940
Revision: 8940 Author: russblau Date: 2011-02-08 16:25:26 +0000 (Tue, 08 Feb 2011) Log Message: ----------- remove checks for non-existent InvalidTitle exception
Modified Paths: -------------- branches/rewrite/scripts/cosmetic_changes.py branches/rewrite/scripts/interwiki.py branches/rewrite/scripts/redirect.py
Modified: branches/rewrite/scripts/cosmetic_changes.py =================================================================== --- branches/rewrite/scripts/cosmetic_changes.py 2011-02-08 15:51:41 UTC (rev 8939) +++ branches/rewrite/scripts/cosmetic_changes.py 2011-02-08 16:25:26 UTC (rev 8940) @@ -217,10 +217,7 @@ # [[page_title|link_text]]trailing_chars # We only work on namespace 0 because pipes and linktrails work # differently for images and categories. - try: - page = pywikibot.Page(pywikibot.Link(titleWithSection, self.site)) - except pywikibot.InvalidTitle: - return match.group() + page = pywikibot.Page(pywikibot.Link(titleWithSection, self.site)) if page.namespace() == 0: # Replace underlines by spaces, also multiple underlines titleWithSection = re.sub('_+', ' ', titleWithSection) @@ -532,7 +529,7 @@ text, r"([\r\n])== *(Licensing|License information|{{int:license-header}}) *==", r"\1== {{int:license}} ==", exceptions, True) - + # frequent field values to {{int:}} versions text = pywikibot.replaceExcept( text, @@ -542,10 +539,10 @@ text, r'(| *Permission *=) *(?:[Ss]ee below|[Ss]iehe unten) *([\r\n])', r'\1\2', exceptions, True) - + # added to transwikied pages text = pywikibot.replaceExcept(text, r'__NOTOC__', '', exceptions, True) - + # tracker element for js upload form text = pywikibot.replaceExcept( text, @@ -553,7 +550,7 @@ '', exceptions[1:], True) text = pywikibot.replaceExcept(text, r'{{ImageUpload|(?:basic|full)}}', '', exceptions, True) - + # duplicated section headers text = pywikibot.replaceExcept( text,
Modified: branches/rewrite/scripts/interwiki.py =================================================================== --- branches/rewrite/scripts/interwiki.py 2011-02-08 15:51:41 UTC (rev 8939) +++ branches/rewrite/scripts/interwiki.py 2011-02-08 16:25:26 UTC (rev 8940) @@ -71,7 +71,7 @@ -number: used as -number:#, specifies that the robot should process that amount of pages and then stop. This is only useful in combination with -start. The default is not to stop. - + -until: used as -until:title, specifies that the robot should process pages in wiki default sort order up to, and including, "title" and then stop. This is only useful in @@ -227,13 +227,13 @@ only when you are sure you have first gotten the interwiki links on the starting page exactly right). (note: without ending colon) - + -hintsareright do not follow interwiki links to sites for which hints on existing pages are given. Note that, hints given interactively, via the -askhint command line option, are only effective once they have been entered, thus interwiki links on the starting page are followed - regardess of hints given when prompted. + regardess of hints given when prompted. (Warning! Should be used with caution!) (note: without ending colon)
@@ -598,7 +598,7 @@ memory when a big number of Page objects will be loaded at the same time. """ - + # Please prefix the class members names by SP # to avoid possible name clashes with pywikibot.Page
@@ -608,8 +608,8 @@ SPstore = None
# attributes created by pywikibot.Page.__init__ - SPcopy = [ '_editrestriction', - '_site', + SPcopy = [ '_editrestriction', + '_site', '_namespace', '_section', '_title', @@ -622,7 +622,7 @@ '_startTime', '_revisionId', '_deletedRevs' ] - + def SPdeleteStore(): if StoredPage.SPpath: del StoredPage.SPstore @@ -637,7 +637,7 @@ import shelve index = 1 while True: - path = config.datafilepath('cache', 'pagestore' + str(index)) + path = config.datafilepath('cache', 'pagestore' + str(index)) if not os.path.exists(path): break index += 1 StoredPage.SPpath = path @@ -666,7 +666,7 @@ """ def __init__(self): # self.tree : - # Dictionary: + # Dictionary: # keys: Site # values: list of pages # All pages found within Site are kept in @@ -676,7 +676,7 @@ # the remove() operation, # keeping list values is important, because # the order in which the pages were found matters: - # the earlier a page is found, the closer it is to the + # the earlier a page is found, the closer it is to the # Subject.originPage. Chances are that pages found within # 2 interwiki distance from the originPage are more related # to the original topic than pages found later on, after @@ -691,7 +691,7 @@ def filter(self, site): """ Iterates over pages that are in Site site - """ + """ try: for page in self.tree[site]: yield page @@ -731,7 +731,7 @@ """ for site, d in self.tree.iteritems(): yield site, len(d) - + def __iter__(self): for site, plist in self.tree.iteritems(): for page in plist: @@ -753,7 +753,7 @@ pseudocode: todo <- [originPage] done <- [] - while todo != []: + while todo != []: pending <- todo todo <-NL(pending) / done done <- NL(pending) U done @@ -761,23 +761,23 @@
There is, however, one limitation that is induced by implementation: - to compute efficiently NL(P), one has to load the page contents of - pages in P. + to compute efficiently NL(P), one has to load the page contents of + pages in P. (Not only the langlinks have to be parsed from each Page, but we also want to know if the Page is a redirect, a disambiguation, etc...)
- Because of this, the pages in pending have to be preloaded. + Because of this, the pages in pending have to be preloaded. However, because the pages in pending are likely to be in several sites we cannot "just" preload them as a batch.
- Instead of doing "pending <- todo" at each iteration, we have to elect a - Site, and we put in pending all the pages from todo that belong to that + Instead of doing "pending <- todo" at each iteration, we have to elect a + Site, and we put in pending all the pages from todo that belong to that Site:
Code becomes: todo <- {originPage.site:[originPage]} done <- [] - while todo != {}: + while todo != {}: site <- electSite() pending <- todo[site]
@@ -789,10 +789,10 @@
Subject objects only operate on pages that should have been preloaded before. - In fact, at any time: + In fact, at any time: * todo contains new Pages that have not been loaded yet * done contains Pages that have been loaded, and that have been treated. - * If batch preloadings are successful, Page._get() is never called from + * If batch preloadings are successful, Page._get() is never called from this Object. """
@@ -904,7 +904,7 @@ def whatsNextPageBatch(self, site): """ By calling this method, you 'promise' this instance that you will - preload all the 'site' Pages that are in the todo list. + preload all the 'site' Pages that are in the todo list.
This routine will return a list of pages that can be treated. """ @@ -1149,7 +1149,7 @@ def batchLoaded(self, counter): """ This is called by a worker to tell us that the promised batch of - pages was loaded. + pages was loaded. In other words, all the pages in self.pending have already been preloaded.
@@ -1190,8 +1190,8 @@ for site, count in self.todo.siteCounts(): counter.minus(site, count) self.todo = PageTree() - # In some rare cases it might be we already did check some 'automatic' links - self.done = PageTree() + # In some rare cases it might be we already did check some 'automatic' links + self.done = PageTree() continue
elif page.isRedirectPage() or page.isCategoryRedirect(): @@ -1204,13 +1204,6 @@ redirectTargetPage = page.getRedirectTarget() else: redirectTargetPage = page.getCategoryRedirectTarget() - except pywikibot.InvalidTitle: - # MW considers #redirect [[en:#foo]] as a redirect page, - # but we can't do anything useful with such pages - if not globalvar.quiet: - pywikibot.output(u"NOTE: %s redirects to an invalid title" - % page) - continue if not globalvar.quiet: pywikibot.output(u"NOTE: %s is %sredirect to %s" % (page, redir, redirectTargetPage)) @@ -1252,7 +1245,7 @@ for site, count in self.todo.siteCounts(): counter.minus(site, count) self.todo = PageTree() - self.done = PageTree() + self.done = PageTree() continue
elif page.section(): @@ -1298,8 +1291,8 @@
elif globalvar.autonomous and duplicate and not skip: pywikibot.output(u"Stopping work on %s because duplicate pages"\ - " %s and %s are found" % (self.originPage, - duplicate, + " %s and %s are found" % (self.originPage, + duplicate, page)) self.makeForcedStop(counter) try: @@ -1312,7 +1305,7 @@ f.write(u" [%s%s graph]" % (config.interwiki_graph_url, filename)) f.write("\n") f.close() - # FIXME: What errors are we catching here? + # FIXME: What errors are we catching here? # except: should be avoided!! except: #raise @@ -1653,12 +1646,12 @@ Delete the contents that are stored on disk for this Subject.
We cannot afford to define this in a StoredPage destructor because - StoredPage instances can get referenced cyclicly: that would stop the + StoredPage instances can get referenced cyclicly: that would stop the garbage collector from destroying some of those objects.
It's also not necessary to set these lines as a Subject destructor: deleting all stored content one entry by one entry when bailing out - after a KeyboardInterrupt for example is redundant, because the + after a KeyboardInterrupt for example is redundant, because the whole storage file will be eventually removed. """ if globalvar.contentsondisk: @@ -1699,7 +1692,7 @@ for iw in re.finditer('<!-- *\[\[(.*?:.*?)\]\] *-->', pagetext): try: ignorepage = pywikibot.Page(page.site, iw.groups()[0]) - except (pywikibot.NoSuchSite, pywikibot.InvalidTitle): + except (pywikibot.NoSuchSite, ): continue
try: @@ -2227,7 +2220,7 @@ #Version info marks bots without unicode error #This also prevents abuse filter blocking on de-wiki if not pywikibot.unicode_error: - mcomment += u'r%s) (' % sys.version.split()[0] + mcomment += u'r%s) (' % sys.version.split()[0]
mcomment += globalvar.summary
@@ -2237,7 +2230,7 @@
mcomment += i18n.twtranslate(insite.lang, commentname) % changes mods = i18n.twtranslate('en', commentname) % changes - + return mods, mcomment, adding, removing, modifying
def botMayEdit (page): @@ -2353,7 +2346,7 @@ globalvar.summary = u'' elif globalvar.summary: globalvar.summary += u'; ' - + # ensure that we don't try to change main page try: site = pywikibot.getSite() @@ -2364,7 +2357,7 @@
if newPages is not None: if len(namespaces) == 0: - ns = 0 + ns = 0 elif len(namespaces) == 1: ns = namespaces[0] if ns != 'all':
Modified: branches/rewrite/scripts/redirect.py =================================================================== --- branches/rewrite/scripts/redirect.py 2011-02-08 15:51:41 UTC (rev 8939) +++ branches/rewrite/scripts/redirect.py 2011-02-08 16:25:26 UTC (rev 8940) @@ -477,10 +477,6 @@ pywikibot.output( u'Warning: Redirect target %s is not a valid page title.' % str(e)[10:]) - #sometimes this error occures. Invalid Title starting with a '#' - except pywikibot.InvalidTitle, err: - pywikibot.output(u'Warning: %s' % err) - break except pywikibot.NoPage: if len(redirList) == 1: pywikibot.output(u'Skipping: Page %s does not exist.'
pywikipedia-svn@lists.wikimedia.org