SVN: [8940] branches/rewrite/scripts - Pywikipedia-svn

8 Feb 2011

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/8940
Revision: 8940
Author:   russblau
Date:     2011-02-08 16:25:26 +0000 (Tue, 08 Feb 2011)
Log Message:
-----------
remove checks for non-existent InvalidTitle exception
Modified Paths:
--------------
    branches/rewrite/scripts/cosmetic_changes.py
    branches/rewrite/scripts/interwiki.py
    branches/rewrite/scripts/redirect.py
Modified: branches/rewrite/scripts/cosmetic_changes.py
===================================================================

--- branches/rewrite/scripts/cosmetic_changes.py	2011-02-08 15:51:41 UTC (rev 8939)
+++ branches/rewrite/scripts/cosmetic_changes.py	2011-02-08 16:25:26 UTC (rev 8940)
@@ -217,10 +217,7 @@
                 # [[page_title|link_text]]trailing_chars
                 # We only work on namespace 0 because pipes and linktrails work
                 # differently for images and categories.
-                try:
-                    page = pywikibot.Page(pywikibot.Link(titleWithSection, self.site))
-                except pywikibot.InvalidTitle:
-                    return match.group()
+                page = pywikibot.Page(pywikibot.Link(titleWithSection, self.site))
                 if page.namespace() == 0:
                     # Replace underlines by spaces, also multiple underlines
                     titleWithSection = re.sub('_+', ' ', titleWithSection)
@@ -532,7 +529,7 @@
             text,
             r"([\r\n])== *(Licensing|License information|{{int:license-header}}) *==",
             r"\1== {{int:license}} ==", exceptions, True)
- 
+
         # frequent field values to {{int:}} versions
         text = pywikibot.replaceExcept(
             text,
@@ -542,10 +539,10 @@
             text,
             r'(| *Permission *=) *(?:[Ss]ee below|[Ss]iehe unten) *([\r\n])',
             r'\1\2', exceptions, True)
- 
+
         # added to transwikied pages
         text = pywikibot.replaceExcept(text, r'__NOTOC__', '', exceptions, True)
- 
+
         # tracker element for js upload form
         text = pywikibot.replaceExcept(
             text,
@@ -553,7 +550,7 @@
             '', exceptions[1:], True)
         text = pywikibot.replaceExcept(text, r'{{ImageUpload|(?:basic|full)}}',
                                        '', exceptions, True)
- 
+
         # duplicated section headers
         text = pywikibot.replaceExcept(
             text,
Modified: branches/rewrite/scripts/interwiki.py
===================================================================
--- branches/rewrite/scripts/interwiki.py	2011-02-08 15:51:41 UTC (rev 8939)
+++ branches/rewrite/scripts/interwiki.py	2011-02-08 16:25:26 UTC (rev 8940)
@@ -71,7 +71,7 @@
     -number:       used as -number:#, specifies that the robot should process
                    that amount of pages and then stop. This is only useful in
                    combination with -start. The default is not to stop.
- 
+
     -until:        used as -until:title, specifies that the robot should
                    process pages in wiki default sort order up to, and
                    including, "title" and then stop. This is only useful in
@@ -227,13 +227,13 @@
                    only when you are sure you have first gotten the interwiki
                    links on the starting page exactly right).
                    (note: without ending colon)
- 
+
     -hintsareright do not follow interwiki links to sites for which hints
                    on existing pages are given. Note that, hints given
                    interactively, via the -askhint command line option,
                    are only effective once they have been entered, thus
                    interwiki links on the starting page are followed
-                   regardess of hints given when prompted. 
+                   regardess of hints given when prompted.
                    (Warning! Should be used with caution!)
                    (note: without ending colon)
@@ -598,7 +598,7 @@
     memory when a big number of Page objects will be loaded
     at the same time.
     """
-    
+
     # Please prefix the class members names by SP
     # to avoid possible name clashes with pywikibot.Page
@@ -608,8 +608,8 @@
     SPstore = None
# attributes created by pywikibot.Page.__init__
-    SPcopy = [ '_editrestriction', 
-               '_site', 
+    SPcopy = [ '_editrestriction',
+               '_site',
                '_namespace',
                '_section',
                '_title',
@@ -622,7 +622,7 @@
                '_startTime',
                '_revisionId',
                '_deletedRevs' ]
-                 
+
     def SPdeleteStore():
         if StoredPage.SPpath:
             del StoredPage.SPstore
@@ -637,7 +637,7 @@
             import shelve
             index = 1
             while True:
-                path = config.datafilepath('cache', 'pagestore' + str(index))       
+                path = config.datafilepath('cache', 'pagestore' + str(index))
                 if not os.path.exists(path): break
                 index += 1
             StoredPage.SPpath = path
@@ -666,7 +666,7 @@
     """
     def __init__(self):
         # self.tree :
-        # Dictionary: 
+        # Dictionary:
         # keys: Site
         # values: list of pages
         # All pages found within Site are kept in
@@ -676,7 +676,7 @@
         # the remove() operation,
         # keeping list values is important, because
         # the order in which the pages were found matters:
-        # the earlier a page is found, the closer it is to the 
+        # the earlier a page is found, the closer it is to the
         # Subject.originPage. Chances are that pages found within
         # 2 interwiki distance from the originPage are more related
         # to the original topic than pages found later on, after
@@ -691,7 +691,7 @@
     def filter(self, site):
         """
         Iterates over pages that are in Site site
-        """ 
+        """
         try:
             for page in self.tree[site]:
                 yield page
@@ -731,7 +731,7 @@
         """
         for site, d in self.tree.iteritems():
             yield site, len(d)
-    
+
     def __iter__(self):
         for site, plist in self.tree.iteritems():
             for page in plist:
@@ -753,7 +753,7 @@
     pseudocode:
         todo <- [originPage]
         done <- []
-        while todo != []: 
+        while todo != []:
             pending <- todo
             todo <-NL(pending) / done
             done <- NL(pending) U done
@@ -761,23 +761,23 @@
There is, however, one limitation that is induced by implementation:
-    to compute efficiently NL(P), one has to load the page contents of 
-    pages in P. 
+    to compute efficiently NL(P), one has to load the page contents of
+    pages in P.
     (Not only the langlinks have to be parsed from each Page, but we also want
      to know if the Page is a redirect, a disambiguation, etc...)
-    Because of this, the pages in pending have to be preloaded. 
+    Because of this, the pages in pending have to be preloaded.
     However, because the pages in pending are likely to be in several sites
     we cannot "just" preload them as a batch.
-    Instead of doing "pending <- todo" at each iteration, we have to elect a 
-    Site, and we put in pending all the pages from todo that belong to that 
+    Instead of doing "pending <- todo" at each iteration, we have to elect a
+    Site, and we put in pending all the pages from todo that belong to that
     Site:
Code becomes:
         todo <- {originPage.site:[originPage]}
         done <- []
-        while todo != {}: 
+        while todo != {}:
             site <- electSite()
             pending <- todo[site]
@@ -789,10 +789,10 @@
Subject objects only operate on pages that should have been preloaded before.
-    In fact, at any time: 
+    In fact, at any time:
       * todo contains new Pages that have not been loaded yet
       * done contains Pages that have been loaded, and that have been treated.
-      * If batch preloadings are successful, Page._get() is never called from 
+      * If batch preloadings are successful, Page._get() is never called from
         this Object.
     """
@@ -904,7 +904,7 @@
     def whatsNextPageBatch(self, site):
         """
         By calling this method, you 'promise' this instance that you will
-        preload all the 'site' Pages that are in the todo list. 
+        preload all the 'site' Pages that are in the todo list.
This routine will return a list of pages that can be treated.
         """
@@ -1149,7 +1149,7 @@
     def batchLoaded(self, counter):
         """
         This is called by a worker to tell us that the promised batch of
-        pages was loaded. 
+        pages was loaded.
         In other words, all the pages in self.pending have already
         been preloaded.
@@ -1190,8 +1190,8 @@
                     for site, count in self.todo.siteCounts():
                         counter.minus(site, count)
                     self.todo = PageTree()
-                    # In some rare cases it might be we already did check some 'automatic' links 
-                    self.done = PageTree() 
+                    # In some rare cases it might be we already did check some 'automatic' links
+                    self.done = PageTree()
                 continue
elif page.isRedirectPage() or page.isCategoryRedirect():
@@ -1204,13 +1204,6 @@
                         redirectTargetPage = page.getRedirectTarget()
                     else:
                         redirectTargetPage = page.getCategoryRedirectTarget()
-                except pywikibot.InvalidTitle:
-                    # MW considers #redirect [[en:#foo]] as a redirect page,
-                    # but we can't do anything useful with such pages
-                    if not globalvar.quiet:
-                        pywikibot.output(u"NOTE: %s redirects to an invalid title"
-                                         % page)
-                    continue
                 if not globalvar.quiet:
                     pywikibot.output(u"NOTE: %s is %sredirect to %s"
                                      % (page, redir, redirectTargetPage))
@@ -1252,7 +1245,7 @@
                     for site, count in self.todo.siteCounts():
                         counter.minus(site, count)
                     self.todo = PageTree()
-                    self.done = PageTree() 
+                    self.done = PageTree()
                 continue
elif page.section():
@@ -1298,8 +1291,8 @@
elif globalvar.autonomous and duplicate and not skip:
                 pywikibot.output(u"Stopping work on %s because duplicate pages"\
-                    " %s and %s are found" % (self.originPage, 
-                                              duplicate, 
+                    " %s and %s are found" % (self.originPage,
+                                              duplicate,
                                               page))
                 self.makeForcedStop(counter)
                 try:
@@ -1312,7 +1305,7 @@
                         f.write(u" [%s%s graph]" % (config.interwiki_graph_url, filename))
                     f.write("\n")
                     f.close()
-                # FIXME: What errors are we catching here? 
+                # FIXME: What errors are we catching here?
                 # except: should be avoided!!
                 except:
                    #raise
@@ -1653,12 +1646,12 @@
         Delete the contents that are stored on disk for this Subject.
We cannot afford to define this in a StoredPage destructor because
-        StoredPage instances can get referenced cyclicly: that would stop the 
+        StoredPage instances can get referenced cyclicly: that would stop the
         garbage collector from destroying some of those objects.
It's also not necessary to set these lines as a Subject destructor:
         deleting all stored content one entry by one entry when bailing out
-        after a KeyboardInterrupt for example is redundant, because the 
+        after a KeyboardInterrupt for example is redundant, because the
         whole storage file will be eventually removed.
         """
         if globalvar.contentsondisk:
@@ -1699,7 +1692,7 @@
         for iw in re.finditer('<!-- *\[\[(.*?:.*?)\]\] *-->', pagetext):
             try:
                 ignorepage = pywikibot.Page(page.site, iw.groups()[0])
-            except (pywikibot.NoSuchSite, pywikibot.InvalidTitle):
+            except (pywikibot.NoSuchSite, ):
                 continue
try:
@@ -2227,7 +2220,7 @@
         #Version info marks bots without unicode error
         #This also prevents abuse filter blocking on de-wiki
         if not pywikibot.unicode_error:
-            mcomment += u'r%s) (' % sys.version.split()[0] 
+            mcomment += u'r%s) (' % sys.version.split()[0]
mcomment += globalvar.summary
@@ -2237,7 +2230,7 @@
mcomment += i18n.twtranslate(insite.lang, commentname) % changes
         mods = i18n.twtranslate('en', commentname) % changes
-    
+
     return mods, mcomment, adding, removing, modifying
def botMayEdit (page):
@@ -2353,7 +2346,7 @@
         globalvar.summary = u''
     elif globalvar.summary:
         globalvar.summary += u'; '
-    
+
     # ensure that we don't try to change main page
     try:
         site = pywikibot.getSite()
@@ -2364,7 +2357,7 @@
if newPages is not None:
         if len(namespaces) == 0:
-            ns = 0 
+            ns = 0
         elif len(namespaces) == 1:
             ns = namespaces[0]
             if ns != 'all':
Modified: branches/rewrite/scripts/redirect.py
===================================================================
--- branches/rewrite/scripts/redirect.py	2011-02-08 15:51:41 UTC (rev 8939)
+++ branches/rewrite/scripts/redirect.py	2011-02-08 16:25:26 UTC (rev 8940)
@@ -477,10 +477,6 @@
                     pywikibot.output(
                         u'Warning: Redirect target %s is not a valid page title.'
                           % str(e)[10:])
-                #sometimes this error occures. Invalid Title starting with a '#'
-                except pywikibot.InvalidTitle, err:
-                    pywikibot.output(u'Warning: %s' % err)
-                    break
                 except pywikibot.NoPage:
                     if len(redirList) == 1:
                         pywikibot.output(u'Skipping: Page %s does not exist.'