SVN: [7788] trunk/pywikipedia/redirect.py - Pywikipedia-svn

17 Dec 2009

Revision: 7788
Author:   russblau
Date:     2009-12-17 18:13:05 +0000 (Thu, 17 Dec 2009)
Log Message:
-----------
Refactor: Python is an object-oriented language; it makes no sense to pass around object attributes as arguments to the object's own methods.
Modified Paths:
--------------
    trunk/pywikipedia/redirect.py
Modified: trunk/pywikipedia/redirect.py
===================================================================

--- trunk/pywikipedia/redirect.py	2009-12-17 15:18:04 UTC (rev 7787)
+++ trunk/pywikipedia/redirect.py	2009-12-17 18:13:05 UTC (rev 7788)
@@ -187,6 +187,8 @@
         self.site = wikipedia.getSite()
         self.xmlFilename = xmlFilename
         self.namespaces = namespaces
+        if use_api and self.namespaces == []:
+            self.namespaces = [ 0 ]
         self.offset = offset
         self.use_move_log = use_move_log
         self.use_api = use_api
@@ -263,35 +265,30 @@
         else:
             return redict
-    def get_redirect_pageids_via_api(self, number=u'max', namespaces=[],
-                                     start=None, until=None):
+    def get_redirect_pageids_via_api(self):
         """
-        Generator which will yield page IDs of Pages that are redirects.
-        Get number of page ids in one go.
-        Iterates over namespaces, Main if an empty list.
-        In each namespace, start alphabetically from a pagetitle start,
-        which need not exist.
+        Return generator that yields page IDs of Pages that are redirects.
         """
-        if namespaces == []:
-            namespaces = [ 0 ]
         params = {
-            'action':'query',
-            'list':'allpages',
-            'apfilterredir':'redirects',
-            'aplimit':number,
-            'apdir':'ascending',
+            'action': 'query',
+            'list': 'allpages',
+            'apfilterredir': 'redirects',
+            'aplimit': self.api_number,
+            'apdir': 'ascending',
             #'':'',
         }
-        for ns in namespaces:
+        if self.api_number is None:
+            params['aplimit'] = "max"
+        for ns in self.namespaces:
             params['apnamespace'] = ns
-            if start:
-                params['apfrom'] = start
+            if self.api_start:
+                params['apfrom'] = self.api_start
             while True:
                 data = query.GetData(params, self.site)
                 if "limits" in data: # process aplimit = max
                     params['aplimit'] = int(data['limits']['allpages'])
                 for x in data['query']['allpages']:
-                    if until and x['title'] == until:
+                    if self.api_until and x['title'] == self.api_until:
                         break
                     yield x['pageid']
@@ -300,17 +297,13 @@
                 else:
                     break
-    def _next_redirects_via_api_commandline(self, number='max', namespaces=[],
-                                            start=None, until=None ):
+    def _next_redirects_via_api_commandline(self):
         """
         Return a generator that retrieves pageids from the API 500 at a time
         and yields them as a list
         """
-        if namespaces == []:
-            namespaces = [ 0 ]
         apiQ = []
-        for pageid in self.get_redirect_pageids_via_api(number, namespaces,
-                                                        start, until):
+        for pageid in self.get_redirect_pageids_via_api():
             apiQ.append(pageid)
             if len(apiQ) >= 500:
                 yield apiQ
@@ -318,8 +311,7 @@
         if apiQ:
             yield apiQ
-    def get_redirects_via_api(self, number=u'max', namespaces=[], start=None,
-                              until=None, maxlen=8):
+    def get_redirects_via_api(self, maxlen=8):
         """
         Return a generator that yields tuples of data about redirect Pages:
             0 - page title of a redirect page
@@ -339,21 +331,14 @@
             2 - target page title of the redirect, or chain (may not exist)
             3 - target page of the redirect, or end of chain, or page title where
                 chain or loop detecton was halted, or None if unknown
-        Get number of page ids in one go.
-        Iterates over namespaces, Main if an empty list.
-        In each namespace, start alphabetically from a pagetitle start, which
-        need not exist.
         """
         import urllib
-        if namespaces == []:
-            namespaces = [ 0 ]
         params = {
             'action':'query',
             'redirects':1,
             #'':'',
         }
-        for apiQ in self._next_redirects_via_api_commandline(
-                                number, namespaces, start, until):
+        for apiQ in self._next_redirects_via_api_commandline():
             params['pageids'] = apiQ
             data = query.GetData(params, self.site)
             redirects = {}
@@ -389,10 +374,7 @@
         if self.use_api:
             count = 0
             for (pagetitle, type, target, final) \
-                    in self.get_redirects_via_api(
-                        namespaces=self.namespaces,
-                        start=self.api_start,
-                        until=self.api_until, maxlen=2):
+                    in self.get_redirects_via_api(maxlen=2):
                 if type == 0:
                     yield pagetitle
                     if self.api_number:
@@ -403,7 +385,7 @@
         elif self.xmlFilename == None:
             # retrieve information from the live wiki's maintenance page
             # broken redirect maintenance page's URL
-            path = self.site.broken_redirects_address(default_limit = False)
+            path = self.site.broken_redirects_address(default_limit=False)
             wikipedia.output(u'Retrieving special page...')
             maintenance_txt = self.site.getUrl(path)
@@ -430,9 +412,7 @@
         if self.use_api:
             count = 0
             for (pagetitle, type, target, final) \
-                    in self.get_redirects_via_api(
-                         namespaces=self.namespaces, start=self.api_start,
-                         until=self.api_until, maxlen = 2):
+                    in self.get_redirects_via_api(maxlen=2):
                 if type != 0 and type != 1:
                     yield pagetitle
                     if self.api_number:
@@ -752,11 +732,7 @@
         delete_reason = wikipedia.translate(self.site, reason_broken)
         count = 0
         for (redir_name, code, target, final)\
-                in self.generator.get_redirects_via_api(
-                     namespaces=self.generator.namespaces,
-                     start=self.generator.api_start,
-                     until=self.generator.api_until,
-                     maxlen = 2):
+                in self.generator.get_redirects_via_api(maxlen=2):
             if code == 1:
                 continue
             elif code == 0:
@@ -765,8 +741,7 @@
             else:
                 self.fix_1_double_redirect(redir_name)
                 count += 1
-            # print ('%s .. %s' % (count, self.number))
-            if self.exiting or ( self.number and count >= self.number ):
+            if self.exiting or (self.number and count >= self.number):
                 break
def run(self):