[Pywikipedia-l] SVN: [6346] trunk/pywikipedia

purodha at svn.wikimedia.org purodha at svn.wikimedia.org
Fri Feb 13 12:58:57 UTC 2009


Revision: 6346
Author:   purodha
Date:     2009-02-13 12:58:56 +0000 (Fri, 13 Feb 2009)

Log Message:
-----------
Adding a Randompages page generator.
Solving tracker issue 1262584, see:
https://sourceforge.net/tracker2/?func=detail&aid=1262584&group_id=93107&atid=603141

Modified Paths:
--------------
    trunk/pywikipedia/family.py
    trunk/pywikipedia/pagegenerators.py
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py	2009-02-12 21:56:26 UTC (rev 6345)
+++ trunk/pywikipedia/family.py	2009-02-13 12:58:56 UTC (rev 6346)
@@ -3373,6 +3373,9 @@
         else:
             return '%s?useskin=monobook&title=%s:BrokenRedirects&limit=%d' % (self.path(code), self.special_namespace_url(code), config.special_page_limit)
 
+    def random_address(self, code):
+        return "%s?useskin=monobook&title=%s:Random" % (self.path(code), self.special_namespace_url(code))
+
     def allmessages_address(self, code):
         return "%s?useskin=monobook&title=%s:Allmessages&ot=html" % (self.path(code), self.special_namespace_url(code))
 

Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py	2009-02-12 21:56:26 UTC (rev 6345)
+++ trunk/pywikipedia/pagegenerators.py	2009-02-13 12:58:56 UTC (rev 6346)
@@ -122,6 +122,10 @@
 -withoutinterwiki Work on all pages that don't have interlanguage links.
                   Argument can be given as "-withoutinterwiki:n" where
                   n is some number (??).
+
+-random           Work on random pages returned by [[Special:Random]].
+                  Can also be given as "-random:n" where n is the number
+                  of pages to be returned, else 100 pages are returned.
 """
 
 
@@ -381,6 +385,12 @@
     for page in linkingPage.linkedPages():
         yield page
 
+def RandomPageGenerator(number = 100, repeat = False, site = None):
+    if site is None:
+        site = wikipedia.getSite()
+    for page in site.randompages(number=number, repeat=repeat):
+        yield page
+
 def TextfilePageGenerator(filename=None, site=None):
     '''
     Read a file of page links between double-square-brackets, and return
@@ -909,6 +919,11 @@
                 title = wikipedia.input(u'Which page should be processed?')
             page = wikipedia.Page(site, title)
             gen = InterwikiPageGenerator(page)
+        elif arg.startswith('-random'):
+            if len(arg) == 7:
+                gen = RandomPageGenerator()
+            else:
+                gen = RandomPageGenerator(number = int(arg[8:]))
         elif arg.startswith('-file'):
             textfilename = arg[6:]
             if not textfilename:

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2009-02-12 21:56:26 UTC (rev 6345)
+++ trunk/pywikipedia/wikipedia.py	2009-02-13 12:58:56 UTC (rev 6346)
@@ -4224,6 +4224,7 @@
             ImagePage objects)
         unusedcategories(): Special:Unusuedcategories (yields Category)
         unusedfiles(): Special:Unusedimages (yields ImagePage)
+        randompages: Special:Random
         withoutinterwiki: Special:Withoutinterwiki
         linksearch: Special:Linksearch
 
@@ -4290,6 +4291,7 @@
         upload_address: Special:Upload.
         double_redirects_address: Special:Doubleredirects.
         broken_redirects_address: Special:Brokenredirects.
+        random_address: Special:Random.
         login_address: Special:Userlogin.
         captcha_image_address(id): Special:Captcha for image 'id'.
         watchlist_address: Special:Watchlist editor.
@@ -5374,7 +5376,29 @@
             if not repeat:
                 break
 
+    def randompages(self, number=1, repeat=False):
+        """Yield irandom pages via Special:Random."""
+        seen = set()
+        path = self.random_address()
+        entryR = re.compile('var wgPageName = "(?P<title>.+?)";')
+        while True:
+            for ignored in range(number):
+                # MediaWiki advances its random pages only every second.
+                time.sleep(1)
+                html = self.getUrl(path)
+                # output(u' html=%s' % (html))
+                m = entryR.search(html)
+                if m != None:
+                    title = m.group('title')
+                    # output(u' title=%s' % ( title ))
+                    if title not in seen:
+                        seen.add(title)
+                        page = Page(self, title)
+                        yield page
+            if not repeat:
+                break
 
+
     def allpages(self, start='!', namespace=None, includeredirects=True,
                  throttle=True):
         """
@@ -5937,6 +5961,10 @@
         """Return path to Special:Brokenredirects."""
         return self.family.broken_redirects_address(self.lang, default_limit)
 
+    def random_address(self):
+        """Return path to Special:Random."""
+        return self.family.random_address(self.lang)
+
     def login_address(self):
         """Return path to Special:Userlogin."""
         return self.family.login_address(self.lang)





More information about the Pywikipedia-l mailing list