Revision: 6346
Author: purodha
Date: 2009-02-13 12:58:56 +0000 (Fri, 13 Feb 2009)
Log Message:
-----------
Adding a Randompages page generator.
Solving tracker issue 1262584, see:
https://sourceforge.net/tracker2/?func=detail&aid=1262584&group_id=…
Modified Paths:
--------------
trunk/pywikipedia/family.py
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2009-02-12 21:56:26 UTC (rev 6345)
+++ trunk/pywikipedia/family.py 2009-02-13 12:58:56 UTC (rev 6346)
@@ -3373,6 +3373,9 @@
else:
return
'%s?useskin=monobook&title=%s:BrokenRedirects&limit=%d' %
(self.path(code), self.special_namespace_url(code), config.special_page_limit)
+ def random_address(self, code):
+ return "%s?useskin=monobook&title=%s:Random" % (self.path(code),
self.special_namespace_url(code))
+
def allmessages_address(self, code):
return "%s?useskin=monobook&title=%s:Allmessages&ot=html" %
(self.path(code), self.special_namespace_url(code))
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2009-02-12 21:56:26 UTC (rev 6345)
+++ trunk/pywikipedia/pagegenerators.py 2009-02-13 12:58:56 UTC (rev 6346)
@@ -122,6 +122,10 @@
-withoutinterwiki Work on all pages that don't have interlanguage links.
Argument can be given as "-withoutinterwiki:n" where
n is some number (??).
+
+-random Work on random pages returned by [[Special:Random]].
+ Can also be given as "-random:n" where n is the number
+ of pages to be returned, else 100 pages are returned.
"""
@@ -381,6 +385,12 @@
for page in linkingPage.linkedPages():
yield page
+def RandomPageGenerator(number = 100, repeat = False, site = None):
+ if site is None:
+ site = wikipedia.getSite()
+ for page in site.randompages(number=number, repeat=repeat):
+ yield page
+
def TextfilePageGenerator(filename=None, site=None):
'''
Read a file of page links between double-square-brackets, and return
@@ -909,6 +919,11 @@
title = wikipedia.input(u'Which page should be processed?')
page = wikipedia.Page(site, title)
gen = InterwikiPageGenerator(page)
+ elif arg.startswith('-random'):
+ if len(arg) == 7:
+ gen = RandomPageGenerator()
+ else:
+ gen = RandomPageGenerator(number = int(arg[8:]))
elif arg.startswith('-file'):
textfilename = arg[6:]
if not textfilename:
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-02-12 21:56:26 UTC (rev 6345)
+++ trunk/pywikipedia/wikipedia.py 2009-02-13 12:58:56 UTC (rev 6346)
@@ -4224,6 +4224,7 @@
ImagePage objects)
unusedcategories(): Special:Unusuedcategories (yields Category)
unusedfiles(): Special:Unusedimages (yields ImagePage)
+ randompages: Special:Random
withoutinterwiki: Special:Withoutinterwiki
linksearch: Special:Linksearch
@@ -4290,6 +4291,7 @@
upload_address: Special:Upload.
double_redirects_address: Special:Doubleredirects.
broken_redirects_address: Special:Brokenredirects.
+ random_address: Special:Random.
login_address: Special:Userlogin.
captcha_image_address(id): Special:Captcha for image 'id'.
watchlist_address: Special:Watchlist editor.
@@ -5374,7 +5376,29 @@
if not repeat:
break
+ def randompages(self, number=1, repeat=False):
+ """Yield irandom pages via Special:Random."""
+ seen = set()
+ path = self.random_address()
+ entryR = re.compile('var wgPageName =
"(?P<title>.+?)";')
+ while True:
+ for ignored in range(number):
+ # MediaWiki advances its random pages only every second.
+ time.sleep(1)
+ html = self.getUrl(path)
+ # output(u' html=%s' % (html))
+ m = entryR.search(html)
+ if m != None:
+ title = m.group('title')
+ # output(u' title=%s' % ( title ))
+ if title not in seen:
+ seen.add(title)
+ page = Page(self, title)
+ yield page
+ if not repeat:
+ break
+
def allpages(self, start='!', namespace=None, includeredirects=True,
throttle=True):
"""
@@ -5937,6 +5961,10 @@
"""Return path to Special:Brokenredirects."""
return self.family.broken_redirects_address(self.lang, default_limit)
+ def random_address(self):
+ """Return path to Special:Random."""
+ return self.family.random_address(self.lang)
+
def login_address(self):
"""Return path to Special:Userlogin."""
return self.family.login_address(self.lang)