Revision: 6347
Author: purodha
Date: 2009-02-13 15:29:12 +0000 (Fri, 13 Feb 2009)
Log Message:
-----------
Add -randomredirect page generator.
Modified Paths:
--------------
trunk/pywikipedia/family.py
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2009-02-13 12:58:56 UTC (rev 6346)
+++ trunk/pywikipedia/family.py 2009-02-13 15:29:12 UTC (rev 6347)
@@ -3376,6 +3376,9 @@
def random_address(self, code):
return "%s?useskin=monobook&title=%s:Random" % (self.path(code),
self.special_namespace_url(code))
+ def randomredirect_address(self, code):
+ return "%s?useskin=monobook&title=%s:RandomRedirect" %
(self.path(code), self.special_namespace_url(code))
+
def allmessages_address(self, code):
return "%s?useskin=monobook&title=%s:Allmessages&ot=html" %
(self.path(code), self.special_namespace_url(code))
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2009-02-13 12:58:56 UTC (rev 6346)
+++ trunk/pywikipedia/pagegenerators.py 2009-02-13 15:29:12 UTC (rev 6347)
@@ -126,6 +126,11 @@
-random Work on random pages returned by [[Special:Random]].
Can also be given as "-random:n" where n is the number
of pages to be returned, else 100 pages are returned.
+
+-randomredirect Work on random redirect target pages returned by
+ [[Special:Randomredirect]]. Can also be given as
+ "-randomredirect:n" where n is the number of pages to be
+ returned, else 100 pages are returned.
"""
@@ -391,6 +396,12 @@
for page in site.randompages(number=number, repeat=repeat):
yield page
+def RandomRedirectPageGenerator(number = 100, repeat = False, site = None):
+ if site is None:
+ site = wikipedia.getSite()
+ for page in site.randomredirectpages(number=number, repeat=repeat):
+ yield page
+
def TextfilePageGenerator(filename=None, site=None):
'''
Read a file of page links between double-square-brackets, and return
@@ -919,6 +930,11 @@
title = wikipedia.input(u'Which page should be processed?')
page = wikipedia.Page(site, title)
gen = InterwikiPageGenerator(page)
+ elif arg.startswith('-randomredirect'):
+ if len(arg) == 7:
+ gen = RandomRedirectPageGenerator()
+ else:
+ gen = RandomRedirectPageGenerator(number = int(arg[8:]))
elif arg.startswith('-random'):
if len(arg) == 7:
gen = RandomPageGenerator()
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-02-13 12:58:56 UTC (rev 6346)
+++ trunk/pywikipedia/wikipedia.py 2009-02-13 15:29:12 UTC (rev 6347)
@@ -4225,6 +4225,7 @@
unusedcategories(): Special:Unusuedcategories (yields Category)
unusedfiles(): Special:Unusedimages (yields ImagePage)
randompages: Special:Random
+ randomredirectpages: Special:Random
withoutinterwiki: Special:Withoutinterwiki
linksearch: Special:Linksearch
@@ -4292,6 +4293,7 @@
double_redirects_address: Special:Doubleredirects.
broken_redirects_address: Special:Brokenredirects.
random_address: Special:Random.
+ randomredirect_address: Special:Random.
login_address: Special:Userlogin.
captcha_image_address(id): Special:Captcha for image 'id'.
watchlist_address: Special:Watchlist editor.
@@ -5376,10 +5378,13 @@
if not repeat:
break
- def randompages(self, number=1, repeat=False):
- """Yield irandom pages via Special:Random."""
+ def randompages(self, number=1, repeat=False, randmoredirect=False):
+ """Yield irandom pages via Special:Random, or
Special:RandmRedirect."""
seen = set()
- path = self.random_address()
+ if randomredirect:
+ path = self.randomredirect_address()
+ else:
+ path = self.random_address()
entryR = re.compile('var wgPageName =
"(?P<title>.+?)";')
while True:
for ignored in range(number):
@@ -5394,6 +5399,28 @@
if title not in seen:
seen.add(title)
page = Page(self, title)
+
+ def randomredirectpages(self, number=1, repeat=False, randmoredirect=True):
+ """Yield irandom pages via Special:Random, or
Special:RandmRedirect."""
+ seen = set()
+ if randomredirect:
+ path = self.randomredirect_address()
+ else:
+ path = self.random_address()
+ entryR = re.compile('var wgPageName =
"(?P<title>.+?)";')
+ while True:
+ for ignored in range(number):
+ # MediaWiki advances its random pages only every second.
+ time.sleep(1)
+ html = self.getUrl(path)
+ # output(u' html=%s' % (html))
+ m = entryR.search(html)
+ if m != None:
+ title = m.group('title')
+ # output(u' title=%s' % ( title ))
+ if title not in seen:
+ seen.add(title)
+ page = Page(self, title)
yield page
if not repeat:
break
@@ -5965,6 +5992,10 @@
"""Return path to Special:Random."""
return self.family.random_address(self.lang)
+ def randomredirect_address(self):
+ """Return path to Special:RandomRedirect."""
+ return self.family.randomredirect_address(self.lang)
+
def login_address(self):
"""Return path to Special:Userlogin."""
return self.family.login_address(self.lang)