Revision: 6347 Author: purodha Date: 2009-02-13 15:29:12 +0000 (Fri, 13 Feb 2009)
Log Message: ----------- Add -randomredirect page generator.
Modified Paths: -------------- trunk/pywikipedia/family.py trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2009-02-13 12:58:56 UTC (rev 6346) +++ trunk/pywikipedia/family.py 2009-02-13 15:29:12 UTC (rev 6347) @@ -3376,6 +3376,9 @@ def random_address(self, code): return "%s?useskin=monobook&title=%s:Random" % (self.path(code), self.special_namespace_url(code))
+ def randomredirect_address(self, code): + return "%s?useskin=monobook&title=%s:RandomRedirect" % (self.path(code), self.special_namespace_url(code)) + def allmessages_address(self, code): return "%s?useskin=monobook&title=%s:Allmessages&ot=html" % (self.path(code), self.special_namespace_url(code))
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2009-02-13 12:58:56 UTC (rev 6346) +++ trunk/pywikipedia/pagegenerators.py 2009-02-13 15:29:12 UTC (rev 6347) @@ -126,6 +126,11 @@ -random Work on random pages returned by [[Special:Random]]. Can also be given as "-random:n" where n is the number of pages to be returned, else 100 pages are returned. + +-randomredirect Work on random redirect target pages returned by + [[Special:Randomredirect]]. Can also be given as + "-randomredirect:n" where n is the number of pages to be + returned, else 100 pages are returned. """
@@ -391,6 +396,12 @@ for page in site.randompages(number=number, repeat=repeat): yield page
+def RandomRedirectPageGenerator(number = 100, repeat = False, site = None): + if site is None: + site = wikipedia.getSite() + for page in site.randomredirectpages(number=number, repeat=repeat): + yield page + def TextfilePageGenerator(filename=None, site=None): ''' Read a file of page links between double-square-brackets, and return @@ -919,6 +930,11 @@ title = wikipedia.input(u'Which page should be processed?') page = wikipedia.Page(site, title) gen = InterwikiPageGenerator(page) + elif arg.startswith('-randomredirect'): + if len(arg) == 7: + gen = RandomRedirectPageGenerator() + else: + gen = RandomRedirectPageGenerator(number = int(arg[8:])) elif arg.startswith('-random'): if len(arg) == 7: gen = RandomPageGenerator()
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-02-13 12:58:56 UTC (rev 6346) +++ trunk/pywikipedia/wikipedia.py 2009-02-13 15:29:12 UTC (rev 6347) @@ -4225,6 +4225,7 @@ unusedcategories(): Special:Unusuedcategories (yields Category) unusedfiles(): Special:Unusedimages (yields ImagePage) randompages: Special:Random + randomredirectpages: Special:Random withoutinterwiki: Special:Withoutinterwiki linksearch: Special:Linksearch
@@ -4292,6 +4293,7 @@ double_redirects_address: Special:Doubleredirects. broken_redirects_address: Special:Brokenredirects. random_address: Special:Random. + randomredirect_address: Special:Random. login_address: Special:Userlogin. captcha_image_address(id): Special:Captcha for image 'id'. watchlist_address: Special:Watchlist editor. @@ -5376,10 +5378,13 @@ if not repeat: break
- def randompages(self, number=1, repeat=False): - """Yield irandom pages via Special:Random.""" + def randompages(self, number=1, repeat=False, randmoredirect=False): + """Yield irandom pages via Special:Random, or Special:RandmRedirect.""" seen = set() - path = self.random_address() + if randomredirect: + path = self.randomredirect_address() + else: + path = self.random_address() entryR = re.compile('var wgPageName = "(?P<title>.+?)";') while True: for ignored in range(number): @@ -5394,6 +5399,28 @@ if title not in seen: seen.add(title) page = Page(self, title) + + def randomredirectpages(self, number=1, repeat=False, randmoredirect=True): + """Yield irandom pages via Special:Random, or Special:RandmRedirect.""" + seen = set() + if randomredirect: + path = self.randomredirect_address() + else: + path = self.random_address() + entryR = re.compile('var wgPageName = "(?P<title>.+?)";') + while True: + for ignored in range(number): + # MediaWiki advances its random pages only every second. + time.sleep(1) + html = self.getUrl(path) + # output(u' html=%s' % (html)) + m = entryR.search(html) + if m != None: + title = m.group('title') + # output(u' title=%s' % ( title )) + if title not in seen: + seen.add(title) + page = Page(self, title) yield page if not repeat: break @@ -5965,6 +5992,10 @@ """Return path to Special:Random.""" return self.family.random_address(self.lang)
+ def randomredirect_address(self): + """Return path to Special:RandomRedirect.""" + return self.family.randomredirect_address(self.lang) + def login_address(self): """Return path to Special:Userlogin.""" return self.family.login_address(self.lang)