Revision: 6346 Author: purodha Date: 2009-02-13 12:58:56 +0000 (Fri, 13 Feb 2009)
Log Message: ----------- Adding a Randompages page generator. Solving tracker issue 1262584, see: https://sourceforge.net/tracker2/?func=detail&aid=1262584&group_id=9...
Modified Paths: -------------- trunk/pywikipedia/family.py trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2009-02-12 21:56:26 UTC (rev 6345) +++ trunk/pywikipedia/family.py 2009-02-13 12:58:56 UTC (rev 6346) @@ -3373,6 +3373,9 @@ else: return '%s?useskin=monobook&title=%s:BrokenRedirects&limit=%d' % (self.path(code), self.special_namespace_url(code), config.special_page_limit)
+ def random_address(self, code): + return "%s?useskin=monobook&title=%s:Random" % (self.path(code), self.special_namespace_url(code)) + def allmessages_address(self, code): return "%s?useskin=monobook&title=%s:Allmessages&ot=html" % (self.path(code), self.special_namespace_url(code))
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2009-02-12 21:56:26 UTC (rev 6345) +++ trunk/pywikipedia/pagegenerators.py 2009-02-13 12:58:56 UTC (rev 6346) @@ -122,6 +122,10 @@ -withoutinterwiki Work on all pages that don't have interlanguage links. Argument can be given as "-withoutinterwiki:n" where n is some number (??). + +-random Work on random pages returned by [[Special:Random]]. + Can also be given as "-random:n" where n is the number + of pages to be returned, else 100 pages are returned. """
@@ -381,6 +385,12 @@ for page in linkingPage.linkedPages(): yield page
+def RandomPageGenerator(number = 100, repeat = False, site = None): + if site is None: + site = wikipedia.getSite() + for page in site.randompages(number=number, repeat=repeat): + yield page + def TextfilePageGenerator(filename=None, site=None): ''' Read a file of page links between double-square-brackets, and return @@ -909,6 +919,11 @@ title = wikipedia.input(u'Which page should be processed?') page = wikipedia.Page(site, title) gen = InterwikiPageGenerator(page) + elif arg.startswith('-random'): + if len(arg) == 7: + gen = RandomPageGenerator() + else: + gen = RandomPageGenerator(number = int(arg[8:])) elif arg.startswith('-file'): textfilename = arg[6:] if not textfilename:
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-02-12 21:56:26 UTC (rev 6345) +++ trunk/pywikipedia/wikipedia.py 2009-02-13 12:58:56 UTC (rev 6346) @@ -4224,6 +4224,7 @@ ImagePage objects) unusedcategories(): Special:Unusuedcategories (yields Category) unusedfiles(): Special:Unusedimages (yields ImagePage) + randompages: Special:Random withoutinterwiki: Special:Withoutinterwiki linksearch: Special:Linksearch
@@ -4290,6 +4291,7 @@ upload_address: Special:Upload. double_redirects_address: Special:Doubleredirects. broken_redirects_address: Special:Brokenredirects. + random_address: Special:Random. login_address: Special:Userlogin. captcha_image_address(id): Special:Captcha for image 'id'. watchlist_address: Special:Watchlist editor. @@ -5374,7 +5376,29 @@ if not repeat: break
+ def randompages(self, number=1, repeat=False): + """Yield irandom pages via Special:Random.""" + seen = set() + path = self.random_address() + entryR = re.compile('var wgPageName = "(?P<title>.+?)";') + while True: + for ignored in range(number): + # MediaWiki advances its random pages only every second. + time.sleep(1) + html = self.getUrl(path) + # output(u' html=%s' % (html)) + m = entryR.search(html) + if m != None: + title = m.group('title') + # output(u' title=%s' % ( title )) + if title not in seen: + seen.add(title) + page = Page(self, title) + yield page + if not repeat: + break
+ def allpages(self, start='!', namespace=None, includeredirects=True, throttle=True): """ @@ -5937,6 +5961,10 @@ """Return path to Special:Brokenredirects.""" return self.family.broken_redirects_address(self.lang, default_limit)
+ def random_address(self): + """Return path to Special:Random.""" + return self.family.random_address(self.lang) + def login_address(self): """Return path to Special:Userlogin.""" return self.family.login_address(self.lang)