Revision: 6778 Author: cosoleto Date: 2009-04-30 14:55:19 +0000 (Thu, 30 Apr 2009)
Log Message: ----------- Rewritten site.randompages() and site.randomredirectpages(), renamed into randompage() and randomredirectpage() and changed related generators accordingly. Now the functions return a single page only, as these Site class methods should be derived from MediaWiki's 'Special:' pages. They returns as soon as it is possible, instead of sleeping 1 second as well as the download time (I couldn't reproduce that problem using a very fast line). Deleted useless parts and duplicated code (repeat, set...).
Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2009-04-30 13:48:48 UTC (rev 6777) +++ trunk/pywikipedia/pagegenerators.py 2009-04-30 14:55:19 UTC (rev 6778) @@ -393,17 +393,17 @@ for page in linkingPage.linkedPages(): yield page
-def RandomPageGenerator(number = 100, repeat = False, site = None): +def RandomPageGenerator(number = 100, site = None): if site is None: site = wikipedia.getSite() - for page in site.randompages(number=number, repeat=repeat): - yield page + for i in range(number): + yield site.randompage()
-def RandomRedirectPageGenerator(number = 100, repeat = False, site = None): +def RandomRedirectPageGenerator(number = 100, site = None): if site is None: site = wikipedia.getSite() - for page in site.randomredirectpages(number=number, repeat=repeat): - yield page + for i in range(number): + yield site.randomredirectpage()
def TextfilePageGenerator(filename=None, site=None): '''
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-04-30 13:48:48 UTC (rev 6777) +++ trunk/pywikipedia/wikipedia.py 2009-04-30 14:55:19 UTC (rev 6778) @@ -4270,8 +4270,8 @@ ImagePage objects) unusedcategories(): Special:Unusuedcategories (yields Category) unusedfiles(): Special:Unusedimages (yields ImagePage) - randompages: Special:Random - randomredirectpages: Special:Random + randompage: Special:Random + randomredirectpage: Special:RandomRedirect withoutinterwiki: Special:Withoutinterwiki linksearch: Special:Linksearch
@@ -5434,56 +5434,20 @@ if not repeat: break
- def randompages(self, number=1, repeat=False, randomredirect=False): - """Yield random pages via Special:Random, or Special:RandomRedirect.""" - seen = set() - if randomredirect: - path = self.randomredirect_address() - else: - path = self.random_address() - entryR = re.compile('var wgPageName = "(?P<title>.+?)";') - while True: - for ignored in range(number): - # MediaWiki advances its random pages only every second. - time.sleep(1) - html = self.getUrl(path) - # output(u' html=%s' % (html)) - m = entryR.search(html) - if m is not None: - title = m.group('title') - # output(u' title=%s' % ( title )) - if title not in seen: - seen.add(title) - yield Page(self, title) - if not repeat: - break + def randompage(self): + """Yield random page via Special:Random""" + html = self.getUrl(self.random_address()) + m = re.search('var wgPageName = "(?P<title>.+?)";', html) + if m is not None: + return Page(self, m.group('title'))
- def randomredirectpages(self, number=1, repeat=False, randomredirect=True): - """Yield random pages via Special:Random, or Special:RandomRedirect.""" - seen = set() - if randomredirect: - path = self.randomredirect_address() - else: - path = self.random_address() - entryR = re.compile('var wgPageName = "(?P<title>.+?)";') - while True: - for ignored in range(number): - # MediaWiki advances its random pages only every second. - time.sleep(1) - html = self.getUrl(path) - # output(u' html=%s' % (html)) - m = entryR.search(html) - if m is not None: - title = m.group('title') - # output(u' title=%s' % ( title )) - if title not in seen: - seen.add(title) - page = Page(self, title) - yield page - if not repeat: - break + def randomredirectpage(self): + """Yield random redirect page via Special:RandomRedirect.""" + html = self.getUrl(self.randomredirect_address()) + m = re.search('var wgPageName = "(?P<title>.+?)";', html) + if m is not None: + return Page(self, m.group('title'))
- def allpages(self, start='!', namespace=None, includeredirects=True, throttle=True): """
pywikipedia-svn@lists.wikimedia.org