Revision: 4208 Author: wikipedian Date: 2007-09-07 13:22:08 +0000 (Fri, 07 Sep 2007)
Log Message: ----------- improved working on multiple sites
Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/replace.py
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2007-09-07 13:08:19 UTC (rev 4207) +++ trunk/pywikipedia/pagegenerators.py 2007-09-07 13:22:08 UTC (rev 4208) @@ -499,10 +499,16 @@
def preload(self, pages): try: - site = pages[0].site() - # filter out pages that are on other sites - pages = filter(lambda p: p.site() == site, pages) - wikipedia.getall(site, pages, throttle=False) + while len(pages) > 0: + # It might be that the pages are on different sites, + # e.g. because the -interwiki parameter was used. + # Query the sites one by one. + site = pages[0].site() + pagesThisSite = [page for page in pages if page.site() == site] + pages = [page for page in pages if page.site() != site] + wikipedia.getall(site, pagesThisSite, throttle=False) + for page in pagesThisSite: + yield page except IndexError: # Can happen if the pages list is empty. Don't care. pass @@ -520,14 +526,12 @@ # We don't want to load too many pages at once using XML export. # We only get a maximum number at a time. if len(somePages) >= self.pageNumber: - self.preload(somePages) - for refpage in somePages: + for refpage in self.preload(somePages): self.queue.put(refpage) somePages = [] if somePages: # preload remaining pages - self.preload(somePages) - for refpage in somePages: + for refpage in self.preload(somePages): self.queue.put(refpage) self.queue.put(None) # to signal end of list except Exception, e:
Modified: trunk/pywikipedia/replace.py =================================================================== --- trunk/pywikipedia/replace.py 2007-09-07 13:08:19 UTC (rev 4207) +++ trunk/pywikipedia/replace.py 2007-09-07 13:22:08 UTC (rev 4208) @@ -223,21 +223,21 @@ # Load the page's text from the wiki original_text = page.get() if not page.canBeEdited(): - wikipedia.output(u'Skipping locked page %s' % page.title()) + wikipedia.output(u'Skipping locked page %s' % page.aslink()) continue except wikipedia.NoPage: - wikipedia.output(u'Page %s not found' % page.title()) + wikipedia.output(u'Page %s not found' % page.aslink()) continue except wikipedia.IsRedirectPage: original_text = page.get(get_redirect=True) match = self.checkExceptions(original_text) # skip all pages that contain certain texts if match: - wikipedia.output(u'Skipping %s because it contains %s' % (page.title(), match)) + wikipedia.output(u'Skipping %s because it contains %s' % (page.aslink(), match)) else: new_text = self.doReplacements(original_text) if new_text == original_text: - wikipedia.output('No changes were necessary in %s' % page.title()) + wikipedia.output('No changes were necessary in %s' % page.aslink()) else: if self.recursive: newest_text = self.doReplacements(new_text)