Revision: 5302 Author: russblau Date: 2008-05-03 17:43:01 +0000 (Sat, 03 May 2008)
Log Message: ----------- Turn off threading on PreloadingGenerator because of bugs
Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/replace.py
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2008-05-03 13:05:23 UTC (rev 5301) +++ trunk/pywikipedia/pagegenerators.py 2008-05-03 17:43:01 UTC (rev 5302) @@ -728,7 +728,8 @@ if not page.isTalkPage(): yield page.toggleTalkPage()
-class PreloadingGenerator(ThreadedGenerator): + +class PreloadingGenerator(object): """ Yields the same pages as generator generator. Retrieves 60 pages (or another number specified by pageNumber), loads them using @@ -740,10 +741,10 @@ def __init__(self, generator, pageNumber=60, lookahead=10): self.wrapped_gen = generator self.pageNumber = pageNumber - ThreadedGenerator.__init__(self, name="Preloading-Thread", - qsize=lookahead) +# ThreadedGenerator.__init__(self, name="Preloading-Thread", +# qsize=lookahead)
- def generator(self): + def __iter__(self): try: # this array will contain up to pageNumber pages and will be flushed # after these pages have been preloaded and yielded.
Modified: trunk/pywikipedia/replace.py =================================================================== --- trunk/pywikipedia/replace.py 2008-05-03 13:05:23 UTC (rev 5301) +++ trunk/pywikipedia/replace.py 2008-05-03 17:43:01 UTC (rev 5302) @@ -332,76 +332,73 @@ """ # Run the generator which will yield Pages which might need to be # changed. - try: - for page in self.generator: - if self.isTitleExcepted(page.title()): - wikipedia.output( - u'Skipping %s because the title is on the exceptions list.' - % page.aslink()) - continue - try: - # Load the page's text from the wiki - original_text = page.get(get_redirect=True) - if not page.canBeEdited(): - wikipedia.output(u"You can't edit page %s" - % page.aslink()) - continue - except wikipedia.NoPage: - wikipedia.output(u'Page %s not found' % page.aslink()) - continue - if self.isTextExcepted(original_text): - wikipedia.output( - u'Skipping %s because it contains text that is on the exceptions list.' - % page.aslink()) - continue - new_text = self.doReplacements(original_text) - if new_text == original_text: - wikipedia.output('No changes were necessary in %s' + for page in self.generator: + if self.isTitleExcepted(page.title()): + wikipedia.output( + u'Skipping %s because the title is on the exceptions list.' + % page.aslink()) + continue + try: + # Load the page's text from the wiki + original_text = page.get(get_redirect=True) + if not page.canBeEdited(): + wikipedia.output(u"You can't edit page %s" % page.aslink()) continue - if self.recursive: + except wikipedia.NoPage: + wikipedia.output(u'Page %s not found' % page.aslink()) + continue + if self.isTextExcepted(original_text): + wikipedia.output( +u'Skipping %s because it contains text that is on the exceptions list.' + % page.aslink()) + continue + new_text = self.doReplacements(original_text) + if new_text == original_text: + wikipedia.output('No changes were necessary in %s' + % page.aslink()) + continue + if self.recursive: + newest_text = self.doReplacements(new_text) + while (newest_text!=new_text): + new_text = newest_text newest_text = self.doReplacements(new_text) - while (newest_text!=new_text): - new_text = newest_text - newest_text = self.doReplacements(new_text)
- if hasattr(self, "addedCat"): - cats = page.categories(nofollow_redirects=True) - if self.addedCat not in cats: - cats.append(self.addedCat) - new_text = wikipedia.replaceCategoryLinks(new_text, - cats) - # Show the title of the page we're working on. - # Highlight the title in purple. - wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" - % page.title()) - wikipedia.showDiff(original_text, new_text) - if not self.acceptall: - choice = wikipedia.inputChoice( - u'Do you want to accept these changes?', - ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') - if choice in ['a', 'A']: - self.acceptall = True - if choice in ['y', 'Y']: - page.put_async(new_text) - if self.acceptall: - try: - page.put(new_text) - except wikipedia.EditConflict: - wikipedia.output(u'Skipping %s because of edit conflict' - % (page.title(),)) - except wikipedia.SpamfilterError, e: - wikipedia.output( - u'Cannot change %s because of blacklist entry %s' - % (page.title(), e.url)) - except wikipedia.PageNotSaved, error: - wikipedia.output(u'Error putting page: %s' - % (error.args,)) - except wikipedia.LockedPage: - wikipedia.output(u'Skipping %s (locked page)' - % (page.title(),)) - finally: - self.generator.stop() + if hasattr(self, "addedCat"): + cats = page.categories(nofollow_redirects=True) + if self.addedCat not in cats: + cats.append(self.addedCat) + new_text = wikipedia.replaceCategoryLinks(new_text, + cats) + # Show the title of the page we're working on. + # Highlight the title in purple. + wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" + % page.title()) + wikipedia.showDiff(original_text, new_text) + if not self.acceptall: + choice = wikipedia.inputChoice( + u'Do you want to accept these changes?', + ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') + if choice in ['a', 'A']: + self.acceptall = True + if choice in ['y', 'Y']: + page.put_async(new_text) + if self.acceptall: + try: + page.put(new_text) + except wikipedia.EditConflict: + wikipedia.output(u'Skipping %s because of edit conflict' + % (page.title(),)) + except wikipedia.SpamfilterError, e: + wikipedia.output( + u'Cannot change %s because of blacklist entry %s' + % (page.title(), e.url)) + except wikipedia.PageNotSaved, error: + wikipedia.output(u'Error putting page: %s' + % (error.args,)) + except wikipedia.LockedPage: + wikipedia.output(u'Skipping %s (locked page)' + % (page.title(),))
def prepareRegexForMySQL(pattern): pattern = pattern.replace('\s', '[:space:]')
pywikipedia-l@lists.wikimedia.org