Revision: 7576
Author: wikipedian
Date: 2009-10-31 00:35:16 +0000 (Sat, 31 Oct 2009)
Log Message:
-----------
used page generators to make the ignore list feature available to other
scripts
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/solve_disambiguation.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2009-10-30 23:57:42 UTC (rev 7575)
+++ trunk/pywikipedia/pagegenerators.py 2009-10-31 00:35:16 UTC (rev 7576)
@@ -734,6 +734,32 @@
if page.namespace() in namespaces:
yield page
+def PageTitleFilterPageGenerator(generator, ignoreList):
+ """
+ Wraps around another generator. Yields only those pages are not
+ listed in the ignore list.
+
+ The ignoreList is a dictionary. Family names are mapped to
+ dictionaries in which language codes are mapped to lists of
+ page titles.
+ """
+
+ def isIgnored(page):
+ if not (page.site().family.name in ignoreList and page.site().lang in
ignoreList[page.site().family.name]):
+ return False
+
+ for ig in ignoreList[page.site().family.name][page.site().lang]:
+ if re.match(ig, page.title()):
+ return True
+ return False
+
+ for page in generator:
+ if isIgnored(page):
+ if wikipedia.verbose:
+ wikipedia.output('Ignoring page %s' % page.title())
+ else:
+ yield page
+
def RedirectFilterPageGenerator(generator):
"""
Wraps around another generator. Yields only those pages that are not redirects.
Modified: trunk/pywikipedia/solve_disambiguation.py
===================================================================
--- trunk/pywikipedia/solve_disambiguation.py 2009-10-30 23:57:42 UTC (rev 7575)
+++ trunk/pywikipedia/solve_disambiguation.py 2009-10-31 00:35:16 UTC (rev 7576)
@@ -412,28 +412,14 @@
class ReferringPageGeneratorWithIgnore:
def __init__(self, disambPage, primary=False, minimum = 0):
self.disambPage = disambPage
- # if run with the -primary argument, enable the ignore manager
- self.primaryIgnoreManager = PrimaryIgnoreManager(disambPage,
- enabled=primary)
self.minimum = minimum
-
+
def __iter__(self):
- # TODO: start yielding before all referring pages have been found
- refs = [page for page in self.disambPage.getReferences(follow_redirects = False,
withTemplateInclusion = False)]
- pywikibot.output(u"Found %d references." % len(refs))
- # Remove ignorables
- if self.disambPage.site().family.name in ignore_title and
self.disambPage.site().lang in ignore_title[self.disambPage.site().family.name]:
- for ig in
ignore_title[self.disambPage.site().family.name][self.disambPage.site().lang]:
- for i in range(len(refs)-1, -1, -1):
- if re.match(ig, refs[i].title()):
- if pywikibot.verbose:
- pywikibot.output('Ignoring page %s'
- % refs[i].title())
- del refs[i]
- for i in range(len(refs)-1, -1, -1):
- if self.primaryIgnoreManager.isIgnored(refs[i]):
- #pywikibot.output('Ignoring page %s because it was skipped
before' % refs[i].title())
- del refs[i]
+ generator = pagegenerators.ReferringPageGenerator(self.disambPage,
followRedirects = False, withTemplateInclusion = False)
+ generator = pagegenerators.PageTitleFilterPageGenerator(generator, ignore_title)
+
+ refs = [page for page in generator]
+
if len(refs) < self.minimum:
pywikibot.output(u"Found only %d pages to work on; skipping." %
len(refs))
return
Show replies by date