Revision: 8652
Author: xqt
Date: 2010-10-14 20:44:04 +0000 (Thu, 14 Oct 2010)
Log Message:
-----------
possibility to compare against page.title() instead
of page.titleWithoutNamespace() in RegexFilterPageGenerator (path bug #3084727 submitted
by DrTrigon. Thanks)
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2010-10-14 20:34:24 UTC (rev 8651)
+++ trunk/pywikipedia/pagegenerators.py 2010-10-14 20:44:04 UTC (rev 8652)
@@ -1160,25 +1160,36 @@
seenPages[_page] = True
yield page
-def RegexFilterPageGenerator(generator, regex, inverse=False):
+def RegexFilterPageGenerator(generator, regex, inverse=False, ignore_namespace=True):
"""
Wraps around another generator. Yields only those pages, the titles of
which are positively matched to any regex in list. If invert is False,
yields all pages matched by any regex, if True, yields all pages matched
- none of the regex.
+ none of the regex. If ignore_namespace is False, the whole page title
+ is compared.
"""
# test for backwards compatibility
if isinstance(regex, basestring):
regex = [regex]
- reg = [ re.compile(r, re.I) for r in regex ]
+ # test if regex is already compiled
+ if isinstance(regex[0], basestring):
+ reg = [ re.compile(r, re.I) for r in regex ]
+ else:
+ reg = regex
for page in generator:
+ # get the page title
+ if ignore_namespace:
+ title = page.titleWithoutNamespace()
+ else:
+ title = page.title()
+
if inverse:
# yield page if NOT matched by all regex
skip = False
for r in reg:
- if r.match(page.titleWithoutNamespace()):
+ if r.match(title):
skip = True
break
if not skip:
@@ -1186,7 +1197,7 @@
else:
# yield page if matched by any regex
for r in reg:
- if r.match(page.titleWithoutNamespace()):
+ if r.match(title):
yield page
break
Show replies by date