Revision: 4743 Author: rotem Date: 2007-12-21 18:58:54 +0000 (Fri, 21 Dec 2007)
Log Message: ----------- Creating wikipedia.site.prefixindex (which still uses wikipedia.site.allpages), and making pagegenerators.PrefixingPageGenerator use it.
Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2007-12-21 18:46:24 UTC (rev 4742) +++ trunk/pywikipedia/pagegenerators.py 2007-12-21 18:58:54 UTC (rev 4743) @@ -117,18 +117,22 @@ """ if site is None: site = wikipedia.getSite() + page = wikipedia.Page(site, start) if namespace is None: - namespace = wikipedia.Page(site, start).namespace() - title = wikipedia.Page(site, start).titleWithoutNamespace() - for page in site.allpages(start=title, namespace=namespace, includeredirects = includeredirects): + namespace = page.namespace() + title = page.titleWithoutNamespace() + for page in site.allpages(start = title, namespace = namespace, includeredirects = includeredirects): yield page
def PrefixingPageGenerator(prefix, namespace = None, includeredirects = True, site = None): - for page in AllpagesPageGenerator(prefix, namespace, includeredirects, site): - if page.titleWithoutNamespace().startswith(prefix): - yield page - else: - break + if site is None: + site = wikipedia.getSite() + page = wikipedia.Page(site, prefix) + if namespace is None: + namespace = page.namespace() + title = page.titleWithoutNamespace() + for page in site.prefixindex(prefix = title, namespace = namespace, includeredirects = includeredirects): + yield page
def NewpagesPageGenerator(number = 100, get_redirect = False, repeat = False, site = None): if site is None:
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2007-12-21 18:46:24 UTC (rev 4742) +++ trunk/pywikipedia/wikipedia.py 2007-12-21 18:58:54 UTC (rev 4743) @@ -3491,6 +3491,7 @@
search(query): query results from Special:Search allpages(): Special:Allpages + prefixindex(): Special:Prefixindex newpages(): Special:Newpages newimages(): Special:Log&type=upload longpages(): Special:Longpages @@ -4456,6 +4457,29 @@ else: break
+ def prefixindex(self, prefix, namespace=0, includeredirects=True): + """Yield all pages with a given prefix. + + Parameters: + prefix The prefix of the pages. + namespace Namespace number; defaults to 0. + MediaWiki software will only return pages in one namespace + at a time. + + If includeredirects is False, redirects will not be found. + If includeredirects equals the string 'only', only redirects + will be found. Note that this has not been tested on older + versions of the MediaWiki code. + + It is advised not to use this directly, but to use the + PrefixingPageGenerator from pagegenerators.py instead. + """ + for page in self.allpages(start = prefix, namespace = namespace, includeredirects = includeredirects): + if page.titleWithoutNamespace().startswith(prefix): + yield page + else: + break + def linksearch(self, siteurl): """Yield Pages from results of Special:Linksearch for 'siteurl'.""" if siteurl.startswith('*.'):