Revision: 8801 Author: xqt Date: 2010-12-26 14:14:49 +0000 (Sun, 26 Dec 2010) Log Message: ----------- enable unlimit mediawiki search but limit all generators by -limit:n
Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2010-12-26 12:30:13 UTC (rev 8800) +++ trunk/pywikipedia/pagegenerators.py 2010-12-26 14:14:49 UTC (rev 8801) @@ -84,6 +84,9 @@ pages on several wiki sites, this is not well tested, so check your edits!
+-limit:n When used with any other argument that specifies a set + of pages, work on no more than n pages in total + -links Work on all pages that are linked from a certain page. Argument can also be given as "-links:linkingpagetitle".
@@ -298,6 +301,7 @@ def __init__(self): self.gens = [] self.namespaces = [] + self.limit = None
def getCombinedGenerator(self, gen=None): """Returns the combination of all accumulated generators, @@ -314,7 +318,7 @@ gensList = self.gens[0] else: gensList = CombinedPageGenerator(self.gens) - genToReturn = DuplicateFilterPageGenerator(gensList) + genToReturn = DuplicateFilterPageGenerator(gensList, total=self.limit) if (self.namespaces): genToReturn = NamespaceFilterPageGenerator(genToReturn, map(int, self.namespaces)) return genToReturn @@ -443,6 +447,12 @@ else: self.namespaces.extend(arg[len('-ns:'):].split(",")) return True + elif arg.startswith('-limit'): + if len(arg) == len('-limit'): + self.limit = int(pywikibot.input("What is the limit value?")) + else: + self.limit = int(arg[len('-limit:'):]) + return True elif arg.startswith('-catr'): gen = self.getCategoryGen(arg, len('-catr'), recurse = True) elif arg.startswith('-category'): @@ -565,7 +575,7 @@ mediawikiQuery = pywikibot.input( u'What do you want to search for?') # In order to be useful, all namespaces are required - gen = SearchPageGenerator(mediawikiQuery, namespaces = []) + gen = SearchPageGenerator(mediawikiQuery, number=None, namespaces=[]) elif arg.startswith('-google'): gen = GoogleSearchPageGenerator(arg[8:]) elif arg.startswith('-titleregex'): @@ -1148,16 +1158,21 @@ if not page.isRedirectPage(): yield page
-def DuplicateFilterPageGenerator(generator): +def DuplicateFilterPageGenerator(generator, total=None): """ Wraps around another generator. Yields all pages, but prevents duplicates. """ seenPages = dict() + count = 0 for page in generator: _page = u"%s:%s:%s" % (page._site.family.name, page._site.lang, page._title) if _page not in seenPages: seenPages[_page] = True + if total: + count += 1 + if count > total: + break yield page
def RegexFilterPageGenerator(generator, regex, inverse=False, ignore_namespace=True): @@ -1309,8 +1324,10 @@ else: gen = genFactory.getCombinedGenerator() if gen: + i = 0 for page in gen: - pywikibot.output(page.title(), toStdout = True) + i+=1 + pywikibot.output("%s: %s" % (repr(i).rjust(4), page.title()), toStdout = True) else: pywikibot.showHelp('pagegenerators') finally:
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2010-12-26 12:30:13 UTC (rev 8800) +++ trunk/pywikipedia/wikipedia.py 2010-12-26 14:14:49 UTC (rev 8801) @@ -5938,7 +5938,7 @@
self._getUserDataOld(text, sysop = sysop, force = force)
- def search(self, key, number = 10, namespaces = None): + def search(self, key, number=10, namespaces=None): """ Yield search results for query. Use API when enabled use_api and version >= 1.11, @@ -5950,13 +5950,14 @@ 'action': 'query', 'list': 'search', 'srsearch': key, - 'srlimit': number } + if number: + params['srlimit'] = number if namespaces: params['srnamespace'] = namespaces
offset = 0 - while offset < number: + while offset < number or not number: params['sroffset'] = offset data = query.GetData(params, self)['query'] if 'error' in data: