http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11384
Revision: 11384
Author: valhallasw
Date: 2013-04-18 20:41:33 +0000 (Thu, 18 Apr 2013)
Log Message:
-----------
fixup for r11383: corrected docs for UserContributionsGenerator
The documentation referred to another implementation for the same issue. This
commit updates the documentation to the current implementation.
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2013-04-18 20:38:53 UTC (rev 11383)
+++ trunk/pywikipedia/pagegenerators.py 2013-04-18 20:41:33 UTC (rev 11384)
@@ -808,8 +808,10 @@
def SearchPageGenerator(query, number = 100, namespaces = None, site = None):
"""
Provides a list of results using the internal MediaWiki search engine.
- If the factory object is given, it is used to filter namespaces as defined
- by the GeneratorFactory
+
+ namespaces: List of namespace numbers to fetch contribs from. Also accepted
+ are None (default namespace), [] (all namespaces, default) and
+ a callable that returns a list of namespaces.
"""
if site is None:
site = pywikibot.getSite()
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11383
Revision: 11383
Author: valhallasw
Date: 2013-04-18 20:38:53 +0000 (Thu, 18 Apr 2013)
Log Message:
-----------
Add support for namespace-filtered page generators.
Before, generators had to get data for /all/ namespaces, and these results
were filtered. This commit adds support for communicating wanted namespaces
with generators.
Instead of creating a generator by passing a list of namespaces, a /function/
that returns a list is passed. This function is only called once the generator
starts generating Page objects, which is /after/ the GeneratorFactory has
completed parsing all command line arguments. Then, the getNamespaces function
is called, and the correct namespaces are used.
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2013-04-18 16:05:32 UTC (rev 11382)
+++ trunk/pywikipedia/pagegenerators.py 2013-04-18 20:38:53 UTC (rev 11383)
@@ -219,6 +219,9 @@
self.namespaces = []
self.limit = None
+ def getNamespaces(self):
+ return map(int, self.namespaces)
+
def getCombinedGenerator(self, gen=None):
"""Returns the combination of all accumulated generators,
that have been created in the process of handling arguments.
@@ -235,8 +238,8 @@
else:
gensList = CombinedPageGenerator(self.gens)
genToReturn = DuplicateFilterPageGenerator(gensList, total=self.limit)
- if (self.namespaces):
- genToReturn = NamespaceFilterPageGenerator(genToReturn, map(int, self.namespaces))
+ if (self.getNamespaces()):
+ genToReturn = NamespaceFilterPageGenerator(genToReturn, self.getNamespaces())
return genToReturn
def getCategoryGen(self, arg, length, recurse=False):
@@ -318,7 +321,7 @@
number = int(args[1])
except:
number = 250
- gen = UserContributionsGenerator(args[0], number)
+ gen = UserContributionsGenerator(args[0], number, namespaces=self.getNamespaces)
elif arg.startswith('-withoutinterwiki'):
if len(arg) == 17:
gen = WithoutInterwikiPageGenerator()
@@ -487,8 +490,7 @@
if not mediawikiQuery:
mediawikiQuery = pywikibot.input(
u'What do you want to search for?')
- # In order to be useful, all namespaces are required
- gen = SearchPageGenerator(mediawikiQuery, number=None, namespaces=[])
+ gen = SearchPageGenerator(mediawikiQuery, number=None, namespaces=self.getNamespaces)
elif arg.startswith('-google'):
gen = GoogleSearchPageGenerator(arg[8:])
elif arg.startswith('-titleregex'):
@@ -791,20 +793,28 @@
def UserContributionsGenerator(username, number = 250, namespaces = [], site = None ):
"""
Yields number unique pages edited by user:username
- namespaces : list of namespace numbers to fetch contribs from
+ namespaces : List of namespace numbers to fetch contribs from. Also accepted
+ are None (default namespace), [] (all namespaces, default) and
+ a callable that returns a list of namespaces.
"""
if site is None:
site = pywikibot.getSite()
+ if callable(namespaces):
+ namespaces = namespaces()
user = userlib.User(site, username)
for page in user.contributions(number, namespaces):
yield page[0]
def SearchPageGenerator(query, number = 100, namespaces = None, site = None):
"""
- Provides a list of results using the internal MediaWiki search engine
+ Provides a list of results using the internal MediaWiki search engine.
+ If the factory object is given, it is used to filter namespaces as defined
+ by the GeneratorFactory
"""
if site is None:
site = pywikibot.getSite()
+ if callable(namespaces):
+ namespaces = namespaces()
for page in site.search(query, number=number, namespaces = namespaces):
yield page[0]
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11380
Revision: 11380
Author: valhallasw
Date: 2013-04-18 09:21:17 +0000 (Thu, 18 Apr 2013)
Log Message:
-----------
API-based search: [] means /every/ namespace, not just 0
In the scrape-based search, the namespaces=[] parameter meant to search /every/
namespace, not just 0. This commit adapts the API search behavior to be
consistent with this.
Modified Paths:
--------------
branches/rewrite/pywikibot/site.py
trunk/pywikipedia/wikipedia.py
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2013-04-17 17:05:23 UTC (rev 11379)
+++ branches/rewrite/pywikibot/site.py 2013-04-18 09:21:17 UTC (rev 11380)
@@ -2250,7 +2250,7 @@
@param where: Where to search; value must be "text" or "titles" (many
wikis do not support title search)
@param namespaces: search only in these namespaces (defaults to 0)
- @type namespaces: list of ints
+ @type namespaces: list of ints, or an empty list to signal all namespaces
@param getredirects: if True, include redirects in results
@param content: if True, load the current content of each iterated page
(default False)
@@ -2260,6 +2260,8 @@
raise Error("search: searchstring cannot be empty")
if where not in ("text", "titles"):
raise Error("search: unrecognized 'where' value: %s" % where)
+ if namespaces == []:
+ namespaces = [ns for ns in self.namespaces().keys() if ns >= 0]
if not namespaces:
pywikibot.warning(u"search: namespaces cannot be empty; using [0].")
namespaces = [0]
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2013-04-17 17:05:23 UTC (rev 11379)
+++ trunk/pywikipedia/wikipedia.py 2013-04-18 09:21:17 UTC (rev 11380)
@@ -7018,6 +7018,8 @@
params['srlimit'] = number
if namespaces:
params['srnamespace'] = namespaces
+ if namespaces == []:
+ params['srnamespace'] = [ni for ni in (self.getNamespaceIndex(x) for x in self.namespaces()) if ni >= 0]
offset = 0
while offset < number or not number:
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11378
Revision: 11378
Author: amir
Date: 2013-04-17 15:48:52 +0000 (Wed, 17 Apr 2013)
Log Message:
-----------
removing parenthesis as a not-at-the-end-mark per bug #3610818
Modified Paths:
--------------
trunk/pywikipedia/pywikibot/textlib.py
Modified: trunk/pywikipedia/pywikibot/textlib.py
===================================================================
--- trunk/pywikipedia/pywikibot/textlib.py 2013-04-17 14:38:34 UTC (rev 11377)
+++ trunk/pywikipedia/pywikibot/textlib.py 2013-04-17 15:48:52 UTC (rev 11378)
@@ -837,7 +837,7 @@
# Note: While allowing dots inside URLs, MediaWiki will regard
# dots at the end of the URL as not part of that URL.
# The same applies to comma, colon and some other characters.
- notAtEnd = '\]\s\.:;,<>"\|\)'
+ notAtEnd = '\]\s\.:;,<>"\|'
# So characters inside the URL can be anything except whitespace,
# closing squared brackets, quotation marks, greater than and less
# than, and the last character also can't be parenthesis or another
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11377
Revision: 11377
Author: drtrigon
Date: 2013-04-17 14:38:34 +0000 (Wed, 17 Apr 2013)
Log Message:
-----------
bug fix; for cleanup with unicode data ('?\195?\164'), follow-up to r11214 (and all before that)
Modified Paths:
--------------
trunk/pywikipedia/sum_disc.py
Modified: trunk/pywikipedia/sum_disc.py
===================================================================
--- trunk/pywikipedia/sum_disc.py 2013-04-17 09:40:00 UTC (rev 11376)
+++ trunk/pywikipedia/sum_disc.py 2013-04-17 14:38:34 UTC (rev 11377)
@@ -36,10 +36,6 @@
will cause no problem for the bot, because the entries are also written to the
history.
-The following parameters are supported:
-
-¶ms;
-
All other parameters will be ignored.
Syntax example:
@@ -1116,7 +1112,7 @@
for line in text.splitlines():
try:
#date = time.strptime(u'abc', u'; %d. %B %Y')
- date = time.strptime(line, str(self._param['parse_msg'][u'start']))
+ date = time.strptime(line.encode('utf-8'), str(self._param['parse_msg'][u'start']))
#date = time.strptime(str(line), str(self._param['parse_msg'][u'start']))
date = datetime.datetime.fromtimestamp(time.mktime(date))
diff = (today - date).days