[Pywikipedia-svn] SVN: [7249] trunk/pywikipedia
alexsh at svn.wikimedia.org
alexsh at svn.wikimedia.org
Tue Sep 15 18:54:53 UTC 2009
Revision: 7249
Author: alexsh
Date: 2009-09-15 18:54:53 +0000 (Tue, 15 Sep 2009)
Log Message:
-----------
pagegenerators.py:
* move UserContributionsGenerator API codes to userlib.py.User().contributions()
* fix -start cannot handle -namespace
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/userlib.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2009-09-15 18:51:10 UTC (rev 7248)
+++ trunk/pywikipedia/pagegenerators.py 2009-09-15 18:54:53 UTC (rev 7249)
@@ -161,7 +161,7 @@
import urllib, urllib2, time
# Application specific imports
-import wikipedia, date, catlib, query
+import wikipedia, date, catlib, userlib, query
import config
# For python 2.4 compatibility
@@ -482,37 +482,15 @@
Yields number unique pages edited by user:username
namespaces : list of namespace numbers to fetch contribs from
"""
- import urllib
+
if site is None:
site = wikipedia.getSite()
if number > 500:
# the api does not allow more than 500 results for anonymous users
number = 500
- params = {
- 'action': 'query',
- 'list': 'usercontribs',
- 'ucuser': username,
- 'ucprop': 'title',
- 'uclimit': int(number),
- 'ucdir': 'older',
- }
-
-
- if namespaces:
- params['ucnamespace'] = '|'.join(map(str, namespaces))
- # An user is likely to contribute on several pages,
- # keeping track of titles
- titleList = []
- while True:
- result = query.GetData(params, site)
- for contr in result['query']['usercontribs']:
- if not contr['title'] in titleList:
- titleList.append(contr['title'])
- yield wikipedia.Page(site, contr['title'])
- if result.has_key('query-continue'):
- params['ucstart'] = result['query-continue']['usercontribs']['ucstart']
- else:
- break
+ user = userlib.User(site, username)
+ for page in user.contributions(number, namespaces):
+ yield page[0]
def SearchPageGenerator(query, number = 100, namespaces = None, site = None):
"""
@@ -1072,7 +1050,11 @@
if not firstPageTitle:
firstPageTitle = wikipedia.input(
u'At which page do you want to start?')
- namespace = wikipedia.Page(site, firstPageTitle).namespace()
+ if self.namespaces != []:
+ namespace = self.namespaces[0]
+ else:
+ namespace = wikipedia.Page(site, firstPageTitle).namespace()
+
firstPageTitle = wikipedia.Page(site,
firstPageTitle).titleWithoutNamespace()
gen = AllpagesPageGenerator(firstPageTitle, namespace,
Modified: trunk/pywikipedia/userlib.py
===================================================================
--- trunk/pywikipedia/userlib.py 2009-09-15 18:51:10 UTC (rev 7248)
+++ trunk/pywikipedia/userlib.py 2009-09-15 18:54:53 UTC (rev 7249)
@@ -41,8 +41,10 @@
site - a wikipedia.Site object
name - name of the user, without the trailing User:
"""
-
- self.site = site
+ if type(site) == str:
+ self.site = wikipedia.getSite(site)
+ else:
+ self.site = site
self.name = name
def __str__(self):
@@ -75,7 +77,7 @@
for page in self.contributions(limit):
yield page[0]
- def contributions(self, limit=500):
+ def contributions(self, limit=500, namespace = []):
""" Yields pages that the user has edited, with an upper bound of ``limit''.
Pages returned are not guaranteed to be unique
(straight Special:Contributions parsing, in chunks of 500 items)."""
@@ -85,13 +87,23 @@
#an autoblock, so has no contribs.
raise AutoblockUserError
+ # please stay this in comment until the regex is fixed
+ #if wikipedia.config.use_api:
+ for pg, oldid, date, comment in self._apiContributions(limit):
+ yield pg, oldid, date, comment
+ return
+ #
+ #TODO: fix contribRX regex
+ #
offset = 0
step = min(limit,500)
older_str = None
- try:
+
+ if self.site.versionnumber() <= 11:
+ older_str = self.site.mediawiki_message('sp-contributions-older')
+ else:
older_str = self.site.mediawiki_message('pager-older-n')
- except wikipedia.KeyError:
- older_str = self.site.mediawiki_message('sp-contributions-older')
+
if older_str.startswith('{{PLURAL:$1'):
older_str = older_str[13:]
older_str = older_str[older_str.find('|')+1:]
@@ -99,8 +111,9 @@
older_str = older_str.replace('$1',str(step))
address = self.site.contribs_address(self.name,limit=step)
- contribRX = re.compile('<li[^>]*> *<a href="(?P<url>[^"]*?)" title="[^"]+">(?P<date>[^<]+)</a>.*>diff</a>\) *(<span class="[^"]+">[A-Za-z]</span>)* *<a href="[^"]+" (class="[^"]+" )?title="[^"]+">(?P<title>[^<]+)</a> *(?P<comment>.*?)(?P<top><strong> *\(top\) *</strong>)? *(<span class="mw-rollback-link">\[<a href="[^"]+token=(?P<rollbackToken>[^"]+)%2B%5C".*rollback</a>\]</span>)? *</li>')
+ contribRX = re.compile(r'<li[^>]*> *<a href="(?P<url>[^"]*?)" title="[^"]+">(?P<date>[^<]+)</a>.*>%s</a>\) *(<span class="[^"]+">[A-Za-z]</span>)* *<a href="[^"]+" (class="[^"]+" )?title="[^"]+">(?P<title>[^<]+)</a> *(?P<comment>.*?)(?P<top><strong> *\(top\) *</strong>)? *(<span class="mw-rollback-link">\[<a href="[^"]+token=(?P<rollbackToken>[^"]+)%2B%5C".*%s</a>\]</span>)? *</li>' % (self.site.mediawiki_message('diff'),self.site.mediawiki_message('rollback') ) )
+
while offset < limit:
data = self.site.getUrl(address)
for pg in contribRX.finditer(data):
@@ -108,13 +121,13 @@
oldid = url[url.find('&oldid=')+11:]
date = pg.group('date')
comment = pg.group('comment')
- rollbackToken = pg.group('rollbackToken')
+ #rollbackToken = pg.group('rollbackToken')
top = None
if pg.group('top'):
top = True
# top, new, minor, should all go in a flags field
- yield wikipedia.Page(self.site,pg.group('title')), oldid, date, comment, rollbackToken
+ yield wikipedia.Page(self.site, pg.group('title')), oldid, date, comment
offset += 1
if offset == limit:
@@ -124,6 +137,33 @@
address = nextRX.group('address').replace('&','&')
else:
break
+
+ def _apiContributions(self, limit = 250, namespace = []):
+
+ params = {
+ 'action': 'query',
+ 'list': 'usercontribs',
+ 'ucuser': self.name,
+ 'ucprop': 'ids|title|timestamp|comment',# |size|flags',
+ 'uclimit': int(limit),
+ 'ucdir': 'older',
+ }
+
+ if namespace:
+ params['ucnamespace'] = '|'.join(namespace)
+ # An user is likely to contribute on several pages,
+ # keeping track of titles
+ count = 0
+ while True:
+ result = wikipedia.query.GetData(params, self.site)
+ for c in result['query']['usercontribs']:
+ yield wikipedia.Page(self.site, c['title'], defaultNamespace=c['ns']), c['revid'], c['timestamp'], c['comment']
+ count += 1
+ if result.has_key('query-continue') and count <= limit:
+ params['ucstart'] = result['query-continue']['usercontribs']['ucstart']
+ else:
+ break
+ return
def uploadedImages(self, number = 10):
"""Yield ImagePages from Special:Log&type=upload"""
More information about the Pywikipedia-svn
mailing list