Revision: 7249 Author: alexsh Date: 2009-09-15 18:54:53 +0000 (Tue, 15 Sep 2009)
Log Message: ----------- pagegenerators.py: * move UserContributionsGenerator API codes to userlib.py.User().contributions() * fix -start cannot handle -namespace
Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/userlib.py
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2009-09-15 18:51:10 UTC (rev 7248) +++ trunk/pywikipedia/pagegenerators.py 2009-09-15 18:54:53 UTC (rev 7249) @@ -161,7 +161,7 @@ import urllib, urllib2, time
# Application specific imports -import wikipedia, date, catlib, query +import wikipedia, date, catlib, userlib, query import config
# For python 2.4 compatibility @@ -482,37 +482,15 @@ Yields number unique pages edited by user:username namespaces : list of namespace numbers to fetch contribs from """ - import urllib + if site is None: site = wikipedia.getSite() if number > 500: # the api does not allow more than 500 results for anonymous users number = 500 - params = { - 'action': 'query', - 'list': 'usercontribs', - 'ucuser': username, - 'ucprop': 'title', - 'uclimit': int(number), - 'ucdir': 'older', - } - - - if namespaces: - params['ucnamespace'] = '|'.join(map(str, namespaces)) - # An user is likely to contribute on several pages, - # keeping track of titles - titleList = [] - while True: - result = query.GetData(params, site) - for contr in result['query']['usercontribs']: - if not contr['title'] in titleList: - titleList.append(contr['title']) - yield wikipedia.Page(site, contr['title']) - if result.has_key('query-continue'): - params['ucstart'] = result['query-continue']['usercontribs']['ucstart'] - else: - break + user = userlib.User(site, username) + for page in user.contributions(number, namespaces): + yield page[0]
def SearchPageGenerator(query, number = 100, namespaces = None, site = None): """ @@ -1072,7 +1050,11 @@ if not firstPageTitle: firstPageTitle = wikipedia.input( u'At which page do you want to start?') - namespace = wikipedia.Page(site, firstPageTitle).namespace() + if self.namespaces != []: + namespace = self.namespaces[0] + else: + namespace = wikipedia.Page(site, firstPageTitle).namespace() + firstPageTitle = wikipedia.Page(site, firstPageTitle).titleWithoutNamespace() gen = AllpagesPageGenerator(firstPageTitle, namespace,
Modified: trunk/pywikipedia/userlib.py =================================================================== --- trunk/pywikipedia/userlib.py 2009-09-15 18:51:10 UTC (rev 7248) +++ trunk/pywikipedia/userlib.py 2009-09-15 18:54:53 UTC (rev 7249) @@ -41,8 +41,10 @@ site - a wikipedia.Site object name - name of the user, without the trailing User: """ - - self.site = site + if type(site) == str: + self.site = wikipedia.getSite(site) + else: + self.site = site self.name = name
def __str__(self): @@ -75,7 +77,7 @@ for page in self.contributions(limit): yield page[0]
- def contributions(self, limit=500): + def contributions(self, limit=500, namespace = []): """ Yields pages that the user has edited, with an upper bound of ``limit''. Pages returned are not guaranteed to be unique (straight Special:Contributions parsing, in chunks of 500 items).""" @@ -85,13 +87,23 @@ #an autoblock, so has no contribs. raise AutoblockUserError
+ # please stay this in comment until the regex is fixed + #if wikipedia.config.use_api: + for pg, oldid, date, comment in self._apiContributions(limit): + yield pg, oldid, date, comment + return + # + #TODO: fix contribRX regex + # offset = 0 step = min(limit,500) older_str = None - try: + + if self.site.versionnumber() <= 11: + older_str = self.site.mediawiki_message('sp-contributions-older') + else: older_str = self.site.mediawiki_message('pager-older-n') - except wikipedia.KeyError: - older_str = self.site.mediawiki_message('sp-contributions-older') + if older_str.startswith('{{PLURAL:$1'): older_str = older_str[13:] older_str = older_str[older_str.find('|')+1:] @@ -99,8 +111,9 @@ older_str = older_str.replace('$1',str(step))
address = self.site.contribs_address(self.name,limit=step) - contribRX = re.compile('<li[^>]*> *<a href="(?P<url>[^"]*?)" title="[^"]+">(?P<date>[^<]+)</a>.*>diff</a>) *(<span class="[^"]+">[A-Za-z]</span>)* *<a href="[^"]+" (class="[^"]+" )?title="[^"]+">(?P<title>[^<]+)</a> *(?P<comment>.*?)(?P<top><strong> *(top) *</strong>)? *(<span class="mw-rollback-link">[<a href="[^"]+token=(?P<rollbackToken>[^"]+)%2B%5C".*rollback</a>]</span>)? *</li>') + contribRX = re.compile(r'<li[^>]*> *<a href="(?P<url>[^"]*?)" title="[^"]+">(?P<date>[^<]+)</a>.*>%s</a>) *(<span class="[^"]+">[A-Za-z]</span>)* *<a href="[^"]+" (class="[^"]+" )?title="[^"]+">(?P<title>[^<]+)</a> *(?P<comment>.*?)(?P<top><strong> *(top) *</strong>)? *(<span class="mw-rollback-link">[<a href="[^"]+token=(?P<rollbackToken>[^"]+)%2B%5C".*%s</a>]</span>)? *</li>' % (self.site.mediawiki_message('diff'),self.site.mediawiki_message('rollback') ) )
+ while offset < limit: data = self.site.getUrl(address) for pg in contribRX.finditer(data): @@ -108,13 +121,13 @@ oldid = url[url.find('&oldid=')+11:] date = pg.group('date') comment = pg.group('comment') - rollbackToken = pg.group('rollbackToken') + #rollbackToken = pg.group('rollbackToken') top = None if pg.group('top'): top = True
# top, new, minor, should all go in a flags field - yield wikipedia.Page(self.site,pg.group('title')), oldid, date, comment, rollbackToken + yield wikipedia.Page(self.site, pg.group('title')), oldid, date, comment
offset += 1 if offset == limit: @@ -124,6 +137,33 @@ address = nextRX.group('address').replace('&','&') else: break + + def _apiContributions(self, limit = 250, namespace = []): + + params = { + 'action': 'query', + 'list': 'usercontribs', + 'ucuser': self.name, + 'ucprop': 'ids|title|timestamp|comment',# |size|flags', + 'uclimit': int(limit), + 'ucdir': 'older', + } + + if namespace: + params['ucnamespace'] = '|'.join(namespace) + # An user is likely to contribute on several pages, + # keeping track of titles + count = 0 + while True: + result = wikipedia.query.GetData(params, self.site) + for c in result['query']['usercontribs']: + yield wikipedia.Page(self.site, c['title'], defaultNamespace=c['ns']), c['revid'], c['timestamp'], c['comment'] + count += 1 + if result.has_key('query-continue') and count <= limit: + params['ucstart'] = result['query-continue']['usercontribs']['ucstart'] + else: + break + return
def uploadedImages(self, number = 10): """Yield ImagePages from Special:Log&type=upload"""
pywikipedia-svn@lists.wikimedia.org