http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10494
Revision: 10494
Author: xqt
Date: 2012-08-22 17:09:33 +0000 (Wed, 22 Aug 2012)
Log Message:
-----------
remove unused get_redirect paramter from site.newpages() and mark it as deprecated;
remove code duplication from site.newpages() and use site.recentchanges() for api call
like in rewrite
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2012-08-22 16:59:37 UTC (rev 10493)
+++ trunk/pywikipedia/wikipedia.py 2012-08-22 17:09:33 UTC (rev 10494)
@@ -6476,8 +6476,11 @@
break
return
- def newpages(self, number = 10, get_redirect = False, repeat = False, namespace = 0,
rcshow = ['!bot','!redirect'], user = None, returndict = False):
- """Yield new articles (as Page objects) from Special:Newpages.
+ @deprecate_arg("get_redirect", None) #20120822
+ def newpages(self, user=None, returndict=False,
+ number=10, repeat=False, namespace=0,
+ rcshow = ['!bot','!redirect']):
+ """Yield new articles (as Page objects) from recent changes.
Starts with the newest article and fetches the number of articles
specified in the first argument. If repeat is True, it fetches
@@ -6485,42 +6488,35 @@
one, sleeping between subsequent fetches of Newpages.
The objects yielded are dependent on parmater returndict.
- When true, it yields a tuple composed of a Page object and a dict of attributes.
+ When true, it yields a tuple composed of a Page object and a dict of
+ attributes.
When false, it yields a tuple composed of the Page object,
timestamp (unicode), length (int), an empty unicode string, username
or IP address (str), comment (unicode).
"""
- # TODO: in recent MW versions Special:Newpages takes a namespace parameter,
- # and defaults to 0 if not specified.
+ # TODO: in recent MW versions Special:Newpages takes a namespace
+ # parameter, and defaults to 0 if not specified.
# TODO: Detection of unregistered users is broken
# TODO: Repeat mechanism doesn't make much sense as implemented;
# should use both offset and limit parameters, and have an
# option to fetch older rather than newer pages
- seen = set()
- while True:
- if self.has_api() and self.versionnumber() >= 10:
- params = {
- 'action': 'query',
- 'list': 'recentchanges',
- 'rctype': 'new',
- 'rcnamespace': namespace,
- 'rclimit': int(number),
- 'rcprop':
['ids','title','timestamp','sizes','user','comment'],
- 'rcshow': rcshow,
- }
- if user: params['rcuser'] = user
- data = query.GetData(params,
self)['query']['recentchanges']
- for np in data:
- if np['pageid'] not in seen:
- seen.add(np['pageid'])
- page = Page(self, np['title'],
defaultNamespace=np['ns'])
- if returndict:
- yield page, np
- else:
- yield page, np['timestamp'], np['newlen'],
u'', np['user'], np['comment']
- else:
+ # N.B. API still provides no way to access Special:Newpages content
+ # directly, so we get new pages indirectly through 'recentchanges'
+ if self.has_api() and self.versionnumber() >= 10:
+ gen = self.recentchanges(number=number, rcshow=rcshow, rctype='new',
+ namespace=namespace, repeat=repeat,
+ user=user, returndict=True)
+ for newpage, pageitem in gen:
+ if returndict:
+ yield (newpage, pageitem)
+ else:
+ yield (newpage, pageitem['timestamp'],
pageitem['newlen'],
+ u'', pageitem['user'],
pageitem['comment'])
+ else:
+ seen = set()
+ while True:
path = self.newpages_address(n=number, namespace=namespace)
# The throttling is important here, so always enabled.
get_throttle()
@@ -6542,8 +6538,8 @@
seen.add(title)
page = Page(self, title)
yield page, date, length, loggedIn, username, comment
- if not repeat:
- break
+ if not repeat:
+ break
def longpages(self, number = 10, repeat = False):
"""Yield Pages from Special:Longpages.
@@ -6797,6 +6793,7 @@
if rcshow: params['rcshow'] = rcshow
if rctype: params['rctype'] = rctype
+ seen = set()
while True:
data = query.GetData(params, self, encodeTitle = False)
if 'error' in data:
@@ -6807,14 +6804,16 @@
raise ServerError("The APIs don't return data, the site may be
down")
for i in rcData:
- page = Page(self, i['title'], defaultNamespace=i['ns'])
- if returndict:
- yield page, i
- else:
- comment = ''
- if 'comment' in i:
- comment = i['comment']
- yield page, i['timestamp'], i['newlen'], True,
i['user'], comment
+ if i['pageid'] not in seen:
+ seen.add(i['pageid'])
+ page = Page(self, i['title'],
defaultNamespace=i['ns'])
+ if returndict:
+ yield page, i
+ else:
+ comment = u''
+ if 'comment' in i:
+ comment = i['comment']
+ yield page, i['timestamp'], i['newlen'], True,
i['user'], comment
if not repeat:
break