http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10498
Revision: 10498
Author: xqt
Date: 2012-08-22 20:31:52 +0000 (Wed, 22 Aug 2012)
Log Message:
-----------
fo mow comment out rcprop=patrolled: patrol rights needed
Modified Paths:
--------------
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2012-08-22 20:02:17 UTC (rev 10497)
+++ branches/rewrite/pywikibot/site.py 2012-08-22 20:31:52 UTC (rev 10498)
@@ -4,7 +4,7 @@
on the same topic in different languages).
"""
#
-# (C) Pywikipedia bot team, 2008-2010
+# (C) Pywikipedia bot team, 2008-2012
#
# Distributed under the terms of the MIT license.
#
@@ -2099,6 +2099,7 @@
"recentchanges: start must be later than end with reverse=False")
rcgen = self._generator(api.ListGenerator, type_arg="recentchanges",
rcprop="user|comment|timestamp|title|ids"
+ #"|sizes|redirect|loginfo" - patrol rights needed
"|sizes|redirect|patrolled|loginfo"
"|flags",
namespaces=namespaces, step=step,
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10495
Revision: 10495
Author: xqt
Date: 2012-08-22 17:19:19 +0000 (Wed, 22 Aug 2012)
Log Message:
-----------
remove unused get_redirect parameter from pagegenerators.newpagesPageGenerator() and mark it as deprecated;
remove bot filter from this generator as requested in bug #3560612
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2012-08-22 17:09:33 UTC (rev 10494)
+++ trunk/pywikipedia/pagegenerators.py 2012-08-22 17:19:19 UTC (rev 10495)
@@ -558,16 +558,16 @@
repeat=repeat, namespace=namespace):
yield page[0]
-def NewpagesPageGenerator(number=100, get_redirect=False, repeat=False, site=None,
- namespace=0):
+@deprecate_arg("get_redirect", None) #20120822
+def NewpagesPageGenerator(number=100, repeat=False, site=None, namespace=0):
"""
Iterate Page objects for all new titles in a single namespace.
"""
# defaults to namespace 0 because that's how Special:Newpages defaults
if site is None:
site = pywikibot.getSite()
- for item in site.newpages(number=number, get_redirect=get_redirect,
- repeat=repeat, namespace=namespace):
+ for item in site.newpages(number=number, repeat=repeat, namespace=namespace,
+ rcshow=['!redirect']):
yield item[0]
def RecentchangesPageGenerator(number=100, site=None):
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10494
Revision: 10494
Author: xqt
Date: 2012-08-22 17:09:33 +0000 (Wed, 22 Aug 2012)
Log Message:
-----------
remove unused get_redirect paramter from site.newpages() and mark it as deprecated;
remove code duplication from site.newpages() and use site.recentchanges() for api call like in rewrite
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2012-08-22 16:59:37 UTC (rev 10493)
+++ trunk/pywikipedia/wikipedia.py 2012-08-22 17:09:33 UTC (rev 10494)
@@ -6476,8 +6476,11 @@
break
return
- def newpages(self, number = 10, get_redirect = False, repeat = False, namespace = 0, rcshow = ['!bot','!redirect'], user = None, returndict = False):
- """Yield new articles (as Page objects) from Special:Newpages.
+ @deprecate_arg("get_redirect", None) #20120822
+ def newpages(self, user=None, returndict=False,
+ number=10, repeat=False, namespace=0,
+ rcshow = ['!bot','!redirect']):
+ """Yield new articles (as Page objects) from recent changes.
Starts with the newest article and fetches the number of articles
specified in the first argument. If repeat is True, it fetches
@@ -6485,42 +6488,35 @@
one, sleeping between subsequent fetches of Newpages.
The objects yielded are dependent on parmater returndict.
- When true, it yields a tuple composed of a Page object and a dict of attributes.
+ When true, it yields a tuple composed of a Page object and a dict of
+ attributes.
When false, it yields a tuple composed of the Page object,
timestamp (unicode), length (int), an empty unicode string, username
or IP address (str), comment (unicode).
"""
- # TODO: in recent MW versions Special:Newpages takes a namespace parameter,
- # and defaults to 0 if not specified.
+ # TODO: in recent MW versions Special:Newpages takes a namespace
+ # parameter, and defaults to 0 if not specified.
# TODO: Detection of unregistered users is broken
# TODO: Repeat mechanism doesn't make much sense as implemented;
# should use both offset and limit parameters, and have an
# option to fetch older rather than newer pages
- seen = set()
- while True:
- if self.has_api() and self.versionnumber() >= 10:
- params = {
- 'action': 'query',
- 'list': 'recentchanges',
- 'rctype': 'new',
- 'rcnamespace': namespace,
- 'rclimit': int(number),
- 'rcprop': ['ids','title','timestamp','sizes','user','comment'],
- 'rcshow': rcshow,
- }
- if user: params['rcuser'] = user
- data = query.GetData(params, self)['query']['recentchanges']
- for np in data:
- if np['pageid'] not in seen:
- seen.add(np['pageid'])
- page = Page(self, np['title'], defaultNamespace=np['ns'])
- if returndict:
- yield page, np
- else:
- yield page, np['timestamp'], np['newlen'], u'', np['user'], np['comment']
- else:
+ # N.B. API still provides no way to access Special:Newpages content
+ # directly, so we get new pages indirectly through 'recentchanges'
+ if self.has_api() and self.versionnumber() >= 10:
+ gen = self.recentchanges(number=number, rcshow=rcshow, rctype='new',
+ namespace=namespace, repeat=repeat,
+ user=user, returndict=True)
+ for newpage, pageitem in gen:
+ if returndict:
+ yield (newpage, pageitem)
+ else:
+ yield (newpage, pageitem['timestamp'], pageitem['newlen'],
+ u'', pageitem['user'], pageitem['comment'])
+ else:
+ seen = set()
+ while True:
path = self.newpages_address(n=number, namespace=namespace)
# The throttling is important here, so always enabled.
get_throttle()
@@ -6542,8 +6538,8 @@
seen.add(title)
page = Page(self, title)
yield page, date, length, loggedIn, username, comment
- if not repeat:
- break
+ if not repeat:
+ break
def longpages(self, number = 10, repeat = False):
"""Yield Pages from Special:Longpages.
@@ -6797,6 +6793,7 @@
if rcshow: params['rcshow'] = rcshow
if rctype: params['rctype'] = rctype
+ seen = set()
while True:
data = query.GetData(params, self, encodeTitle = False)
if 'error' in data:
@@ -6807,14 +6804,16 @@
raise ServerError("The APIs don't return data, the site may be down")
for i in rcData:
- page = Page(self, i['title'], defaultNamespace=i['ns'])
- if returndict:
- yield page, i
- else:
- comment = ''
- if 'comment' in i:
- comment = i['comment']
- yield page, i['timestamp'], i['newlen'], True, i['user'], comment
+ if i['pageid'] not in seen:
+ seen.add(i['pageid'])
+ page = Page(self, i['title'], defaultNamespace=i['ns'])
+ if returndict:
+ yield page, i
+ else:
+ comment = u''
+ if 'comment' in i:
+ comment = i['comment']
+ yield page, i['timestamp'], i['newlen'], True, i['user'], comment
if not repeat:
break