http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9105
Revision: 9105
Author: jayvdb
Date: 2011-03-20 07:19:05 +0000 (Sun, 20 Mar 2011)
Log Message:
-----------
Fix comment
Modified Paths:
--------------
trunk/pywikipedia/patrol.py
Modified: trunk/pywikipedia/patrol.py
===================================================================
--- trunk/pywikipedia/patrol.py 2011-03-20 07:15:33 UTC (rev 9104)
+++ trunk/pywikipedia/patrol.py 2011-03-20 07:19:05 UTC (rev 9105)
@@ -33,8 +33,9 @@
"""
Constructor. Parameters:
* feed - The changes feed to work on (Newpages or Recentchanges)
- * verbose - If True, doesn't do any real changes, but only shows
- what would have been changed.
+ * user - Limit whitelist parsing and patrolling to a specific user
+ * ask - If True, confirm each patrol action
+ * whitelist - page title for whitelist (optional)
"""
self.feed = feed
self.user = user
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9101
Revision: 9101
Author: jayvdb
Date: 2011-03-18 11:32:29 +0000 (Fri, 18 Mar 2011)
Log Message:
-----------
Add returndict param to site.newpages and site.recentchanges
When true, these generators yield (Page,dict)
minor other improvements to patrol.py
Modified Paths:
--------------
trunk/pywikipedia/patrol.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/patrol.py
===================================================================
--- trunk/pywikipedia/patrol.py 2011-03-18 11:26:19 UTC (rev 9100)
+++ trunk/pywikipedia/patrol.py 2011-03-18 11:32:29 UTC (rev 9101)
@@ -297,7 +297,8 @@
pywikibot.output(u"Page %s is a redirect; skipping." % page.aslink())
return
-def newpages_feed(site, number, namespace, user, repeat):
+# This should never be used
+def old_feed_repeater(site, number, namespace, user, repeat):
while True:
gen = site.newpages(number = number, namespace = namespace, user=user, rcshow = '!patrolled')
for page in gen:
@@ -308,14 +309,14 @@
else:
break
-def recentchanges_feed(site, number, namespace, user, repeat):
+def feed_repeater(gen, delay):
while True:
- gen = site.recentchanges(number = number, namespace=namespace, user=user, rcshow = '!patrolled')
for page in gen:
- yield page[0], page[2], page[5], page[6]
+ attrs = page[1]
+ yield page[0], attrs['user'], attrs['revid'], attrs['rcid']
if repeat:
- pywikibot.output('Sleeping for 10 minutes')
- time.sleep(60)
+ pywikibot.output('Sleeping for %d minutes', delay)
+ time.sleep(delay)
else:
break
@@ -382,12 +383,14 @@
if newpages or user:
pywikibot.output(u"Newpages:")
- feed = newpages_feed(site, number = newpage_count, namespace = namespace, user=user, repeat=repeat)
+ gen = site.newpages(number = newpage_count, namespace=namespace, user=user, rcshow = '!patrolled', returndict = True)
+ feed = feed_repeater(gen, delay=60)
bot.run(feed)
if recentchanges or user:
pywikibot.output(u"Recentchanges:")
- feed = recentchanges_feed(site, number = 1000, namespace = namespace, user=user, repeat=repeat)
+ gen = site.recentchanges(number = 1000, namespace=namespace, user=user, rcshow = '!patrolled', returndict = True)
+ feed = feed_repeater(gen, delay=60)
bot.run(feed)
pywikibot.output('%d/%d patrolled' % (bot.patrol_counter, bot.rc_item_counter))
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2011-03-18 11:26:19 UTC (rev 9100)
+++ trunk/pywikipedia/wikipedia.py 2011-03-18 11:32:29 UTC (rev 9101)
@@ -6082,7 +6082,7 @@
break
return
- def newpages(self, number = 10, get_redirect = False, repeat = False, namespace = 0, rcshow = ['!bot','!redirect'], user = None):
+ def newpages(self, number = 10, get_redirect = False, repeat = False, namespace = 0, rcshow = ['!bot','!redirect'], user = None, returndict = False):
"""Yield new articles (as Page objects) from Special:Newpages.
Starts with the newest article and fetches the number of articles
@@ -6090,7 +6090,9 @@
Newpages again. If there is no new page, it blocks until there is
one, sleeping between subsequent fetches of Newpages.
- The objects yielded are tuples composed of the Page object,
+ The objects yielded are dependent on parmater returndict.
+ When true, it yields a tuple composed of a Page object and a dict of attributes.
+ When false, it yields a tuple composed of the Page object,
timestamp (unicode), length (int), an empty unicode string, username
or IP address (str), comment (unicode).
@@ -6120,7 +6122,10 @@
if np['pageid'] not in seen:
seen.add(np['pageid'])
page = Page(self, np['title'], defaultNamespace=np['ns'])
- yield page, np['timestamp'], np['newlen'], u'', np['user'], np['comment']
+ if returndict:
+ yield page, np
+ else:
+ yield page, np['timestamp'], np['newlen'], u'', np['user'], np['comment']
else:
path = self.newpages_address(n=number, namespace=namespace)
# The throttling is important here, so always enabled.
@@ -6332,11 +6337,11 @@
yield o, t, u, c
return
- def recentchanges(self, number = 100, rcstart = None, rcend = None, rcshow = None, rcdir='older', rctype ='edit|new', namespace=None, includeredirects=True, repeat = False, user = None):
+ def recentchanges(self, number = 100, rcstart = None, rcend = None, rcshow = None, rcdir='older', rctype ='edit|new', namespace=None, includeredirects=True, repeat = False, user = None, returndict = False):
"""
Yield recent changes as Page objects
uses API call: action=query&list=recentchanges&rctype=edit|new&rclimit=500
-
+
Starts with the newest change and fetches the number of changes
specified in the first argument. If repeat is True, it fetches
again.
@@ -6369,7 +6374,9 @@
rctype - Which types of changes to show.
Values (separate with '|'): edit, new, log
- The objects yielded are tuples composed of the Page object,
+ The objects yielded are dependent on parmater returndict.
+ When true, it yields a tuple composed of a Page object and a dict of attributes.
+ When false, it yields a tuple composed of the Page object,
timestamp (unicode), length (int), an empty unicode string, username
or IP address (str), comment (unicode).
@@ -6402,11 +6409,14 @@
raise ServerError("The APIs don't return data, the site may be down")
for i in rcData:
- comment = ''
- if 'comment' in i:
- comment = i['comment']
page = Page(self, i['title'], defaultNamespace=i['ns'])
- yield page, i['timestamp'], i['newlen'], u'', i['user'], comment
+ if returndict:
+ yield page, i
+ else:
+ comment = ''
+ if 'comment' in i:
+ comment = i['comment']
+ yield page, i['timestamp'], i['newlen'], True, i['user'], comment
if not repeat:
break