Revision: 6942 Author: purodha Date: 2009-06-07 05:24:19 +0000 (Sun, 07 Jun 2009)
Log Message: ----------- Initial version of a recentchenges page generator (via API)
Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2009-06-07 04:40:46 UTC (rev 6941) +++ trunk/pywikipedia/pagegenerators.py 2009-06-07 05:24:19 UTC (rev 6942) @@ -133,6 +133,10 @@
-gorandom Specifies that the robot should starting at the random pages returned by [[Special:Random]]. + +-recentchanges Work on new and edited pages returned by [[Special:Recentchanges]]. + Can also be given as "-recentchanges:n" where n is the number + of pages to be returned, else 100 pages are returned. """
@@ -403,6 +407,12 @@ site = wikipedia.getSite() for i in range(number): yield site.randomredirectpage() + +def RecentchangesPageGenerator(number = 100, site = None): + if site is None: + site = wikipedia.getSite() + for page in site.recentchanges(number=number): + yield page[0]
def TextfilePageGenerator(filename=None, site=None): ''' @@ -954,6 +964,11 @@ gen = RandomPageGenerator() else: gen = RandomPageGenerator(number = int(arg[8:])) + elif arg.startswith('-recentchanges'): + if len(arg) == 14: + gen = RecentchangesPageGenerator() + else: + gen = RecentchangesPageGenerator(number = int(arg[15:])) elif arg.startswith('-file'): textfilename = arg[6:] if not textfilename:
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-06-07 04:40:46 UTC (rev 6941) +++ trunk/pywikipedia/wikipedia.py 2009-06-07 05:24:19 UTC (rev 6942) @@ -4274,6 +4274,7 @@ deadendpages(): Special:Deadendpages ancientpages(): Special:Ancientpages lonelypages(): Special:Lonelypages + recentchanges(): Special:Recentchanges unwatchedpages(): Special:Unwatchedpages (sysop accounts only) uncategorizedcategories(): Special:Uncategorizedcategories (yields Category objects) @@ -5342,6 +5343,76 @@ if not repeat: break
+ def recentchanges(self, number = 100, rcstart = None, rcend = None, rcshow = None, rctype ='edit|new', repeat = False): + """ + Yield ImagePages from APIs, call: action=query&list=recentchanges&rctype=edit|new&rclimit=500 + + Options directly from APIs: + --- + Parameters: + rcstart - The timestamp to start enumerating from. + rcend - The timestamp to end enumerating. + rcdir - In which direction to enumerate. + One value: newer, older + Default: older + rcprop - Include additional pieces of information + Values (separate with '|'): + user, comment, flags, timestamp, title, ids, sizes, + redirect, patrolled, loginfo + Default: title|timestamp|ids + rctoken - Which tokens to obtain for each change + Values (separate with '|'): patrol + rcshow - Show only items that meet this criteria. + For example, to see only minor edits done by + logged-in users, set show=minor|!anon + Values (separate with '|'): + minor, !minor, bot, !bot, anon, !anon, + redirect, !redirect, patrolled, !patrolled + rclimit - How many total changes to return. + No more than 500 (5000 for bots) allowed. + Default: 10 + rctype - Which types of changes to show. + Values (separate with '|'): edit, new, log + """ + if rctype is None: + rctype = 'edit|new' + params = { + 'action' : 'query', + 'list' : 'recentchanges', + 'rctype' : rctype, + 'rcprop' : 'user|comment|timestamp|title|ids|loginfo', #|flags|sizes|redirect|patrolled' + 'rclimit' : int(number), + } + if rcstart is not None: params['rcstart'] = rcstart + if rcend is not None: params['rcend'] = rcend + if rcshow is not None: params['rcshow'] = rcshow + if rctype is not None: params['rctype'] = rctype + while True: + data = query.GetData(params, + useAPI = True, encodeTitle = False) + try: + rcData = data['query']['recentchanges'] + except KeyError: + raise ServerError("The APIs don't return data, the site may be down") + + for rcItem in rcData: + try: + comment = rcItem['comment'] + except KeyError: + comment = '' + try: + loginfo = rcItem['loginfo'] + except KeyError: + loginfo = '' + # pageid = rcItem['pageid'] + title = rcItem['title'] + timestamp = rcItem['timestamp'] + # logid = rcItem['logid'] + user = rcItem['user'] + yield Page(self, title), timestamp, user, comment, loginfo + if not repeat: + break + def uncategorizedimages(self, number = 10, repeat = False): """Yield ImagePages from Special:Uncategorizedimages.""" seen = set()