jenkins-bot has submitted this change and it was merged.
Change subject: Add RepeatingGenerator ......................................................................
Add RepeatingGenerator
In compat, we have many generators such as logpages(), recentchanges() with parameter `repeat`, while in core this parameter is missing. RepeatingGenerator is a way to bring its functionality back.
Change-Id: I3da94de6bec0d5638f7be39597db90f7a1e7bc6d --- M pywikibot/pagegenerators.py M pywikibot/site.py M tests/pagegenerators_tests.py 3 files changed, 72 insertions(+), 3 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved XZise: Looks good to me, but someone else must approve jenkins-bot: Verified
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index a46e27c..a26c58d 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -22,6 +22,7 @@ import codecs import itertools import re +import time import pywikibot from pywikibot import date, config, i18n from pywikibot.tools import deprecate_arg @@ -983,6 +984,56 @@ yield page.toggleTalkPage()
+def RepeatingGenerator(generator, key_func=lambda x: x, sleep_duration=60, + total=None, **kwargs): + """Yield items in live time. + + The provided generator must support parameter 'start', 'end', + 'reverse', and 'total' such as site.recentchanges(), site.logevents(). + + For example: + + To fetch revisions in recentchanges in live time, call + gen = RepeatingGenerator(site.recentchanges, lambda x: x['revid']) + + To fetch new pages in live time, call + gen = RepeatingGenerator(site.newpages, lambda x: x[0]) + + Note that other parameters not listed below will be passed + to the generator function. Parameter 'reverse', 'start', 'end' + will always be discarded to prevent the generator yielding items + in wrong order. + + @param generator: a function returning a generator that will be queried + @param key_func: a function returning key that will be used to detect + duplicate entry + @param sleep_duration: duration between each query + @param total: if it is a positive number, iterate no more than this + number of items in total. Otherwise, iterate forever + @type total: int or None + @return: a generator yielding items in ascending order by time + """ + kwargs.pop('reverse', None) # always get newest item first + kwargs.pop('start', None) # don't set start time + kwargs.pop('end', None) # don't set stop time + + seen = set() + while total is None or len(seen) < total: + def filtered_generator(): + for item in generator(total=None if seen else 1, **kwargs): + key = key_func(item) + if key not in seen: + seen.add(key) + yield item + if len(seen) == total: + return + else: + break + time.sleep(sleep_duration) + for item in list(filtered_generator())[::-1]: + yield item + + @deprecate_arg("pageNumber", "step") @deprecate_arg("lookahead", None) def PreloadingGenerator(generator, step=50): diff --git a/pywikibot/site.py b/pywikibot/site.py index 13da02c..3d6ddef 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -4010,9 +4010,7 @@ """Yield new articles (as Page objects) from recent changes.
Starts with the newest article and fetches the number of articles - specified in the first argument. If repeat is True, it fetches - Newpages again. If there is no new page, it blocks until there is - one, sleeping between subsequent fetches of Newpages. + specified in the first argument.
The objects yielded are dependent on parameter returndict. When true, it yields a tuple composed of a Page object and a dict of diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py old mode 100644 new mode 100755 index 706ea11..1bcfa34 --- a/tests/pagegenerators_tests.py +++ b/tests/pagegenerators_tests.py @@ -128,6 +128,26 @@ quantifier='none') self.assertEqual(len(tuple(gen)), 9)
+ def test_RepeatingGenerator(self): + self.assertFunction("RepeatingGenerator") + # site.recentchanges() includes external edits (from wikidata), + # so total=4 is not too high + items = list( + pagegenerators.RepeatingGenerator(self.site.recentchanges, + key_func=lambda x: x['revid'], + sleep_duration=10, + reverse=True, + namespaces=[0], + total=4) + ) + self.assertEqual(len(items), 4) + timestamps = [pywikibot.Timestamp.fromISOformat(item['timestamp']) + for item in items] + self.assertEqual(sorted(timestamps), timestamps) + self.assertTrue(all(item['ns'] == 0 for item in items)) + self.assertEqual(len(set(item['revid'] for item in items)), 4) + + if __name__ == "__main__": try: unittest.main()
pywikibot-commits@lists.wikimedia.org