jenkins-bot has submitted this change and it was merged.
Change subject: Implement a preloading generator for ItemPages ......................................................................
Implement a preloading generator for ItemPages
Since it becomes near impossible to find which specific item lines up with which original page object, we just create new objects and yield those.
WikibasePage.__defined_by had to be renamed so it is no longer a private method. Change-Id: I05917eb982f41dcdc92d8e45292ba6c27eac47f7 --- M pywikibot/page.py M pywikibot/pagegenerators.py M pywikibot/site.py M tests/site_tests.py 4 files changed, 69 insertions(+), 2 deletions(-)
Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py index 7956ae1..1244a72 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -2293,7 +2293,11 @@ del self._link._title return Page(self).title(**kwargs)
+ @deprecated("_defined_by") def __defined_by(self, singular=False): + return self._defined_by(singular=singular) + + def _defined_by(self, singular=False): """ returns the parameters needed by the API to identify an item. Once an item's "p/q##" is looked up, that will be used for all future @@ -2342,7 +2346,7 @@ args can be used to specify custom props. """ if force or not hasattr(self, '_content'): - data = self.repo.loadcontent(self.__defined_by(), *args) + data = self.repo.loadcontent(self._defined_by(), *args) self.id = data.keys()[0] self._content = data[self.id] if 'lastrevid' in self._content: @@ -2429,7 +2433,7 @@ baserevid = self.lastrevid else: baserevid = None - updates = self.repo.editEntity(self.__defined_by(singular=True), data, + updates = self.repo.editEntity(self._defined_by(singular=True), data, baserevid=baserevid, **kwargs) self.lastrevid = updates['entity']['lastrevid']
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index c6e8c7d..0fbfa63 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -797,16 +797,44 @@ site = page.site sites.setdefault(site, []).append(page) if len(sites[site]) >= step: + # if this site is at the step, process it group = sites[site] sites[site] = [] for i in site.preloadpages(group, step): yield i for site in sites: if sites[site]: + # process any leftover sites that never reached the step for i in site.preloadpages(sites[site], step): yield i
+def PreloadingItemGenerator(generator, step=50): + """ + Yield preloaded pages taken from another generator. + + Function basically is copied from above, but for ItemPage's + + @param generator: pages to iterate over + @param step: how many pages to preload at once + """ + sites = {} + for page in generator: + site = page.site + sites.setdefault(site, []).append(page) + if len(sites[site]) >= step: + # if this site is at the step, process it + group = sites[site] + sites[site] = [] + for i in site.preloaditempages(group, step): + yield i + for site in sites: + if sites[site]: + # process any leftover sites that never reached the step + for i in site.preloaditempages(sites[site], step): + yield i + + def NewimagesPageGenerator(step=None, total=None, site=None): if site is None: site = pywikibot.Site() diff --git a/pywikibot/site.py b/pywikibot/site.py index ef00047..920b414 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -3398,6 +3398,32 @@ raise pywikibot.data.api.APIError, data['errors'] return data['entities']
+ def preloaditempages(self, pagelist, groupsize=50): + """Yields ItemPages with content prefilled. + + Note that pages will be iterated in a different order + than in the underlying pagelist. + + @param pagelist: an iterable that yields ItemPage objects + @param groupsize: how many pages to query at a time + @type groupsize: int + """ + from pywikibot.tools import itergroup + for sublist in itergroup(pagelist, groupsize): + req = {'ids': [], 'titles': [], 'sites': []} + for p in sublist: + ident = p._defined_by() + for key in ident: + req[key].append(ident[key]) + + req = api.Request(site=self, action='wbgetentities', **req) + data = req.submit() + for qid in data['entities']: + item = pywikibot.ItemPage(self, qid) + item._content = data['entities'][qid] + item.get() # This parses the json and preloads the various properties + yield item + def getPropertyType(self, prop): """ This is used sepecifically because we can cache diff --git a/tests/site_tests.py b/tests/site_tests.py index cb29a0e..bf9e33f 100644 --- a/tests/site_tests.py +++ b/tests/site_tests.py @@ -202,6 +202,15 @@ if count >= 5: break
+ def testItemPreload(self): + """Test that ItemPage preloading works""" + + datasite = mysite.data_repository() + + items = [pywikibot.ItemPage(datasite, 'q' + str(num)) for num in range(1, 11)] + for page in datasite.preloaditempages(items): + self.assertTrue(hasattr(page, '_content')) + def testLinkMethods(self): """Test site methods for getting links to and from a page"""