jenkins-bot has submitted this change and it was merged.
Change subject: Implement a preloading generator for ItemPages
......................................................................
Implement a preloading generator for ItemPages
Since it becomes near impossible to find which specific
item lines up with which original page object, we just
create new objects and yield those.
WikibasePage.__defined_by had to be renamed so it is
no longer a private method.
Change-Id: I05917eb982f41dcdc92d8e45292ba6c27eac47f7
---
M pywikibot/page.py
M pywikibot/pagegenerators.py
M pywikibot/site.py
M tests/site_tests.py
4 files changed, 69 insertions(+), 2 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 7956ae1..1244a72 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -2293,7 +2293,11 @@
del self._link._title
return Page(self).title(**kwargs)
+ @deprecated("_defined_by")
def __defined_by(self, singular=False):
+ return self._defined_by(singular=singular)
+
+ def _defined_by(self, singular=False):
"""
returns the parameters needed by the API to identify an item.
Once an item's "p/q##" is looked up, that will be used for all
future
@@ -2342,7 +2346,7 @@
args can be used to specify custom props.
"""
if force or not hasattr(self, '_content'):
- data = self.repo.loadcontent(self.__defined_by(), *args)
+ data = self.repo.loadcontent(self._defined_by(), *args)
self.id = data.keys()[0]
self._content = data[self.id]
if 'lastrevid' in self._content:
@@ -2429,7 +2433,7 @@
baserevid = self.lastrevid
else:
baserevid = None
- updates = self.repo.editEntity(self.__defined_by(singular=True), data,
+ updates = self.repo.editEntity(self._defined_by(singular=True), data,
baserevid=baserevid, **kwargs)
self.lastrevid = updates['entity']['lastrevid']
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index c6e8c7d..0fbfa63 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -797,16 +797,44 @@
site = page.site
sites.setdefault(site, []).append(page)
if len(sites[site]) >= step:
+ # if this site is at the step, process it
group = sites[site]
sites[site] = []
for i in site.preloadpages(group, step):
yield i
for site in sites:
if sites[site]:
+ # process any leftover sites that never reached the step
for i in site.preloadpages(sites[site], step):
yield i
+def PreloadingItemGenerator(generator, step=50):
+ """
+ Yield preloaded pages taken from another generator.
+
+ Function basically is copied from above, but for ItemPage's
+
+ @param generator: pages to iterate over
+ @param step: how many pages to preload at once
+ """
+ sites = {}
+ for page in generator:
+ site = page.site
+ sites.setdefault(site, []).append(page)
+ if len(sites[site]) >= step:
+ # if this site is at the step, process it
+ group = sites[site]
+ sites[site] = []
+ for i in site.preloaditempages(group, step):
+ yield i
+ for site in sites:
+ if sites[site]:
+ # process any leftover sites that never reached the step
+ for i in site.preloaditempages(sites[site], step):
+ yield i
+
+
def NewimagesPageGenerator(step=None, total=None, site=None):
if site is None:
site = pywikibot.Site()
diff --git a/pywikibot/site.py b/pywikibot/site.py
index ef00047..920b414 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -3398,6 +3398,32 @@
raise pywikibot.data.api.APIError, data['errors']
return data['entities']
+ def preloaditempages(self, pagelist, groupsize=50):
+ """Yields ItemPages with content prefilled.
+
+ Note that pages will be iterated in a different order
+ than in the underlying pagelist.
+
+ @param pagelist: an iterable that yields ItemPage objects
+ @param groupsize: how many pages to query at a time
+ @type groupsize: int
+ """
+ from pywikibot.tools import itergroup
+ for sublist in itergroup(pagelist, groupsize):
+ req = {'ids': [], 'titles': [], 'sites': []}
+ for p in sublist:
+ ident = p._defined_by()
+ for key in ident:
+ req[key].append(ident[key])
+
+ req = api.Request(site=self, action='wbgetentities', **req)
+ data = req.submit()
+ for qid in data['entities']:
+ item = pywikibot.ItemPage(self, qid)
+ item._content = data['entities'][qid]
+ item.get() # This parses the json and preloads the various properties
+ yield item
+
def getPropertyType(self, prop):
"""
This is used sepecifically because we can cache
diff --git a/tests/site_tests.py b/tests/site_tests.py
index cb29a0e..bf9e33f 100644
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -202,6 +202,15 @@
if count >= 5:
break
+ def testItemPreload(self):
+ """Test that ItemPage preloading works"""
+
+ datasite = mysite.data_repository()
+
+ items = [pywikibot.ItemPage(datasite, 'q' + str(num)) for num in range(1,
11)]
+ for page in datasite.preloaditempages(items):
+ self.assertTrue(hasattr(page, '_content'))
+
def testLinkMethods(self):
"""Test site methods for getting links to and from a
page"""
--
To view, visit
https://gerrit.wikimedia.org/r/80789
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I05917eb982f41dcdc92d8e45292ba6c27eac47f7
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot