jenkins-bot merged this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] Make preloading generators work with arbitrary entity types

Bug: T160397
Change-Id: Ie068ca3427063ff13ba4545544a1b3965ab7d88d
---
M pywikibot/pagegenerators.py
M pywikibot/site.py
M tests/site_tests.py
3 files changed, 47 insertions(+), 32 deletions(-)

diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index ab4ed51..429336a 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -14,7 +14,7 @@
&params;
"""
#
-# (C) Pywikibot team, 2008-2017
+# (C) Pywikibot team, 2008-2018
#
# Distributed under the terms of the MIT license.
#
@@ -2073,11 +2073,11 @@


@deprecated_args(step='groupsize')
-def PreloadingItemGenerator(generator, groupsize=50):
+def PreloadingEntityGenerator(generator, groupsize=50):
"""
Yield preloaded pages taken from another generator.

- Function basically is copied from above, but for ItemPage's
+ Function basically is copied from above, but for Wikibase entities.

@param generator: pages to iterate over
@param groupsize: how many pages to preload at once
@@ -2085,26 +2085,16 @@
"""
sites = {}
for page in generator:
- if not isinstance(page, pywikibot.page.WikibasePage):
- datasite = page.site.data_repository()
- if page.namespace() != datasite.item_namespace:
- pywikibot.output(
- u'PreloadingItemGenerator skipping %s as it is not in %s'
- % (page, datasite.item_namespace))
- continue
-
- page = pywikibot.ItemPage(datasite, page.title())
-
site = page.site
sites.setdefault(site, []).append(page)
if len(sites[site]) >= groupsize:
# if this site is at the groupsize, process it
group = sites.pop(site)
- for i in site.preloaditempages(group, groupsize):
+ for i in site.preload_entities(group, groupsize):
yield i
for site, pages in sites.items():
# process any leftover sites that never reached the groupsize
- for i in site.preloaditempages(pages, groupsize):
+ for i in site.preload_entities(pages, groupsize):
yield i


@@ -2982,6 +2972,8 @@
yield page


+PreloadingItemGenerator = redirect_func(PreloadingEntityGenerator,
+ old_name='PreloadingItemGenerator')
# Deprecated old names available for compatibility with compat.
ImageGenerator = redirect_func(PageClassGenerator, old_name='ImageGenerator')
FileGenerator = redirect_func(PageClassGenerator, old_name='FileGenerator')
diff --git a/pywikibot/site.py b/pywikibot/site.py
index 1eff176..152da7c 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -6,7 +6,7 @@
groups of wikis on the same topic in different languages.
"""
#
-# (C) Pywikibot team, 2008-2017
+# (C) Pywikibot team, 2008-2018
#
# Distributed under the terms of the MIT license.
#
@@ -7351,6 +7351,10 @@
super(DataSite, self).__init__(*args, **kwargs)
self._item_namespace = None
self._property_namespace = None
+ self._type_to_class = {
+ 'item': pywikibot.ItemPage,
+ 'property': pywikibot.PropertyPage,
+ }

def _cache_entity_namespaces(self):
"""Find namespaces for each known wikibase entity type."""
@@ -7587,9 +7591,9 @@
raise api.APIError(data['errors'])
return data['entities']

- def preloaditempages(self, pagelist, groupsize=50):
+ def preload_entities(self, pagelist, groupsize=50):
"""
- Yield ItemPages with content prefilled.
+ Yield subclasses of WikibasePage's with content prefilled.

Note that pages will be iterated in a different order
than in the underlying pagelist.
@@ -7607,23 +7611,34 @@
for key in ident:
req[key].append(ident[key])
else:
- assert p.site.has_data_repository, \
- 'Site must have a data repository'
- if (p.site == p.site.data_repository() and
- p.namespace() == p.data_repository.item_namespace):
+ if p.site == self and p.namespace() in (
+ self.item_namespace, self.property_namespace):
req['ids'].append(p.title(withNamespace=False))
else:
+ assert p.site.has_data_repository, \
+ 'Site must have a data repository'
req['sites'].append(p.site.dbName())
req['titles'].append(p._link._text)

req = self._simple_request(action='wbgetentities', **req)
data = req.submit()
- for qid in data['entities']:
- item = pywikibot.ItemPage(self, qid)
- item._content = data['entities'][qid]
+ for entity in data['entities']:
+ if 'missing' in data['entities'][entity]:
+ continue
+ cls = self._type_to_class[data['entities'][entity]['type']]
+ page = cls(self, entity)
# No api call is made because item._content is given
- item.get(get_redirect=True)
- yield item
+ page._content = data['entities'][entity]
+ try:
+ page.get() # cannot provide get_redirect=True (T145971)
+ except pywikibot.IsRedirectPage:
+ pass
+ yield page
+
+ @deprecated('DataSite.preload_entities')
+ def preloaditempages(self, pagelist, groupsize=50):
+ """DEPRECATED."""
+ return self.preload_entities(pagelist, groupsize)

def getPropertyType(self, prop):
"""
diff --git a/tests/site_tests.py b/tests/site_tests.py
index f2dd8ef..81700ca 100644
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -3007,7 +3007,7 @@

class TestDataSitePreloading(WikidataTestCase):

- """Test DataSite.preloaditempages for repo pages."""
+ """Test DataSite.preload_entities for repo pages."""

def test_item(self):
"""Test that ItemPage preloading works for Item objects."""
@@ -3016,7 +3016,7 @@
for num in range(1, 6)]

seen = []
- for item in datasite.preloaditempages(items):
+ for item in datasite.preload_entities(items):
self.assertIsInstance(item, pywikibot.ItemPage)
self.assertTrue(hasattr(item, '_content'))
self.assertNotIn(item, seen)
@@ -3031,24 +3031,32 @@
for num in range(1, 6)]

seen = []
- for item in datasite.preloaditempages(pages):
+ for item in datasite.preload_entities(pages):
self.assertIsInstance(item, pywikibot.ItemPage)
self.assertTrue(hasattr(item, '_content'))
self.assertNotIn(item, seen)
seen.append(item)
self.assertEqual(len(seen), 5)

+ def test_property(self):
+ """Test that preloading works for properties."""
+ datasite = self.get_repo()
+ page = pywikibot.Page(datasite, 'P6')
+ property_page = next(datasite.preload_entities([page]))
+ self.assertIsInstance(property_page, pywikibot.PropertyPage)
+ self.assertTrue(hasattr(property_page, '_content'))
+

class TestDataSiteClientPreloading(DefaultWikidataClientTestCase):

- """Test DataSite.preloaditempages for client pages."""
+ """Test DataSite.preload_entities for client pages."""

def test_non_item(self):
"""Test that ItemPage preloading works with Page generator."""
mainpage = self.get_mainpage()
datasite = self.get_repo()

- item = next(datasite.preloaditempages([mainpage]))
+ item = next(datasite.preload_entities([mainpage]))
self.assertIsInstance(item, pywikibot.ItemPage)
self.assertTrue(hasattr(item, '_content'))
self.assertEqual(item.id, 'Q5296')

To view, visit change 342588. To unsubscribe, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie068ca3427063ff13ba4545544a1b3965ab7d88d
Gerrit-Change-Number: 342588
Gerrit-PatchSet: 9
Gerrit-Owner: Matěj Suchánek <matejsuchanek97@gmail.com>
Gerrit-Reviewer: Dalba <dalba.wiki@gmail.com>
Gerrit-Reviewer: Mpaa <mpaa.wiki@gmail.com>
Gerrit-Reviewer: Multichill <maarten@mdammers.nl>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot <>