XZise has submitted this change and it was merged.
Change subject: Support items not stored in repo ......................................................................
Support items not stored in repo
ItemPage initialiser allowed title to be None, however it would cause all methods including exists() to fail, throw different exceptions and 'id' would change after an exception.
Replace magic 'null' inside ItemPage.fromPage with '-1' as a consistent way to refer to an item which does not have a Qxx allocated by the repository. - ItemPage.__init__(site, title=None) uses title '-1' - update getID() and title() to output '-1' - always throw InvalidTitle for incorrect Item and Property titles
Add lazy_load param to ItemPage.fromPage which is used to synchronously raise NoPage if page does not exist or does not have an item in the repository, simplifying error handling in scripts. All related code can be found with a search for 'lazy_load'.
Move WikibasePage.title() logic for namespace()==0 into ItemPage.
After item.editEntity, refresh the item object if a Q was allocated to it by the repository.
This complements change Ibe77c51fc5cf3dae8ece533cc5787bf960f0ebed allowing an empty item to be instantiated and populated before submitting it to the repository.
Change-Id: Ibea8f84242a6e938882059f5d5fa28394b8a7a23 --- M pywikibot/page.py M tests/wikibase_edit_tests.py M tests/wikibase_tests.py 3 files changed, 208 insertions(+), 102 deletions(-)
Approvals: XZise: Looks good to me, approved
diff --git a/pywikibot/page.py b/pywikibot/page.py index d9287f5..8a083a4 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -2704,12 +2704,21 @@ """ Internal function to provide the API parameters to identify the entity.
+ The API parameters may be 'id' if the ItemPage has one, + or 'site'&'title' if instantiated via ItemPage.fromPage with + lazy_load enabled. + Once an item's "p/q##" is looked up, that will be used for all future requests. + + An empty dict is returned if the ItemPage is instantiated without + either ID (internally it has id = '-1') or site&title.
@param singular: Whether the parameter names should use the singular form @type singular: bool + @return: API parameters + @rtype: dict """ params = {} if singular: @@ -2720,18 +2729,24 @@ id = 'ids' site = 'sites' title = 'titles' + + lazy_loading_id = not hasattr(self, 'id') and hasattr(self, '_site') + # id overrides all if hasattr(self, 'id'): - params[id] = self.id - return params - - # the rest only applies to ItemPages, but is still needed here. - if hasattr(self, '_site') and hasattr(self, '_title'): + if self.id != '-1': + params[id] = self.id + elif lazy_loading_id: params[site] = self._site.dbName() params[title] = self._title else: - quit() - params[id] = self.getID() + # if none of the above applies, this item is in an invalid state + # which needs to be raise as an exception, but also logged in case + # an exception handler is catching the generic Error. + pywikibot.error('%s is in invalid state' % + self.__class__.__name__) + raise pywikibot.Error('%s is in invalid state' % + self.__class__.__name__)
return params
@@ -2767,9 +2782,16 @@ """ lazy_loading_id = not hasattr(self, 'id') and hasattr(self, '_site') if force or not hasattr(self, '_content'): - data = self.repo.loadcontent(self._defined_by(), *args) - self.id = list(data.keys())[0] - self._content = data[self.id] + identification = self._defined_by() + if not identification: + raise pywikibot.NoPage(self) + + data = self.repo.loadcontent(identification, *args) + item_index = list(data.keys())[0] + if lazy_loading_id or item_index != '-1': + self.id = item_index + + self._content = data[item_index] if 'lastrevid' in self._content: self.lastrevid = self._content['lastrevid'] else: @@ -2818,7 +2840,8 @@ if not hasattr(self, 'id') or force: self.get(force=force) if numeric: - return int(self.id[1:]) + return int(self.id[1:]) if self.id != '-1' else -1 + return self.id
def latestRevision(self): @@ -2911,6 +2934,10 @@ baserevid=baserevid, **kwargs) self.lastrevid = updates['entity']['lastrevid']
+ lazy_loading_id = not hasattr(self, 'id') and hasattr(self, '_site') + if lazy_loading_id or self.id == '-1': + self.__init__(self.site, title=updates['entity']['id']) + def editLabels(self, labels, **kwargs): """ Edit entity labels. @@ -2964,49 +2991,97 @@
@param site: data repository @type site: pywikibot.site.DataSite - @param title: id number of item, "Q###" + @param title: id number of item, "Q###", + -1 or None for an empty item. @type title: str """ + # Special case for empty item + if title is None or title == '-1': + super(ItemPage, self).__init__(site, u'-1', ns=0) + self.id = u'-1' + return + super(ItemPage, self).__init__(site, title, ns=site.item_namespace) - self.id = self._link.title.upper() + + # Link.__init__, called from Page.__init__, has cleaned the title + # stripping whitespace and uppercasing the first letter according + # to the namespace case=first-letter. + + # Validate the title is 'Q' and a positive integer. + if not re.match('^Q[1-9]\d*$', self._link.title): + raise pywikibot.InvalidTitle( + u"'%s' is not a valid item page title" + % self._link.title) + + self.id = self._link.title
def title(self, **kwargs): """ - Get the title of the page. + Return ID as title of the ItemPage. + + If the ItemPage was lazy-loaded via ItemPage.fromPage, this method + will fetch the wikibase item ID for the page, potentially raising + NoPage with the page on the linked wiki if it does not exist, or + does not have a corresponding wikibase item ID. + + This method also refreshes the title if the id property was set. + i.e. item.id = 'Q60'
All optional keyword parameters are passed to the superclass. """ - # If the item was instantiated without an ID, - # remove the existing Link title, force the Link text to be reparsed. - self.getID() - if self._link._text != self.id: - self._link._text = self.id - del self._link._title + # If instantiated via ItemPage.fromPage using site and title, + # _site and _title exist, and id does not exist. + lazy_loading_id = not hasattr(self, 'id') and hasattr(self, '_site') + + if lazy_loading_id or self._link._text != self.id: + # If the item is lazy loaded or has been modified, + # _link._text is stale. Removing _link._title + # forces Link to re-parse ._text into ._title. + if hasattr(self._link, '_title'): + del self._link._title + self._link._text = self.getID() + self._link.parse() + # Remove the temporary values that are no longer needed after + # the .getID() above has called .get(), which populated .id + if hasattr(self, '_site'): + del self._title + del self._site
return super(ItemPage, self).title(**kwargs)
@classmethod - def fromPage(cls, page): + def fromPage(cls, page, lazy_load=False): """ Get the ItemPage for a Page that links to it.
- @param page: Page + @exception NoPage There is no corresponding ItemPage for the page + @param page: Page to look for corresponding data item + @type page: pywikibot.Page + @param lazy_load: Do not raise NoPage if either page or corresponding + ItemPage does not exist. + @type lazy_load: bool @return: ItemPage """ if not page.site.has_transcluded_data: raise pywikibot.WikiBaseError(u'%s has no transcluded data' % page.site) + if not lazy_load and not page.exists(): + raise pywikibot.NoPage(page) + repo = page.site.data_repository() if hasattr(page, '_pageprops') and page.properties().get('wikibase_item'): # If we have already fetched the pageprops for something else, # we already have the id, so use it return cls(repo, page.properties().get('wikibase_item')) - i = cls(repo, 'null') + i = cls(repo) + # clear id, and temporarily store data needed to lazy loading the item del i.id i._site = page.site i._title = page.title() + if not lazy_load and not i.exists(): + raise pywikibot.NoPage(i) return i
def get(self, force=False, *args, **kwargs): @@ -3273,7 +3348,8 @@ Property.__init__(self, source, title) self.id = self.title(withNamespace=False).upper() if not self.id.startswith(u'P'): - raise ValueError(u"'%s' is not a property page!" % self.title()) + raise pywikibot.InvalidTitle( + u"'%s' is not an property page title" % title)
def get(self, force=False, *args): """ diff --git a/tests/wikibase_edit_tests.py b/tests/wikibase_edit_tests.py index 4a21a5d..b4e7247 100644 --- a/tests/wikibase_edit_tests.py +++ b/tests/wikibase_edit_tests.py @@ -94,9 +94,7 @@ } } } - item = pywikibot.ItemPage(testsite, 'null') - item._defined_by = lambda singular=None: {} - #del item.id + item = pywikibot.ItemPage(testsite) item.editEntity(data)
diff --git a/tests/wikibase_tests.py b/tests/wikibase_tests.py index bee7313..d367afb 100644 --- a/tests/wikibase_tests.py +++ b/tests/wikibase_tests.py @@ -13,7 +13,6 @@ from pywikibot import pagegenerators from pywikibot.page import WikibasePage from pywikibot.site import Namespace -from pywikibot.data.api import APIError import json import copy
@@ -119,16 +118,19 @@
class TestItemLoad(WikidataTestCase):
- """Test each of the three code paths for item creation: - 1. by Q id - 2. ItemPage.fromPage(page) - 3. ItemPage.fromPage(page_with_props_loaded) + """ + Test item creation.
- Test various invalid scenarios: - 1. invalid Q ids - 2. invalid pages to fromPage - 3. missing pages to fromPage - 4. unconnected pages to fromPage + Tests for item creation include: + 1. by Q id + 2. ItemPage.fromPage(page) + 3. ItemPage.fromPage(page_with_props_loaded) + + Test various invalid scenarios: + 1. invalid Q ids + 2. invalid pages to fromPage + 3. missing pages to fromPage + 4. unconnected pages to fromPage """
sites = { @@ -153,15 +155,16 @@ wikidata = self.get_repo() item = pywikibot.ItemPage(wikidata, 'Q60') self.assertEqual(item._link._title, 'Q60') + self.assertEqual(item._defined_by(), {u'ids': u'Q60'}) self.assertEqual(item.id, 'Q60') - self.assertEqual(hasattr(item, '_title'), False) - self.assertEqual(hasattr(item, '_site'), False) + self.assertFalse(hasattr(item, '_title')) + self.assertFalse(hasattr(item, '_site')) self.assertEqual(item.title(), 'Q60') self.assertEqual(item.getID(), 'Q60') self.assertEqual(item.getID(numeric=True), 60) - self.assertEqual(hasattr(item, '_content'), False) + self.assertFalse(hasattr(item, '_content')) item.get() - self.assertEqual(hasattr(item, '_content'), True) + self.assertTrue(hasattr(item, '_content'))
def test_load_item_set_id(self): """Test setting item.id attribute on empty item.""" @@ -169,11 +172,11 @@ item = pywikibot.ItemPage(wikidata, '-1') self.assertEqual(item._link._title, '-1') item.id = 'Q60' - self.assertEqual(hasattr(item, '_content'), False) + self.assertFalse(hasattr(item, '_content')) self.assertEqual(item.getID(), 'Q60') - self.assertEqual(hasattr(item, '_content'), False) + self.assertFalse(hasattr(item, '_content')) item.get() - self.assertEqual(hasattr(item, '_content'), True) + self.assertTrue(hasattr(item, '_content')) self.assertIn('en', item.labels) self.assertEqual(item.labels['en'], 'New York City') self.assertEqual(item.title(), 'Q60') @@ -209,33 +212,20 @@ # should not raise an error as the constructor only requires # the site parameter, with the title parameter defaulted to None wikidata = self.get_repo() - self.assertRaises(TypeError, pywikibot.ItemPage, wikidata) + item = pywikibot.ItemPage(wikidata) + self.assertEqual(item._link._title, '-1')
def test_item_invalid_titles(self): - wikidata = self.get_repo() - - def check(title, exception): - item = pywikibot.ItemPage(wikidata, title) - if title != '': - ucfirst_title = title[0].upper() + title[1:] - else: - ucfirst_title = title - self.assertEqual(item._link._title, ucfirst_title) - self.assertEqual(item.id, title.upper()) - self.assertEqual(item.title(), title.upper()) - self.assertEqual(hasattr(item, '_content'), False) - self.assertRaises(exception, item.get) - self.assertEqual(hasattr(item, '_content'), False) - self.assertEqual(item.title(), title.upper()) - - check('', KeyError) - - for title in ['-1', '1', 'Q0.5', 'NULL', 'null', 'Q', 'Q-1']: - check(title, APIError) + for title in ['null', 'NULL', 'None', '', + '-2', '1', '0', '+1', + 'Q0', 'Q0.5', 'Q', 'Q-1', 'Q+1']: + self.assertRaises(pywikibot.InvalidTitle, + pywikibot.ItemPage, wikidata, title)
def test_item_untrimmed_title(self): wikidata = self.get_repo() + # spaces in the title should not cause an error item = pywikibot.ItemPage(wikidata, ' Q60 ') self.assertEqual(item._link._title, 'Q60') self.assertEqual(item.title(), 'Q60') @@ -247,30 +237,44 @@ item = pywikibot.ItemPage(wikidata, 'Q404') self.assertEqual(item._link._title, 'Q404') self.assertEqual(item.title(), 'Q404') - self.assertEqual(hasattr(item, '_content'), False) + self.assertFalse(hasattr(item, '_content')) self.assertEqual(item.id, 'Q404') self.assertEqual(item.getID(), 'Q404') self.assertEqual(item.getID(numeric=True), 404) - self.assertEqual(hasattr(item, '_content'), False) + self.assertFalse(hasattr(item, '_content')) self.assertRaises(pywikibot.NoPage, item.get) - self.assertEqual(hasattr(item, '_content'), True) + self.assertTrue(hasattr(item, '_content')) + self.assertEqual(item.id, 'Q404') + self.assertEqual(item.getID(), 'Q404') self.assertEqual(item._link._title, 'Q404') self.assertEqual(item.title(), 'Q404') - self.assertEqual(item.exists(), False) + self.assertRaises(pywikibot.NoPage, item.get) + self.assertTrue(hasattr(item, '_content')) + self.assertEqual(item._link._title, 'Q404') + self.assertEqual(item.getID(), 'Q404') + self.assertEqual(item.title(), 'Q404') + + def test_item_never_existed(self): + wikidata = self.get_repo() + # this item has not been created + item = pywikibot.ItemPage(wikidata, 'Q9999999999999999999') + self.assertFalse(item.exists()) + self.assertEqual(item.getID(), 'Q9999999999999999999') + self.assertRaises(pywikibot.NoPage, item.get)
def test_fromPage_noprops(self): page = self.nyc item = pywikibot.ItemPage.fromPage(page) - self.assertEqual(item._link._title, 'Null') # not good - self.assertEqual(hasattr(item, 'id'), False) - self.assertEqual(hasattr(item, '_content'), False) + self.assertEqual(item._link._title, '-1') + self.assertTrue(hasattr(item, 'id')) + self.assertTrue(hasattr(item, '_content')) self.assertEqual(item.title(), 'Q60') - self.assertEqual(hasattr(item, '_content'), True) + self.assertTrue(hasattr(item, '_content')) self.assertEqual(item.id, 'Q60') self.assertEqual(item.getID(), 'Q60') self.assertEqual(item.getID(numeric=True), 60) item.get() - self.assertEqual(item.exists(), True) + self.assertTrue(item.exists())
def test_fromPage_props(self): page = self.nyc @@ -279,16 +283,32 @@ item = pywikibot.ItemPage.fromPage(page) self.assertEqual(item._link._title, 'Q60') self.assertEqual(item.id, 'Q60') - self.assertEqual(hasattr(item, '_content'), False) + self.assertFalse(hasattr(item, '_content')) self.assertEqual(item.title(), 'Q60') - self.assertEqual(hasattr(item, '_content'), False) + self.assertFalse(hasattr(item, '_content')) self.assertEqual(item.id, 'Q60') self.assertEqual(item.getID(), 'Q60') self.assertEqual(item.getID(numeric=True), 60) - self.assertEqual(hasattr(item, '_content'), False) + self.assertFalse(hasattr(item, '_content')) item.get() - self.assertEqual(hasattr(item, '_content'), True) - self.assertEqual(item.exists(), True) + self.assertTrue(hasattr(item, '_content')) + self.assertTrue(item.exists()) + + def test_fromPage_lazy(self): + page = pywikibot.Page(pywikibot.page.Link("New York City", self.site)) + item = pywikibot.ItemPage.fromPage(page, lazy_load=True) + self.assertEqual(item._defined_by(), + {'sites': u'enwiki', 'titles': u'New York City'}) + self.assertEqual(item._link._title, '-1') + self.assertFalse(hasattr(item, 'id')) + self.assertFalse(hasattr(item, '_content')) + self.assertEqual(item.title(), 'Q60') + self.assertTrue(hasattr(item, '_content')) + self.assertEqual(item.id, 'Q60') + self.assertEqual(item.getID(), 'Q60') + self.assertEqual(item.getID(numeric=True), 60) + item.get() + self.assertTrue(item.exists())
def test_fromPage_invalid_title(self): page = pywikibot.Page(pywikibot.page.Link("[]", self.site)) @@ -310,16 +330,17 @@ if props: page.properties()
- item = pywikibot.ItemPage.fromPage(page) - self.assertEqual(hasattr(item, 'id'), False) - self.assertEqual(hasattr(item, '_title'), True) - self.assertEqual(hasattr(item, '_site'), True) - self.assertEqual(hasattr(item, '_content'), False) + item = pywikibot.ItemPage.fromPage(page, lazy_load=True)
- self.assertEqual(item._link._title, 'Null') + self.assertFalse(hasattr(item, 'id')) + self.assertTrue(hasattr(item, '_title')) + self.assertTrue(hasattr(item, '_site')) + self.assertFalse(hasattr(item, '_content')) + + self.assertEqual(item._link._title, '-1') # the method 'exists' does not raise an exception if method == 'exists': - self.assertEqual(item.exists(), False) + self.assertFalse(item.exists()) else: self.assertRaises(pywikibot.NoPage, getattr(item, method))
@@ -333,9 +354,17 @@ if link.title != 'Test page': self.assertEqual(item._link._title, '-1')
- self.assertEqual(hasattr(item, '_content'), True) + self.assertTrue(hasattr(item, '_content'))
- self.assertEqual(item.exists(), False) + self.assertFalse(item.exists()) + + page = pywikibot.Page(link) + if props: + page.properties() + + # by default, fromPage should always raise the same exception + self.assertRaises(pywikibot.NoPage, + pywikibot.ItemPage.fromPage, page)
def test_fromPage_redirect(self): # this is a redirect, and should not have a wikidata item @@ -358,14 +387,21 @@ # this is a deleted page, and should not have a wikidata item link = pywikibot.page.Link("Test page", self.site) page = pywikibot.Page(link) - item = pywikibot.ItemPage.fromPage(page) + # ItemPage.fromPage should raise an exception when not lazy loading + # and that exception should refer to the source title 'Test page' + # not the Item being created. + self.assertRaisesRegexp(pywikibot.NoPage, 'Test page', + pywikibot.ItemPage.fromPage, + page, lazy_load=False) + + item = pywikibot.ItemPage.fromPage(page, lazy_load=True)
# Now verify that delay loading will result in the desired semantics. # It should not raise NoPage on the wikibase item which has a title # like '-1' or 'Null', as that is useless to determine the cause # without a full debug log. - # It should raise NoPage on the page, as that is what the - # bot operator needs to see in the log output. + # It should raise NoPage on the source page, with title 'Test page' + # as that is what the bot operator needs to see in the log output. self.assertRaisesRegexp(pywikibot.NoPage, 'Test page', item.get)
@@ -455,7 +491,7 @@
class TestPageMethods(WikidataTestCase):
- """Test cases to test methods of Page() behave correctly with Wikibase""" + """Test cases to test methods of Page() behave correctly with Wikibase."""
family = 'wikidata' code = 'test' @@ -583,7 +619,7 @@
class TestNamespaces(WikidataTestCase):
- """Test cases to test namespaces of Wikibase entities""" + """Test cases to test namespaces of Wikibase entities."""
def test_empty_wikibase_page(self): # As a base class it should be able to instantiate @@ -664,19 +700,15 @@ # TODO: These items have inappropriate titles, which should # raise an error. wikidata = self.get_repo() - item = pywikibot.ItemPage(wikidata, 'Invalid:Q1') - self.assertEqual(item.namespace(), 0) - self.assertEqual(item.id, 'INVALID:Q1') - self.assertEqual(item.title(), 'INVALID:Q1') - self.assertEqual(hasattr(item, '_content'), False) - self.assertRaises(APIError, item.get) - self.assertEqual(hasattr(item, '_content'), False) - self.assertEqual(item.title(), 'INVALID:Q1') + self.assertRaises(pywikibot.InvalidTitle, + pywikibot.ItemPage, + wikidata, + 'Invalid:Q1')
class TestAlternateNamespaces(TestCase):
- """Test cases to test namespaces of Wikibase entities""" + """Test cases to test namespaces of Wikibase entities."""
net = False
pywikibot-commits@lists.wikimedia.org