jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/627243 )
Change subject: [FEAT] Minimal working example for Structured data on Commons ......................................................................
[FEAT] Minimal working example for Structured data on Commons
Supported: - one-to-one file <-> mediainfo cooperation - file captions
Not supported: - statements (depicts, etc.) - supplying structured data during upload
Bug: T213904 Bug: T223820 Change-Id: I7b019a0e311ddab12b4fd19093622f7ed56386c3 --- M pywikibot/__init__.py M pywikibot/families/commons_family.py M pywikibot/page/__init__.py M pywikibot/page/_collections.py M pywikibot/site/_apisite.py M pywikibot/site/_datasite.py M tests/file_tests.py 7 files changed, 144 insertions(+), 24 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/__init__.py b/pywikibot/__init__.py index deaec89..b49938b 100644 --- a/pywikibot/__init__.py +++ b/pywikibot/__init__.py @@ -83,7 +83,7 @@ 'FilePage', 'handle_args', 'html2unicode', 'input', 'input_choice', 'input_yn', 'InterwikiRedirectPage', 'InvalidTitle', 'IsNotRedirectPage', 'IsRedirectPage', 'ItemPage', 'Link', 'LockedNoPage', 'LockedPage', 'log', - 'NoCreateError', 'NoMoveTarget', 'NoPage', 'NoUsername', + 'MediaInfo', 'NoCreateError', 'NoMoveTarget', 'NoPage', 'NoUsername', 'NoWikibaseEntity', 'OtherPageSaveError', 'output', 'Page', 'PageCreatedConflict', 'PageDeletedConflict', 'PageRelatedError', 'PageSaveRelatedError', 'PropertyPage', 'SectionError', 'Server414Error', @@ -1216,6 +1216,7 @@ FilePage, ItemPage, Link, + MediaInfo, Page, PropertyPage, SiteLink, diff --git a/pywikibot/families/commons_family.py b/pywikibot/families/commons_family.py index 67bdbb5..ee448cd 100644 --- a/pywikibot/families/commons_family.py +++ b/pywikibot/families/commons_family.py @@ -38,3 +38,7 @@ doc_subpages = { '_default': (('/doc', ), ['commons']), } + + def interface(self, code): + """Return 'DataSite'.""" + return 'DataSite' diff --git a/pywikibot/page/__init__.py b/pywikibot/page/__init__.py index 628eb62..ca64e91 100644 --- a/pywikibot/page/__init__.py +++ b/pywikibot/page/__init__.py @@ -15,6 +15,7 @@ # # Distributed under the terms of the MIT license. # +import json as jsonlib import logging import os.path import re @@ -2509,6 +2510,24 @@ """ return self.site.globalusage(self, total=total)
+ def data_item(self): + """ + Convenience function to get the associated Wikibase item of the file. + + If WikibaseMediaInfo extension is available (e.g. on Commons), + the method returns the associated mediainfo entity. Otherwise, + it falls back to behavior of BasePage.data_item. + + :rtype: pywikibot.page.WikibaseEntity + """ + if self.site.has_extension('WikibaseMediaInfo'): + if not hasattr(self, '_item'): + self._item = MediaInfo(self.site) + self._item._file = self + return self._item + + return super().data_item() +
class Category(Page):
@@ -3444,6 +3463,73 @@ return '{}{}'.format(self.repo.concept_base_uri, entity_id)
+class MediaInfo(WikibaseEntity): + + title_pattern = r'M[1-9]\d*' + DATA_ATTRIBUTES = { + 'labels': LanguageDict, + # TODO: 'statements': ClaimCollection, + } + + @property + def file(self) -> FilePage: + """Get the file associated with the mediainfo.""" + if not hasattr(self, '_file'): + if self.id == '-1': + # if the above doesn't apply, this entity is in an invalid + # state which needs to be raised as an exception, but also + # logged in case an exception handler is catching + # the generic Error + pywikibot.error('{} is in invalid state' + .format(self.__class__.__name__)) + raise Error('{} is in invalid state' + .format(self.__class__.__name__)) + + page_id = self.getID(numeric=True) + result = list(self.repo.load_pages_from_pageids([page_id])) + if not result: + raise Error('There is no existing page with id "{}"' + .format(page_id)) + + page = result.pop() + if page.namespace() != page.site.namespaces.FILE: + raise Error('Page with id "{}" is not a file'.format(page_id)) + + self._file = FilePage(page) + + return self._file + + def get(self, force: bool = False) -> dict: + if self.id == '-1': + if force: + if not self.file.exists(): + exc = NoPageError(self.file) + raise NoWikibaseEntityError(self) from exc + # get just the id for Wikibase API call + self.id = 'M' + str(self.file.pageid) + else: + try: + data = self.file.latest_revision.slots['mediainfo']['*'] + except NoPageError as exc: + raise NoWikibaseEntityError(self) from exc + + self._content = jsonlib.loads(data) + self.id = self._content['id'] + + return super().get(force=force) + + def getID(self, numeric=False): + """ + Get the entity identifier. + + :param numeric: Strip the first letter and return an int + :type numeric: bool + """ + if self.id == '-1': + self.get() + return super().getID(numeric=numeric) + + class WikibasePage(BasePage, WikibaseEntity):
""" diff --git a/pywikibot/page/_collections.py b/pywikibot/page/_collections.py index 93a8ab9..a8dae47 100644 --- a/pywikibot/page/_collections.py +++ b/pywikibot/page/_collections.py @@ -85,8 +85,10 @@ @classmethod def fromJSON(cls, data, repo=None): """Construct a new LanguageDict from JSON.""" - this = cls({key: value['value'] for key, value in data.items()}) - return this + if data != []: # workaround for T222159 + return cls({key: value['value'] for key, value in data.items()}) + else: + return cls()
@classmethod def normalizeData(cls, data: dict): diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py index da2f734..aa9d641 100644 --- a/pywikibot/site/_apisite.py +++ b/pywikibot/site/_apisite.py @@ -1009,7 +1009,8 @@
def is_data_repository(self): """Return True if its data repository is itself.""" - return self is self.data_repository() + # fixme: this was an identity check + return self == self.data_repository()
def page_from_repository(self, item): """ diff --git a/pywikibot/site/_datasite.py b/pywikibot/site/_datasite.py index 04466fb..ffe0e50 100644 --- a/pywikibot/site/_datasite.py +++ b/pywikibot/site/_datasite.py @@ -48,6 +48,7 @@ self._type_to_class = { 'item': pywikibot.ItemPage, 'property': pywikibot.PropertyPage, + 'mediainfo': pywikibot.MediaInfo, }
def _cache_entity_namespaces(self): @@ -164,18 +165,6 @@
return baserevid
- def data_repository(self): - """ - Override parent method. - - This avoids pointless API queries since the data repository - is this site by definition. - - :return: this Site object - :rtype: pywikibot.site.DataSite - """ - return self - def geo_shape_repository(self): """Return Site object for the geo-shapes repository e.g. commons.""" url = self.siteinfo['general'].get('wikibase-geoshapestoragebaseurl') @@ -219,12 +208,12 @@
def preload_entities(self, pagelist, groupsize=50): """ - Yield subclasses of WikibasePage's with content prefilled. + Yield subclasses of WikibaseEntity's with content prefilled.
Note that pages will be iterated in a different order than in the underlying pagelist.
- :param pagelist: an iterable that yields either WikibasePage objects, + :param pagelist: an iterable that yields either WikibaseEntity objects, or Page objects linked to an ItemPage. :param groupsize: how many pages to query at a time :type groupsize: int @@ -234,7 +223,7 @@ for sublist in itergroup(pagelist, groupsize): req = {'ids': [], 'titles': [], 'sites': []} for p in sublist: - if isinstance(p, pywikibot.page.WikibasePage): + if isinstance(p, pywikibot.page.WikibaseEntity): ident = p._defined_by() for key in ident: req[key].append(ident[key]) @@ -300,7 +289,7 @@
:param entity: Page to edit, or dict with API parameters to use for entity identification - :type entity: WikibasePage or dict + :type entity: WikibaseEntity or dict :param data: data updates :type data: dict :param bot: Whether to mark the edit as a bot edit @@ -310,7 +299,7 @@ """ # this changes the reference to a new object data = dict(data) - if isinstance(entity, pywikibot.page.WikibasePage): + if isinstance(entity, pywikibot.page.WikibaseEntity): params = entity._defined_by(singular=True) if 'id' in params and params['id'] == '-1': del params['id'] @@ -349,7 +338,7 @@ Add a claim.
:param entity: Entity to modify - :type entity: WikibasePage + :type entity: WikibaseEntity :param claim: Claim to be added :type claim: pywikibot.Claim :param bot: Whether to mark the edit as a bot edit @@ -747,7 +736,7 @@ Supported actions are: wbsetaliases, wbsetdescription, wbsetlabel and wbsetsitelink
- :param itemdef: Item to modify or create + :param itemdef: Entity to modify or create :type itemdef: str, WikibaseEntity or Page connected to such item :param action: wbset{action} to perform: 'wbsetaliases', 'wbsetdescription', 'wbsetlabel', 'wbsetsitelink' diff --git a/tests/file_tests.py b/tests/file_tests.py index 85b6b09..bee9347 100644 --- a/tests/file_tests.py +++ b/tests/file_tests.py @@ -10,7 +10,11 @@ from contextlib import suppress
import pywikibot -from pywikibot.exceptions import NoPageError, PageRelatedError +from pywikibot.exceptions import ( + NoPageError, + NoWikibaseEntityError, + PageRelatedError, +) from tests import join_images_path from tests.aspects import TestCase
@@ -274,6 +278,39 @@ page.download(filename)
+class TestFilePageDataItem(TestCase): + + """Test structured data of FilePage.""" + + family = 'commons' + code = 'commons' + + cached = True + + def test_data_item(self): + """Test associated data item.""" + page = pywikibot.FilePage(self.site, 'File:Albert Einstein.jpg') + item = page.data_item() + self.assertIsInstance(item, pywikibot.MediaInfo) + self.assertTrue(page._item is item) + self.assertTrue(item.file is page) + self.assertEqual('-1', item.id) + item.get() + self.assertEqual('M14634781', item.getID()) + self.assertIsInstance( + item.labels, pywikibot.page._collections.LanguageDict) + del item._file + self.assertEqual(page, item.file) + + def test_data_item_not_existing(self): + """Test data item associated to file that does not exist.""" + page = pywikibot.FilePage(self.site, + 'File:Albert Einstein.jpg_notexisting') + item = page.data_item() + with self.assertRaises(NoWikibaseEntityError): + item.get() + + if __name__ == '__main__': # pragma: no cover with suppress(SystemExit): unittest.main()
pywikibot-commits@lists.wikimedia.org