jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] move wikibase data structures to page/_collections.py

- split page.__init__.py and move wikibase data stuctures to
_collections.py file
- move their tests to collections_tests.py
- update documentations

Change-Id: Ia3bf9a496ed796e5b0896cd2e9aa11075d8c689b
---
M docs/api_ref/pywikibot.page.rst
M pywikibot/CONTENT.rst
M pywikibot/page/__init__.py
A pywikibot/page/_collections.py
M tests/__init__.py
A tests/collections_tests.py
M tests/wikibase_tests.py
M tox.ini
8 files changed, 743 insertions(+), 649 deletions(-)

diff --git a/docs/api_ref/pywikibot.page.rst b/docs/api_ref/pywikibot.page.rst
index 46ec207..512ecd2 100644
--- a/docs/api_ref/pywikibot.page.rst
+++ b/docs/api_ref/pywikibot.page.rst
@@ -6,6 +6,11 @@
Submodules
----------

+pywikibot.page.\_collections module
+-----------------------------------
+
+.. automodule:: pywikibot.page._collections
+
pywikibot.page.\_decorators module
----------------------------------

diff --git a/pywikibot/CONTENT.rst b/pywikibot/CONTENT.rst
index e49dd95..eb16410 100644
--- a/pywikibot/CONTENT.rst
+++ b/pywikibot/CONTENT.rst
@@ -102,6 +102,8 @@
+============================+======================================================+
| __init__.py | Objects representing MediaWiki pages |
+----------------------------+------------------------------------------------------+
+ | _collections.py | Structures holding data for Wikibase entities |
+ +----------------------------+------------------------------------------------------+
| _decorators.py | Decorators used by page objects |
+----------------------------+------------------------------------------------------+
| _revision.py | Object representing page revision |
diff --git a/pywikibot/page/__init__.py b/pywikibot/page/__init__.py
index 142d248..a155e4b 100644
--- a/pywikibot/page/__init__.py
+++ b/pywikibot/page/__init__.py
@@ -21,7 +21,6 @@
import unicodedata

from collections import Counter, defaultdict, OrderedDict
-from collections.abc import MutableMapping
from contextlib import suppress
from html.entities import name2codepoint
from itertools import chain
@@ -43,6 +42,12 @@
UserRightsError,
)
from pywikibot.family import Family
+from pywikibot.page._collections import (
+ AliasesDict,
+ ClaimCollection,
+ LanguageDict,
+ SiteLinkCollection,
+)
from pywikibot.page._decorators import allow_asynchronous
from pywikibot.page._revision import Revision
from pywikibot.site import DataSite, Namespace
@@ -3194,412 +3199,6 @@
return self.isRegistered() and 'bot' not in self.groups()


-class BaseDataDict(MutableMapping):
-
- """
- Base structure holding data for a Wikibase entity.
-
- Data are mappings from a language to a value. It will be
- specialised in subclasses.
- """
-
- def __init__(self, data=None):
- super().__init__()
- self._data = {}
- if data:
- self.update(data)
-
- @classmethod
- def new_empty(cls, repo):
- return cls()
-
- def __getitem__(self, key):
- key = self.normalizeKey(key)
- return self._data[key]
-
- def __setitem__(self, key, value):
- key = self.normalizeKey(key)
- self._data[key] = value
-
- def __delitem__(self, key):
- key = self.normalizeKey(key)
- del self._data[key]
-
- def __iter__(self):
- return iter(self._data)
-
- def __len__(self):
- return len(self._data)
-
- def __contains__(self, key):
- key = self.normalizeKey(key)
- return key in self._data
-
- def __repr__(self):
- return '{}({})'.format(type(self), self._data)
-
- @staticmethod
- def normalizeKey(key) -> str:
- """Helper function to return language codes of a site object."""
- if isinstance(key, pywikibot.site.BaseSite):
- key = key.lang
- return key
-
-
-class LanguageDict(BaseDataDict):
-
- """
- A structure holding language data for a Wikibase entity.
-
- Language data are mappings from a language to a string. It can be
- labels, descriptions and others.
- """
-
- @classmethod
- def fromJSON(cls, data, repo=None):
- this = cls({key: value['value'] for key, value in data.items()})
- return this
-
- @classmethod
- def normalizeData(cls, data):
- norm_data = {}
- for key, value in data.items():
- if isinstance(value, str):
- norm_data[key] = {'language': key, 'value': value}
- else:
- norm_data[key] = value
- return norm_data
-
- def toJSON(self, diffto=None):
- data = {}
- diffto = diffto or {}
- for key in diffto.keys() - self.keys():
- data[key] = {'language': key, 'value': ''}
- for key in self.keys() - diffto.keys():
- data[key] = {'language': key, 'value': self[key]}
- for key in self.keys() & diffto.keys():
- if self[key] != diffto[key]['value']:
- data[key] = {'language': key, 'value': self[key]}
- return data
-
-
-class AliasesDict(BaseDataDict):
-
- """
- A structure holding aliases for a Wikibase entity.
-
- It is a mapping from a language to a list of strings.
- """
-
- @classmethod
- def fromJSON(cls, data, repo=None):
- this = cls()
- for key, value in data.items():
- this[key] = [val['value'] for val in value]
- return this
-
- @classmethod
- def normalizeData(cls, data):
- norm_data = {}
- for key, values in data.items():
- if isinstance(values, list):
- strings = []
- for value in values:
- if isinstance(value, str):
- strings.append({'language': key, 'value': value})
- else:
- strings.append(value)
- norm_data[key] = strings
- return norm_data
-
- def toJSON(self, diffto=None):
- data = {}
- diffto = diffto or {}
- for lang in diffto.keys() & self.keys():
- if (sorted(val['value'] for val in diffto[lang])
- != sorted(self[lang])):
- data[lang] = [{'language': lang, 'value': i}
- for i in self[lang]]
- for lang in diffto.keys() - self.keys():
- data[lang] = [
- {'language': lang, 'value': i['value'], 'remove': ''}
- for i in diffto[lang]]
- for lang in self.keys() - diffto.keys():
- data[lang] = [{'language': lang, 'value': i} for i in self[lang]]
- return data
-
-
-class ClaimCollection(MutableMapping):
- """A structure holding claims for a Wikibase entity."""
-
- def __init__(self, repo):
- super().__init__()
- self.repo = repo
- self._data = {}
-
- @classmethod
- def fromJSON(cls, data, repo):
- this = cls(repo)
- for key, claims in data.items():
- this[key] = [Claim.fromJSON(repo, claim) for claim in claims]
- return this
-
- @classmethod
- def new_empty(cls, repo):
- return cls(repo)
-
- def __getitem__(self, key):
- return self._data[key]
-
- def __setitem__(self, key, value):
- self._data[key] = value
-
- def __delitem__(self, key):
- del self._data[key]
-
- def __iter__(self):
- return iter(self._data)
-
- def __len__(self):
- return len(self._data)
-
- def __contains__(self, key):
- return key in self._data
-
- def __repr__(self):
- return '{}({})'.format(type(self), self._data)
-
- @classmethod
- def normalizeData(cls, data):
- # no normalization here, should there be?
- return data
-
- def toJSON(self, diffto=None):
- claims = {}
- for prop in self:
- if len(self[prop]) > 0:
- claims[prop] = [claim.toJSON() for claim in self[prop]]
-
- if diffto:
- temp = defaultdict(list)
- props_add = set(claims.keys())
- props_orig = set(diffto.keys())
- for prop in (props_orig | props_add):
- if prop not in props_orig:
- temp[prop].extend(claims[prop])
- continue
- if prop not in props_add:
- temp[prop].extend(
- {'id': claim['id'], 'remove': ''}
- for claim in diffto[prop] if 'id' in claim)
- continue
-
- claim_ids = set()
- claim_map = {
- json['id']: json for json in diffto[prop]
- if 'id' in json}
- for claim, json in zip(self[prop], claims[prop]):
- if 'id' in json:
- claim_ids.add(json['id'])
- if json['id'] in claim_map:
- other = Claim.fromJSON(
- self.repo, claim_map[json['id']])
- if claim.same_as(other, ignore_rank=False,
- ignore_refs=False):
- continue
- temp[prop].append(json)
-
- for claim in diffto[prop]:
- if 'id' in claim and claim['id'] not in claim_ids:
- temp[prop].append({'id': claim['id'], 'remove': ''})
-
- claims = temp
-
- return claims
-
- def set_on_item(self, item):
- """Set Claim.on_item attribute for all claims in this collection."""
- for claims in self.values():
- for claim in claims:
- claim.on_item = item
-
-
-class SiteLinkCollection(MutableMapping):
- """A structure holding SiteLinks for a Wikibase item."""
-
- def __init__(self, repo, data=None):
- """
- Initializer.
-
- @param repo: the Wikibase site on which badges are defined
- @type repo: pywikibot.site.DataSite
- """
- super().__init__()
- self.repo = repo
- self._data = {}
- if data:
- self.update(data)
-
- @classmethod
- def new_empty(cls, repo):
- """Construct a new empty SiteLinkCollection."""
- return cls(repo)
-
- @classmethod
- def fromJSON(cls, data, repo):
- """Construct a new SiteLinkCollection from JSON."""
- return cls(repo, data)
-
- @staticmethod
- def getdbName(site):
- """
- Helper function to obtain a dbName for a Site.
-
- @param site: The site to look up.
- @type site: pywikibot.site.BaseSite or str
- """
- if isinstance(site, pywikibot.site.BaseSite):
- return site.dbName()
- return site
-
- def __getitem__(self, key):
- """
- Get the SiteLink with the given key.
-
- @param key: site key as Site instance or db key
- @type key: pywikibot.Site or str
- @rtype: pywikibot.page.SiteLink
- """
- key = self.getdbName(key)
- val = self._data[key]
- if isinstance(val, str):
- val = SiteLink(val, key)
- elif isinstance(val, dict):
- val = SiteLink.fromJSON(val, self.repo)
- else:
- return val
- self._data[key] = val
- return val
-
- def __setitem__(self, key, val):
- """
- Set the SiteLink for a given key.
-
- This only sets the value given as str, dict or SiteLink. If a
- str or dict is given the SiteLink object is created later in
- __getitem__ method.
-
- @param key: site key as Site instance or db key
- @type key: pywikibot.Site or str
- @param val: page name as a string or JSON containing SiteLink
- data or a SiteLink object
- @type val: Union[str, dict, SiteLink]
- """
- key = self.getdbName(key)
- if isinstance(val, SiteLink):
- assert val.site.dbName() == key
- self._data[key] = val
-
- def __delitem__(self, key):
- key = self.getdbName(key)
- del self._data[key]
-
- def __iter__(self):
- return iter(self._data)
-
- def __len__(self):
- return len(self._data)
-
- def __contains__(self, key):
- key = self.getdbName(key)
- return key in self._data
-
- @classmethod
- def _extract_JSON(cls, obj):
- if isinstance(obj, SiteLink):
- return obj.toJSON()
- elif isinstance(obj, BaseLink):
- db_name = cls.getdbName(obj.site)
- return {'site': db_name, 'title': obj.title}
- elif isinstance(obj, Page):
- db_name = cls.getdbName(obj.site)
- return {'site': db_name, 'title': obj.title()}
- else:
- return obj
-
- @classmethod
- def normalizeData(cls, data) -> dict:
- """
- Helper function to expand data into the Wikibase API structure.
-
- @param data: Data to normalize
- @type data: list or dict
-
- @return: The dict with normalized data
- """
- norm_data = {}
- if isinstance(data, dict):
- for key, obj in data.items():
- key = cls.getdbName(key)
- json = cls._extract_JSON(obj)
- if isinstance(json, str):
- json = {'site': key, 'title': json}
- elif key != json['site']:
- raise ValueError(
- "Key '{}' doesn't match the site of the value: '{}'"
- .format(key, json['site']))
- norm_data[key] = json
- else:
- for obj in data:
- json = cls._extract_JSON(obj)
- if not isinstance(json, dict):
- raise ValueError(
- "Couldn't determine the site and title of the value: "
- '{!r}'.format(json))
- db_name = json['site']
- norm_data[db_name] = json
- return norm_data
-
- def toJSON(self, diffto: Optional[dict] = None) -> dict:
- """
- Create JSON suitable for Wikibase API.
-
- When diffto is provided, JSON representing differences
- to the provided data is created.
-
- @param diffto: JSON containing entity data
- """
- data = {dbname: sitelink.toJSON()
- for (dbname, sitelink) in self.items()}
- if diffto:
- to_nuke = []
- for dbname, sitelink in data.items():
- if dbname in diffto:
- diffto_link = diffto[dbname]
- if diffto_link.get('title') == sitelink.get('title'):
- # compare badges
- tmp_badges = []
- diffto_badges = diffto_link.get('badges', [])
- badges = sitelink.get('badges', [])
- for badge in set(diffto_badges) - set(badges):
- tmp_badges.append('')
- for badge in set(badges) - set(diffto_badges):
- tmp_badges.append(badge)
- if tmp_badges:
- data[dbname]['badges'] = tmp_badges
- else:
- to_nuke.append(dbname)
- # find removed sitelinks
- for dbname in (set(diffto.keys()) - set(self.keys())):
- badges = [''] * len(diffto[dbname].get('badges', []))
- data[dbname] = {'site': dbname, 'title': ''}
- if badges:
- data[dbname]['badges'] = badges
- for dbname in to_nuke:
- del data[dbname]
- return data
-
-
class WikibaseEntity:

"""
diff --git a/pywikibot/page/_collections.py b/pywikibot/page/_collections.py
new file mode 100644
index 0000000..a6d49f5
--- /dev/null
+++ b/pywikibot/page/_collections.py
@@ -0,0 +1,468 @@
+"""Structures holding data for Wikibase entities."""
+#
+# (C) Pywikibot team, 2019-2021
+#
+# Distributed under the terms of the MIT license.
+#
+from collections.abc import MutableMapping
+from collections import defaultdict
+from typing import Optional
+
+import pywikibot
+
+
+__all__ = (
+ 'AliasesDict',
+ 'ClaimCollection',
+ 'LanguageDict',
+ 'SiteLinkCollection',
+)
+
+
+class BaseDataDict(MutableMapping):
+
+ """
+ Base structure holding data for a Wikibase entity.
+
+ Data are mappings from a language to a value. It will be
+ specialised in subclasses.
+ """
+
+ def __init__(self, data=None):
+ super().__init__()
+ self._data = {}
+ if data:
+ self.update(data)
+
+ @classmethod
+ def new_empty(cls, repo):
+ """Construct a new empty BaseDataDict."""
+ return cls()
+
+ def __getitem__(self, key):
+ key = self.normalizeKey(key)
+ return self._data[key]
+
+ def __setitem__(self, key, value):
+ key = self.normalizeKey(key)
+ self._data[key] = value
+
+ def __delitem__(self, key):
+ key = self.normalizeKey(key)
+ del self._data[key]
+
+ def __iter__(self):
+ return iter(self._data)
+
+ def __len__(self):
+ return len(self._data)
+
+ def __contains__(self, key):
+ key = self.normalizeKey(key)
+ return key in self._data
+
+ def __repr__(self):
+ return '{}({})'.format(type(self), self._data)
+
+ @staticmethod
+ def normalizeKey(key) -> str:
+ """Helper function to return language codes of a site object."""
+ if isinstance(key, pywikibot.site.BaseSite):
+ key = key.lang
+ return key
+
+
+class LanguageDict(BaseDataDict):
+
+ """
+ A structure holding language data for a Wikibase entity.
+
+ Language data are mappings from a language to a string. It can be
+ labels, descriptions and others.
+ """
+
+ @classmethod
+ def fromJSON(cls, data, repo=None):
+ """Construct a new LanguageDict from JSON."""
+ this = cls({key: value['value'] for key, value in data.items()})
+ return this
+
+ @classmethod
+ def normalizeData(cls, data: dict):
+ """Helper function to expand data into the Wikibase API structure.
+
+ @param data: Data to normalize
+ @return: The dict with normalized data
+ """
+ norm_data = {}
+ for key, value in data.items():
+ if isinstance(value, str):
+ norm_data[key] = {'language': key, 'value': value}
+ else:
+ norm_data[key] = value
+ return norm_data
+
+ def toJSON(self, diffto: Optional[dict] = None) -> dict:
+ """Create JSON suitable for Wikibase API.
+
+ When diffto is provided, JSON representing differences
+ to the provided data is created.
+
+ @param diffto: JSON containing entity data
+ """
+ data = {}
+ diffto = diffto or {}
+ for key in diffto.keys() - self.keys():
+ data[key] = {'language': key, 'value': ''}
+ for key in self.keys() - diffto.keys():
+ data[key] = {'language': key, 'value': self[key]}
+ for key in self.keys() & diffto.keys():
+ if self[key] != diffto[key]['value']:
+ data[key] = {'language': key, 'value': self[key]}
+ return data
+
+
+class AliasesDict(BaseDataDict):
+
+ """
+ A structure holding aliases for a Wikibase entity.
+
+ It is a mapping from a language to a list of strings.
+ """
+
+ @classmethod
+ def fromJSON(cls, data, repo=None):
+ """Construct a new AliasesDict from JSON."""
+ this = cls()
+ for key, value in data.items():
+ this[key] = [val['value'] for val in value]
+ return this
+
+ @classmethod
+ def normalizeData(cls, data: dict) -> dict:
+ """Helper function to expand data into the Wikibase API structure.
+
+ @param data: Data to normalize
+ @return: The dict with normalized data
+ """
+ norm_data = {}
+ for key, values in data.items():
+ if isinstance(values, list):
+ strings = []
+ for value in values:
+ if isinstance(value, str):
+ strings.append({'language': key, 'value': value})
+ else:
+ strings.append(value)
+ norm_data[key] = strings
+ return norm_data
+
+ def toJSON(self, diffto: Optional[dict] = None) -> dict:
+ """Create JSON suitable for Wikibase API.
+
+ When diffto is provided, JSON representing differences
+ to the provided data is created.
+
+ @param diffto: JSON containing entity data
+ """
+ data = {}
+ diffto = diffto or {}
+ for lang in diffto.keys() & self.keys():
+ if (sorted(val['value'] for val in diffto[lang])
+ != sorted(self[lang])):
+ data[lang] = [{'language': lang, 'value': i}
+ for i in self[lang]]
+ for lang in diffto.keys() - self.keys():
+ data[lang] = [
+ {'language': lang, 'value': i['value'], 'remove': ''}
+ for i in diffto[lang]]
+ for lang in self.keys() - diffto.keys():
+ data[lang] = [{'language': lang, 'value': i} for i in self[lang]]
+ return data
+
+
+class ClaimCollection(MutableMapping):
+ """A structure holding claims for a Wikibase entity."""
+
+ def __init__(self, repo):
+ """Initializer."""
+ super().__init__()
+ self.repo = repo
+ self._data = {}
+
+ @classmethod
+ def fromJSON(cls, data, repo):
+ """Construct a new ClaimCollection from JSON."""
+ this = cls(repo)
+ for key, claims in data.items():
+ this[key] = [pywikibot.page.Claim.fromJSON(repo, claim)
+ for claim in claims]
+ return this
+
+ @classmethod
+ def new_empty(cls, repo):
+ """Construct a new empty ClaimCollection."""
+ return cls(repo)
+
+ def __getitem__(self, key):
+ return self._data[key]
+
+ def __setitem__(self, key, value):
+ self._data[key] = value
+
+ def __delitem__(self, key):
+ del self._data[key]
+
+ def __iter__(self):
+ return iter(self._data)
+
+ def __len__(self):
+ return len(self._data)
+
+ def __contains__(self, key):
+ return key in self._data
+
+ def __repr__(self):
+ return '{}({})'.format(type(self), self._data)
+
+ @classmethod
+ def normalizeData(cls, data) -> dict:
+ """Helper function to expand data into the Wikibase API structure.
+
+ @param data: Data to normalize
+ @return: The dict with normalized data
+ """
+ # no normalization here, should there be?
+ return data
+
+ def toJSON(self, diffto: Optional[dict] = None) -> dict:
+ """Create JSON suitable for Wikibase API.
+
+ When diffto is provided, JSON representing differences
+ to the provided data is created.
+
+ @param diffto: JSON containing entity data
+ """
+ claims = {}
+ for prop in self:
+ if len(self[prop]) > 0:
+ claims[prop] = [claim.toJSON() for claim in self[prop]]
+
+ if diffto:
+ temp = defaultdict(list)
+ props_add = set(claims.keys())
+ props_orig = set(diffto.keys())
+ for prop in (props_orig | props_add):
+ if prop not in props_orig:
+ temp[prop].extend(claims[prop])
+ continue
+ if prop not in props_add:
+ temp[prop].extend(
+ {'id': claim['id'], 'remove': ''}
+ for claim in diffto[prop] if 'id' in claim)
+ continue
+
+ claim_ids = set()
+ claim_map = {
+ json['id']: json for json in diffto[prop]
+ if 'id' in json}
+ for claim, json in zip(self[prop], claims[prop]):
+ if 'id' in json:
+ claim_ids.add(json['id'])
+ if json['id'] in claim_map:
+ other = pywikibot.page.Claim.fromJSON(
+ self.repo, claim_map[json['id']])
+ if claim.same_as(other, ignore_rank=False,
+ ignore_refs=False):
+ continue
+ temp[prop].append(json)
+
+ for claim in diffto[prop]:
+ if 'id' in claim and claim['id'] not in claim_ids:
+ temp[prop].append({'id': claim['id'], 'remove': ''})
+
+ claims = temp
+
+ return claims
+
+ def set_on_item(self, item):
+ """Set Claim.on_item attribute for all claims in this collection."""
+ for claims in self.values():
+ for claim in claims:
+ claim.on_item = item
+
+
+class SiteLinkCollection(MutableMapping):
+ """A structure holding SiteLinks for a Wikibase item."""
+
+ def __init__(self, repo, data=None):
+ """
+ Initializer.
+
+ @param repo: the Wikibase site on which badges are defined
+ @type repo: pywikibot.site.DataSite
+ """
+ super().__init__()
+ self.repo = repo
+ self._data = {}
+ if data:
+ self.update(data)
+
+ @classmethod
+ def new_empty(cls, repo):
+ """Construct a new empty SiteLinkCollection."""
+ return cls(repo)
+
+ @classmethod
+ def fromJSON(cls, data, repo):
+ """Construct a new SiteLinkCollection from JSON."""
+ return cls(repo, data)
+
+ @staticmethod
+ def getdbName(site):
+ """
+ Helper function to obtain a dbName for a Site.
+
+ @param site: The site to look up.
+ @type site: pywikibot.site.BaseSite or str
+ """
+ if isinstance(site, pywikibot.site.BaseSite):
+ return site.dbName()
+ return site
+
+ def __getitem__(self, key):
+ """
+ Get the SiteLink with the given key.
+
+ @param key: site key as Site instance or db key
+ @type key: pywikibot.Site or str
+ @rtype: pywikibot.page.SiteLink
+ """
+ key = self.getdbName(key)
+ val = self._data[key]
+ if isinstance(val, str):
+ val = pywikibot.page.SiteLink(val, key)
+ elif isinstance(val, dict):
+ val = pywikibot.page.SiteLink.fromJSON(val, self.repo)
+ else:
+ return val
+ self._data[key] = val
+ return val
+
+ def __setitem__(self, key, val):
+ """
+ Set the SiteLink for a given key.
+
+ This only sets the value given as str, dict or SiteLink. If a
+ str or dict is given the SiteLink object is created later in
+ __getitem__ method.
+
+ @param key: site key as Site instance or db key
+ @type key: pywikibot.Site or str
+ @param val: page name as a string or JSON containing SiteLink
+ data or a SiteLink object
+ @type val: Union[str, dict, SiteLink]
+ """
+ key = self.getdbName(key)
+ if isinstance(val, pywikibot.page.SiteLink):
+ assert val.site.dbName() == key
+ self._data[key] = val
+
+ def __delitem__(self, key):
+ key = self.getdbName(key)
+ del self._data[key]
+
+ def __iter__(self):
+ return iter(self._data)
+
+ def __len__(self):
+ return len(self._data)
+
+ def __contains__(self, key):
+ key = self.getdbName(key)
+ return key in self._data
+
+ @classmethod
+ def _extract_json(cls, obj):
+ if isinstance(obj, pywikibot.page.SiteLink):
+ return obj.toJSON()
+ elif isinstance(obj, pywikibot.page.BaseLink):
+ db_name = cls.getdbName(obj.site)
+ return {'site': db_name, 'title': obj.title}
+ elif isinstance(obj, pywikibot.page.Page):
+ db_name = cls.getdbName(obj.site)
+ return {'site': db_name, 'title': obj.title()}
+ else:
+ return obj
+
+ @classmethod
+ def normalizeData(cls, data) -> dict:
+ """
+ Helper function to expand data into the Wikibase API structure.
+
+ @param data: Data to normalize
+ @type data: list or dict
+
+ @return: The dict with normalized data
+ """
+ norm_data = {}
+ if isinstance(data, dict):
+ for key, obj in data.items():
+ key = cls.getdbName(key)
+ json = cls._extract_json(obj)
+ if isinstance(json, str):
+ json = {'site': key, 'title': json}
+ elif key != json['site']:
+ raise ValueError(
+ "Key '{}' doesn't match the site of the value: '{}'"
+ .format(key, json['site']))
+ norm_data[key] = json
+ else:
+ for obj in data:
+ json = cls._extract_json(obj)
+ if not isinstance(json, dict):
+ raise ValueError(
+ "Couldn't determine the site and title of the value: "
+ '{!r}'.format(json))
+ db_name = json['site']
+ norm_data[db_name] = json
+ return norm_data
+
+ def toJSON(self, diffto: Optional[dict] = None) -> dict:
+ """
+ Create JSON suitable for Wikibase API.
+
+ When diffto is provided, JSON representing differences
+ to the provided data is created.
+
+ @param diffto: JSON containing entity data
+ """
+ data = {dbname: sitelink.toJSON()
+ for (dbname, sitelink) in self.items()}
+ if diffto:
+ to_nuke = []
+ for dbname, sitelink in data.items():
+ if dbname in diffto:
+ diffto_link = diffto[dbname]
+ if diffto_link.get('title') == sitelink.get('title'):
+ # compare badges
+ tmp_badges = []
+ diffto_badges = diffto_link.get('badges', [])
+ badges = sitelink.get('badges', [])
+ for badge in set(diffto_badges) - set(badges):
+ tmp_badges.append('')
+ for badge in set(badges) - set(diffto_badges):
+ tmp_badges.append(badge)
+ if tmp_badges:
+ data[dbname]['badges'] = tmp_badges
+ else:
+ to_nuke.append(dbname)
+ # find removed sitelinks
+ for dbname in (set(diffto.keys()) - set(self.keys())):
+ badges = [''] * len(diffto[dbname].get('badges', []))
+ data[dbname] = {'site': dbname, 'title': ''}
+ if badges:
+ data[dbname]['badges'] = badges
+ for dbname in to_nuke:
+ del data[dbname]
+ return data
diff --git a/tests/__init__.py b/tests/__init__.py
index 969f630..eaf45c2 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -67,6 +67,7 @@
'basesite',
'bot',
'category',
+ 'collections',
'cosmetic_changes',
'date',
'datasite',
diff --git a/tests/collections_tests.py b/tests/collections_tests.py
new file mode 100644
index 0000000..f21b0e2
--- /dev/null
+++ b/tests/collections_tests.py
@@ -0,0 +1,256 @@
+"""Tests for the Wikidata parts of the page module."""
+#
+# (C) Pywikibot team, 2019-2021
+#
+# Distributed under the terms of the MIT license.
+#
+import unittest
+
+from contextlib import suppress
+
+from pywikibot.page import (
+ LanguageDict, AliasesDict, ClaimCollection, SiteLinkCollection,
+)
+from tests.aspects import WikidataTestCase
+
+
+class DataCollectionTestCase(WikidataTestCase):
+
+ """Test case for a Wikibase collection class."""
+
+ collection_class = None
+
+ def _test_new_empty(self):
+ """Test that new_empty method returns empty collection."""
+ cls = self.collection_class
+ result = cls.new_empty(self.get_repo())
+ self.assertIsEmpty(result)
+
+
+class TestLanguageDict(DataCollectionTestCase):
+
+ """Test cases covering LanguageDict methods."""
+
+ collection_class = LanguageDict
+
+ family = 'wikipedia'
+ code = 'en'
+
+ dry = True
+
+ def setUp(self):
+ """Setup tests."""
+ super().setUp()
+ self.site = self.get_site()
+ self.lang_out = {'en': 'foo', 'zh': 'bar'}
+
+ def test_init(self):
+ """Test LanguageDict initializer."""
+ ld = LanguageDict()
+ self.assertLength(ld, 0)
+ ld = LanguageDict(self.lang_out)
+ self.assertLength(ld, 2)
+
+ def test_setitem(self):
+ """Test LanguageDict.__setitem__ metamethod."""
+ ld = LanguageDict(self.lang_out)
+ self.assertIn('en', ld)
+ ld[self.site] = 'bar'
+ self.assertIn('en', ld)
+
+ def test_getitem(self):
+ """Test LanguageDict.__getitem__ metamethod."""
+ ld = LanguageDict(self.lang_out)
+ self.assertEqual(ld['en'], 'foo')
+ self.assertEqual(ld[self.site], 'foo')
+ self.assertIsNone(ld.get('de'))
+
+ def test_delitem(self):
+ """Test LanguageDict.__delitem__ metamethod."""
+ ld = LanguageDict(self.lang_out)
+ ld.pop(self.site)
+ ld.pop('zh')
+ self.assertNotIn('en', ld)
+ self.assertNotIn('zh', ld)
+ self.assertLength(ld, 0)
+
+ def test_fromJSON(self):
+ """Test LanguageDict.fromJSON method."""
+ ld = LanguageDict.fromJSON(
+ {'en': {'language': 'en', 'value': 'foo'},
+ 'zh': {'language': 'zh', 'value': 'bar'}})
+ self.assertIsInstance(ld, LanguageDict)
+ self.assertEqual(ld, LanguageDict(self.lang_out))
+
+ def test_toJSON(self):
+ """Test LanguageDict.toJSON method."""
+ ld = LanguageDict()
+ self.assertEqual(ld.toJSON(), {})
+ ld = LanguageDict(self.lang_out)
+ self.assertEqual(
+ ld.toJSON(), {'en': {'language': 'en', 'value': 'foo'},
+ 'zh': {'language': 'zh', 'value': 'bar'}})
+
+ def test_toJSON_diffto(self):
+ """Test LanguageDict.toJSON method."""
+ ld = LanguageDict({'de': 'foo', 'zh': 'bar'})
+ diffto = {
+ 'de': {'language': 'de', 'value': 'bar'},
+ 'en': {'language': 'en', 'value': 'foo'}}
+ self.assertEqual(
+ ld.toJSON(diffto=diffto),
+ {'de': {'language': 'de', 'value': 'foo'},
+ 'en': {'language': 'en', 'value': ''},
+ 'zh': {'language': 'zh', 'value': 'bar'}})
+
+ def test_normalizeData(self):
+ """Test LanguageDict.normalizeData method."""
+ self.assertEqual(
+ LanguageDict.normalizeData(self.lang_out),
+ {'en': {'language': 'en', 'value': 'foo'},
+ 'zh': {'language': 'zh', 'value': 'bar'}})
+
+ def test_new_empty(self):
+ """Test that new_empty method returns empty collection."""
+ self._test_new_empty()
+
+
+class TestAliasesDict(DataCollectionTestCase):
+
+ """Test cases covering AliasesDict methods."""
+
+ collection_class = AliasesDict
+
+ family = 'wikipedia'
+ code = 'en'
+
+ dry = True
+
+ def setUp(self):
+ """Setup tests."""
+ super().setUp()
+ self.site = self.get_site()
+ self.lang_out = {'en': ['foo', 'bar'],
+ 'zh': ['foo', 'bar']}
+
+ def test_init(self):
+ """Test AliasesDict initializer."""
+ ad = AliasesDict()
+ self.assertLength(ad, 0)
+ ad = AliasesDict(self.lang_out)
+ self.assertLength(ad, 2)
+
+ def test_setitem(self):
+ """Test AliasesDict.__setitem__ metamethod."""
+ ad = AliasesDict(self.lang_out)
+ self.assertIn('en', ad)
+ self.assertIn('zh', ad)
+ ad[self.site] = ['baz']
+ self.assertIn('en', ad)
+
+ def test_getitem(self):
+ """Test AliasesDict.__getitem__ metamethod."""
+ ad = AliasesDict(self.lang_out)
+ self.assertEqual(ad['en'], ['foo', 'bar'])
+ self.assertEqual(ad[self.site], ['foo', 'bar'])
+ self.assertIsNone(ad.get('de'))
+
+ def test_delitem(self):
+ """Test AliasesDict.__delitem__ metamethod."""
+ ad = AliasesDict(self.lang_out)
+ ad.pop(self.site)
+ ad.pop('zh')
+ self.assertNotIn('en', ad)
+ self.assertNotIn('zh', ad)
+ self.assertLength(ad, 0)
+
+ def test_fromJSON(self):
+ """Test AliasesDict.fromJSON method."""
+ ad = AliasesDict.fromJSON(
+ {'en': [{'language': 'en', 'value': 'foo'},
+ {'language': 'en', 'value': 'bar'}],
+ 'zh': [{'language': 'zh', 'value': 'foo'},
+ {'language': 'zh', 'value': 'bar'}],
+ })
+ self.assertIsInstance(ad, AliasesDict)
+ self.assertEqual(ad, AliasesDict(self.lang_out))
+
+ def test_toJSON(self):
+ """Test AliasesDict.toJSON method."""
+ ad = AliasesDict()
+ self.assertEqual(ad.toJSON(), {})
+ ad = AliasesDict(self.lang_out)
+ self.assertEqual(
+ ad.toJSON(),
+ {'en': [{'language': 'en', 'value': 'foo'},
+ {'language': 'en', 'value': 'bar'}],
+ 'zh': [{'language': 'zh', 'value': 'foo'},
+ {'language': 'zh', 'value': 'bar'}],
+ })
+
+ def test_toJSON_diffto(self):
+ """Test AliasesDict.toJSON method."""
+ ad = AliasesDict(self.lang_out)
+ diffto = {
+ 'de': [
+ {'language': 'de', 'value': 'foo'},
+ {'language': 'de', 'value': 'bar'},
+ ],
+ 'en': [
+ {'language': 'en', 'value': 'foo'},
+ {'language': 'en', 'value': 'baz'},
+ ]}
+ self.assertEqual(
+ ad.toJSON(diffto=diffto),
+ {'de': [{'language': 'de', 'value': 'foo', 'remove': ''},
+ {'language': 'de', 'value': 'bar', 'remove': ''}],
+ 'en': [{'language': 'en', 'value': 'foo'},
+ {'language': 'en', 'value': 'bar'}],
+ 'zh': [{'language': 'zh', 'value': 'foo'},
+ {'language': 'zh', 'value': 'bar'}]
+ })
+
+ def test_normalizeData(self):
+ """Test AliasesDict.normalizeData method."""
+ data_in = {'en': [
+ {'language': 'en', 'value': 'foo'},
+ 'bar',
+ {'language': 'en', 'value': 'baz', 'remove': ''},
+ ]}
+ data_out = {'en': [
+ {'language': 'en', 'value': 'foo'},
+ {'language': 'en', 'value': 'bar'},
+ {'language': 'en', 'value': 'baz', 'remove': ''},
+ ]}
+ self.assertEqual(AliasesDict.normalizeData(data_in), data_out)
+
+ def test_new_empty(self):
+ """Test that new_empty method returns empty collection."""
+ self._test_new_empty()
+
+
+class TestClaimCollection(DataCollectionTestCase):
+
+ """Test cases covering ClaimCollection methods."""
+
+ collection_class = ClaimCollection
+
+ def test_new_empty(self):
+ """Test that new_empty method returns empty collection."""
+ self._test_new_empty()
+
+
+class TestSiteLinkCollection(DataCollectionTestCase):
+
+ """Test cases covering SiteLinkCollection methods."""
+
+ collection_class = SiteLinkCollection
+
+ def test_new_empty(self):
+ """Test that new_empty method returns empty collection."""
+ self._test_new_empty()
+
+
+if __name__ == '__main__': # pragma: no cover
+ with suppress(SystemExit):
+ unittest.main()
diff --git a/tests/wikibase_tests.py b/tests/wikibase_tests.py
index a0e7da8..9f19a85 100644
--- a/tests/wikibase_tests.py
+++ b/tests/wikibase_tests.py
@@ -6,6 +6,7 @@
#
import copy
import json
+import unittest

from contextlib import suppress
from decimal import Decimal
@@ -13,15 +14,12 @@
import pywikibot

from pywikibot import pagegenerators
-from pywikibot.page import (
- WikibasePage, ItemPage, PropertyPage, Page, LanguageDict, AliasesDict,
- ClaimCollection, SiteLinkCollection,
-)
+from pywikibot.page import ItemPage, Page, PropertyPage, WikibasePage
from pywikibot.site import Namespace, NamespacesDict
from pywikibot.tools import MediaWikiVersion

from tests import join_pages_path, mock
-from tests.aspects import TestCase, unittest, WikidataTestCase
+from tests.aspects import TestCase, WikidataTestCase

from tests.basepage import (
BasePageMethodsTestBase,
@@ -1757,243 +1755,6 @@
self.assertLength(wvlinks, 2)


-class DataCollectionTestCase(WikidataTestCase):
-
- """Test case for a Wikibase collection class."""
-
- collection_class = None
-
- def _test_new_empty(self):
- """Test that new_empty method returns empty collection."""
- cls = self.collection_class
- result = cls.new_empty(self.get_repo())
- self.assertIsEmpty(result)
-
-
-class TestLanguageDict(DataCollectionTestCase):
-
- """Test cases covering LanguageDict methods."""
-
- collection_class = LanguageDict
-
- family = 'wikipedia'
- code = 'en'
-
- dry = True
-
- def setUp(self):
- """Setup tests."""
- super().setUp()
- self.site = self.get_site()
- self.lang_out = {'en': 'foo', 'zh': 'bar'}
-
- def test_init(self):
- """Test LanguageDict initializer."""
- ld = LanguageDict()
- self.assertLength(ld, 0)
- ld = LanguageDict(self.lang_out)
- self.assertLength(ld, 2)
-
- def test_setitem(self):
- """Test LanguageDict.__setitem__ metamethod."""
- ld = LanguageDict(self.lang_out)
- self.assertIn('en', ld)
- ld[self.site] = 'bar'
- self.assertIn('en', ld)
-
- def test_getitem(self):
- """Test LanguageDict.__getitem__ metamethod."""
- ld = LanguageDict(self.lang_out)
- self.assertEqual(ld['en'], 'foo')
- self.assertEqual(ld[self.site], 'foo')
- self.assertIsNone(ld.get('de'))
-
- def test_delitem(self):
- """Test LanguageDict.__delitem__ metamethod."""
- ld = LanguageDict(self.lang_out)
- ld.pop(self.site)
- ld.pop('zh')
- self.assertNotIn('en', ld)
- self.assertNotIn('zh', ld)
- self.assertLength(ld, 0)
-
- def test_fromJSON(self):
- """Test LanguageDict.fromJSON method."""
- ld = LanguageDict.fromJSON(
- {'en': {'language': 'en', 'value': 'foo'},
- 'zh': {'language': 'zh', 'value': 'bar'}})
- self.assertIsInstance(ld, LanguageDict)
- self.assertEqual(ld, LanguageDict(self.lang_out))
-
- def test_toJSON(self):
- """Test LanguageDict.toJSON method."""
- ld = LanguageDict()
- self.assertEqual(ld.toJSON(), {})
- ld = LanguageDict(self.lang_out)
- self.assertEqual(
- ld.toJSON(), {'en': {'language': 'en', 'value': 'foo'},
- 'zh': {'language': 'zh', 'value': 'bar'}})
-
- def test_toJSON_diffto(self):
- """Test LanguageDict.toJSON method."""
- ld = LanguageDict({'de': 'foo', 'zh': 'bar'})
- diffto = {
- 'de': {'language': 'de', 'value': 'bar'},
- 'en': {'language': 'en', 'value': 'foo'}}
- self.assertEqual(
- ld.toJSON(diffto=diffto),
- {'de': {'language': 'de', 'value': 'foo'},
- 'en': {'language': 'en', 'value': ''},
- 'zh': {'language': 'zh', 'value': 'bar'}})
-
- def test_normalizeData(self):
- """Test LanguageDict.normalizeData method."""
- self.assertEqual(
- LanguageDict.normalizeData(self.lang_out),
- {'en': {'language': 'en', 'value': 'foo'},
- 'zh': {'language': 'zh', 'value': 'bar'}})
-
- def test_new_empty(self):
- """Test that new_empty method returns empty collection."""
- self._test_new_empty()
-
-
-class TestAliasesDict(DataCollectionTestCase):
-
- """Test cases covering AliasesDict methods."""
-
- collection_class = AliasesDict
-
- family = 'wikipedia'
- code = 'en'
-
- dry = True
-
- def setUp(self):
- """Setup tests."""
- super().setUp()
- self.site = self.get_site()
- self.lang_out = {'en': ['foo', 'bar'],
- 'zh': ['foo', 'bar']}
-
- def test_init(self):
- """Test AliasesDict initializer."""
- ad = AliasesDict()
- self.assertLength(ad, 0)
- ad = AliasesDict(self.lang_out)
- self.assertLength(ad, 2)
-
- def test_setitem(self):
- """Test AliasesDict.__setitem__ metamethod."""
- ad = AliasesDict(self.lang_out)
- self.assertIn('en', ad)
- self.assertIn('zh', ad)
- ad[self.site] = ['baz']
- self.assertIn('en', ad)
-
- def test_getitem(self):
- """Test AliasesDict.__getitem__ metamethod."""
- ad = AliasesDict(self.lang_out)
- self.assertEqual(ad['en'], ['foo', 'bar'])
- self.assertEqual(ad[self.site], ['foo', 'bar'])
- self.assertIsNone(ad.get('de'))
-
- def test_delitem(self):
- """Test AliasesDict.__delitem__ metamethod."""
- ad = AliasesDict(self.lang_out)
- ad.pop(self.site)
- ad.pop('zh')
- self.assertNotIn('en', ad)
- self.assertNotIn('zh', ad)
- self.assertLength(ad, 0)
-
- def test_fromJSON(self):
- """Test AliasesDict.fromJSON method."""
- ad = AliasesDict.fromJSON(
- {'en': [{'language': 'en', 'value': 'foo'},
- {'language': 'en', 'value': 'bar'}],
- 'zh': [{'language': 'zh', 'value': 'foo'},
- {'language': 'zh', 'value': 'bar'}],
- })
- self.assertIsInstance(ad, AliasesDict)
- self.assertEqual(ad, AliasesDict(self.lang_out))
-
- def test_toJSON(self):
- """Test AliasesDict.toJSON method."""
- ad = AliasesDict()
- self.assertEqual(ad.toJSON(), {})
- ad = AliasesDict(self.lang_out)
- self.assertEqual(
- ad.toJSON(),
- {'en': [{'language': 'en', 'value': 'foo'},
- {'language': 'en', 'value': 'bar'}],
- 'zh': [{'language': 'zh', 'value': 'foo'},
- {'language': 'zh', 'value': 'bar'}],
- })
-
- def test_toJSON_diffto(self):
- """Test AliasesDict.toJSON method."""
- ad = AliasesDict(self.lang_out)
- diffto = {
- 'de': [
- {'language': 'de', 'value': 'foo'},
- {'language': 'de', 'value': 'bar'},
- ],
- 'en': [
- {'language': 'en', 'value': 'foo'},
- {'language': 'en', 'value': 'baz'},
- ]}
- self.assertEqual(
- ad.toJSON(diffto=diffto),
- {'de': [{'language': 'de', 'value': 'foo', 'remove': ''},
- {'language': 'de', 'value': 'bar', 'remove': ''}],
- 'en': [{'language': 'en', 'value': 'foo'},
- {'language': 'en', 'value': 'bar'}],
- 'zh': [{'language': 'zh', 'value': 'foo'},
- {'language': 'zh', 'value': 'bar'}]
- })
-
- def test_normalizeData(self):
- """Test AliasesDict.normalizeData method."""
- data_in = {'en': [
- {'language': 'en', 'value': 'foo'},
- 'bar',
- {'language': 'en', 'value': 'baz', 'remove': ''},
- ]}
- data_out = {'en': [
- {'language': 'en', 'value': 'foo'},
- {'language': 'en', 'value': 'bar'},
- {'language': 'en', 'value': 'baz', 'remove': ''},
- ]}
- self.assertEqual(AliasesDict.normalizeData(data_in), data_out)
-
- def test_new_empty(self):
- """Test that new_empty method returns empty collection."""
- self._test_new_empty()
-
-
-class TestClaimCollection(DataCollectionTestCase):
-
- """Test cases covering ClaimCollection methods."""
-
- collection_class = ClaimCollection
-
- def test_new_empty(self):
- """Test that new_empty method returns empty collection."""
- self._test_new_empty()
-
-
-class TestSiteLinkCollection(DataCollectionTestCase):
-
- """Test cases covering SiteLinkCollection methods."""
-
- collection_class = SiteLinkCollection
-
- def test_new_empty(self):
- """Test that new_empty method returns empty collection."""
- self._test_new_empty()
-
-
class TestWriteNormalizeData(TestCase):

"""Test cases for routines that normalize data for writing to Wikidata.
diff --git a/tox.ini b/tox.ini
index 1465a93..8fbb637 100644
--- a/tox.ini
+++ b/tox.ini
@@ -142,6 +142,7 @@
pywikibot/logging.py : N803
pywikibot/login.py: N802, N816
pywikibot/page/__init__.py: N802
+ pywikibot/page/_collections.py: N802
pywikibot/pagegenerators.py : N802, N803, N806, N816
pywikibot/site/_apisite.py: N802
pywikibot/site/_datasite.py: N802
@@ -185,6 +186,7 @@
tests/archivebot_tests.py: N802, N813
tests/aspects.py: N802
tests/bot_tests.py: N802
+ tests/collections_tests.py: N802
tests/cosmetic_changes_tests.py: N802
tests/data_ingestion_tests.py: N802
tests/date_tests.py: N802

To view, visit change 667349. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ia3bf9a496ed796e5b0896cd2e9aa11075d8c689b
Gerrit-Change-Number: 667349
Gerrit-PatchSet: 5
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged