jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/825392 )
Change subject: [IMPR] raise KeyError instead of AttributeError if FileInfo is used as Mapping
......................................................................
[IMPR] raise KeyError instead of AttributeError if FileInfo is used as Mapping
For mapping types, if key is missing, KeyError should be raised, see
https://docs.python.org/3/reference/datamodel.html?highlight=__getitem__#ob…
Change-Id: I84aef0a0b7033318c2f1d4ab850c65cb283a9210
---
M pywikibot/page/_filepage.py
1 file changed, 11 insertions(+), 3 deletions(-)
Approvals:
Matěj Suchánek: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/page/_filepage.py b/pywikibot/page/_filepage.py
index 1616bb7..bd627e4 100644
--- a/pywikibot/page/_filepage.py
+++ b/pywikibot/page/_filepage.py
@@ -360,21 +360,29 @@
<pywikibot.site._apisite.APISite.loadimageinfo>` for details.
.. note:: timestamp will be casted to :func:`pywikibot.Timestamp`.
+
+ .. versionchanged:: 7.7
+ raises KeyError instead of AttributeError if FileInfo is used as
+ Mapping.
"""
def __init__(self, file_revision) -> None:
- """Initiate the class using the dict from `APISite.loadimageinfo`."""
+ """Initiate the class using the dict from ``APISite.loadimageinfo``."""
self.__dict__.update(file_revision)
self.timestamp = pywikibot.Timestamp.fromISOformat(self.timestamp)
def __getitem__(self, key):
"""Give access to class values by key."""
- return getattr(self, key)
+ try:
+ result = getattr(self, key)
+ except AttributeError as e:
+ raise KeyError(str(e).replace('attribute', 'key')) from None
+ return result
def __repr__(self) -> str:
"""Return a more complete string representation."""
return repr(self.__dict__)
def __eq__(self, other) -> bool:
- """Test if two File_info objects are equal."""
+ """Test if two FileInfo objects are equal."""
return self.__dict__ == other.__dict__
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/825392
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I84aef0a0b7033318c2f1d4ab850c65cb283a9210
Gerrit-Change-Number: 825392
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/826987 )
Change subject: [IMPR] Use global -verbose and -debug levels
......................................................................
[IMPR] Use global -verbose and -debug levels
- Use global -verbose and -debug levels
- use format string method instead of modulo operator
- shorten some conditions
Change-Id: I9fa8e88a4e93614a1ecda8fcce767d5e2e606ff6
---
M scripts/create_isbn_edition.py
1 file changed, 109 insertions(+), 114 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/create_isbn_edition.py b/scripts/create_isbn_edition.py
index c5fd36e..2f9dbdc 100644
--- a/scripts/create_isbn_edition.py
+++ b/scripts/create_isbn_edition.py
@@ -282,10 +282,12 @@
#
import os # Operating system
import re # Regular expressions (very handy!)
+from itertools import islice
import pywikibot # API interface to Wikidata
from pywikibot import pagegenerators as pg # Wikidata Query interface
from pywikibot.backports import List
+from pywikibot.config import verbose_output as verbose
from pywikibot.data import api
try:
@@ -299,9 +301,6 @@
unidecode = e
# Initialisation
-debug = True # Show debugging information
-verbose = True # Verbose mode
-
booklib = 'goob' # Default digital library
# ISBN number: 10 or 13 digits with optional dashes (-)
@@ -336,10 +335,7 @@
:param checklist: List of values
:Returns: True when match
"""
- for seq in statement_list:
- if seq.getTarget().getID() in checklist:
- return True
- return False
+ return any(seq.getTarget().getID() in checklist for seq in statement_list)
def get_item_list(item_name, instance_id):
@@ -395,13 +391,9 @@
global targetx
isbn_number = isbn_number.strip()
- if isbn_number == '':
+ if not isbn_number:
return # Do nothing when the ISBN number is missing
- # Validate ISBN data
- if verbose:
- pywikibot.info()
-
try:
isbn_data = isbnlib.meta(isbn_number, service=booklib)
pywikibot.info(isbn_data)
@@ -420,36 +412,37 @@
if len(isbn_data) < 6:
pywikibot.error(
- 'Unknown or incomplete digital library registration for %s'
- % isbn_number)
+ 'Unknown or incomplete digital library registration for {}'
+ .format(isbn_number))
return
# Show the raw results
if verbose:
+ pywikibot.info()
for i in isbn_data:
- pywikibot.info('%s:\t%s' % (i, isbn_data[i]))
+ pywikibot.info('{}:\t{}'.format(i, isbn_data[i]))
# Get the book language from the ISBN book reference
booklang = mainlang # Default language
- if isbn_data['Language'] != '':
+ if isbn_data['Language']:
booklang = isbn_data['Language'].strip()
if booklang == 'iw': # Obsolete codes
booklang = 'he'
lang_list = list(get_item_list(booklang, propreqinst['P407']))
- if len(lang_list) == 1:
- target['P407'] = lang_list[0]
- elif len(lang_list) == 0:
- pywikibot.warning('Unknown language %s' % booklang)
+
+ if not lang_list:
+ pywikibot.warning('Unknown language ' + booklang)
return
- else:
- pywikibot.warning('Ambiguous language %s' % booklang)
+
+ if len(lang_list) != 1:
+ pywikibot.warning('Ambiguous language ' + booklang)
return
+ target['P407'] = lang_list[0]
+
# Get formatted ISBN number
isbn_number = isbn_data['ISBN-13'] # Numeric format
isbn_fmtd = isbnlib.mask(isbn_number) # Canonical format
- if verbose:
- pywikibot.info()
pywikibot.info(isbn_fmtd) # First one
# Get (sub)title when there is a dot
@@ -464,25 +457,25 @@
subtitle = titles[1].strip()
# pywikibot.info book titles
- if debug:
- pywikibot.info(objectname)
- pywikibot.info(subtitle) # Optional
- # print subsequent subtitles, when available
- for i in range(2, len(titles)):
- # Not stored in Wikidata...
- pywikibot.info(titles[i].strip())
+ pywikibot.debug(objectname)
+ pywikibot.debug(subtitle) # Optional
+
+ # print subsequent subtitles, when available
+ for title in islice(titles, 2, None):
+ # Not stored in Wikidata...
+ pywikibot.debug(title.strip())
# Search the ISBN number in Wikidata both canonical and numeric
# P212 should have canonical hyphenated format
isbn_query = ("""# Get ISBN number
-SELECT ?item WHERE {
- VALUES ?isbn_number {
- "%s"
- "%s"
- }
+SELECT ?item WHERE {{
+ VALUES ?isbn_number {{
+ "{}"
+ "{}"
+ }}
?item wdt:P212 ?isbn_number.
-}
-""" % (isbn_fmtd, isbn_number))
+}}
+""".format(isbn_fmtd, isbn_number))
pywikibot.info(isbn_query)
generator = pg.WikidataSPARQLPageGenerator(isbn_query, site=repo)
@@ -491,26 +484,25 @@
rescnt = 0
for rescnt, item in enumerate(generator, start=1):
qnumber = item.getID()
- pywikibot.warning('Found item: %s' % qnumber)
+ pywikibot.warning('Found item: {}'.format(qnumber))
# Create or amend the item
if rescnt == 1:
item.get(get_redirect=True) # Update item
- elif rescnt == 0:
+ elif not rescnt:
label = {}
label[booklang] = objectname
item = pywikibot.ItemPage(repo) # Create item
item.editEntity({'labels': label}, summary=transcmt)
qnumber = item.getID()
- pywikibot.warning('Creating item: %s' % qnumber)
+ pywikibot.warning('Creating item: {}'.format(qnumber))
else:
- pywikibot.critical('Ambiguous ISBN number %s' % isbn_fmtd)
+ pywikibot.critical('Ambiguous ISBN number {}'.format(isbn_fmtd))
return
# Add all P/Q values
# Make sure that labels are known in the native language
- if debug:
- pywikibot.info(target)
+ pywikibot.debug(target)
# Register statements
for propty in target:
@@ -520,12 +512,13 @@
targetx[propty] = pywikibot.ItemPage(repo, target[propty])
try:
- pywikibot.warning('Add %s (%s): %s (%s)'
- % (proptyx[propty].labels[booklang], propty,
- targetx[propty].labels[booklang],
- target[propty]))
+ pywikibot.warning('Add {} ({}): {} ({})'
+ .format(proptyx[propty].labels[booklang],
+ propty,
+ targetx[propty].labels[booklang],
+ target[propty]))
except: # noqa: B001, E722, H201
- pywikibot.warning('Add %s:%s' % (propty, target[propty]))
+ pywikibot.warning('Add {}:{}'.format(propty, target[propty]))
claim = pywikibot.Claim(repo, propty)
claim.setTarget(targetx[propty])
@@ -533,22 +526,22 @@
# Set formatted ISBN number
if 'P212' not in item.claims:
- pywikibot.warning('Add ISBN number (P212): %s' % (isbn_fmtd))
+ pywikibot.warning('Add ISBN number (P212): {}'.format(isbn_fmtd))
claim = pywikibot.Claim(repo, 'P212')
claim.setTarget(isbn_fmtd)
item.addClaim(claim, bot=True, summary=transcmt)
# Title
if 'P1476' not in item.claims:
- pywikibot.warning('Add Title (P1476): %s' % (objectname))
+ pywikibot.warning('Add Title (P1476): {}'.format(objectname))
claim = pywikibot.Claim(repo, 'P1476')
claim.setTarget(pywikibot.WbMonolingualText(text=objectname,
language=booklang))
item.addClaim(claim, bot=True, summary=transcmt)
# Subtitle
- if subtitle != '' and 'P1680' not in item.claims:
- pywikibot.warning('Add Subtitle (P1680): %s' % (subtitle))
+ if subtitle and 'P1680' not in item.claims:
+ pywikibot.warning('Add Subtitle (P1680): {}'.format(subtitle))
claim = pywikibot.Claim(repo, 'P1680')
claim.setTarget(pywikibot.WbMonolingualText(text=subtitle,
language=booklang))
@@ -556,9 +549,9 @@
# Date of publication
pub_year = isbn_data['Year']
- if pub_year != '' and 'P577' not in item.claims:
- pywikibot.warning('Add Year of publication (P577): %s'
- % (isbn_data['Year']))
+ if pub_year and 'P577' not in item.claims:
+ pywikibot.warning('Add Year of publication (P577): {}'
+ .format(isbn_data['Year']))
claim = pywikibot.Claim(repo, 'P577')
claim.setTarget(pywikibot.WbTime(year=int(pub_year), precision='year'))
item.addClaim(claim, bot=True, summary=transcmt)
@@ -567,7 +560,7 @@
author_cnt = 0
for author_name in isbn_data['Authors']:
author_name = author_name.strip()
- if author_name != '':
+ if author_name:
author_cnt += 1
author_list = list(get_item_list(author_name, propreqinst['P50']))
@@ -580,9 +573,9 @@
break
if add_author:
- pywikibot.warning('Add author %d (P50): %s (%s)'
- % (author_cnt, author_name,
- author_list[0]))
+ pywikibot.warning('Add author {} (P50): {} ({})'
+ .format(author_cnt, author_name,
+ author_list[0]))
claim = pywikibot.Claim(repo, 'P50')
claim.setTarget(pywikibot.ItemPage(repo, author_list[0]))
item.addClaim(claim, bot=True, summary=transcmt)
@@ -590,28 +583,28 @@
qualifier = pywikibot.Claim(repo, 'P1545')
qualifier.setTarget(str(author_cnt))
claim.addQualifier(qualifier, summary=transcmt)
- elif len(author_list) == 0:
- pywikibot.warning('Unknown author: %s' % author_name)
+ elif not author_list:
+ pywikibot.warning('Unknown author: {}'.format(author_name))
else:
- pywikibot.warning('Ambiguous author: %s' % author_name)
+ pywikibot.warning('Ambiguous author: {}'.format(author_name))
# Get the publisher
publisher_name = isbn_data['Publisher'].strip()
- if publisher_name != '':
+ if publisher_name:
publisher_list = list(get_item_list(publisher_name,
propreqinst['P123']))
if len(publisher_list) == 1:
if 'P123' not in item.claims:
- pywikibot.warning('Add publisher (P123): %s (%s)'
- % (publisher_name, publisher_list[0]))
+ pywikibot.warning('Add publisher (P123): {} ({})'
+ .format(publisher_name, publisher_list[0]))
claim = pywikibot.Claim(repo, 'P123')
claim.setTarget(pywikibot.ItemPage(repo, publisher_list[0]))
item.addClaim(claim, bot=True, summary=transcmt)
- elif len(publisher_list) == 0:
- pywikibot.warning('Unknown publisher: %s' % publisher_name)
+ elif not publisher_list:
+ pywikibot.warning('Unknown publisher: ' + publisher_name)
else:
- pywikibot.warning('Ambiguous publisher: %s' % publisher_name)
+ pywikibot.warning('Ambiguous publisher: ' + publisher_name)
# Get addional data from the digital library
isbn_cover = isbnlib.cover(isbn_number)
@@ -627,13 +620,13 @@
# Book cover images
for i in isbn_cover:
- pywikibot.info('%s:\t%s' % (i, isbn_cover[i]))
+ pywikibot.info('{}:\t{}'.format(i, isbn_cover[i]))
# Handle ISBN classification
isbn_classify = isbnlib.classify(isbn_number)
- if debug:
- for i in isbn_classify:
- pywikibot.info('%s:\t%s' % (i, isbn_classify[i]))
+
+ for i in isbn_classify:
+ pywikibot.debug('{}:\t{}'.format(i, isbn_classify[i]))
# ./create_isbn_edition.py '978-3-8376-5645-9' - de P407 Q188
# Q113460204
@@ -644,7 +637,8 @@
# Set the OCLC ID
if 'oclc' in isbn_classify and 'P243' not in item.claims:
- pywikibot.warning('Add OCLC ID (P243): %s' % (isbn_classify['oclc']))
+ pywikibot.warning('Add OCLC ID (P243): {}'
+ .format(isbn_classify['oclc']))
claim = pywikibot.Claim(repo, 'P243')
claim.setTarget(isbn_classify['oclc'])
item.addClaim(claim, bot=True, summary=transcmt)
@@ -658,8 +652,8 @@
# Edition should belong to only one single work
work = item.claims['P629'][0].getTarget()
# There doesn't exist a moveClaim method?
- pywikibot.warning('Move OCLC Work ID %s to work %s'
- % (oclcworkid, work.getID()))
+ pywikibot.warning('Move OCLC Work ID {} to work {}'
+ .format(oclcworkid, work.getID()))
# Keep current OCLC Work ID if present
if 'P5331' not in work.claims:
claim = pywikibot.Claim(repo, 'P5331')
@@ -668,10 +662,10 @@
# OCLC Work ID does not belong to edition
item.removeClaims(oclcwork, bot=True, summary=transcmt)
else:
- pywikibot.error('OCLC Work ID %s conflicts with OCLC ID %s and no '
+ pywikibot.error('OCLC Work ID {} conflicts with OCLC ID {} and no '
'work available'
- % (item.claims['P5331'][0].getTarget(),
- item.claims['P243'][0].getTarget()))
+ .format(item.claims['P5331'][0].getTarget(),
+ item.claims['P243'][0].getTarget()))
# OCLC work ID should not be registered for editions, only for works
if 'owi' not in isbn_classify:
@@ -680,20 +674,20 @@
# Edition should only have one single work
work = item.claims['P629'][0].getTarget()
if 'P5331' not in work.claims: # Assign the OCLC work ID if missing
- pywikibot.warning('Add OCLC work ID (P5331): %s to work %s'
- % (isbn_classify['owi'], work.getID()))
+ pywikibot.warning('Add OCLC work ID (P5331): {} to work {}'
+ .format(isbn_classify['owi'], work.getID()))
claim = pywikibot.Claim(repo, 'P5331')
claim.setTarget(isbn_classify['owi'])
work.addClaim(claim, bot=True, summary=transcmt)
elif 'P243' in item.claims:
- pywikibot.warning('OCLC Work ID %s ignored because of OCLC ID %s'
- % (isbn_classify['owi'],
- item.claims['P243'][0].getTarget()))
+ pywikibot.warning('OCLC Work ID {} ignored because of OCLC ID {}'
+ .format(isbn_classify['owi'],
+ item.claims['P243'][0].getTarget()))
# Assign the OCLC work ID only if there is no work, and no OCLC ID
# for edition
elif 'P5331' not in item.claims:
- pywikibot.warning('Add OCLC work ID (P5331): %s to edition'
- % (isbn_classify['owi']))
+ pywikibot.warning('Add OCLC work ID (P5331): {} to edition'
+ .format(isbn_classify['owi']))
claim = pywikibot.Claim(repo, 'P5331')
claim.setTarget(isbn_classify['owi'])
item.addClaim(claim, bot=True, summary=transcmt)
@@ -711,16 +705,16 @@
# Library of Congress Classification (works and editions)
if 'lcc' in isbn_classify and 'P8360' not in item.claims:
pywikibot.warning(
- 'Add Library of Congress Classification for edition (P8360): %s'
- % (isbn_classify['lcc']))
+ 'Add Library of Congress Classification for edition (P8360): {}'
+ .format(isbn_classify['lcc']))
claim = pywikibot.Claim(repo, 'P8360')
claim.setTarget(isbn_classify['lcc'])
item.addClaim(claim, bot=True, summary=transcmt)
# Dewey Decimale Classificatie
if 'ddc' in isbn_classify and 'P1036' not in item.claims:
- pywikibot.warning('Add Dewey Decimale Classificatie (P1036): %s'
- % (isbn_classify['ddc']))
+ pywikibot.warning('Add Dewey Decimale Classificatie (P1036): {}'
+ .format(isbn_classify['ddc']))
claim = pywikibot.Claim(repo, 'P1036')
claim.setTarget(isbn_classify['ddc'])
item.addClaim(claim, bot=True, summary=transcmt)
@@ -739,10 +733,10 @@
# Get the main subject
main_subject_query = ("""# Search the main subject
-SELECT ?item WHERE {
- ?item wdt:P2163 "%s".
-}
-""" % (fast_id))
+SELECT ?item WHERE {{
+ ?item wdt:P2163 "{}".
+}}
+""".format(fast_id))
pywikibot.info(main_subject_query)
generator = pg.WikidataSPARQLPageGenerator(main_subject_query,
@@ -754,20 +748,20 @@
qmain_subject = main_subject.getID()
try:
main_subject_label = main_subject.labels[booklang]
- pywikibot.info('Found main subject %s (%s) for Fast ID %s'
- % (main_subject_label, qmain_subject,
- fast_id))
+ pywikibot.info('Found main subject {} ({}) for Fast ID {}'
+ .format(main_subject_label, qmain_subject,
+ fast_id))
except: # noqa B001, E722, H201
main_subject_label = ''
- pywikibot.info('Found main subject (%s) for Fast ID %s'
- % (qmain_subject, fast_id))
- pywikibot.error('Missing label for item %s'
- % qmain_subject)
+ pywikibot.info('Found main subject ({}) for Fast ID {}'
+ .format(qmain_subject, fast_id))
+ pywikibot.error('Missing label for item {}'
+ .format(qmain_subject))
# Create or amend P921 statement
- if rescnt == 0:
- pywikibot.error('Main subject not found for Fast ID %s'
- % (fast_id))
+ if not rescnt:
+ pywikibot.error('Main subject not found for Fast ID {}'
+ .format(fast_id))
elif rescnt == 1:
add_main_subject = True
if 'P921' in item.claims: # Check for duplicates
@@ -777,21 +771,22 @@
break
if add_main_subject:
- pywikibot.warning('Add main subject (P921) %s (%s)'
- % (main_subject_label, qmain_subject))
+ pywikibot.warning('Add main subject (P921) {} ({})'
+ .format(main_subject_label,
+ qmain_subject))
claim = pywikibot.Claim(repo, 'P921')
claim.setTarget(main_subject)
item.addClaim(claim, bot=True, summary=transcmt)
else:
- pywikibot.info('Skipping main subject %s (%s)'
- % (main_subject_label, qmain_subject))
+ pywikibot.info('Skipping main subject {} ({})'
+ .format(main_subject_label, qmain_subject))
else:
- pywikibot.error('Ambiguous main subject for Fast ID %s'
- % (fast_id))
+ pywikibot.error('Ambiguous main subject for Fast ID {}'
+ .format(fast_id))
# Book description
isbn_description = isbnlib.desc(isbn_number)
- if isbn_description != '':
+ if isbn_description:
pywikibot.info()
pywikibot.info(isbn_description)
@@ -858,9 +853,9 @@
'P31' not in targetx[propty].claims
or not is_in_list(targetx[propty].claims['P31'],
propreqinst[propty])):
- pywikibot.critical('%s (%s) is not a language'
- % (targetx[propty].labels[mainlang],
- target[propty]))
+ pywikibot.critical('{} ({}) is not a language'
+ .format(targetx[propty].labels[mainlang],
+ target[propty]))
return
# check dependencies
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/826987
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I9fa8e88a4e93614a1ecda8fcce767d5e2e606ff6
Gerrit-Change-Number: 826987
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: Geertivp <geertivp(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/826950 )
Change subject: [IMPR] imprvements for create_isbn_edition.py
......................................................................
[IMPR] imprvements for create_isbn_edition.py
- capitalize constants
- shorten is_in_list function
- repo and wikidata_site is the same object, drop one of them
- remove logger and use pywikibot.logger functions instead
- use enumerate to count loops
- fix some globals
- use return in main() instead of sys.exit()
- use bare return statements in amend_isbn_edition
because return result is not used
Change-Id: I7415d910bda1fb384b4076df09f899c24e270b56
---
M scripts/create_isbn_edition.py
1 file changed, 99 insertions(+), 107 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/create_isbn_edition.py b/scripts/create_isbn_edition.py
index 2aea6c1..c5fd36e 100644
--- a/scripts/create_isbn_edition.py
+++ b/scripts/create_isbn_edition.py
@@ -280,10 +280,8 @@
#
# Distributed under the terms of the MIT license.
#
-import logging # Error logging
import os # Operating system
import re # Regular expressions (very handy!)
-import sys # System calls
import pywikibot # API interface to Wikidata
from pywikibot import pagegenerators as pg # Wikidata Query interface
@@ -307,9 +305,9 @@
booklib = 'goob' # Default digital library
# ISBN number: 10 or 13 digits with optional dashes (-)
-isbnre = re.compile(r'[0-9-]{10,17}')
-propre = re.compile(r'P[0-9]+') # Wikidata P-number
-qsuffre = re.compile(r'Q[0-9]+') # Wikidata Q-number
+ISBNRE = re.compile(r'[0-9-]{10,17}')
+PROPRE = re.compile(r'P[0-9]+') # Wikidata P-number
+QSUFFRE = re.compile(r'Q[0-9]+') # Wikidata Q-number
# Other statements are added via command line parameters
target = {
@@ -340,11 +338,8 @@
"""
for seq in statement_list:
if seq.getTarget().getID() in checklist:
- isinlist = True
- break
- else:
- isinlist = False
- return isinlist
+ return True
+ return False
def get_item_list(item_name, instance_id):
@@ -364,7 +359,7 @@
'language': mainlang,
'search': item_name, # Get item list from label
}
- request = api.Request(site=wikidata_site, parameters=params)
+ request = api.Request(site=repo, parameters=params)
result = request.submit()
if 'search' in result:
@@ -396,13 +391,12 @@
:param isbn_number: ISBN number (string; 10 or 13 digits with
optional hyphens)
"""
- global logger
global proptyx
global targetx
isbn_number = isbn_number.strip()
if isbn_number == '':
- return 3 # Do nothing when the ISBN number is missing
+ return # Do nothing when the ISBN number is missing
# Validate ISBN data
if verbose:
@@ -410,7 +404,7 @@
try:
isbn_data = isbnlib.meta(isbn_number, service=booklib)
- logger.info(isbn_data)
+ pywikibot.info(isbn_data)
# {'ISBN-13': '9789042925564',
# 'Title': 'De Leuvense Vaart - Van De Vaartkom Tot Wijgmaal. '
# 'Aspecten Uit De Industriele Geschiedenis Van Leuven',
@@ -420,15 +414,15 @@
# 'Language': 'nl'}
except Exception as error:
# When the book is unknown the function returns
- logger.error(error)
+ pywikibot.error(error)
# raise ValueError(error)
- return 3
+ return
if len(isbn_data) < 6:
- logger.error(
+ pywikibot.error(
'Unknown or incomplete digital library registration for %s'
% isbn_number)
- return 3
+ return
# Show the raw results
if verbose:
@@ -445,11 +439,11 @@
if len(lang_list) == 1:
target['P407'] = lang_list[0]
elif len(lang_list) == 0:
- logger.warning('Unknown language %s' % booklang)
- return 3
+ pywikibot.warning('Unknown language %s' % booklang)
+ return
else:
- logger.warning('Ambiguous language %s' % booklang)
- return 3
+ pywikibot.warning('Ambiguous language %s' % booklang)
+ return
# Get formatted ISBN number
isbn_number = isbn_data['ISBN-13'] # Numeric format
@@ -471,12 +465,12 @@
# pywikibot.info book titles
if debug:
- pywikibot.info(objectname, file=sys.stderr)
- pywikibot.info(subtitle, file=sys.stderr) # Optional
+ pywikibot.info(objectname)
+ pywikibot.info(subtitle) # Optional
# print subsequent subtitles, when available
for i in range(2, len(titles)):
# Not stored in Wikidata...
- pywikibot.info(titles[i].strip(), file=sys.stderr)
+ pywikibot.info(titles[i].strip())
# Search the ISBN number in Wikidata both canonical and numeric
# P212 should have canonical hyphenated format
@@ -490,14 +484,14 @@
}
""" % (isbn_fmtd, isbn_number))
- logger.info(isbn_query)
- generator = pg.WikidataSPARQLPageGenerator(isbn_query, site=wikidata_site)
+ pywikibot.info(isbn_query)
+ generator = pg.WikidataSPARQLPageGenerator(isbn_query, site=repo)
+ # Main loop for all DISTINCT items
rescnt = 0
- for item in generator: # Main loop for all DISTINCT items
- rescnt += 1
+ for rescnt, item in enumerate(generator, start=1):
qnumber = item.getID()
- logger.warning('Found item: %s' % qnumber)
+ pywikibot.warning('Found item: %s' % qnumber)
# Create or amend the item
if rescnt == 1:
@@ -508,15 +502,15 @@
item = pywikibot.ItemPage(repo) # Create item
item.editEntity({'labels': label}, summary=transcmt)
qnumber = item.getID()
- logger.warning('Creating item: %s' % qnumber)
+ pywikibot.warning('Creating item: %s' % qnumber)
else:
- logger.critical('Ambiguous ISBN number %s' % isbn_fmtd)
- return 3
+ pywikibot.critical('Ambiguous ISBN number %s' % isbn_fmtd)
+ return
# Add all P/Q values
# Make sure that labels are known in the native language
if debug:
- pywikibot.info(target, file=sys.stderr)
+ pywikibot.info(target)
# Register statements
for propty in target:
@@ -526,12 +520,12 @@
targetx[propty] = pywikibot.ItemPage(repo, target[propty])
try:
- logger.warning('Add %s (%s): %s (%s)'
- % (proptyx[propty].labels[booklang], propty,
- targetx[propty].labels[booklang],
- target[propty]))
+ pywikibot.warning('Add %s (%s): %s (%s)'
+ % (proptyx[propty].labels[booklang], propty,
+ targetx[propty].labels[booklang],
+ target[propty]))
except: # noqa: B001, E722, H201
- logger.warning('Add %s:%s' % (propty, target[propty]))
+ pywikibot.warning('Add %s:%s' % (propty, target[propty]))
claim = pywikibot.Claim(repo, propty)
claim.setTarget(targetx[propty])
@@ -539,14 +533,14 @@
# Set formatted ISBN number
if 'P212' not in item.claims:
- logger.warning('Add ISBN number (P212): %s' % (isbn_fmtd))
+ pywikibot.warning('Add ISBN number (P212): %s' % (isbn_fmtd))
claim = pywikibot.Claim(repo, 'P212')
claim.setTarget(isbn_fmtd)
item.addClaim(claim, bot=True, summary=transcmt)
# Title
if 'P1476' not in item.claims:
- logger.warning('Add Title (P1476): %s' % (objectname))
+ pywikibot.warning('Add Title (P1476): %s' % (objectname))
claim = pywikibot.Claim(repo, 'P1476')
claim.setTarget(pywikibot.WbMonolingualText(text=objectname,
language=booklang))
@@ -554,7 +548,7 @@
# Subtitle
if subtitle != '' and 'P1680' not in item.claims:
- logger.warning('Add Subtitle (P1680): %s' % (subtitle))
+ pywikibot.warning('Add Subtitle (P1680): %s' % (subtitle))
claim = pywikibot.Claim(repo, 'P1680')
claim.setTarget(pywikibot.WbMonolingualText(text=subtitle,
language=booklang))
@@ -563,8 +557,8 @@
# Date of publication
pub_year = isbn_data['Year']
if pub_year != '' and 'P577' not in item.claims:
- logger.warning('Add Year of publication (P577): %s'
- % (isbn_data['Year']))
+ pywikibot.warning('Add Year of publication (P577): %s'
+ % (isbn_data['Year']))
claim = pywikibot.Claim(repo, 'P577')
claim.setTarget(pywikibot.WbTime(year=int(pub_year), precision='year'))
item.addClaim(claim, bot=True, summary=transcmt)
@@ -586,8 +580,9 @@
break
if add_author:
- logger.warning('Add author %d (P50): %s (%s)'
- % (author_cnt, author_name, author_list[0]))
+ pywikibot.warning('Add author %d (P50): %s (%s)'
+ % (author_cnt, author_name,
+ author_list[0]))
claim = pywikibot.Claim(repo, 'P50')
claim.setTarget(pywikibot.ItemPage(repo, author_list[0]))
item.addClaim(claim, bot=True, summary=transcmt)
@@ -596,9 +591,9 @@
qualifier.setTarget(str(author_cnt))
claim.addQualifier(qualifier, summary=transcmt)
elif len(author_list) == 0:
- logger.warning('Unknown author: %s' % author_name)
+ pywikibot.warning('Unknown author: %s' % author_name)
else:
- logger.warning('Ambiguous author: %s' % author_name)
+ pywikibot.warning('Ambiguous author: %s' % author_name)
# Get the publisher
publisher_name = isbn_data['Publisher'].strip()
@@ -608,15 +603,15 @@
if len(publisher_list) == 1:
if 'P123' not in item.claims:
- logger.warning('Add publisher (P123): %s (%s)'
- % (publisher_name, publisher_list[0]))
+ pywikibot.warning('Add publisher (P123): %s (%s)'
+ % (publisher_name, publisher_list[0]))
claim = pywikibot.Claim(repo, 'P123')
claim.setTarget(pywikibot.ItemPage(repo, publisher_list[0]))
item.addClaim(claim, bot=True, summary=transcmt)
elif len(publisher_list) == 0:
- logger.warning('Unknown publisher: %s' % publisher_name)
+ pywikibot.warning('Unknown publisher: %s' % publisher_name)
else:
- logger.warning('Ambiguous publisher: %s' % publisher_name)
+ pywikibot.warning('Ambiguous publisher: %s' % publisher_name)
# Get addional data from the digital library
isbn_cover = isbnlib.cover(isbn_number)
@@ -638,7 +633,7 @@
isbn_classify = isbnlib.classify(isbn_number)
if debug:
for i in isbn_classify:
- pywikibot.info('%s:\t%s' % (i, isbn_classify[i]), file=sys.stderr)
+ pywikibot.info('%s:\t%s' % (i, isbn_classify[i]))
# ./create_isbn_edition.py '978-3-8376-5645-9' - de P407 Q188
# Q113460204
@@ -649,7 +644,7 @@
# Set the OCLC ID
if 'oclc' in isbn_classify and 'P243' not in item.claims:
- logger.warning('Add OCLC ID (P243): %s' % (isbn_classify['oclc']))
+ pywikibot.warning('Add OCLC ID (P243): %s' % (isbn_classify['oclc']))
claim = pywikibot.Claim(repo, 'P243')
claim.setTarget(isbn_classify['oclc'])
item.addClaim(claim, bot=True, summary=transcmt)
@@ -663,8 +658,8 @@
# Edition should belong to only one single work
work = item.claims['P629'][0].getTarget()
# There doesn't exist a moveClaim method?
- logger.warning('Move OCLC Work ID %s to work %s'
- % (oclcworkid, work.getID()))
+ pywikibot.warning('Move OCLC Work ID %s to work %s'
+ % (oclcworkid, work.getID()))
# Keep current OCLC Work ID if present
if 'P5331' not in work.claims:
claim = pywikibot.Claim(repo, 'P5331')
@@ -673,10 +668,10 @@
# OCLC Work ID does not belong to edition
item.removeClaims(oclcwork, bot=True, summary=transcmt)
else:
- logger.error('OCLC Work ID %s conflicts with OCLC ID %s and no '
- 'work available'
- % (item.claims['P5331'][0].getTarget(),
- item.claims['P243'][0].getTarget()))
+ pywikibot.error('OCLC Work ID %s conflicts with OCLC ID %s and no '
+ 'work available'
+ % (item.claims['P5331'][0].getTarget(),
+ item.claims['P243'][0].getTarget()))
# OCLC work ID should not be registered for editions, only for works
if 'owi' not in isbn_classify:
@@ -685,20 +680,20 @@
# Edition should only have one single work
work = item.claims['P629'][0].getTarget()
if 'P5331' not in work.claims: # Assign the OCLC work ID if missing
- logger.warning('Add OCLC work ID (P5331): %s to work %s'
- % (isbn_classify['owi'], work.getID()))
+ pywikibot.warning('Add OCLC work ID (P5331): %s to work %s'
+ % (isbn_classify['owi'], work.getID()))
claim = pywikibot.Claim(repo, 'P5331')
claim.setTarget(isbn_classify['owi'])
work.addClaim(claim, bot=True, summary=transcmt)
elif 'P243' in item.claims:
- logger.warning('OCLC Work ID %s ignored because of OCLC ID %s'
- % (isbn_classify['owi'],
- item.claims['P243'][0].getTarget()))
+ pywikibot.warning('OCLC Work ID %s ignored because of OCLC ID %s'
+ % (isbn_classify['owi'],
+ item.claims['P243'][0].getTarget()))
# Assign the OCLC work ID only if there is no work, and no OCLC ID
# for edition
elif 'P5331' not in item.claims:
- logger.warning('Add OCLC work ID (P5331): %s to edition'
- % (isbn_classify['owi']))
+ pywikibot.warning('Add OCLC work ID (P5331): %s to edition'
+ % (isbn_classify['owi']))
claim = pywikibot.Claim(repo, 'P5331')
claim.setTarget(isbn_classify['owi'])
item.addClaim(claim, bot=True, summary=transcmt)
@@ -715,7 +710,7 @@
# Library of Congress Classification (works and editions)
if 'lcc' in isbn_classify and 'P8360' not in item.claims:
- logger.warning(
+ pywikibot.warning(
'Add Library of Congress Classification for edition (P8360): %s'
% (isbn_classify['lcc']))
claim = pywikibot.Claim(repo, 'P8360')
@@ -724,8 +719,8 @@
# Dewey Decimale Classificatie
if 'ddc' in isbn_classify and 'P1036' not in item.claims:
- logger.warning('Add Dewey Decimale Classificatie (P1036): %s'
- % (isbn_classify['ddc']))
+ pywikibot.warning('Add Dewey Decimale Classificatie (P1036): %s'
+ % (isbn_classify['ddc']))
claim = pywikibot.Claim(repo, 'P1036')
claim.setTarget(isbn_classify['ddc'])
item.addClaim(claim, bot=True, summary=transcmt)
@@ -749,29 +744,30 @@
}
""" % (fast_id))
- logger.info(main_subject_query)
+ pywikibot.info(main_subject_query)
generator = pg.WikidataSPARQLPageGenerator(main_subject_query,
- site=wikidata_site)
+ site=repo)
+ # Main loop for all DISTINCT items
rescnt = 0
- for main_subject in generator: # Main loop for all DISTINCT items
- rescnt += 1
+ for rescnt, main_subject in enumerate(generator, start=1):
qmain_subject = main_subject.getID()
try:
main_subject_label = main_subject.labels[booklang]
- logger.info('Found main subject %s (%s) for Fast ID %s'
- % (main_subject_label, qmain_subject, fast_id))
+ pywikibot.info('Found main subject %s (%s) for Fast ID %s'
+ % (main_subject_label, qmain_subject,
+ fast_id))
except: # noqa B001, E722, H201
main_subject_label = ''
- logger.info('Found main subject (%s) for Fast ID %s'
- % (qmain_subject, fast_id))
- logger.error('Missing label for item %s'
- % qmain_subject)
+ pywikibot.info('Found main subject (%s) for Fast ID %s'
+ % (qmain_subject, fast_id))
+ pywikibot.error('Missing label for item %s'
+ % qmain_subject)
# Create or amend P921 statement
if rescnt == 0:
- logger.error('Main subject not found for Fast ID %s'
- % (fast_id))
+ pywikibot.error('Main subject not found for Fast ID %s'
+ % (fast_id))
elif rescnt == 1:
add_main_subject = True
if 'P921' in item.claims: # Check for duplicates
@@ -781,17 +777,17 @@
break
if add_main_subject:
- logger.warning('Add main subject (P921) %s (%s)'
- % (main_subject_label, qmain_subject))
+ pywikibot.warning('Add main subject (P921) %s (%s)'
+ % (main_subject_label, qmain_subject))
claim = pywikibot.Claim(repo, 'P921')
claim.setTarget(main_subject)
item.addClaim(claim, bot=True, summary=transcmt)
else:
- logger.info('Skipping main subject %s (%s)'
- % (main_subject_label, qmain_subject))
+ pywikibot.info('Skipping main subject %s (%s)'
+ % (main_subject_label, qmain_subject))
else:
- logger.error('Ambiguous main subject for Fast ID %s'
- % (fast_id))
+ pywikibot.error('Ambiguous main subject for Fast ID %s'
+ % (fast_id))
# Book description
isbn_description = isbnlib.desc(isbn_number)
@@ -800,15 +796,14 @@
pywikibot.info(isbn_description)
# Currently does not work (service not available)
+ pywikibot.warning('BibTex unavailable')
+ return
+
try:
- logger.warning('BibTex unavailable')
- return 0
bibtex_metadata = isbnlib.doi2tex(isbn_doi)
pywikibot.info(bibtex_metadata)
except Exception as error:
- logger.error(error) # Data not available
-
- return 0
+ pywikibot.error(error) # Data not available
def main(*args: str) -> None:
@@ -819,21 +814,18 @@
:param args: command line arguments
"""
- # Error logging
- global logger
+ global booklib
+ global mainlang
global repo
+ global proptyx
global targetx
- global wikidata_site
-
- logger = logging.getLogger('create_isbn_edition')
# Get optional parameters
local_args = pywikibot.handle_args(*args)
# Login to Wikibase instance
- wikidata_site = pywikibot.Site('wikidata')
# Required for wikidata object access (item, property, statement)
- repo = wikidata_site.data_repository()
+ repo = pywikibot.Site('wikidata')
# Get the digital library
if local_args:
@@ -849,8 +841,8 @@
# Get additional P/Q parameters
while local_args:
- inpar = propre.findall(local_args.pop(0).upper())[0]
- target[inpar] = qsuffre.findall(local_args(0).upper())[0]
+ inpar = PROPRE.findall(local_args.pop(0).upper())[0]
+ target[inpar] = QSUFFRE.findall(local_args(0).upper())[0]
# Validate P/Q list
proptyx = {}
@@ -866,10 +858,10 @@
'P31' not in targetx[propty].claims
or not is_in_list(targetx[propty].claims['P31'],
propreqinst[propty])):
- logger.critical('%s (%s) is not a language'
- % (targetx[propty].labels[mainlang],
- target[propty]))
- sys.exit(12)
+ pywikibot.critical('%s (%s) is not a language'
+ % (targetx[propty].labels[mainlang],
+ target[propty]))
+ return
# check dependencies
for module in (isbnlib, unidecode):
@@ -881,7 +873,7 @@
# containing ISBN numbers
inputfile = pywikibot.input('Get list of item numbers')
# Extract all ISBN numbers
- itemlist = sorted(set(isbnre.findall(inputfile)))
+ itemlist = sorted(set(ISBNRE.findall(inputfile)))
for isbn_number in itemlist: # Process the next edition
amend_isbn_edition(isbn_number)
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/826950
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I7415d910bda1fb384b4076df09f899c24e270b56
Gerrit-Change-Number: 826950
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: Geertivp <geertivp(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
Xqt has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/826937 )
Change subject: [pep8] PEP8 changes for create_isbn_edition.py
......................................................................
[pep8] PEP8 changes for create_isbn_edition.py
- Code style issues
- clear trailing white space
- untabify file
- keep lines below 80 chars
- update function documentation and parameter list
- update shebang
- script documentation is in __doc__
- replace print statements by pywikibot.info
- add main() function, mostly needed for windows and for script tests
- add pywikibot.handle_args to handle global options and test -help
- add isbnlib dependency
- lazy import isbnlib and unidecode
- replace sys.stdin.read by pywikibot.input to show a input message
- create wikidata_site and repo after global args are read to prevent
site warning
Change-Id: I6917ec9b511db609c2f1828486b9a53998d1e376
---
M scripts/create_isbn_edition.py
M setup.py
M tests/script_tests.py
M tox.ini
4 files changed, 317 insertions(+), 212 deletions(-)
Approvals:
jenkins-bot: Verified
Xqt: Looks good to me, approved
diff --git a/scripts/create_isbn_edition.py b/scripts/create_isbn_edition.py
index ee6b18c..2aea6c1 100644
--- a/scripts/create_isbn_edition.py
+++ b/scripts/create_isbn_edition.py
@@ -1,15 +1,15 @@
-#!/home/geertivp/pwb/bin/python3
-
-codedoc = """
-Pywikibot client to load ISBN related data into Wikidata
+#!/usr/bin/python3
+"""Pywikibot script to load ISBN related data into Wikidata.
Pywikibot script to get ISBN data from a digital library,
and create or amend the related Wikidata item for edition
(with the P212=ISBN number as unique external ID).
-Use digital libraries to get ISBN data in JSON format, and integrate the results into Wikidata.
+Use digital libraries to get ISBN data in JSON format, and integrate the
+results into Wikidata.
-Then the resulting item number can be used e.g. to generate Wikipedia references using template Cite_Q.
+Then the resulting item number can be used e.g. to generate Wikipedia
+references using template Cite_Q.
Parameters:
@@ -34,39 +34,49 @@
Default LANG; e.g. en, nl, fr, de, es, it, etc.
P3 P4...: P/Q pairs to add additional claims (repeated)
- e.g. P921 Q107643461 (main subject: database management linked to P2163 Fast ID)
+ e.g. P921 Q107643461 (main subject: database
+ management linked to P2163 Fast ID)
stdin: ISBN numbers (International standard book number)
- Free text (e.g. Wikipedia references list, or publication list) is accepted.
- Identification is done via an ISBN regex expression.
+ Free text (e.g. Wikipedia references list, or publication list)
+ is accepted. Identification is done via an ISBN regex expression.
Functionality:
- * The ISBN number is used as a primary key (P212 where no duplicates are allowed)
- The item update is not performed when there is no unique match
- * Statements are added or merged incrementally; existing data is not overwritten.
- * Authors and publishers are searched to get their item number (ambiguous items are skipped)
+ * The ISBN number is used as a primary key (P212 where no duplicates
+ are allowed. The item update is not performed when there is no
+ unique match
+ * Statements are added or merged incrementally; existing data is not
+ overwritten.
+ * Authors and publishers are searched to get their item number
+ (ambiguous items are skipped)
* Book title and subtitle are separated with '.', ':', or '-'
* This script can be run incrementally with the same parameters
- Caveat: Take into account the Wikidata Query database replication delay.
- Wait for minimum 5 minutes to avoid creating duplicate objects.
+ Caveat: Take into account the Wikidata Query database
+ replication delay. Wait for minimum 5 minutes to avoid creating
+ duplicate objects.
Data quality:
- * Use https://query.wikidata.org/querybuilder/ to identify P212 duplicates
- Merge duplicate items before running the script again.
+ * Use https://query.wikidata.org/querybuilder/ to identify P212
+ duplicates. Merge duplicate items before running the script
+ again.
* The following properties should only be used for written works
P5331: OCLC work ID (editions should only have P243)
- P8383: Goodreads-identificatiecode for work (editions should only have P2969)
+ P8383: Goodreads-identificatiecode for work (editions should
+ only have P2969)
Examples:
- # Default library (Google Books), language (LANG), no additional statements
+ # Default library (Google Books), language (LANG), no additional
+ statements
+
./create_isbn_edition.py
9789042925564
# Wikimedia, language Dutch, main subject: database management
+
./create_isbn_edition.py wiki en P921 Q107643461
978-0-596-10089-6
@@ -109,10 +119,11 @@
P1036: Dewey Decimal Classification
P2163: Fast ID (inverse lookup via Wikidata Query) -> P921: main subject
P2969: Goodreads-identificatiecode
-
+
(only for written works)
P5331: OCLC work ID (editions should only have P243)
- P8383: Goodreads-identificatiecode for work (editions should only have P2969)
+ P8383: Goodreads-identificatiecode for work (editions should only
+ have P2969)
Author:
@@ -154,7 +165,7 @@
https://pypi.org/search/?q=isbnlib_
pip install isbnlib (mandatory)
-
+
(optional)
pip install isbnlib-bol
pip install isbnlib-bnf
@@ -169,24 +180,32 @@
* Better use the ISO 639-1 language code parameter as a default
The language code is not always available from the digital library.
* SPARQL queries run on a replicated database
- Possible important replication delay; wait 5 minutes before retry -- otherwise risk for creating duplicates.
+ Possible important replication delay; wait 5 minutes before retry
+ -- otherwise risk for creating duplicates.
Known problems:
* Unknown ISBN, e.g. 9789400012820
- * No ISBN data available for an edition either causes no output (goob = Google Books), or an error message (wiki, openl)
+ * No ISBN data available for an edition either causes no output
+ (goob = Google Books), or an error message (wiki, openl)
The script is taking care of both
* Only 6 ISBN attributes are listed by the webservice(s)
missing are e.g.: place of publication, number of pages
- * Not all ISBN atttributes have data (authos, publisher, date of publication, language)
- * The script uses multiple webservice calls (script might take time, but it is automated)
- * Need to amend ISBN items that have no author, publisher, or other required data (which additional services to use?)
+ * Not all ISBN atttributes have data (authos, publisher, date of
+ publication, language)
+ * The script uses multiple webservice calls (script might take time,
+ but it is automated)
+ * Need to amend ISBN items that have no author, publisher, or other
+ required data (which additional services to use?)
* How to add still more digital libraries?
- * Does the KBR has a public ISBN service (Koninklijke Bibliotheek van België)?
- * Filter for work properties -- need to amend Q47461344 (written work) instance and P629 (edition of) + P747 (has edition) statements
- https://www.wikidata.org/wiki/Q63413107
+ * Does the KBR has a public ISBN service (Koninklijke
+ Bibliotheek van België)?
+ * Filter for work properties -- need to amend Q47461344 (written
+ work) instance and P629 (edition of) + P747 (has edition)
+ statements https://www.wikidata.org/wiki/Q63413107
['9781282557246', '9786612557248', '9781847196057', '9781847196040']
- P8383: Goodreads-identificatiecode voor work 13957943 (should have P2969)
+ P8383: Goodreads-identificatiecode voor work 13957943 (should
+ have P2969)
P5331: OCLC-identificatiecode voor work 793965595 (should have P243)
To do:
@@ -205,7 +224,7 @@
Environment:
The python script can run on the following platforms:
-
+
Linux client
Google Chromebook (Linux container)
Toolforge Portal
@@ -238,7 +257,7 @@
Related projects:
https://phabricator.wikimedia.org/T314942 (this script)
-
+
(other projects)
https://phabricator.wikimedia.org/T282719https://phabricator.wikimedia.org/T214802
@@ -254,64 +273,71 @@
https://en.wikipedia.org/wiki/bibliographic_databasehttps://www.titelbank.nl/pls/ttb/f?p=103:4012:::NO::P4012_TTEL_ID:3496019&c…
+.. versionadded:: 7.7
"""
-
+#
+# (C) Pywikibot team, 2022
+#
+# Distributed under the terms of the MIT license.
+#
import logging # Error logging
import os # Operating system
-import re # Regular expressions (very handy!)
+import re # Regular expressions (very handy!)
import sys # System calls
-import unidecode # Unicode
-import pywikibot # API interface to Wikidata
-
-from isbnlib import * # ISBN data
-from pywikibot import pagegenerators as pg # Wikidata Query interface
+import pywikibot # API interface to Wikidata
+from pywikibot import pagegenerators as pg # Wikidata Query interface
+from pywikibot.backports import List
from pywikibot.data import api
+try:
+ import isbnlib
+except ImportError as e:
+ isbnlib = e
+
+try:
+ from unidecode import unidecode
+except ImportError as e:
+ unidecode = e
+
# Initialisation
debug = True # Show debugging information
verbose = True # Verbose mode
booklib = 'goob' # Default digital library
-isbnre = re.compile(r'[0-9-]{10,17}') # ISBN number: 10 or 13 digits with optional dashes (-)
+
+# ISBN number: 10 or 13 digits with optional dashes (-)
+isbnre = re.compile(r'[0-9-]{10,17}')
propre = re.compile(r'P[0-9]+') # Wikidata P-number
qsuffre = re.compile(r'Q[0-9]+') # Wikidata Q-number
# Other statements are added via command line parameters
target = {
-'P31':'Q3331189', # Is an instance of an edition
+ 'P31': 'Q3331189', # Is an instance of an edition
}
# Statement property and instance validation rules
propreqinst = {
-'P50':'Q5', # Author requires human
-'P123':{'Q2085381', 'Q1114515', 'Q1320047'},# Publisher requires publisher
-'P407':{'Q34770', 'Q33742', 'Q1288568'}, # Edition language requires at least one of (living, natural) language
+ 'P50': 'Q5', # Author requires human
+ # Publisher requires publisher
+ 'P123': {'Q2085381', 'Q1114515', 'Q1320047'},
+ # Edition language requires at least one of (living, natural) language
+ 'P407': {'Q34770', 'Q33742', 'Q1288568'},
}
mainlang = os.getenv('LANG', 'en')[:2] # Default description language
# Connect to database
-transcmt = '#pwb Create ISBN edition' # Wikidata transaction comment
-wikidata_site = pywikibot.Site('wikidata', 'wikidata') # Login to Wikibase instance
-repo = wikidata_site.data_repository() # Required for wikidata object access (item, property, statement)
+transcmt = '#pwb Create ISBN edition' # Wikidata transaction comment
-def is_in_list(statement_list, checklist):
+def is_in_list(statement_list, checklist: List[str]) -> bool:
+ """Verify if statement list contains at least one item from the checklist.
+
+ :param statement_list: Statement list
+ :param checklist: List of values
+ :Returns: True when match
"""
-Verify if statement list contains at least one item from the checklist
-
-Parameters:
-
- statement_list: Statement list
-
- checklist: List of values (string)
-
-Returns:
-
- Boolean (True when match)
- """
-
for seq in statement_list:
if seq.getTarget().getID() in checklist:
isinlist = True
@@ -322,84 +348,92 @@
def get_item_list(item_name, instance_id):
+ """Get list of items by name, belonging to an instance (list).
+
+ :param item_name: Item name (string; case sensitive)
+ :param instance_id: Instance ID (string, set, or list)
+ :Returns: Set of items (Q-numbers)
"""
-Get list of items by name, belonging to an instance (list)
-
-Parameters:
-
- item_name: Item name (string; case sensitive)
-
- instance_id: Instance ID (string, set, or list)
-
-Returns:
-
- Set of items (Q-numbers)
- """
-
item_list = set() # Empty set
- params = {'action': 'wbsearchentities', 'format': 'json', 'type': 'item', 'strictlanguage': False,
- 'language': mainlang, # All languages are searched, but labels are in native language
- 'search': item_name} # Get item list from label
+ params = {
+ 'action': 'wbsearchentities',
+ 'format': 'json',
+ 'type': 'item',
+ 'strictlanguage': False,
+ # All languages are searched, but labels are in native language
+ 'language': mainlang,
+ 'search': item_name, # Get item list from label
+ }
request = api.Request(site=wikidata_site, parameters=params)
result = request.submit()
if 'search' in result:
for res in result['search']:
item = pywikibot.ItemPage(repo, res['id'])
- item.get(get_redirect = True)
+ item.get(get_redirect=True)
if 'P31' in item.claims:
- for seq in item.claims['P31']: # Loop through instances
- if seq.getTarget().getID() in instance_id: # Matching instance
- for lang in item.labels: # Search all languages
- if unidecode.unidecode(item_name.lower()) == unidecode.unidecode(item.labels[lang].lower()): # Ignore label case and accents
- item_list.add(item.getID()) # Label math
+ for seq in item.claims['P31']: # Loop through instances
+ # Matching instance
+ if seq.getTarget().getID() in instance_id:
+ for lang in item.labels: # Search all languages
+ # Ignore label case and accents
+ if (unidecode(item_name.lower())
+ == unidecode(item.labels[lang].lower())):
+ item_list.add(item.getID()) # Label math
for lang in item.aliases:
- if item_name in item.aliases[lang]: # Case sensitive for aliases
- item_list.add(item.getID()) # Alias match
+ # Case sensitive for aliases
+ if item_name in item.aliases[lang]:
+ item_list.add(item.getID()) # Alias match
return item_list
-def amend_isbn_edition(isbn_number):
- """
-Amend ISBN registration.
-
-Parameters:
-
- isbn_number: ISBN number (string; 10 or 13 digits with optional hyphens)
-
-Result:
+def amend_isbn_edition(isbn_number): # noqa: C901
+ """Amend ISBN registration.
Amend Wikidata, by registering the ISBN-13 data via P212,
depending on the data obtained from the digital library.
+
+ :param isbn_number: ISBN number (string; 10 or 13 digits with
+ optional hyphens)
"""
+ global logger
global proptyx
+ global targetx
isbn_number = isbn_number.strip()
if isbn_number == '':
- return 3 # Do nothing when the ISBN number is missing
-
+ return 3 # Do nothing when the ISBN number is missing
+
# Validate ISBN data
if verbose:
- print()
+ pywikibot.info()
try:
- isbn_data = meta(isbn_number, service=booklib)
+ isbn_data = isbnlib.meta(isbn_number, service=booklib)
logger.info(isbn_data)
- # {'ISBN-13': '9789042925564', 'Title': 'De Leuvense Vaart - Van De Vaartkom Tot Wijgmaal. Aspecten Uit De Industriele Geschiedenis Van Leuven', 'Authors': ['A. Cresens'], 'Publisher': 'Peeters Pub & Booksellers', 'Year': '2012', 'Language': 'nl'}
+ # {'ISBN-13': '9789042925564',
+ # 'Title': 'De Leuvense Vaart - Van De Vaartkom Tot Wijgmaal. '
+ # 'Aspecten Uit De Industriele Geschiedenis Van Leuven',
+ # 'Authors': ['A. Cresens'],
+ # 'Publisher': 'Peeters Pub & Booksellers',
+ # 'Year': '2012',
+ # 'Language': 'nl'}
except Exception as error:
# When the book is unknown the function returns
logger.error(error)
- #raise ValueError(error)
+ # raise ValueError(error)
return 3
if len(isbn_data) < 6:
- logger.error('Unknown or incomplete digital library registration for %s' % isbn_number)
+ logger.error(
+ 'Unknown or incomplete digital library registration for %s'
+ % isbn_number)
return 3
# Show the raw results
if verbose:
for i in isbn_data:
- print('%s:\t%s' % (i, isbn_data[i]))
+ pywikibot.info('%s:\t%s' % (i, isbn_data[i]))
# Get the book language from the ISBN book reference
booklang = mainlang # Default language
@@ -419,10 +453,10 @@
# Get formatted ISBN number
isbn_number = isbn_data['ISBN-13'] # Numeric format
- isbn_fmtd = mask(isbn_number) # Canonical format
+ isbn_fmtd = isbnlib.mask(isbn_number) # Canonical format
if verbose:
- print()
- print(isbn_fmtd) # First one
+ pywikibot.info()
+ pywikibot.info(isbn_fmtd) # First one
# Get (sub)title when there is a dot
titles = isbn_data['Title'].split('. ') # goob is using a '.'
@@ -435,14 +469,17 @@
if len(titles) > 1:
subtitle = titles[1].strip()
- # Print book titles
+ # pywikibot.info book titles
if debug:
- print(objectname, file=sys.stderr)
- print(subtitle, file=sys.stderr) # Optional
- for i in range(2,len(titles)): # Print subsequent subtitles, when available
- print(titles[i].strip(), file=sys.stderr) # Not stored in Wikidata...
+ pywikibot.info(objectname, file=sys.stderr)
+ pywikibot.info(subtitle, file=sys.stderr) # Optional
+ # print subsequent subtitles, when available
+ for i in range(2, len(titles)):
+ # Not stored in Wikidata...
+ pywikibot.info(titles[i].strip(), file=sys.stderr)
# Search the ISBN number in Wikidata both canonical and numeric
+ # P212 should have canonical hyphenated format
isbn_query = ("""# Get ISBN number
SELECT ?item WHERE {
VALUES ?isbn_number {
@@ -451,13 +488,13 @@
}
?item wdt:P212 ?isbn_number.
}
-""" % (isbn_fmtd, isbn_number)) # P212 should have canonical hyphenated format
+""" % (isbn_fmtd, isbn_number))
logger.info(isbn_query)
generator = pg.WikidataSPARQLPageGenerator(isbn_query, site=wikidata_site)
rescnt = 0
- for item in generator: # Main loop for all DISTINCT items
+ for item in generator: # Main loop for all DISTINCT items
rescnt += 1
qnumber = item.getID()
logger.warning('Found item: %s' % qnumber)
@@ -479,7 +516,7 @@
# Add all P/Q values
# Make sure that labels are known in the native language
if debug:
- print(target, file=sys.stderr)
+ pywikibot.info(target, file=sys.stderr)
# Register statements
for propty in target:
@@ -489,8 +526,11 @@
targetx[propty] = pywikibot.ItemPage(repo, target[propty])
try:
- logger.warning('Add %s (%s): %s (%s)' % (proptyx[propty].labels[booklang], propty, targetx[propty].labels[booklang], target[propty]))
- except:
+ logger.warning('Add %s (%s): %s (%s)'
+ % (proptyx[propty].labels[booklang], propty,
+ targetx[propty].labels[booklang],
+ target[propty]))
+ except: # noqa: B001, E722, H201
logger.warning('Add %s:%s' % (propty, target[propty]))
claim = pywikibot.Claim(repo, propty)
@@ -508,20 +548,23 @@
if 'P1476' not in item.claims:
logger.warning('Add Title (P1476): %s' % (objectname))
claim = pywikibot.Claim(repo, 'P1476')
- claim.setTarget(pywikibot.WbMonolingualText(text=objectname, language=booklang))
+ claim.setTarget(pywikibot.WbMonolingualText(text=objectname,
+ language=booklang))
item.addClaim(claim, bot=True, summary=transcmt)
# Subtitle
if subtitle != '' and 'P1680' not in item.claims:
logger.warning('Add Subtitle (P1680): %s' % (subtitle))
claim = pywikibot.Claim(repo, 'P1680')
- claim.setTarget(pywikibot.WbMonolingualText(text=subtitle, language=booklang))
+ claim.setTarget(pywikibot.WbMonolingualText(text=subtitle,
+ language=booklang))
item.addClaim(claim, bot=True, summary=transcmt)
# Date of publication
pub_year = isbn_data['Year']
if pub_year != '' and 'P577' not in item.claims:
- logger.warning('Add Year of publication (P577): %s' % (isbn_data['Year']))
+ logger.warning('Add Year of publication (P577): %s'
+ % (isbn_data['Year']))
claim = pywikibot.Claim(repo, 'P577')
claim.setTarget(pywikibot.WbTime(year=int(pub_year), precision='year'))
item.addClaim(claim, bot=True, summary=transcmt)
@@ -543,7 +586,8 @@
break
if add_author:
- logger.warning('Add author %d (P50): %s (%s)' % (author_cnt, author_name, author_list[0]))
+ logger.warning('Add author %d (P50): %s (%s)'
+ % (author_cnt, author_name, author_list[0]))
claim = pywikibot.Claim(repo, 'P50')
claim.setTarget(pywikibot.ItemPage(repo, author_list[0]))
item.addClaim(claim, bot=True, summary=transcmt)
@@ -559,11 +603,13 @@
# Get the publisher
publisher_name = isbn_data['Publisher'].strip()
if publisher_name != '':
- publisher_list = list(get_item_list(publisher_name, propreqinst['P123']))
+ publisher_list = list(get_item_list(publisher_name,
+ propreqinst['P123']))
if len(publisher_list) == 1:
if 'P123' not in item.claims:
- logger.warning('Add publisher (P123): %s (%s)' % (publisher_name, publisher_list[0]))
+ logger.warning('Add publisher (P123): %s (%s)'
+ % (publisher_name, publisher_list[0]))
claim = pywikibot.Claim(repo, 'P123')
claim.setTarget(pywikibot.ItemPage(repo, publisher_list[0]))
item.addClaim(claim, bot=True, summary=transcmt)
@@ -573,30 +619,33 @@
logger.warning('Ambiguous publisher: %s' % publisher_name)
# Get addional data from the digital library
- isbn_cover = cover(isbn_number)
- isbn_editions = editions(isbn_number, service='merge')
- isbn_doi = doi(isbn_number)
- isbn_info = info(isbn_number)
+ isbn_cover = isbnlib.cover(isbn_number)
+ isbn_editions = isbnlib.editions(isbn_number, service='merge')
+ isbn_doi = isbnlib.doi(isbn_number)
+ isbn_info = isbnlib.info(isbn_number)
if verbose:
- print()
- print(isbn_info)
- print(isbn_doi)
- print(isbn_editions)
+ pywikibot.info()
+ pywikibot.info(isbn_info)
+ pywikibot.info(isbn_doi)
+ pywikibot.info(isbn_editions)
# Book cover images
for i in isbn_cover:
- print('%s:\t%s' % (i, isbn_cover[i]))
+ pywikibot.info('%s:\t%s' % (i, isbn_cover[i]))
# Handle ISBN classification
- isbn_classify = classify(isbn_number)
+ isbn_classify = isbnlib.classify(isbn_number)
if debug:
for i in isbn_classify:
- print('%s:\t%s' % (i, isbn_classify[i]), file=sys.stderr)
+ pywikibot.info('%s:\t%s' % (i, isbn_classify[i]), file=sys.stderr)
# ./create_isbn_edition.py '978-3-8376-5645-9' - de P407 Q188
# Q113460204
- # {'owi': '11103651812', 'oclc': '1260160983', 'lcc': 'TK5105.8882', 'ddc': '300', 'fast': {'1175035': 'Wikis (Computer science)', '1795979': 'Wikipedia', '1122877': 'Social sciences'}}
+ # {'owi': '11103651812', 'oclc': '1260160983', 'lcc': 'TK5105.8882',
+ # 'ddc': '300', 'fast': {'1175035': 'Wikis (Computer science)',
+ # '1795979': 'Wikipedia',
+ # '1122877': 'Social sciences'}}
# Set the OCLC ID
if 'oclc' in isbn_classify and 'P243' not in item.claims:
@@ -608,54 +657,75 @@
# OCLC ID and OCLC work ID should not be both assigned
if 'P243' in item.claims and 'P5331' in item.claims:
if 'P629' in item.claims:
- oclcwork = item.claims['P5331'][0] # OCLC Work should be unique
- oclcworkid = oclcwork.getTarget() # Get the OCLC Work ID from the edition
- work = item.claims['P629'][0].getTarget() # Edition should belong to only one single work
- logger.warning('Move OCLC Work ID %s to work %s' % (oclcworkid, work.getID())) # There doesn't exist a moveClaim method?
- if 'P5331' not in work.claims: # Keep current OCLC Work ID if present
+ oclcwork = item.claims['P5331'][0] # OCLC Work should be unique
+ # Get the OCLC Work ID from the edition
+ oclcworkid = oclcwork.getTarget()
+ # Edition should belong to only one single work
+ work = item.claims['P629'][0].getTarget()
+ # There doesn't exist a moveClaim method?
+ logger.warning('Move OCLC Work ID %s to work %s'
+ % (oclcworkid, work.getID()))
+ # Keep current OCLC Work ID if present
+ if 'P5331' not in work.claims:
claim = pywikibot.Claim(repo, 'P5331')
claim.setTarget(oclcworkid)
work.addClaim(claim, bot=True, summary=transcmt)
- item.removeClaims(oclcwork, bot=True, summary=transcmt) # OCLC Work ID does not belong to edition
+ # OCLC Work ID does not belong to edition
+ item.removeClaims(oclcwork, bot=True, summary=transcmt)
else:
- logger.error('OCLC Work ID %s conflicts with OCLC ID %s and no work available' % (item.claims['P5331'][0].getTarget(), item.claims['P243'][0].getTarget()))
+ logger.error('OCLC Work ID %s conflicts with OCLC ID %s and no '
+ 'work available'
+ % (item.claims['P5331'][0].getTarget(),
+ item.claims['P243'][0].getTarget()))
# OCLC work ID should not be registered for editions, only for works
if 'owi' not in isbn_classify:
pass
- elif 'P629' in item.claims: # Get the work related to the edition
- work = item.claims['P629'][0].getTarget() # Edition should only have one single work
- if 'P5331' not in work.claims: # Assign the OCLC work ID if missing
- logger.warning('Add OCLC work ID (P5331): %s to work %s' % (isbn_classify['owi'], work.getID()))
+ elif 'P629' in item.claims: # Get the work related to the edition
+ # Edition should only have one single work
+ work = item.claims['P629'][0].getTarget()
+ if 'P5331' not in work.claims: # Assign the OCLC work ID if missing
+ logger.warning('Add OCLC work ID (P5331): %s to work %s'
+ % (isbn_classify['owi'], work.getID()))
claim = pywikibot.Claim(repo, 'P5331')
claim.setTarget(isbn_classify['owi'])
work.addClaim(claim, bot=True, summary=transcmt)
elif 'P243' in item.claims:
- logger.warning('OCLC Work ID %s ignored because of OCLC ID %s' % (isbn_classify['owi'], item.claims['P243'][0].getTarget()))
- elif 'P5331' not in item.claims: # Assign the OCLC work ID only if there is no work, and no OCLC ID for edition
- logger.warning('Add OCLC work ID (P5331): %s to edition' % (isbn_classify['owi']))
+ logger.warning('OCLC Work ID %s ignored because of OCLC ID %s'
+ % (isbn_classify['owi'],
+ item.claims['P243'][0].getTarget()))
+ # Assign the OCLC work ID only if there is no work, and no OCLC ID
+ # for edition
+ elif 'P5331' not in item.claims:
+ logger.warning('Add OCLC work ID (P5331): %s to edition'
+ % (isbn_classify['owi']))
claim = pywikibot.Claim(repo, 'P5331')
claim.setTarget(isbn_classify['owi'])
item.addClaim(claim, bot=True, summary=transcmt)
- # Reverse logic for moving OCLC ID and P212 (ISBN) from work to edition is more difficult because of 1:M relationship...
+ # Reverse logic for moving OCLC ID and P212 (ISBN) from work to
+ # edition is more difficult because of 1:M relationship...
# Same logic as for OCLC (work) ID
# Goodreads-identificatiecode (P2969)
- # Goodreads-identificatiecode for work (P8383) should not be registered for editions; should rather use P2969
+ # Goodreads-identificatiecode for work (P8383) should not be
+ # registered for editions; should rather use P2969
# Library of Congress Classification (works and editions)
if 'lcc' in isbn_classify and 'P8360' not in item.claims:
- logger.warning('Add Library of Congress Classification for edition (P8360): %s' % (isbn_classify['lcc']))
+ logger.warning(
+ 'Add Library of Congress Classification for edition (P8360): %s'
+ % (isbn_classify['lcc']))
claim = pywikibot.Claim(repo, 'P8360')
claim.setTarget(isbn_classify['lcc'])
item.addClaim(claim, bot=True, summary=transcmt)
# Dewey Decimale Classificatie
if 'ddc' in isbn_classify and 'P1036' not in item.claims:
- logger.warning('Add Dewey Decimale Classificatie (P1036): %s' % (isbn_classify['ddc']))
+ logger.warning('Add Dewey Decimale Classificatie (P1036): %s'
+ % (isbn_classify['ddc']))
claim = pywikibot.Claim(repo, 'P1036')
claim.setTarget(isbn_classify['ddc'])
item.addClaim(claim, bot=True, summary=transcmt)
@@ -666,7 +736,8 @@
# https://www.oclc.org/research/areas/data-science/fast.html
# https://www.oclc.org/content/dam/oclc/fast/FAST-quick-start-guide-2022.pdf
- # Authority control identifier from WorldCat's “FAST Linked Data” authority file (external ID P2163)
+ # Authority control identifier from WorldCat's “FAST Linked Data”
+ # authority file (external ID P2163)
# Corresponding to P921 (Wikidata main subject)
if 'fast' in isbn_classify:
for fast_id in isbn_classify['fast']:
@@ -679,109 +750,142 @@
""" % (fast_id))
logger.info(main_subject_query)
- generator = pg.WikidataSPARQLPageGenerator(main_subject_query, site=wikidata_site)
+ generator = pg.WikidataSPARQLPageGenerator(main_subject_query,
+ site=wikidata_site)
rescnt = 0
- for main_subject in generator: # Main loop for all DISTINCT items
+ for main_subject in generator: # Main loop for all DISTINCT items
rescnt += 1
qmain_subject = main_subject.getID()
try:
main_subject_label = main_subject.labels[booklang]
- logger.info('Found main subject %s (%s) for Fast ID %s' % (main_subject_label, qmain_subject, fast_id))
- except:
+ logger.info('Found main subject %s (%s) for Fast ID %s'
+ % (main_subject_label, qmain_subject, fast_id))
+ except: # noqa B001, E722, H201
main_subject_label = ''
- logger.info('Found main subject (%s) for Fast ID %s' % (qmain_subject, fast_id))
- logger.error('Missing label for item %s' % qmain_subject)
+ logger.info('Found main subject (%s) for Fast ID %s'
+ % (qmain_subject, fast_id))
+ logger.error('Missing label for item %s'
+ % qmain_subject)
# Create or amend P921 statement
if rescnt == 0:
- logger.error('Main subject not found for Fast ID %s' % (fast_id))
+ logger.error('Main subject not found for Fast ID %s'
+ % (fast_id))
elif rescnt == 1:
add_main_subject = True
- if 'P921' in item.claims: # Check for duplicates
+ if 'P921' in item.claims: # Check for duplicates
for seq in item.claims['P921']:
if seq.getTarget().getID() == qmain_subject:
add_main_subject = False
break
if add_main_subject:
- logger.warning('Add main subject (P921) %s (%s)' % (main_subject_label, qmain_subject))
+ logger.warning('Add main subject (P921) %s (%s)'
+ % (main_subject_label, qmain_subject))
claim = pywikibot.Claim(repo, 'P921')
claim.setTarget(main_subject)
item.addClaim(claim, bot=True, summary=transcmt)
else:
- logger.info('Skipping main subject %s (%s)' % (main_subject_label, qmain_subject))
+ logger.info('Skipping main subject %s (%s)'
+ % (main_subject_label, qmain_subject))
else:
- logger.error('Ambiguous main subject for Fast ID %s' % (fast_id))
+ logger.error('Ambiguous main subject for Fast ID %s'
+ % (fast_id))
# Book description
- isbn_description = desc(isbn_number)
+ isbn_description = isbnlib.desc(isbn_number)
if isbn_description != '':
- print()
- print(isbn_description)
+ pywikibot.info()
+ pywikibot.info(isbn_description)
# Currently does not work (service not available)
try:
logger.warning('BibTex unavailable')
return 0
- bibtex_metadata = doi2tex(isbn_doi)
- print(bibtex_metadata)
+ bibtex_metadata = isbnlib.doi2tex(isbn_doi)
+ pywikibot.info(bibtex_metadata)
except Exception as error:
logger.error(error) # Data not available
return 0
-# Error logging
-logger = logging.getLogger('create_isbn_edition')
-#logging.basicConfig(level=logging.DEBUG) # Uncomment for debugging
-##logger.setLevel(logging.DEBUG)
+def main(*args: str) -> None:
+ """
+ Process command line arguments and invoke bot.
-pgmnm = sys.argv.pop(0)
-logger.debug('%s %s' % (pgmnm, '2022-08-23 (gvp)'))
+ If args is an empty list, sys.argv is used.
-# Get optional parameters
+ :param args: command line arguments
+ """
+ # Error logging
+ global logger
+ global repo
+ global targetx
+ global wikidata_site
-# Get the digital library
-if len(sys.argv) > 0:
- booklib = sys.argv.pop(0)
- if booklib == '-':
- booklib = 'goob'
+ logger = logging.getLogger('create_isbn_edition')
-# Get the native language
-# The language code is only required when P/Q parameters are added, or different from the LANG code
-if len(sys.argv) > 0:
- mainlang = sys.argv.pop(0)
+ # Get optional parameters
+ local_args = pywikibot.handle_args(*args)
-# Get additional P/Q parameters
-while len(sys.argv) > 0:
- inpar = propre.findall(sys.argv.pop(0).upper())[0]
- target[inpar] = qsuffre.findall(sys.argv.pop(0).upper())[0]
+ # Login to Wikibase instance
+ wikidata_site = pywikibot.Site('wikidata')
+ # Required for wikidata object access (item, property, statement)
+ repo = wikidata_site.data_repository()
-# Validate P/Q list
-proptyx={}
-targetx={}
+ # Get the digital library
+ if local_args:
+ booklib = local_args.pop(0)
+ if booklib == '-':
+ booklib = 'goob'
-# Validate the propery/instance pair
-for propty in target:
- if propty not in proptyx:
- proptyx[propty] = pywikibot.PropertyPage(repo, propty)
- targetx[propty] = pywikibot.ItemPage(repo, target[propty])
- targetx[propty].get(get_redirect=True)
- if propty in propreqinst and ('P31' not in targetx[propty].claims or not is_in_list(targetx[propty].claims['P31'], propreqinst[propty])):
- logger.critical('%s (%s) is not a language' % (targetx[propty].labels[mainlang], target[propty]))
- sys.exit(12)
+ # Get the native language
+ # The language code is only required when P/Q parameters are added,
+ # or different from the LANG code
+ if local_args:
+ mainlang = local_args.pop(0)
-# Get list of item numbers
-inputfile = sys.stdin.read() # Typically the Appendix list of references of e.g. a Wikipedia page containing ISBN numbers
-itemlist = sorted(set(isbnre.findall(inputfile))) # Extract all ISBN numbers
+ # Get additional P/Q parameters
+ while local_args:
+ inpar = propre.findall(local_args.pop(0).upper())[0]
+ target[inpar] = qsuffre.findall(local_args(0).upper())[0]
-for isbn_number in itemlist: # Process the next edition
- amend_isbn_edition(isbn_number)
+ # Validate P/Q list
+ proptyx = {}
+ targetx = {}
-# Einde van de miserie
-"""
-Notes:
+ # Validate the propery/instance pair
+ for propty in target:
+ if propty not in proptyx:
+ proptyx[propty] = pywikibot.PropertyPage(repo, propty)
+ targetx[propty] = pywikibot.ItemPage(repo, target[propty])
+ targetx[propty].get(get_redirect=True)
+ if propty in propreqinst and (
+ 'P31' not in targetx[propty].claims
+ or not is_in_list(targetx[propty].claims['P31'],
+ propreqinst[propty])):
+ logger.critical('%s (%s) is not a language'
+ % (targetx[propty].labels[mainlang],
+ target[propty]))
+ sys.exit(12)
+
+ # check dependencies
+ for module in (isbnlib, unidecode):
+ if isinstance(module, ImportError):
+ raise module
+
+ # Get list of item numbers
+ # Typically the Appendix list of references of e.g. a Wikipedia page
+ # containing ISBN numbers
+ inputfile = pywikibot.input('Get list of item numbers')
+ # Extract all ISBN numbers
+ itemlist = sorted(set(isbnre.findall(inputfile)))
+
+ for isbn_number in itemlist: # Process the next edition
+ amend_isbn_edition(isbn_number)
-"""
+if __name__ == '__main__':
+ main()
diff --git a/setup.py b/setup.py
index 21779d9..00a1cb9 100755
--- a/setup.py
+++ b/setup.py
@@ -97,6 +97,7 @@
# ------- setup extra_requires for scripts ------- #
script_deps = {
+ 'create_isbn_edition.py': ['isbnlib', 'unidecode'],
'commons_information.py': extra_deps['mwparserfromhell'],
'patrol.py': extra_deps['mwparserfromhell'],
'weblinkchecker.py': extra_deps['memento'],
diff --git a/tests/script_tests.py b/tests/script_tests.py
index 94d0b80..d499269 100755
--- a/tests/script_tests.py
+++ b/tests/script_tests.py
@@ -26,6 +26,7 @@
# These dependencies are not always the package name which is in setup.py.
# Here, the name given to the module which will be imported is required.
script_deps = {
+ 'create_isbn_edition': ['isbnlib', 'unidecode'],
'commons_information': ['mwparserfromhell'],
'patrol': ['mwparserfromhell'],
'weblinkchecker': ['memento_client'],
@@ -374,7 +375,7 @@
# Here come scripts requiring and missing dependencies, that haven't been
# fixed to output -help in that case.
_expected_failures = {'version'}
- _allowed_failures = ['create_isbn_edition']
+ _allowed_failures = []
_arguments = '-help'
_results = None
diff --git a/tox.ini b/tox.ini
index ecf4bfc..3b35408 100644
--- a/tox.ini
+++ b/tox.ini
@@ -164,7 +164,6 @@
scripts/clean_sandbox.py: N816
scripts/commonscat.py: N802, N806, N816
scripts/cosmetic_changes.py: N816
- scripts/create_isbn_edition.py: C901, D100, E402, E501, F405, T201
scripts/dataextend.py: C901, D101, D102, E126, E127, E131, E501
scripts/harvest_template.py: N802, N816
scripts/interwiki.py: N802, N803, N806, N816
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/826937
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I6917ec9b511db609c2f1828486b9a53998d1e376
Gerrit-Change-Number: 826937
Gerrit-PatchSet: 17
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: Geertivp <geertivp(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
Xqt has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/826631 )
Change subject: Pywikibot client to load ISBN related data into Wikidata
......................................................................
Pywikibot client to load ISBN related data into Wikidata
Pywikibot script to get ISBN data from a digital library,
and create or amend the related Wikidata item for edition
(with the P212=ISBN number as unique external ID).
Use digital libraries to get ISBN data in JSON format, and integrate the results into Wikidata.
Then the resulting item number can be used e.g. to generate Wikipedia references using template Cite_Q.
Bug: T314942
Change-Id: Ic88ab7125b764e8d296121d7a3d47fb0e53877ab
---
A scripts/create_isbn_edition.py
1 file changed, 787 insertions(+), 0 deletions(-)
Approvals:
Xqt: Verified; Looks good to me, approved
diff --git a/scripts/create_isbn_edition.py b/scripts/create_isbn_edition.py
new file mode 100644
index 0000000..ee6b18c
--- /dev/null
+++ b/scripts/create_isbn_edition.py
@@ -0,0 +1,787 @@
+#!/home/geertivp/pwb/bin/python3
+
+codedoc = """
+Pywikibot client to load ISBN related data into Wikidata
+
+Pywikibot script to get ISBN data from a digital library,
+and create or amend the related Wikidata item for edition
+(with the P212=ISBN number as unique external ID).
+
+Use digital libraries to get ISBN data in JSON format, and integrate the results into Wikidata.
+
+Then the resulting item number can be used e.g. to generate Wikipedia references using template Cite_Q.
+
+Parameters:
+
+ All parameters are optional:
+
+ P1: digital library (default goob "-")
+
+ bnf Catalogue General (France)
+ bol Bol.com
+ dnb Deutsche National Library
+ goob Google Books
+ kb National Library of the Netherlands
+ loc Library of Congress US
+ mcues Ministerio de Cultura (Spain)
+ openl OpenLibrary.org
+ porbase urn.porbase.org Portugal
+ sbn Servizio Bibliotecario Nazionale
+ wiki wikipedia.org
+ worldcat WorldCat
+
+ P2: ISO 639-1 language code
+ Default LANG; e.g. en, nl, fr, de, es, it, etc.
+
+ P3 P4...: P/Q pairs to add additional claims (repeated)
+ e.g. P921 Q107643461 (main subject: database management linked to P2163 Fast ID)
+
+ stdin: ISBN numbers (International standard book number)
+
+ Free text (e.g. Wikipedia references list, or publication list) is accepted.
+ Identification is done via an ISBN regex expression.
+
+Functionality:
+
+ * The ISBN number is used as a primary key (P212 where no duplicates are allowed)
+ The item update is not performed when there is no unique match
+ * Statements are added or merged incrementally; existing data is not overwritten.
+ * Authors and publishers are searched to get their item number (ambiguous items are skipped)
+ * Book title and subtitle are separated with '.', ':', or '-'
+ * This script can be run incrementally with the same parameters
+ Caveat: Take into account the Wikidata Query database replication delay.
+ Wait for minimum 5 minutes to avoid creating duplicate objects.
+
+Data quality:
+
+ * Use https://query.wikidata.org/querybuilder/ to identify P212 duplicates
+ Merge duplicate items before running the script again.
+ * The following properties should only be used for written works
+ P5331: OCLC work ID (editions should only have P243)
+ P8383: Goodreads-identificatiecode for work (editions should only have P2969)
+
+Examples:
+
+ # Default library (Google Books), language (LANG), no additional statements
+ ./create_isbn_edition.py
+ 9789042925564
+
+ # Wikimedia, language Dutch, main subject: database management
+ ./create_isbn_edition.py wiki en P921 Q107643461
+ 978-0-596-10089-6
+
+Return status:
+
+ The following status codes are returned to the shell:
+
+ 3 Invalid or missing parameter
+ 12 Item does not exist
+
+Standard ISBN properties:
+
+ P31:Q3331189: instance of edition
+ P50: author
+ P123: publisher
+ P212: canonical ISBN number (lookup via Wikidata Query)
+ P407: language of work (Qnumber linked to ISO 639-1 language code)
+ P577: date of publication (year)
+ P1476: book title
+ P1680: subtitle
+
+Other ISBN properties:
+
+ P291: place of publication
+ P921: main subject (inverse lookup from external Fast ID P2163)
+ P629: work for edition
+ P747: edition of work
+ P1104: number of pages
+
+Qualifiers:
+
+ P1545: (author) sequence number
+
+External identifiers:
+
+ P213: ISNI ID
+ P243: OCLC ID
+ P496: ORCID iD
+ P675: Google Books-identificatiecode
+ P1036: Dewey Decimal Classification
+ P2163: Fast ID (inverse lookup via Wikidata Query) -> P921: main subject
+ P2969: Goodreads-identificatiecode
+
+ (only for written works)
+ P5331: OCLC work ID (editions should only have P243)
+ P8383: Goodreads-identificatiecode for work (editions should only have P2969)
+
+Author:
+
+ Geert Van Pamel, 2022-08-04, GNU General Public License v3.0, User:Geertivp
+
+Documentation:
+
+ https://en.wikipedia.org/wiki/ISBN
+ https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
+ https://www.geeksforgeeks.org/searching-books-with-python/
+ https://www.freecodecamp.org/news/python-json-how-to-convert-a-string-to-js…
+ https://pypi.org/project/isbnlib/
+ https://buildmedia.readthedocs.org/media/pdf/isbnlib/v3.4.5/isbnlib.pdf
+ https://isbntools.readthedocs.io/en/latest/info.html
+ https://www.wikidata.org/wiki/Property:P212
+ https://www.wikidata.org/wiki/Wikidata:WikiProject_Books
+ WikiProject Books: https://www.wikidata.org/wiki/Q21831105
+ https://www.wikidata.org/wiki/Wikidata:List_of_properties/work
+ https://www.wikidata.org/wiki/Template:Book_properties
+ https://www.wikidata.org/wiki/Template:Bibliographic_properties
+ http://classify.oclc.org/classify2/ClassifyDemo
+ https://www.wikidata.org/wiki/Wikidata:WikiProject_Source_MetaData
+ https://www.wikidata.org/wiki/Help:Sources
+ https://www.wikidata.org/wiki/Q22696135
+ https://meta.wikimedia.org/wiki/Community_Wishlist_Survey_2021/Wikidata/Bib…
+ https://doc.wikimedia.org/pywikibot/master/api_ref/pywikibot.html
+ https://doc.wikimedia.org/pywikibot/master/
+ https://docs.python.org/3/howto/logging.html
+ https://wikitech.wikimedia.org/wiki/Portal:Toolforge
+ http://www.isbn.org/standards/home/isbn/international/hyphenation-instructi…
+ https://www.wikidata.org/wiki/Wikidata:Pywikibot_-_Python_3_Tutorial/Settin…
+ https://www.wikidata.org/wiki/Wikidata:Pywikibot_-_Python_3_Tutorial/Settin…
+
+Prerequisites:
+
+ pywikibot
+
+ Install the following ISBN lib packages:
+ https://pypi.org/search/?q=isbnlib_
+
+ pip install isbnlib (mandatory)
+
+ (optional)
+ pip install isbnlib-bol
+ pip install isbnlib-bnf
+ pip install isbnlib-dnb
+ pip install isbnlib-kb
+ pip install isbnlib-loc
+ pip install isbnlib-worldcat2
+ etc.
+
+Restrictions:
+
+ * Better use the ISO 639-1 language code parameter as a default
+ The language code is not always available from the digital library.
+ * SPARQL queries run on a replicated database
+ Possible important replication delay; wait 5 minutes before retry -- otherwise risk for creating duplicates.
+
+Known problems:
+
+ * Unknown ISBN, e.g. 9789400012820
+ * No ISBN data available for an edition either causes no output (goob = Google Books), or an error message (wiki, openl)
+ The script is taking care of both
+ * Only 6 ISBN attributes are listed by the webservice(s)
+ missing are e.g.: place of publication, number of pages
+ * Not all ISBN atttributes have data (authos, publisher, date of publication, language)
+ * The script uses multiple webservice calls (script might take time, but it is automated)
+ * Need to amend ISBN items that have no author, publisher, or other required data (which additional services to use?)
+ * How to add still more digital libraries?
+ * Does the KBR has a public ISBN service (Koninklijke Bibliotheek van België)?
+ * Filter for work properties -- need to amend Q47461344 (written work) instance and P629 (edition of) + P747 (has edition) statements
+ https://www.wikidata.org/wiki/Q63413107
+ ['9781282557246', '9786612557248', '9781847196057', '9781847196040']
+ P8383: Goodreads-identificatiecode voor work 13957943 (should have P2969)
+ P5331: OCLC-identificatiecode voor work 793965595 (should have P243)
+
+To do:
+
+ * Add source reference (digital library instance)
+
+Algorithm:
+
+ Get parameters
+ Validate parameters
+ Get ISBN data
+ Convert ISBN data
+ Get additional data
+ Register ISBN data into Wikidata (create or amend items or claims)
+
+Environment:
+
+ The python script can run on the following platforms:
+
+ Linux client
+ Google Chromebook (Linux container)
+ Toolforge Portal
+ PAWS
+
+ LANG: ISO 639-1 language code
+
+Source code:
+
+ https://github.com/geertivp/Pywikibot/blob/main/create_isbn_edition.py
+
+Applications:
+
+ Generate a book reference
+ Example: {{Cite Q|Q63413107}} (wp.en)
+ See also:
+ https://meta.wikimedia.org/wiki/WikiCite
+ https://www.wikidata.org/wiki/Q21831105 (WikiCite)
+ https://www.wikidata.org/wiki/Q22321052 (Cite_Q)
+ https://www.mediawiki.org/wiki/Global_templates
+ https://www.wikidata.org/wiki/Wikidata:WikiProject_Source_MetaData
+ https://phabricator.wikimedia.org/tag/wikicite/
+ https://meta.wikimedia.org/wiki/WikiCite/Shared_Citations
+
+Wikidata Query:
+
+ List of editions about musicians: https://w.wiki/5aaz
+ List of editions having ISBN number: https://w.wiki/5akq
+
+Related projects:
+
+ https://phabricator.wikimedia.org/T314942 (this script)
+
+ (other projects)
+ https://phabricator.wikimedia.org/T282719
+ https://phabricator.wikimedia.org/T214802
+ https://phabricator.wikimedia.org/T208134
+ https://phabricator.wikimedia.org/T138911
+ https://phabricator.wikimedia.org/T20814
+ https://en.wikipedia.org/wiki/User:Citation_bot
+ https://meta.wikimedia.org/wiki/Community_Wishlist_Survey_2021/Wikidata/Bib…
+ https://zenodo.org/record/55004#.YvwO4hTP1D8
+
+Other systems:
+
+ https://en.wikipedia.org/wiki/bibliographic_database
+ https://www.titelbank.nl/pls/ttb/f?p=103:4012:::NO::P4012_TTEL_ID:3496019&c…
+
+"""
+
+import logging # Error logging
+import os # Operating system
+import re # Regular expressions (very handy!)
+import sys # System calls
+import unidecode # Unicode
+
+import pywikibot # API interface to Wikidata
+
+from isbnlib import * # ISBN data
+from pywikibot import pagegenerators as pg # Wikidata Query interface
+from pywikibot.data import api
+
+# Initialisation
+debug = True # Show debugging information
+verbose = True # Verbose mode
+
+booklib = 'goob' # Default digital library
+isbnre = re.compile(r'[0-9-]{10,17}') # ISBN number: 10 or 13 digits with optional dashes (-)
+propre = re.compile(r'P[0-9]+') # Wikidata P-number
+qsuffre = re.compile(r'Q[0-9]+') # Wikidata Q-number
+
+# Other statements are added via command line parameters
+target = {
+'P31':'Q3331189', # Is an instance of an edition
+}
+
+# Statement property and instance validation rules
+propreqinst = {
+'P50':'Q5', # Author requires human
+'P123':{'Q2085381', 'Q1114515', 'Q1320047'},# Publisher requires publisher
+'P407':{'Q34770', 'Q33742', 'Q1288568'}, # Edition language requires at least one of (living, natural) language
+}
+
+mainlang = os.getenv('LANG', 'en')[:2] # Default description language
+
+# Connect to database
+transcmt = '#pwb Create ISBN edition' # Wikidata transaction comment
+wikidata_site = pywikibot.Site('wikidata', 'wikidata') # Login to Wikibase instance
+repo = wikidata_site.data_repository() # Required for wikidata object access (item, property, statement)
+
+
+def is_in_list(statement_list, checklist):
+ """
+Verify if statement list contains at least one item from the checklist
+
+Parameters:
+
+ statement_list: Statement list
+
+ checklist: List of values (string)
+
+Returns:
+
+ Boolean (True when match)
+ """
+
+ for seq in statement_list:
+ if seq.getTarget().getID() in checklist:
+ isinlist = True
+ break
+ else:
+ isinlist = False
+ return isinlist
+
+
+def get_item_list(item_name, instance_id):
+ """
+Get list of items by name, belonging to an instance (list)
+
+Parameters:
+
+ item_name: Item name (string; case sensitive)
+
+ instance_id: Instance ID (string, set, or list)
+
+Returns:
+
+ Set of items (Q-numbers)
+ """
+
+ item_list = set() # Empty set
+ params = {'action': 'wbsearchentities', 'format': 'json', 'type': 'item', 'strictlanguage': False,
+ 'language': mainlang, # All languages are searched, but labels are in native language
+ 'search': item_name} # Get item list from label
+ request = api.Request(site=wikidata_site, parameters=params)
+ result = request.submit()
+
+ if 'search' in result:
+ for res in result['search']:
+ item = pywikibot.ItemPage(repo, res['id'])
+ item.get(get_redirect = True)
+ if 'P31' in item.claims:
+ for seq in item.claims['P31']: # Loop through instances
+ if seq.getTarget().getID() in instance_id: # Matching instance
+ for lang in item.labels: # Search all languages
+ if unidecode.unidecode(item_name.lower()) == unidecode.unidecode(item.labels[lang].lower()): # Ignore label case and accents
+ item_list.add(item.getID()) # Label math
+ for lang in item.aliases:
+ if item_name in item.aliases[lang]: # Case sensitive for aliases
+ item_list.add(item.getID()) # Alias match
+ return item_list
+
+
+def amend_isbn_edition(isbn_number):
+ """
+Amend ISBN registration.
+
+Parameters:
+
+ isbn_number: ISBN number (string; 10 or 13 digits with optional hyphens)
+
+Result:
+
+ Amend Wikidata, by registering the ISBN-13 data via P212,
+ depending on the data obtained from the digital library.
+ """
+ global proptyx
+
+ isbn_number = isbn_number.strip()
+ if isbn_number == '':
+ return 3 # Do nothing when the ISBN number is missing
+
+ # Validate ISBN data
+ if verbose:
+ print()
+
+ try:
+ isbn_data = meta(isbn_number, service=booklib)
+ logger.info(isbn_data)
+ # {'ISBN-13': '9789042925564', 'Title': 'De Leuvense Vaart - Van De Vaartkom Tot Wijgmaal. Aspecten Uit De Industriele Geschiedenis Van Leuven', 'Authors': ['A. Cresens'], 'Publisher': 'Peeters Pub & Booksellers', 'Year': '2012', 'Language': 'nl'}
+ except Exception as error:
+ # When the book is unknown the function returns
+ logger.error(error)
+ #raise ValueError(error)
+ return 3
+
+ if len(isbn_data) < 6:
+ logger.error('Unknown or incomplete digital library registration for %s' % isbn_number)
+ return 3
+
+ # Show the raw results
+ if verbose:
+ for i in isbn_data:
+ print('%s:\t%s' % (i, isbn_data[i]))
+
+ # Get the book language from the ISBN book reference
+ booklang = mainlang # Default language
+ if isbn_data['Language'] != '':
+ booklang = isbn_data['Language'].strip()
+ if booklang == 'iw': # Obsolete codes
+ booklang = 'he'
+ lang_list = list(get_item_list(booklang, propreqinst['P407']))
+ if len(lang_list) == 1:
+ target['P407'] = lang_list[0]
+ elif len(lang_list) == 0:
+ logger.warning('Unknown language %s' % booklang)
+ return 3
+ else:
+ logger.warning('Ambiguous language %s' % booklang)
+ return 3
+
+ # Get formatted ISBN number
+ isbn_number = isbn_data['ISBN-13'] # Numeric format
+ isbn_fmtd = mask(isbn_number) # Canonical format
+ if verbose:
+ print()
+ print(isbn_fmtd) # First one
+
+ # Get (sub)title when there is a dot
+ titles = isbn_data['Title'].split('. ') # goob is using a '.'
+ if len(titles) == 1:
+ titles = isbn_data['Title'].split(': ') # Extract subtitle
+ if len(titles) == 1:
+ titles = isbn_data['Title'].split(' - ') # Extract subtitle
+ objectname = titles[0].strip()
+ subtitle = ''
+ if len(titles) > 1:
+ subtitle = titles[1].strip()
+
+ # Print book titles
+ if debug:
+ print(objectname, file=sys.stderr)
+ print(subtitle, file=sys.stderr) # Optional
+ for i in range(2,len(titles)): # Print subsequent subtitles, when available
+ print(titles[i].strip(), file=sys.stderr) # Not stored in Wikidata...
+
+ # Search the ISBN number in Wikidata both canonical and numeric
+ isbn_query = ("""# Get ISBN number
+SELECT ?item WHERE {
+ VALUES ?isbn_number {
+ "%s"
+ "%s"
+ }
+ ?item wdt:P212 ?isbn_number.
+}
+""" % (isbn_fmtd, isbn_number)) # P212 should have canonical hyphenated format
+
+ logger.info(isbn_query)
+ generator = pg.WikidataSPARQLPageGenerator(isbn_query, site=wikidata_site)
+
+ rescnt = 0
+ for item in generator: # Main loop for all DISTINCT items
+ rescnt += 1
+ qnumber = item.getID()
+ logger.warning('Found item: %s' % qnumber)
+
+ # Create or amend the item
+ if rescnt == 1:
+ item.get(get_redirect=True) # Update item
+ elif rescnt == 0:
+ label = {}
+ label[booklang] = objectname
+ item = pywikibot.ItemPage(repo) # Create item
+ item.editEntity({'labels': label}, summary=transcmt)
+ qnumber = item.getID()
+ logger.warning('Creating item: %s' % qnumber)
+ else:
+ logger.critical('Ambiguous ISBN number %s' % isbn_fmtd)
+ return 3
+
+ # Add all P/Q values
+ # Make sure that labels are known in the native language
+ if debug:
+ print(target, file=sys.stderr)
+
+ # Register statements
+ for propty in target:
+ if propty not in item.claims:
+ if propty not in proptyx:
+ proptyx[propty] = pywikibot.PropertyPage(repo, propty)
+ targetx[propty] = pywikibot.ItemPage(repo, target[propty])
+
+ try:
+ logger.warning('Add %s (%s): %s (%s)' % (proptyx[propty].labels[booklang], propty, targetx[propty].labels[booklang], target[propty]))
+ except:
+ logger.warning('Add %s:%s' % (propty, target[propty]))
+
+ claim = pywikibot.Claim(repo, propty)
+ claim.setTarget(targetx[propty])
+ item.addClaim(claim, bot=True, summary=transcmt)
+
+ # Set formatted ISBN number
+ if 'P212' not in item.claims:
+ logger.warning('Add ISBN number (P212): %s' % (isbn_fmtd))
+ claim = pywikibot.Claim(repo, 'P212')
+ claim.setTarget(isbn_fmtd)
+ item.addClaim(claim, bot=True, summary=transcmt)
+
+ # Title
+ if 'P1476' not in item.claims:
+ logger.warning('Add Title (P1476): %s' % (objectname))
+ claim = pywikibot.Claim(repo, 'P1476')
+ claim.setTarget(pywikibot.WbMonolingualText(text=objectname, language=booklang))
+ item.addClaim(claim, bot=True, summary=transcmt)
+
+ # Subtitle
+ if subtitle != '' and 'P1680' not in item.claims:
+ logger.warning('Add Subtitle (P1680): %s' % (subtitle))
+ claim = pywikibot.Claim(repo, 'P1680')
+ claim.setTarget(pywikibot.WbMonolingualText(text=subtitle, language=booklang))
+ item.addClaim(claim, bot=True, summary=transcmt)
+
+ # Date of publication
+ pub_year = isbn_data['Year']
+ if pub_year != '' and 'P577' not in item.claims:
+ logger.warning('Add Year of publication (P577): %s' % (isbn_data['Year']))
+ claim = pywikibot.Claim(repo, 'P577')
+ claim.setTarget(pywikibot.WbTime(year=int(pub_year), precision='year'))
+ item.addClaim(claim, bot=True, summary=transcmt)
+
+ # Get the author list
+ author_cnt = 0
+ for author_name in isbn_data['Authors']:
+ author_name = author_name.strip()
+ if author_name != '':
+ author_cnt += 1
+ author_list = list(get_item_list(author_name, propreqinst['P50']))
+
+ if len(author_list) == 1:
+ add_author = True
+ if 'P50' in item.claims:
+ for seq in item.claims['P50']:
+ if seq.getTarget().getID() in author_list:
+ add_author = False
+ break
+
+ if add_author:
+ logger.warning('Add author %d (P50): %s (%s)' % (author_cnt, author_name, author_list[0]))
+ claim = pywikibot.Claim(repo, 'P50')
+ claim.setTarget(pywikibot.ItemPage(repo, author_list[0]))
+ item.addClaim(claim, bot=True, summary=transcmt)
+
+ qualifier = pywikibot.Claim(repo, 'P1545')
+ qualifier.setTarget(str(author_cnt))
+ claim.addQualifier(qualifier, summary=transcmt)
+ elif len(author_list) == 0:
+ logger.warning('Unknown author: %s' % author_name)
+ else:
+ logger.warning('Ambiguous author: %s' % author_name)
+
+ # Get the publisher
+ publisher_name = isbn_data['Publisher'].strip()
+ if publisher_name != '':
+ publisher_list = list(get_item_list(publisher_name, propreqinst['P123']))
+
+ if len(publisher_list) == 1:
+ if 'P123' not in item.claims:
+ logger.warning('Add publisher (P123): %s (%s)' % (publisher_name, publisher_list[0]))
+ claim = pywikibot.Claim(repo, 'P123')
+ claim.setTarget(pywikibot.ItemPage(repo, publisher_list[0]))
+ item.addClaim(claim, bot=True, summary=transcmt)
+ elif len(publisher_list) == 0:
+ logger.warning('Unknown publisher: %s' % publisher_name)
+ else:
+ logger.warning('Ambiguous publisher: %s' % publisher_name)
+
+ # Get addional data from the digital library
+ isbn_cover = cover(isbn_number)
+ isbn_editions = editions(isbn_number, service='merge')
+ isbn_doi = doi(isbn_number)
+ isbn_info = info(isbn_number)
+
+ if verbose:
+ print()
+ print(isbn_info)
+ print(isbn_doi)
+ print(isbn_editions)
+
+ # Book cover images
+ for i in isbn_cover:
+ print('%s:\t%s' % (i, isbn_cover[i]))
+
+ # Handle ISBN classification
+ isbn_classify = classify(isbn_number)
+ if debug:
+ for i in isbn_classify:
+ print('%s:\t%s' % (i, isbn_classify[i]), file=sys.stderr)
+
+ # ./create_isbn_edition.py '978-3-8376-5645-9' - de P407 Q188
+ # Q113460204
+ # {'owi': '11103651812', 'oclc': '1260160983', 'lcc': 'TK5105.8882', 'ddc': '300', 'fast': {'1175035': 'Wikis (Computer science)', '1795979': 'Wikipedia', '1122877': 'Social sciences'}}
+
+ # Set the OCLC ID
+ if 'oclc' in isbn_classify and 'P243' not in item.claims:
+ logger.warning('Add OCLC ID (P243): %s' % (isbn_classify['oclc']))
+ claim = pywikibot.Claim(repo, 'P243')
+ claim.setTarget(isbn_classify['oclc'])
+ item.addClaim(claim, bot=True, summary=transcmt)
+
+ # OCLC ID and OCLC work ID should not be both assigned
+ if 'P243' in item.claims and 'P5331' in item.claims:
+ if 'P629' in item.claims:
+ oclcwork = item.claims['P5331'][0] # OCLC Work should be unique
+ oclcworkid = oclcwork.getTarget() # Get the OCLC Work ID from the edition
+ work = item.claims['P629'][0].getTarget() # Edition should belong to only one single work
+ logger.warning('Move OCLC Work ID %s to work %s' % (oclcworkid, work.getID())) # There doesn't exist a moveClaim method?
+ if 'P5331' not in work.claims: # Keep current OCLC Work ID if present
+ claim = pywikibot.Claim(repo, 'P5331')
+ claim.setTarget(oclcworkid)
+ work.addClaim(claim, bot=True, summary=transcmt)
+ item.removeClaims(oclcwork, bot=True, summary=transcmt) # OCLC Work ID does not belong to edition
+ else:
+ logger.error('OCLC Work ID %s conflicts with OCLC ID %s and no work available' % (item.claims['P5331'][0].getTarget(), item.claims['P243'][0].getTarget()))
+
+ # OCLC work ID should not be registered for editions, only for works
+ if 'owi' not in isbn_classify:
+ pass
+ elif 'P629' in item.claims: # Get the work related to the edition
+ work = item.claims['P629'][0].getTarget() # Edition should only have one single work
+ if 'P5331' not in work.claims: # Assign the OCLC work ID if missing
+ logger.warning('Add OCLC work ID (P5331): %s to work %s' % (isbn_classify['owi'], work.getID()))
+ claim = pywikibot.Claim(repo, 'P5331')
+ claim.setTarget(isbn_classify['owi'])
+ work.addClaim(claim, bot=True, summary=transcmt)
+ elif 'P243' in item.claims:
+ logger.warning('OCLC Work ID %s ignored because of OCLC ID %s' % (isbn_classify['owi'], item.claims['P243'][0].getTarget()))
+ elif 'P5331' not in item.claims: # Assign the OCLC work ID only if there is no work, and no OCLC ID for edition
+ logger.warning('Add OCLC work ID (P5331): %s to edition' % (isbn_classify['owi']))
+ claim = pywikibot.Claim(repo, 'P5331')
+ claim.setTarget(isbn_classify['owi'])
+ item.addClaim(claim, bot=True, summary=transcmt)
+
+ # Reverse logic for moving OCLC ID and P212 (ISBN) from work to edition is more difficult because of 1:M relationship...
+
+ # Same logic as for OCLC (work) ID
+
+ # Goodreads-identificatiecode (P2969)
+
+ # Goodreads-identificatiecode for work (P8383) should not be registered for editions; should rather use P2969
+
+ # Library of Congress Classification (works and editions)
+ if 'lcc' in isbn_classify and 'P8360' not in item.claims:
+ logger.warning('Add Library of Congress Classification for edition (P8360): %s' % (isbn_classify['lcc']))
+ claim = pywikibot.Claim(repo, 'P8360')
+ claim.setTarget(isbn_classify['lcc'])
+ item.addClaim(claim, bot=True, summary=transcmt)
+
+ # Dewey Decimale Classificatie
+ if 'ddc' in isbn_classify and 'P1036' not in item.claims:
+ logger.warning('Add Dewey Decimale Classificatie (P1036): %s' % (isbn_classify['ddc']))
+ claim = pywikibot.Claim(repo, 'P1036')
+ claim.setTarget(isbn_classify['ddc'])
+ item.addClaim(claim, bot=True, summary=transcmt)
+
+ # Register Fast ID using P921 (main subject) through P2163 (Fast ID)
+ # https://www.wikidata.org/wiki/Q3294867
+ # https://nl.wikipedia.org/wiki/Faceted_Application_of_Subject_Terminology
+ # https://www.oclc.org/research/areas/data-science/fast.html
+ # https://www.oclc.org/content/dam/oclc/fast/FAST-quick-start-guide-2022.pdf
+
+ # Authority control identifier from WorldCat's “FAST Linked Data” authority file (external ID P2163)
+ # Corresponding to P921 (Wikidata main subject)
+ if 'fast' in isbn_classify:
+ for fast_id in isbn_classify['fast']:
+
+ # Get the main subject
+ main_subject_query = ("""# Search the main subject
+SELECT ?item WHERE {
+ ?item wdt:P2163 "%s".
+}
+""" % (fast_id))
+
+ logger.info(main_subject_query)
+ generator = pg.WikidataSPARQLPageGenerator(main_subject_query, site=wikidata_site)
+
+ rescnt = 0
+ for main_subject in generator: # Main loop for all DISTINCT items
+ rescnt += 1
+ qmain_subject = main_subject.getID()
+ try:
+ main_subject_label = main_subject.labels[booklang]
+ logger.info('Found main subject %s (%s) for Fast ID %s' % (main_subject_label, qmain_subject, fast_id))
+ except:
+ main_subject_label = ''
+ logger.info('Found main subject (%s) for Fast ID %s' % (qmain_subject, fast_id))
+ logger.error('Missing label for item %s' % qmain_subject)
+
+ # Create or amend P921 statement
+ if rescnt == 0:
+ logger.error('Main subject not found for Fast ID %s' % (fast_id))
+ elif rescnt == 1:
+ add_main_subject = True
+ if 'P921' in item.claims: # Check for duplicates
+ for seq in item.claims['P921']:
+ if seq.getTarget().getID() == qmain_subject:
+ add_main_subject = False
+ break
+
+ if add_main_subject:
+ logger.warning('Add main subject (P921) %s (%s)' % (main_subject_label, qmain_subject))
+ claim = pywikibot.Claim(repo, 'P921')
+ claim.setTarget(main_subject)
+ item.addClaim(claim, bot=True, summary=transcmt)
+ else:
+ logger.info('Skipping main subject %s (%s)' % (main_subject_label, qmain_subject))
+ else:
+ logger.error('Ambiguous main subject for Fast ID %s' % (fast_id))
+
+ # Book description
+ isbn_description = desc(isbn_number)
+ if isbn_description != '':
+ print()
+ print(isbn_description)
+
+ # Currently does not work (service not available)
+ try:
+ logger.warning('BibTex unavailable')
+ return 0
+ bibtex_metadata = doi2tex(isbn_doi)
+ print(bibtex_metadata)
+ except Exception as error:
+ logger.error(error) # Data not available
+
+ return 0
+
+
+# Error logging
+logger = logging.getLogger('create_isbn_edition')
+#logging.basicConfig(level=logging.DEBUG) # Uncomment for debugging
+##logger.setLevel(logging.DEBUG)
+
+pgmnm = sys.argv.pop(0)
+logger.debug('%s %s' % (pgmnm, '2022-08-23 (gvp)'))
+
+# Get optional parameters
+
+# Get the digital library
+if len(sys.argv) > 0:
+ booklib = sys.argv.pop(0)
+ if booklib == '-':
+ booklib = 'goob'
+
+# Get the native language
+# The language code is only required when P/Q parameters are added, or different from the LANG code
+if len(sys.argv) > 0:
+ mainlang = sys.argv.pop(0)
+
+# Get additional P/Q parameters
+while len(sys.argv) > 0:
+ inpar = propre.findall(sys.argv.pop(0).upper())[0]
+ target[inpar] = qsuffre.findall(sys.argv.pop(0).upper())[0]
+
+# Validate P/Q list
+proptyx={}
+targetx={}
+
+# Validate the propery/instance pair
+for propty in target:
+ if propty not in proptyx:
+ proptyx[propty] = pywikibot.PropertyPage(repo, propty)
+ targetx[propty] = pywikibot.ItemPage(repo, target[propty])
+ targetx[propty].get(get_redirect=True)
+ if propty in propreqinst and ('P31' not in targetx[propty].claims or not is_in_list(targetx[propty].claims['P31'], propreqinst[propty])):
+ logger.critical('%s (%s) is not a language' % (targetx[propty].labels[mainlang], target[propty]))
+ sys.exit(12)
+
+# Get list of item numbers
+inputfile = sys.stdin.read() # Typically the Appendix list of references of e.g. a Wikipedia page containing ISBN numbers
+itemlist = sorted(set(isbnre.findall(inputfile))) # Extract all ISBN numbers
+
+for isbn_number in itemlist: # Process the next edition
+ amend_isbn_edition(isbn_number)
+
+# Einde van de miserie
+"""
+Notes:
+
+
+"""
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/826631
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ic88ab7125b764e8d296121d7a3d47fb0e53877ab
Gerrit-Change-Number: 826631
Gerrit-PatchSet: 3
Gerrit-Owner: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Geertivp <geertivp(a)gmail.com>
Gerrit-MessageType: merged
Xqt has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/826820 )
Change subject: [doc] Add License informations to some init files
......................................................................
[doc] Add License informations to some init files
Change-Id: Id3e6cc0949d778dcf467050b1bc844ed423bfa63
---
M pywikibot/comms/__init__.py
M pywikibot/data/__init__.py
M pywikibot/families/__init__.py
M pywikibot/userinterfaces/__init__.py
4 files changed, 20 insertions(+), 0 deletions(-)
Approvals:
Xqt: Verified; Looks good to me, approved
diff --git a/pywikibot/comms/__init__.py b/pywikibot/comms/__init__.py
index c1c5b60..e76cb24 100644
--- a/pywikibot/comms/__init__.py
+++ b/pywikibot/comms/__init__.py
@@ -1 +1,6 @@
"""Communication layer."""
+#
+# (C) Pywikibot team, 2008-2022
+#
+# Distributed under the terms of the MIT license.
+#
diff --git a/pywikibot/data/__init__.py b/pywikibot/data/__init__.py
index 13a975c..7db53e3 100644
--- a/pywikibot/data/__init__.py
+++ b/pywikibot/data/__init__.py
@@ -1 +1,6 @@
"""Module providing several layers of data access to the wiki."""
+#
+# (C) Pywikibot team, 2007-2022
+#
+# Distributed under the terms of the MIT license.
+#
diff --git a/pywikibot/families/__init__.py b/pywikibot/families/__init__.py
index b4afbad..50e52e1 100644
--- a/pywikibot/families/__init__.py
+++ b/pywikibot/families/__init__.py
@@ -1 +1,6 @@
"""Families package."""
+#
+# (C) Pywikibot team, 2008-2022
+#
+# Distributed under the terms of the MIT license.
+#
diff --git a/pywikibot/userinterfaces/__init__.py b/pywikibot/userinterfaces/__init__.py
index 0487523..b4f0f90 100644
--- a/pywikibot/userinterfaces/__init__.py
+++ b/pywikibot/userinterfaces/__init__.py
@@ -14,3 +14,8 @@
all of which are documented in the abstract class
:class:`userinterfaces._interface_base.ABUIC`.
"""
+#
+# (C) Pywikibot team, 2008-2022
+#
+# Distributed under the terms of the MIT license.
+#
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/826820
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Id3e6cc0949d778dcf467050b1bc844ed423bfa63
Gerrit-Change-Number: 826820
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-MessageType: merged