jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/517168 )
Change subject: [IMPR] Make coordinate_import.py work on a set of Wikidata items
......................................................................
[IMPR] Make coordinate_import.py work on a set of Wikidata items
Bug: T220806
Change-Id: I2ab5605031ba00155cc80b3d88110b2379171d23
---
M scripts/coordinate_import.py
1 file changed, 56 insertions(+), 15 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/coordinate_import.py b/scripts/coordinate_import.py
index e323891..010e770 100755
--- a/scripts/coordinate_import.py
+++ b/scripts/coordinate_import.py
@@ -21,6 +21,15 @@
python pwb.py coordinate_import -lang:it -family:wikipedia \
-namespace:0 -transcludes:Infobox_stazione_ferroviaria
+You can also run over a set of items on the repo without coordinates and
+try to import them from any connected page. To do this, you need to
+explictly provide the repo as the site using -lang a -family arguments.
+Example:
+
+ python pwb.py coordinate_import -lang:wikidata -family:wikidata \
+ -namespace:0 -querypage:Deadendpages
+
+
The following command line parameters are supported:
-create Create items for pages without one.
@@ -46,6 +55,8 @@
"""A bot to import coordinates to Wikidata."""
+ use_from_page = None
+
def __init__(self, generator, **kwargs):
"""
Initializer.
@@ -68,46 +79,74 @@
@return: the first property for which self.prop
is used as qualifier, or None if any
@return: unicode or None
-
"""
for prop in claims:
for claim in claims[prop]:
if self.prop in claim.qualifiers:
return prop
+ return None
- def treat_page_and_item(self, page, item):
- """Treat page/item."""
- coordinate = page.coordinates(primary_only=True)
+ def item_has_coordinates(self, item):
+ """
+ Check if the item has coordinates.
- if not coordinate:
- return
-
+ @return: whether the item has coordinates
+ @rtype: bool
+ """
claims = item.get().get('claims')
if self.prop in claims:
pywikibot.output('Item {} already contains coordinates ({})'
.format(item.title(), self.prop))
- return
+ return True
prop = self.has_coord_qualifier(claims)
if prop:
pywikibot.output('Item {} already contains coordinates'
' ({}) as qualifier for {}'
.format(item.title(), self.prop, prop))
+ return True
+ return False
+
+ def treat_page_and_item(self, page, item):
+ """Treat page/item."""
+ if self.item_has_coordinates(item):
return
+ if page is None:
+ # running over items, search in linked pages
+ for page in item.iterlinks():
+ if self.try_import_coordinates_from_page(page, item):
+ break
+ return
+
+ self.try_import_coordinates_from_page(page, item)
+
+ def try_import_coordinates_from_page(self, page, item):
+ """
+ Try import coordinate from the given page to the given item.
+
+ @return: whether any coordinates were found and the import
+ was successful
+ @rtype: bool
+ """
+ coordinate = page.coordinates(primary_only=True)
+ if not coordinate:
+ return False
newclaim = pywikibot.Claim(self.repo, self.prop)
newclaim.setTarget(coordinate)
- pywikibot.output('Adding {}, {} to {}'.format(coordinate.lat,
- coordinate.lon,
- item.title()))
+ source = self.getSource(page.site)
+ if source:
+ newclaim.addSource(source)
+ pywikibot.output('Adding {}, {} to {}'.format(
+ coordinate.lat, coordinate.lon, item.title()))
+ # todo: handle exceptions using self.user_add_claim
try:
item.addClaim(newclaim)
-
- source = self.getSource(page.site)
- if source:
- newclaim.addSource(source, bot=True)
except CoordinateGlobeUnknownException as e:
pywikibot.output('Skipping unsupported globe: {}'.format(e.args))
+ return False
+ else:
+ return True
def main(*args):
@@ -130,6 +169,8 @@
if arg == '-create':
create_new = True
+ # xxx: this preloading preloads neither coordinates nor Wikibase items
+ # but preloads wikitext which we don't need
generator = generator_factory.getCombinedGenerator(preload=True)
if generator:
--
To view, visit
https://gerrit.wikimedia.org/r/517168
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I2ab5605031ba00155cc80b3d88110b2379171d23
Gerrit-Change-Number: 517168
Gerrit-PatchSet: 3
Gerrit-Owner: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Lokal Profil <andre.costa(a)wikimedia.se>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: Multichill <maarten(a)mdammers.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot (75)