jenkins-bot has submitted this change and it was merged.
Change subject: (bug 62011) Script to import coordinates from Wikipedia to Wikidata
......................................................................
(bug 62011) Script to import coordinates from Wikipedia to Wikidata
The bot uses the data for the GeoData extension available through
page.coordinates().
Some related bugs need to be fixed before this script becomes fully
functional:
* bug 62105: globecoordinate serialization fails on some precision values
* bug 62119: Bot crashes on malllformed coordinates api output
Change-Id: I966dc95354b5f08ff3d077a0a8e0e606f1ba8e76
---
A scripts/coordinate_import.py
1 file changed, 109 insertions(+), 0 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/coordinate_import.py b/scripts/coordinate_import.py
new file mode 100644
index 0000000..f89f55f
--- /dev/null
+++ b/scripts/coordinate_import.py
@@ -0,0 +1,109 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Usage:
+
+python coordinate_import.py -lang:en -family:wikipedia -cat:Category:Coordinates_not_on_Wikidata
+
+This will work on all pages in the category "coordinates not on Wikidata" and will import the coordinates on these pages to Wikidata.
+
+The data from the "GeoData" extension (https://www.mediawiki.org/wiki/Extension:GeoData) is used so that extension has to be setup properly.
+You can look at the [[Special:Nearby]] page on your local Wiki to see if it's populated.
+
+You can use any typical pagegenerator to provide with a list of pages:
+
+python coordinate_import.py -lang:it -family:wikipedia -transcludes:Infobox_stazione_ferroviaria -namespace:0
+
+¶ms;
+"""
+#
+# (C) Multichill 2014
+# (C) Pywikibot team, 2013
+#
+# Distributed under the terms of MIT License.
+#
+__version__ = '$Id$'
+#
+import json
+import pywikibot
+from pywikibot import pagegenerators
+
+
+class coordImportRobot:
+ """
+ A bot to import coordinates to Wikidata
+ """
+ def __init__(self, generator):
+ """
+ Arguments:
+ * generator - A generator that yields Page objects.
+
+ """
+ self.generator = pagegenerators.PreloadingGenerator(generator)
+ self.site = pywikibot.Site()
+ self.repo = pywikibot.Site().data_repository()
+ self.cacheSources()
+
+ def getSource(self, lang):
+ """
+ Get the source for the specified language,
+ if possible
+ """
+ if lang in self.source_values:
+ source = pywikibot.Claim(self.repo, 'p143')
+ source.setTarget(self.source_values.get(lang))
+ return source
+
+ def cacheSources(self):
+ """
+ Fetches the sources from the onwiki list
+ and stores it internally
+ """
+ page = pywikibot.Page(self.repo, u'Wikidata:List of wikis/python')
+ self.source_values = json.loads(page.get())
+ self.source_values = self.source_values['wikipedia']
+ for source_lang in self.source_values:
+ self.source_values[source_lang] = pywikibot.ItemPage(self.repo,
+ self.source_values[source_lang])
+
+ def run(self):
+ """
+ Starts the robot.
+ """
+ for page in self.generator:
+ pywikibot.output(u'Working on %s' % page.title())
+ item = pywikibot.ItemPage.fromPage(page)
+
+ if item.exists():
+ pywikibot.output(u'Found %s' % item.title())
+ coordinate = page.coordinates(primary_only=True)
+
+ if coordinate:
+ claims = item.get().get('claims')
+ if u'P625' in claims:
+ pywikibot.output(u'Item %s already contains coordinates (P625)' % item.title())
+ else:
+ newclaim = pywikibot.Claim(self.repo, u'P625')
+ newclaim.setTarget(coordinate)
+ pywikibot.output(u'Adding %s, %s to %s' % (coordinate.lat, coordinate.lon, item.title()))
+ item.addClaim(newclaim)
+
+ source = self.getSource(page.site.language())
+ if source:
+ newclaim.addSource(source, bot=True)
+
+
+def main():
+ gen = pagegenerators.GeneratorFactory()
+
+ for arg in pywikibot.handleArgs():
+ if gen.handleArg(arg):
+ continue
+
+ generator = gen.getCombinedGenerator()
+
+ coordbot = coordImportRobot(generator)
+ coordbot.run()
+
+if __name__ == "__main__":
+ main()
--
To view, visit https://gerrit.wikimedia.org/r/116285
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I966dc95354b5f08ff3d077a0a8e0e606f1ba8e76
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Multichill <maarten(a)mdammers.nl>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Multichill <maarten(a)mdammers.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Revert "Make obsolete site object can be created"
......................................................................
Revert "Make obsolete site object can be created"
Note to self: actually run *all* tests. Apparently we have a duplicate set of must_be tests >_<
See https://travis-ci.org/wikimedia/pywikibot-core/jobs/21351801
This reverts commit b548a1afb74ba26d1acd508e8b6ed14f03a368ee.
Change-Id: Icd6ddbc18d0515ec4f3f786dd7eb990e064bab8c
---
M pywikibot/page.py
M pywikibot/site.py
M tests/site_tests.py
3 files changed, 15 insertions(+), 49 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 6c0178d..f95b39b 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -997,40 +997,31 @@
# ignore any links with invalid contents
continue
- def langlinks(self, include_obsolete=False):
+ def langlinks(self):
"""Return a list of all interlanguage Links on this page.
-
- @param include_obsolete: if true, return even Link objects whose site
- is obsolete
"""
# Data might have been preloaded
if not hasattr(self, '_langlinks'):
- self._langlinks = list(self.iterlanglinks(include_obsolete=True))
+ self._langlinks = list(self.iterlanglinks())
- if include_obsolete:
- return self._langlinks
- else:
- return filter(lambda i: not i.site.obsolete, self._langlinks)
+ return self._langlinks
- def iterlanglinks(self, step=None, total=None, include_obsolete=False):
+ def iterlanglinks(self, step=None, total=None):
"""Iterate all interlanguage links on this page.
@param step: limit each API call to this number of pages
@param total: iterate no more than this number of pages in total
- @param include_obsolete: if true, yield even Link object whose site
- is obsolete
@return: a generator that yields Link objects.
"""
if hasattr(self, '_langlinks'):
- return iter(self.langlinks(include_obsolete=include_obsolete))
+ return iter(self._langlinks)
# XXX We might want to fill _langlinks when the Site
# method is called. If we do this, we'll have to think
# about what will happen if the generator is not completely
# iterated upon.
- return self.site.pagelanglinks(self, step=step, total=total,
- include_obsolete=include_obsolete)
+ return self.site.pagelanglinks(self, step=step, total=total)
def data_item(self):
"""
diff --git a/pywikibot/site.py b/pywikibot/site.py
index 14b916a..0d1e92d 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -129,15 +129,15 @@
else:
self.__family = fam
- self.obsolete = False
# if we got an outdated language code, use the new one instead.
if self.__code in self.__family.obsolete:
if self.__family.obsolete[self.__code] is not None:
self.__code = self.__family.obsolete[self.__code]
else:
# no such language anymore
- self.obsolete = True
- elif self.__code not in self.languages():
+ raise NoSuchSite("Language %s in family %s is obsolete"
+ % (self.__code, self.__family.name))
+ if self.__code not in self.languages():
if self.__family.name in list(self.__family.langs.keys()) and \
len(self.__family.langs) == 1:
oldcode = self.__code
@@ -707,15 +707,10 @@
@param right: the rights the logged in user should have
not supported yet and thus ignored.
@returns: a decorator to make sure the requirement is statisfied when
- the decorated function is called. The function can be called
- with as_group='sysop' to override the group set in the
- decorator.
+ the decorated function is called.
"""
def decorator(fn):
def callee(self, *args, **kwargs):
- if self.obsolete:
- raise NoSuchSite("Language %s in family %s is obsolete"
- % (self.code, self.family.name))
grp = kwargs.pop('as_group', group)
if grp == 'user':
self.login(False)
@@ -1964,14 +1959,8 @@
# No such function in the API (this method isn't called anywhere)
raise NotImplementedError
- def pagelanglinks(self, page, step=None, total=None,
- include_obsolete=False):
- """Iterate all interlanguage links on page, yielding Link objects.
-
- @param include_obsolete: if true, yield even Link objects whose
- site is obsolete
-
- """
+ def pagelanglinks(self, page, step=None, total=None):
+ """Iterate all interlanguage links on page, yielding Link objects."""
lltitle = page.title(withSection=False)
llquery = self._generator(api.PropertyGenerator,
type_arg="langlinks",
@@ -1985,13 +1974,9 @@
if 'langlinks' not in pageitem:
continue
for linkdata in pageitem['langlinks']:
- link = pywikibot.Link.langlinkUnsafe(linkdata['lang'],
- linkdata['*'],
- source=self)
- if link.site.obsolete and not include_obsolete:
- continue
- else:
- yield link
+ yield pywikibot.Link.langlinkUnsafe(linkdata['lang'],
+ linkdata['*'],
+ source=self)
def page_extlinks(self, page, step=None, total=None):
"""Iterate all external links on page, yielding URL strings."""
diff --git a/tests/site_tests.py b/tests/site_tests.py
index 10e5bdf..79db3e2 100644
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -1113,11 +1113,7 @@
# Implemented without setUpClass(cls) and global variables as objects
# were not completely disposed and recreated but retained 'memory'
def setUp(self):
- self.code = 'test'
- self.family = lambda: None
- self.family.name = 'test'
self._logged_in_as = None
- self.obsolete = False
def login(self, sysop):
# mock call
@@ -1161,12 +1157,6 @@
self.assertEqual(retval[0], args)
self.assertEqual(retval[1], kwargs)
self.assertEqual(self._logged_in_as, 'sysop')
-
- def testObsoleteSite(self):
- self.obsolete = True
- args = (1, 2, 'a', 'b')
- kwargs = {'i': 'j', 'k': 'l'}
- self.assertRaises(pywikibot.NoSuchSite, self.call_this_user_req_function, args, kwargs)
if __name__ == '__main__':
try:
--
To view, visit https://gerrit.wikimedia.org/r/120364
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Icd6ddbc18d0515ec4f3f786dd7eb990e064bab8c
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot <>