jenkins-bot has submitted this change and it was merged.
Change subject: Added script to upload Freebase identifiers
......................................................................
Added script to upload Freebase identifiers
Change-Id: I9d1f459d58da0eecd1631e8b863129d9d1aa3a85
---
A scripts/freebasemappingupload.py
1 file changed, 103 insertions(+), 0 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/freebasemappingupload.py b/scripts/freebasemappingupload.py
new file mode 100644
index 0000000..0bd2bba
--- /dev/null
+++ b/scripts/freebasemappingupload.py
@@ -0,0 +1,103 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Script to upload the mappings of Freebase to Wikidata
+Can be easily adapted to upload other String identifiers as well
+
+This bot needs the dump from
+https://developers.google.com/freebase/data#freebase-wikidata-mappings
+
+The script takes a single parameter:
+
+-filename: the filename to read the freebase-wikidata mappings from;
+ default: fb2w.nt.gz
+"""
+
+#
+# (C) Denny Vrandecic, 2013
+# (C) Pywikibot team, 2013
+# Distributed under the terms of the MIT license.
+
+import gzip
+import os
+import sys
+
+import pywikibot
+
+
+class FreebaseMapperRobot:
+
+ def __init__(self, filename):
+ self.repo = pywikibot.Site('wikidata',
'wikidata').data_repository()
+ self.filename = filename
+ if not os.path.exists(self.filename):
+ pywikibot.output('Cannot find %s. Try providing the absolute path.' %
self.filename)
+ sys.exit(1)
+
+ def run(self):
+ # Set up some items we will use a lot.
+ self.claim = pywikibot.Claim(self.repo, 'P646') # freebase mapping
+ # And sources!
+ self.statedin = pywikibot.Claim(self.repo, 'P248') # stated in
+ freebasedumpitem = pywikibot.ItemPage(self.repo, 'Q15241312') # Freebase
data dump
+ self.statedin.setTarget(freebasedumpitem)
+ self.dateofpub = pywikibot.Claim(self.repo, 'P577') # date of
publication
+ oct28 = pywikibot.WbTime(year=2013, month=10, day=28, precision='day')
+ self.dateofpub.setTarget(oct28)
+
+ for line in gzip.open(self.filename):
+ self.processLine(line.strip())
+
+ def processLine(self, line):
+ if not line or line.startswith('#'):
+ return
+ mid, sameas, qid, dot = line.split()
+ if sameas != '<http://www.w3.org/2002/07/owl#sameAs>':
+ return
+ if dot != '.':
+ return
+ if not
mid.startswith('<http://rdf.freebase.com/ns/m')/ns/m'):
+ return
+ mid = '/m/' + mid[30:-1]
+ if not
qid.startswith('<http://www.wikidata.org/entity/Q')ity/Q'):
+ return
+ qid = 'Q' + qid[33:-1]
+ data = pywikibot.ItemPage(self.repo, qid)
+ data.get()
+ if not data.labels:
+ label = ''
+ elif 'en' in data.labels:
+ label = data.labels['en']
+ else:
+ # Just pick up the first label
+ label = data.labels.values()[0]
+ pywikibot.output('Parsed: %s <--> %s' % (qid, mid))
+ pywikibot.output('%s is %s' % (data.getID(), label))
+ if data.claims and 'P646' in data.claims:
+ # We assume that there is only one claim.
+ # If there are multiple ones, our logs might be wrong
+ # but the constraint value reports will catch them
+ if mid != data.claims['P646'][0].getTarget():
+ pywikibot.output('Mismatch: expected %s, has %s instead'
+ % (mid, data.claims['P646'][0].getTarget()))
+ else:
+ pywikibot.output('Already has mid set, is consistent.')
+ else:
+ # No claim set, lets add it.
+ pywikibot.output('Going to add a new claim.')
+ self.claim.setTarget(mid)
+ data.addClaim(self.claim)
+ self.claim.addSources([self.statedin, self.dateofpub])
+ pywikibot.output('Claim added!')
+
+
+def main():
+ filename = 'fb2w.nt.gz' # Default filename
+ for arg in pywikibot.handleArgs():
+ if arg.startswith('-filename'):
+ filename = arg[11:]
+ bot = FreebaseMapperRobot(filename)
+ bot.run()
+
+if __name__ == '__main__':
+ main()
--
To view, visit
https://gerrit.wikimedia.org/r/99180
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I9d1f459d58da0eecd1631e8b863129d9d1aa3a85
Gerrit-PatchSet: 6
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Denny Vrandecic <vrandecic(a)gmail.com>
Gerrit-Reviewer: Denny Vrandecic <vrandecic(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot