http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11249
Revision: 11249 Author: drtrigon Date: 2013-03-24 11:07:22 +0000 (Sun, 24 Mar 2013) Log Message: ----------- improvement; label detection speed-up improvement; code and syntax clean up (with work-a-round for "r11070 issue")
Modified Paths: -------------- trunk/pywikipedia/subster.py
Modified: trunk/pywikipedia/subster.py =================================================================== --- trunk/pywikipedia/subster.py 2013-03-24 08:10:49 UTC (rev 11248) +++ trunk/pywikipedia/subster.py 2013-03-24 11:07:22 UTC (rev 11249) @@ -33,8 +33,7 @@ Run bot on another site language than configured as default. E.g. 'en'.
python subster.py -family:meta -lang: - python subster.py -family:wikidata -lang:repo - python subster.py -family:wikidata -lang:en + python subster.py -family:wikidata -lang:wikidata Run bot on another site family and language than configured as default. E.g. 'meta' or 'wikidata'.
@@ -177,12 +176,8 @@ self._userListPage = pywikibot.Page(self.site, bot_config['TemplateName']) self._ConfCSSpostprocPage = pywikibot.Page(self.site, bot_config['ConfCSSpostproc']) self._ConfCSSconfigPage = pywikibot.Page(self.site, bot_config['ConfCSSconfig']) - self.pagegen = pagegenerators.ReferringPageGenerator(self._userListPage, onlyTemplateInclusion=True) - if (self.site.family.name == 'wikidata'): # DRTRIGON-130 - # http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11070 - # http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11071 - self.site = self.site.data_repository() - self._code = self._ConfCSSpostprocPage.get() + self.pagegen = pagegenerators.ReferringPageGenerator(self._userListPage, onlyTemplateInclusion=True) + self._code = self._ConfCSSpostprocPage.get() pywikibot.output(u'Imported postproc %s rev %s from %s' %\ ((self._ConfCSSpostprocPage.title(asLink=True),) + self._ConfCSSpostprocPage.getVersionHistory(revCount=1)[0][:2]) ) self._flagenable = {} @@ -220,31 +215,6 @@ # output result to page or return directly if sim: return substed_content - elif self.site.is_data_repository(): # DRTRIGON-130 - # convert talk page result to wikidata(base) - data = self.WD_convertContent(substed_content) - datapage = pywikibot.DataPage(self.site, page.title()) - for item in data: - for element in datapage.searchentities(u'%s:%s' %\ - (pywikibot.config.usernames[self.site.family.name][self.site.lang], item)): - dataoutpage = pywikibot.DataPage(self.site, element['id']) - #dataoutpage = page.toggleTalkPage() - - pywikibot.output(u'%s <--- %s = %s' %\ - (dataoutpage.title(asLink=True), item, data[item])) - - # check for changes and then write/change/set values - summary = u'Bot: update data because of configuration on %s.' % page.title(asLink=True) - #if not self.WD_save(dataoutpage, dic[u'claims'], {u'p32': data}, summary): - buf = dataoutpage.get() - propid = 217 # just a cheat to start with ... - claim = [ claim for claim in buf[u'claims'] if (claim['m'][1] == propid) ] - #if buf.strip().splitlines()[-1].split(u'/')[-1].strip() != data[item]: - if (not claim) or (claim[0]['m'][3] != data[item]): - #dataoutpage.put(buf + u'\n' + out, comment=summary) - dataoutpage.editclaim(u'p%i' % propid, data[item], comment=summary) - else: - pywikibot.output(u'NOTHING TO DO!') else: # if changed, write! if (substed_content != content): @@ -263,6 +233,13 @@ self.save( page, substed_content, (head + u' ' + msg) % {'tags':", ".join(substed_tags)}, **flags ) + + # DRTRIGON-130: data repository (wikidata) output to items + if self.site.is_data_repository() or\ + (self.site.family.name == 'wikidata'): # (work-a-round) + data = self.WD_convertContent(substed_content) + #print self.WD_save(dataoutpage, dic[u'claims'], {u'p32': data}, summary) + self.WD_save(page, data) else: pywikibot.output(u'NOTHING TO DO!')
@@ -547,64 +524,100 @@
return res
- def WD_save(self, outpage, dic, data, comment=None): +# def WD_save(self, outpage, dic, data, comment=None): +# """Stores the content to Wikidata. +# +# @param dic: Original content. +# @type dic: dict +# @param data: New content. +# @type data: dict +# +# Returns nothing, but stores the changed content. +# """ +# # DRTRIGON-130: check for changes and then write/change/set values +# changed = False +# for prop in data: +# pywikibot.output(u'Checking claim with %i values' % len(data[prop])) +# for i, item in enumerate(data[prop]): +# if (i < len(dic[prop])) and \ +# (dic[prop][i][u'mainsnak'][u'datavalue'][u'value'] == item): +# pass # same value; nothing to do +# else: +# # changes; update or create claim +# changed = True +# if (i < len(dic[prop])): +# #print item, dic[prop][i][u'mainsnak'][u'datavalue'][u'value'] +# pywikibot.output(u'Updating claim with value: %s' % item) +# outpage.setclaimvalue(dic[prop][i][u'id'], item, comment=comment) +# else: +# pywikibot.output(u'Creating new claim with value: %s' % item) +# outpage.createclaim(prop, item, comment=comment) +# # search linked items and update them too +# # VERY HACKY, HAS TO BE CONCEPTIONALLY IMPROVED: +# # link any "key = value" pair to any other item by adding "key" +# # to the items 'aliases' (could also use 'description' or even +# # a redirect) +# (key, value) = map(string.strip, item.split('=')) +# for linked in outpage.searchentities(key): +# outpage = pywikibot.DataPage(self.site, linked[u'id']) +# #attr = outpage.getentities() +# attr = linked +# if (u'aliases' in attr) and (key in attr[u'aliases']): +# pywikibot.output(u'Item %s linked to key %s ...' % (outpage.title(asLink=True), key)) +# data = outpage.getentities() +# if u'claims' in data: +# if (data[u'claims'][u'p32'][0][u'mainsnak'][u'datavalue'][u'value'].strip() == value): +# pywikibot.output(u'... ok') +# continue +# changed = True +# pywikibot.output(u'... updating claim with value: %s' % value) +# outpage.setclaimvalue(data[u'claims'][u'p32'][0][u'id'], value, comment=comment) +# else: +# changed = True +# pywikibot.output(u'... creating new claim with value: %s' % value) +# outpage.createclaim(prop, value, comment=comment) +# # speed-up by setting everything at once (in one single write attempt) +# #outpage.editentity(data = {u'claims': data}) +# #outpage.setitem() +# +# return changed + + def WD_save(self, page, data): """Stores the content to Wikidata.
- @param dic: Original content. - @type dic: dict + @param page: Page containing template. + @type page: page object @param data: New content. @type data: dict
- Returns nothing, but stores the changed content. + Returns nothing, but stores the changed content to linked labels. """ # DRTRIGON-130: check for changes and then write/change/set values - changed = False - for prop in data: - pywikibot.output(u'Checking claim with %i values' % len(data[prop])) - for i, item in enumerate(data[prop]): - if (i < len(dic[prop])) and \ - (dic[prop][i][u'mainsnak'][u'datavalue'][u'value'] == item): - pass # same value; nothing to do - else: - # changes; update or create claim - changed = True - if (i < len(dic[prop])): - #print item, dic[prop][i][u'mainsnak'][u'datavalue'][u'value'] - pywikibot.output(u'Updating claim with value: %s' % item) - outpage.setclaimvalue(dic[prop][i][u'id'], item, comment=comment) - else: - pywikibot.output(u'Creating new claim with value: %s' % item) - outpage.createclaim(prop, item, comment=comment) - # search linked items and update them too - # VERY HACKY, HAS TO BE CONCEPTIONALLY IMPROVED: - # link any "key = value" pair to any other item by adding "key" - # to the items 'aliases' (could also use 'description' or even - # a redirect) - (key, value) = map(string.strip, item.split('=')) - for linked in outpage.searchentities(key): - outpage = pywikibot.DataPage(self.site, linked[u'id']) - #attr = outpage.getentities() - attr = linked - if (u'aliases' in attr) and (key in attr[u'aliases']): - pywikibot.output(u'Item %s linked to key %s ...' % (outpage.title(asLink=True), key)) - data = outpage.getentities() - if u'claims' in data: - if (data[u'claims'][u'p32'][0][u'mainsnak'][u'datavalue'][u'value'].strip() == value): - pywikibot.output(u'... ok') - continue - changed = True - pywikibot.output(u'... updating claim with value: %s' % value) - outpage.setclaimvalue(data[u'claims'][u'p32'][0][u'id'], value, comment=comment) - else: - changed = True - pywikibot.output(u'... creating new claim with value: %s' % value) - outpage.createclaim(prop, value, comment=comment) - # speed-up by setting everything at once (in one single write attempt) - #outpage.editentity(data = {u'claims': data}) - #outpage.setitem() + datapage = pywikibot.DataPage(self.site, page.title()) + links = datapage.searchentities(u'%s:%s' % (pywikibot.config.usernames[self.site.family.name][self.site.lang], datapage.title().split(u':')[1])) + for element in links: + item = element[u'aliases'][0].split(u':')[2] + if item not in data: + pywikibot.output(u'Value "%s" not found.' % (item,)) + continue
- return changed + dataoutpage = pywikibot.DataPage(self.site, element['id']) + #dataoutpage.createclaim(u'p38', u'{"entity-type":"quantity", "numeric-id":1}') + #dataoutpage = page.toggleTalkPage()
+ # check for changes and then write/change/set values + summary = u'Bot: update data because of configuration on %s.' % page.title(asLink=True) + buf = dataoutpage.get() + propid = 217 # just a cheat to start with ... + claim = [ claim for claim in buf[u'claims'] if (claim['m'][1] == propid) ] + #if buf.strip().splitlines()[-1].split(u'/')[-1].strip() != data[item]: + if (not claim) or (claim[0]['m'][3] != data[item]): + pywikibot.output(u'%s in %s <--- %s = %s' %\ + (element[u'aliases'][0], dataoutpage.title(asLink=True), item, data[item])) + + #dataoutpage.put(buf + u'\n' + out, comment=summary) + dataoutpage.editclaim(u'p%i' % propid, data[item], comment=summary) + def get_var_regex(self, var, cont='.*?'): """Get regex used/needed to find the tags to replace.