Pywikibot-commits

pywikibot-commits@lists.wikimedia.org

1 participants
13224 discussions

[Gerrit] [SYNC] Synchronize with core, code improvements - change (pywikibot/compat)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: [SYNC] Synchronize with core, code improvements ...................................................................... [SYNC] Synchronize with core, code improvements - remove obsolete config, sys and string import - remove unused variables Change-Id: I88a29b5e3654eb70df412c3f1d65182ccd02811b --- M template.py 1 file changed, 14 insertions(+), 12 deletions(-) Approvals: Xqt: Looks good to me, approved Huji: Checked; Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/template.py b/template.py index 33c7ef0..ac1c2a1 100644 --- a/template.py +++ b/template.py @@ -1,3 +1,4 @@ +#!/usr/bin/python # -*- coding: utf-8 -*- """ Very simple script to replace a template with another one, @@ -105,15 +106,14 @@ # __version__ = '$Id$' # + import re -import sys -import string import wikipedia as pywikibot from pywikibot import i18n -import config -import catlib import pagegenerators as pg +import xmlreader import replace +import catlib def UserEditFilterGenerator(generator, username, timestamp=None, skip=False): @@ -159,13 +159,11 @@ * xmlfilename - The dump's path, either absolute or relative """ - self.templates = templates self.xmlfilename = xmlfilename def __iter__(self): """Yield page objects until the entire XML dump has been read.""" - import xmlreader mysite = pywikibot.getSite() dump = xmlreader.XmlDump(self.xmlfilename) # regular expression to find the original template. @@ -176,11 +174,14 @@ for template in self.templates: templatePattern = template.title(withNamespace=False) if not pywikibot.getSite().nocapitalize: - templatePattern = '[' + templatePattern[0].upper() + templatePattern[0].lower() + ']' + templatePattern[1:] + templatePattern = '[%s%s]%s' % (templatePattern[0].upper(), + templatePattern[0].lower(), + templatePattern[1:]) templatePattern = re.sub(' ', '[_ ]', templatePattern) templatePatterns.append(templatePattern) - templateRegex = re.compile(r'\{\{ *([mM][sS][gG]:)?(?:%s) *(?P<parameters>\|[^}]+|) *}}' % '|'.join(templatePatterns)) - + templateRegex = re.compile( + r'\{\{ *([mM][sS][gG]:)?(?:%s) *(?P<parameters>\|[^}]+|) *}}' + % '|'.join(templatePatterns)) for entry in dump.parse(): if templateRegex.search(entry.text): page = pywikibot.Page(mysite, entry.title) @@ -219,7 +220,7 @@ self.addedCat = catlib.Category( site, u'%s:%s' % (site.namespace(14), self.addedCat)) - comma = self.summary = site.mediawiki_message('comma-separator') + comma = site.mediawiki_message('comma-separator') # get edit summary message if it's empty if not self.editSummary: @@ -326,7 +327,8 @@ else: if not genFactory.handleArg(arg): templateNames.append( - pywikibot.Page(pywikibot.getSite(), arg, defaultNamespace=10 + pywikibot.Page(pywikibot.getSite(), arg, + defaultNamespace=10 ).title(withNamespace=False)) if subst ^ remove: @@ -342,7 +344,6 @@ return oldTemplates = [] - ns = pywikibot.getSite().template_namespace() for templateName in templates.keys(): oldTemplate = pywikibot.Page(pywikibot.getSite(), templateName, defaultNamespace=10) @@ -361,6 +362,7 @@ gen = pg.DuplicateFilterPageGenerator(gen) preloadingGen = pg.PreloadingGenerator(gen) + bot = TemplateRobot(preloadingGen, templates, subst, remove, editSummary, acceptAll, addedCat) bot.run() -- To view, visit https://gerrit.wikimedia.org/r/102127 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I88a29b5e3654eb70df412c3f1d65182ccd02811b Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Huji <huji.huji(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Siebrand <siebrand(a)wikimedia.org> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

10 years, 4 months

[Gerrit] [PEP8] changes, code improvements - change (pywikibot/compat)

by Alex S.H. Lin (Code Review)

Alex S.H. Lin has submitted this change and it was merged. Change subject: [PEP8] changes, code improvements ...................................................................... [PEP8] changes, code improvements Change-Id: I6c7030e61030f4f5450603becc09eedf0b84428e --- M deledpimage.py 1 file changed, 67 insertions(+), 58 deletions(-) Approvals: Alex S.H. Lin: Verified; Looks good to me, approved diff --git a/deledpimage.py b/deledpimage.py index a78d364..ae727ad 100644 --- a/deledpimage.py +++ b/deledpimage.py @@ -9,24 +9,23 @@ * [[:Image:logo.jpg]] pass * Image:logo.jpg in gallery --> [[:Image:logo.jpg]] in gallery end * logo.jpg(like used in template) --> hide(used ) + +commond: +python deledpimage.py + """ # -# (C) Shizhao, 2008 +# (c) Shizhao, 2008 +# (c) pywikibot team, 2009-2013 # # Distributed under the terms of the MIT license. # __version__ = '$Id$' # -""" -commond: -python deledpimage.py - -删除显示在用户页上的合理使用图像 - -""" -import re, time -import wikipedia as pywikibot +import re +import time +import pywikibot import catlib site = pywikibot.getSite() @@ -35,90 +34,100 @@ 'ar': u'تصنيف:وسوم حقوق نسخ الصور غير الحرة', 'en': u'Category:Non-free image copyright tags', 'zh': u'Category:合理使用图像模板', - } +} content = { 'ar': u'هذه الصورة غير الحرة غير مستخدمة في نطاق المقالات، انظر [[Wikipedia:Non-free content#Policy]]', 'en': u'This Non-free image NOT used in non-article namespaces, see[[Wikipedia:Non-free content#Policy]]', 'zh': u'不是使用在条目中的非自由版权图像，根据[[Wikipedia:合理使用]]，不能在非条目名字空间展示：\n', - } +} msg = { 'ar': u'روبوت: إصلاح استخدام صورة EDP: [[%s]]', 'en': u'Robot: Fix EDP image use: [[%s]]', 'zh': u'Bot修正EDP图像用法：[[%s]]', - } +} -lcontent=pywikibot.translate(site, content) -category=pywikibot.translate(site, cat) -putmsg=pywikibot.translate(site, msg) +lcontent = pywikibot.translate(site, content) +category = pywikibot.translate(site, cat) +putmsg = pywikibot.translate(site, msg) -#from non-free copyright tag category get all EDPtemplate -templatecat=catlib.Category(site, category) +# from non-free copyright tag category get all EDPtemplate +templatecat = catlib.Category(site, category) templatelist = templatecat.articlesList() -#from References of EDP template get all non-free images +# from References of EDP template get all non-free images for tempalte in templatelist: images = [page for page in tempalte.getReferences() if page.isImage()] - for image in images : - imagetitle=image.title() - imagepage=pywikibot.ImagePage(site,imagetitle) + for image in images: + imagetitle = image.title() + imagepage = pywikibot.ImagePage(site, imagetitle) -#from imagepage get all usingPages of non-articles - pimages=[puseimage for puseimage in imagepage.usingPages() if puseimage.namespace()<>0] + # from imagepage get all usingPages of non-articles + pimages = [puseimage for puseimage in imagepage.usingPages() + if puseimage.namespace() != 0] for pimage in pimages: - ns=pimage.namespace() - pimagetitle=pimage.title() + ns = pimage.namespace() + pimagetitle = pimage.title() c = u'\nfond an used the image [[%s]] in [[%s]]: ' \ % (imagetitle, pimagetitle) - text=pimage.get() + text = pimage.get() try: - re.search('',text,re.I).group(0) + re.search('', + text, re.I).group(0) except: try: - # imagetext=re.search('\[\['+imagetitle+'(.*?)\]\]',text,re.I).group(0) if imagetitle not in text: - -# Not [[Image:]] namespace + # Not [[Image:]] namespace if imagetitle[6:] in text: - imagetext = re.search(imagetitle[6:]+'(.*?)(|)',text,re.I).group(0) - text = re.sub(imagetitle[6:]+'(.*?)(|)', '', text, re.I) - pywikibot.output(c+u'remove!!!\nSleep 10 s......') + imagetext = re.search(imagetitle[6:] + '(.*?)(|)', + text, re.I).group(0) + text = re.sub(imagetitle[6:] + '(.*?)(|)', + '', + text, re.I) + pywikibot.output(c + u'remove!!!\nSleeping 10 s...') pimage.put(text, putmsg % imagetitle) time.sleep(10) -#used [[Image:wiki.png]] image + #used [[Image:wiki.png]] image else: - if '[['+imagetitle in text: + if '[[' + imagetitle in text: -#Image in userpage, imagepage,and all talkpage , [[Image:wiki.png]] --> [[:Image:wiki.png]] - if ns==1 or ns==6 or ns==2 or ns==3 or ns==5 or ns==7 or ns==9 or ns==11 or ns==13 or ns==15 or ns==17 or ns==101: + #Image in userpage, imagepage, and all talkpage + #[[Image:wiki.png]] --> [[:Image:wiki.png]] - text = re.sub('\[\['+imagetitle+'(.*?)\]\]', ''+'[['+':'+imagetitle+']]',text, re.I) - pywikibot.output(c+u'FIX!\nSleep 10 s......') - pimage.put(text, putmsg % imagetitle) - time.sleep(10) + if ns in (1, 2, 3, 5, 6, 7, 9, 11, 13, 15, 17, 101): + text = re.sub('\[\[' + imagetitle + '(.*?)\]\]', + '' + + '[[' + ':' + imagetitle + ']]', + text, re.I) + pywikibot.output(c + u'FIX!\nSleeping 10 s...') + pimage.put(text, putmsg % imagetitle) + time.sleep(10) -#Image in template, categorypage, remove - elif ns==10 or ns==14: - text = re.sub('\[\['+imagetitle+'(.*?)(|)\]\]', '',text, re.I) - pywikibot.output(c+u'Remove!!!\nSleep 10 s......') - pimage.put(text, putmsg % imagetitle) - time.sleep(10) -# elif '[[:'+imagetitle in text: -# pywikibot.output(c+u'EDP is OK!') - -#Image in <gallery></gallery> + #Image in template, categorypage, remove + elif ns in (10, 14): + text = re.sub( + '\[\[' + imagetitle + '(.*?)(|)\]\]', + '', + text, re.I) + pywikibot.output( + c + u'Remove!!!\nSleeping 10 s...') + pimage.put(text, putmsg % imagetitle) + time.sleep(10) + #Image in <gallery></gallery> else: -# try: -# imagetext=re.search(imagetitle+'(.*?)\n',text,re.I).group(0) - text = re.sub(imagetitle+'(.*?)', '',text, re.I) - text=re.sub('</gallery>\n', '</gallery>\n'+'\n'+'[[:'+imagetitle+']]\n',text, re.I) - pywikibot.output(c+u'FIX <gallery>!\nSleep 10 s......') + text = re.sub(imagetitle + '(.*?)', '', text, re.I) + text = re.sub('</gallery>\n', + '</gallery>\n' + '\n' + + '[[:' + imagetitle + ']]\n', + text, re.I) + pywikibot.output( + c + u'FIX <gallery>!\nSleeping 10 s...') pimage.put(text, putmsg % imagetitle) time.sleep(10) except: print 'Error' pass - -- To view, visit https://gerrit.wikimedia.org/r/103255 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I6c7030e61030f4f5450603becc09eedf0b84428e Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Alex S.H. Lin <alexsh(a)mail2000.com.tw> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Meno25 <meno25mail(a)gmail.com> Gerrit-Reviewer: Siebrand <siebrand(a)wikimedia.org> Gerrit-Reviewer: jenkins-bot

10 years, 4 months

[Gerrit] [PEP8] editarticle.py - change (pywikibot/compat)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: [PEP8] editarticle.py ...................................................................... [PEP8] editarticle.py Change-Id: I5331abeb7848352a7554cc4766415a25d25fce2b --- M editarticle.py 1 file changed, 13 insertions(+), 12 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/editarticle.py b/editarticle.py index da73797..ab6ce12 100644 --- a/editarticle.py +++ b/editarticle.py @@ -37,7 +37,7 @@ def __init__(self): pass - def command(self, tempFilename, text, jumpIndex = None): + def command(self, tempFilename, text, jumpIndex=None): command = config.editor if jumpIndex: # Some editors make it possible to mark occurences of substrings, @@ -53,25 +53,25 @@ if config.editor.startswith('kate'): command += " -l %i -c %i" % (line + 1, column + 1) elif config.editor.startswith('gedit'): - command += " +%i" % (line + 1) # seems not to support columns + command += " +%i" % (line + 1) # seems not to support columns elif config.editor.startswith('emacs'): - command += " +%i" % (line + 1) # seems not to support columns + command += " +%i" % (line + 1) # seems not to support columns elif config.editor.startswith('jedit'): - command += " +line:%i" % (line + 1) # seems not to support columns + command += " +line:%i" % (line + 1) # seems not to support columns elif config.editor.startswith('vim'): - command += " +%i" % (line + 1) # seems not to support columns + command += " +%i" % (line + 1) # seems not to support columns elif config.editor.startswith('nano'): command += " +%i,%i" % (line + 1, column + 1) # Windows editors elif config.editor.lower().endswith('notepad++.exe'): - command += " -n%i" % (line + 1) # seems not to support columns + command += " -n%i" % (line + 1) # seems not to support columns command += ' %s' % tempFilename #print command return command def convertLinebreaks(self, text): - if sys.platform=='win32': + if sys.platform == 'win32': return text.replace('\r\n', '\n') # TODO: Mac OS handling return text @@ -79,12 +79,12 @@ def restoreLinebreaks(self, text): if text is None: return None - if sys.platform=='win32': + if sys.platform == 'win32': return text.replace('\n', '\r\n') # TODO: Mac OS handling return text - def edit(self, text, jumpIndex = None, highlight = None): + def edit(self, text, jumpIndex=None, highlight=None): """ Calls the editor and thus allows the user to change the text. Returns the modified text. Halts the thread's operation until the editor @@ -113,13 +113,14 @@ return None else: newcontent = open(tempFilename).read().decode( - config.editor_encoding) + config.editor_encoding) os.unlink(tempFilename) return self.restoreLinebreaks(newcontent) else: return self.restoreLinebreaks( pywikibot.ui.editText(text, jumpIndex=jumpIndex, highlight=highlight)) + class ArticleEditor: # join lines if line starts with this ones @@ -169,7 +170,7 @@ def run(self): try: - old = self.page.get(get_redirect = self.options.edit_redirect) + old = self.page.get(get_redirect=self.options.edit_redirect) except pywikibot.NoPage: old = "" textEditor = TextEditor() @@ -187,6 +188,7 @@ else: pywikibot.output(u"Nothing changed") + def main(*args): app = ArticleEditor(*args) app.run() @@ -196,4 +198,3 @@ main() finally: pywikibot.stopme() - -- To view, visit https://gerrit.wikimedia.org/r/103329 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I5331abeb7848352a7554cc4766415a25d25fce2b Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

10 years, 4 months

[Gerrit] [PEP8] changes - change (pywikibot/compat)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: [PEP8] changes ...................................................................... [PEP8] changes Change-Id: Icf144501fa5939ccb12ae7278929175551573248 --- M extract_wikilinks.py M fixing_redirects.py M followlive.py M get.py 4 files changed, 23 insertions(+), 19 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/extract_wikilinks.py b/extract_wikilinks.py index 6f5e7d1..601cea2 100644 --- a/extract_wikilinks.py +++ b/extract_wikilinks.py @@ -19,9 +19,10 @@ # # Distributed under the terms of the MIT license. # -__version__='$Id$' +__version__ = '$Id$' # -import sys,re +import sys +import re import codecs import wikipedia as pywikibot # This bot does not contact the Wiki, so no need to get it on the list @@ -38,7 +39,7 @@ elif arg.startswith("-bare"): complete = False elif fn: - print "Ignoring argument %s"%arg + print "Ignoring argument %s" % arg else: fn = arg @@ -47,14 +48,14 @@ sys.exit(1) mysite = pywikibot.getSite() -f=open(fn,'r') -text=f.read() +f = open(fn, 'r') +text = f.read() f.close() for hit in R.findall(text): if complete: list.append(mysite.linkto(hit)) else: - list.append("[[%s]]"%hit) + list.append("[[%s]]" % hit) if sorted: list.sort() for page in list: diff --git a/fixing_redirects.py b/fixing_redirects.py index 49627dd..d35badf 100644 --- a/fixing_redirects.py +++ b/fixing_redirects.py @@ -20,9 +20,10 @@ # # Distributed under the terms of the MIT license. # -__version__='$Id$' +__version__ = '$Id$' # -import re, sys +import re +import sys import wikipedia as pywikibot import pagegenerators from pywikibot import i18n @@ -72,7 +73,7 @@ curpos = 0 # This loop will run until we have finished the current page while True: - m = linkR.search(text, pos = curpos) + m = linkR.search(text, pos=curpos) if not m: break # Make sure that next time around we will not find this same hit. @@ -102,7 +103,7 @@ if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title - if m.group('section') == None: + if m.group('section') is None: section = '' else: section = m.group('section') @@ -117,16 +118,16 @@ replaceit = choice in "rR" # remove preleading ":" - if link_text[0]==':': + if link_text[0] == ':': link_text = link_text[1:] if link_text[0].isupper(): new_page_title = targetPage.title() else: new_page_title = targetPage.title()[0].lower() + \ - targetPage.title()[1:] + targetPage.title()[1:] # remove preleading ":" - if new_page_title[0]==':': + if new_page_title[0] == ':': new_page_title = new_page_title[1:] if replaceit and trailing_chars: @@ -149,6 +150,7 @@ pageCache = [] + def workon(page): mysite = pywikibot.getSite() try: @@ -163,7 +165,7 @@ % page.title()) links = page.linkedPages() if len(links): - pywikibot.getall(mysite,links) + pywikibot.getall(mysite, links) else: pywikibot.output('Nothing left to do.') return diff --git a/followlive.py b/followlive.py index a8987a2..ba7a822 100644 --- a/followlive.py +++ b/followlive.py @@ -13,10 +13,11 @@ # # Distributed under the terms of the MIT license. # -__version__='$Id$' +__version__ = '$Id$' import sys -import datetime, time +import datetime +import time import traceback import wikipedia as pywikibot import editarticle diff --git a/get.py b/get.py index 82343c6..4b97cdf 100644 --- a/get.py +++ b/get.py @@ -13,9 +13,10 @@ # # Distributed under the terms of the MIT license. -__version__='$Id$' +__version__ = '$Id$' import wikipedia as pywikibot + def main(): singlePageTitleParts = [] @@ -26,11 +27,10 @@ page = pywikibot.Page(pywikibot.getSite(), pageTitle) # TODO: catch exceptions - pywikibot.output(page.get(), toStdout = True) + pywikibot.output(page.get(), toStdout=True) if __name__ == "__main__": try: main() finally: pywikibot.stopme() - -- To view, visit https://gerrit.wikimedia.org/r/103333 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Icf144501fa5939ccb12ae7278929175551573248 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

10 years, 4 months

[Gerrit] Added script to upload Freebase identifiers - change (pywikibot/core)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: Added script to upload Freebase identifiers ...................................................................... Added script to upload Freebase identifiers Change-Id: I9d1f459d58da0eecd1631e8b863129d9d1aa3a85 --- A scripts/freebasemappingupload.py 1 file changed, 103 insertions(+), 0 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/freebasemappingupload.py b/scripts/freebasemappingupload.py new file mode 100644 index 0000000..0bd2bba --- /dev/null +++ b/scripts/freebasemappingupload.py @@ -0,0 +1,103 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +Script to upload the mappings of Freebase to Wikidata +Can be easily adapted to upload other String identifiers as well + +This bot needs the dump from +https://developers.google.com/freebase/data#freebase-wikidata-mappings + +The script takes a single parameter: + +-filename: the filename to read the freebase-wikidata mappings from; + default: fb2w.nt.gz +""" + +# +# (C) Denny Vrandecic, 2013 +# (C) Pywikibot team, 2013 +# Distributed under the terms of the MIT license. + +import gzip +import os +import sys + +import pywikibot + + +class FreebaseMapperRobot: + + def __init__(self, filename): + self.repo = pywikibot.Site('wikidata', 'wikidata').data_repository() + self.filename = filename + if not os.path.exists(self.filename): + pywikibot.output('Cannot find %s. Try providing the absolute path.' % self.filename) + sys.exit(1) + + def run(self): + # Set up some items we will use a lot. + self.claim = pywikibot.Claim(self.repo, 'P646') # freebase mapping + # And sources! + self.statedin = pywikibot.Claim(self.repo, 'P248') # stated in + freebasedumpitem = pywikibot.ItemPage(self.repo, 'Q15241312') # Freebase data dump + self.statedin.setTarget(freebasedumpitem) + self.dateofpub = pywikibot.Claim(self.repo, 'P577') # date of publication + oct28 = pywikibot.WbTime(year=2013, month=10, day=28, precision='day') + self.dateofpub.setTarget(oct28) + + for line in gzip.open(self.filename): + self.processLine(line.strip()) + + def processLine(self, line): + if not line or line.startswith('#'): + return + mid, sameas, qid, dot = line.split() + if sameas != '<http://www.w3.org/2002/07/owl#sameAs>': + return + if dot != '.': + return + if not mid.startswith('<http://rdf.freebase.com/ns/m'): + return + mid = '/m/' + mid[30:-1] + if not qid.startswith('<http://www.wikidata.org/entity/Q'): + return + qid = 'Q' + qid[33:-1] + data = pywikibot.ItemPage(self.repo, qid) + data.get() + if not data.labels: + label = '' + elif 'en' in data.labels: + label = data.labels['en'] + else: + # Just pick up the first label + label = data.labels.values()[0] + pywikibot.output('Parsed: %s <--> %s' % (qid, mid)) + pywikibot.output('%s is %s' % (data.getID(), label)) + if data.claims and 'P646' in data.claims: + # We assume that there is only one claim. + # If there are multiple ones, our logs might be wrong + # but the constraint value reports will catch them + if mid != data.claims['P646'][0].getTarget(): + pywikibot.output('Mismatch: expected %s, has %s instead' + % (mid, data.claims['P646'][0].getTarget())) + else: + pywikibot.output('Already has mid set, is consistent.') + else: + # No claim set, lets add it. + pywikibot.output('Going to add a new claim.') + self.claim.setTarget(mid) + data.addClaim(self.claim) + self.claim.addSources([self.statedin, self.dateofpub]) + pywikibot.output('Claim added!') + + +def main(): + filename = 'fb2w.nt.gz' # Default filename + for arg in pywikibot.handleArgs(): + if arg.startswith('-filename'): + filename = arg[11:] + bot = FreebaseMapperRobot(filename) + bot.run() + +if __name__ == '__main__': + main() -- To view, visit https://gerrit.wikimedia.org/r/99180 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I9d1f459d58da0eecd1631e8b863129d9d1aa3a85 Gerrit-PatchSet: 6 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Denny Vrandecic <vrandecic(a)gmail.com> Gerrit-Reviewer: Denny Vrandecic <vrandecic(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

10 years, 4 months

[Gerrit] casechecker.py: ported to core - change (pywikibot/core)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: casechecker.py: ported to core ...................................................................... casechecker.py: ported to core Ported casechecker.py from pywikibot compat to pywikibot core Removed pywikibot.config.verbose_output, updated i18n scripts directory Change-Id: Iccb0720e09e2fa484800a779804fc49fff2ba3ec --- A scripts/casechecker.py 1 file changed, 829 insertions(+), 0 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/casechecker.py b/scripts/casechecker.py new file mode 100644 index 0000000..98532bd --- /dev/null +++ b/scripts/casechecker.py @@ -0,0 +1,829 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" Script to enumerate all pages on the wiki and find all titles +with mixed latin and cyrilic alphabets. +""" +# +# (C) Pywikibot team, 2006-2013 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$' + +import os +import sys +import re +import codecs +import pywikibot +from pywikibot import i18n +from pywikibot.data import api + + +# +# Permutations code was taken from +# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/190465 +# +def xuniqueCombinations(items, n): + if n == 0: + yield [] + else: + for i in xrange(len(items)): + for cc in xuniqueCombinations(items[i + 1:], n - 1): + yield [items[i]] + cc +# End of permutation code +# + +# +# Windows Concole colors +# This code makes this script Windows ONLY!!! +# Feel free to adapt it to another platform +# +# Adapted from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/496901 +# +STD_OUTPUT_HANDLE = -11 + +FOREGROUND_BLUE = 0x01 # text color contains blue. +FOREGROUND_GREEN = 0x02 # text color contains green. +FOREGROUND_RED = 0x04 # text color contains red. +FOREGROUND_INTENSITY = 0x08 # text color is intensified. +BACKGROUND_BLUE = 0x10 # background color contains blue. +BACKGROUND_GREEN = 0x20 # background color contains green. +BACKGROUND_RED = 0x40 # background color contains red. +BACKGROUND_INTENSITY = 0x80 # background color is intensified. + +FOREGROUND_WHITE = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED + +try: + import ctypes + std_out_handle = ctypes.windll.kernel32.GetStdHandle(STD_OUTPUT_HANDLE) +except: + std_out_handle = None + + +def SetColor(color): + if std_out_handle: + try: + return ctypes.windll.kernel32.SetConsoleTextAttribute( + std_out_handle, color) + except: + pass + + if color == FOREGROUND_BLUE: + print '(b:', + if color == FOREGROUND_GREEN: + print '(g:', + if color == FOREGROUND_RED: + print '(r:', + +# end of console code + + +class CaseChecker(object): + # These words are always in one language, even though they could be typed + # in both + alwaysInLocal = [u'СССР', u'Как', u'как'] + alwaysInLatin = [u'II', u'III'] + + localUpperLtr = u'ЁІЇЎАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯҐ' + localLowerLtr = u'ёіїўабвгдежзийклмнопрстуфхцчшщъыьэюяґ' + localLtr = localUpperLtr + localLowerLtr + + localSuspects = u'АВЕКМНОРСТХІЁЇаеорсухіёї' + latinSuspects = u'ABEKMHOPCTXIËÏaeopcyxiëï' + + # possibly try to fix one character mistypes in an alternative keyboard + # layout + localKeyboard = u'йцукенгшщзфывапролдячсмить' + latinKeyboard = u'qwertyuiopasdfghjklzxcvbnm' + + romanNumChars = u'IVXLMC' + # all letters that may be used as suffixes after roman numbers: "Iый" + romannumSuffixes = localLowerLtr + romanNumSfxPtrn = re.compile( + u'^[' + romanNumChars + ']+[' + localLowerLtr + ']+$') + + whitelists = { + 'ru': u'ВП:КЛ/Проверенные', + } + + latLtr = u'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + + lclClrFnt = u'' + latClrFnt = u'' + suffixClr = u'' + + wordBreaker = re.compile(u'[ _\-/\|#[\]():]') + stripChars = u' \t,' + + titles = True + links = False + aplimit = None + apfrom = u'' + title = None + replace = False + stopAfter = -1 + wikilog = None + wikilogfile = 'wikilog.txt' + failedTitles = 'failedTitles.txt' + nosuggestions = 'nosuggestions.txt' + doFailed = False + titleList = None + autonomous = False + namespaces = [] + filterredir = 'nonredirects' + + def __init__(self): + + for arg in pywikibot.handleArgs(): + if arg.startswith('-from'): + if arg.startswith('-from:'): + self.apfrom = arg[6:] + else: + self.apfrom = pywikibot.input(u'Which page to start from: ') + elif arg.startswith('-reqsize:'): + self.aplimit = int(arg[9:]) + elif arg == '-links': + self.links = True + elif arg == '-linksonly': + self.links = True + self.titles = False + elif arg == '-replace': + self.replace = True + elif arg == '-redir': + self.filterredir = 'all' + elif arg == '-redironly': + self.filterredir = 'redirects' + elif arg.startswith('-limit:'): + self.stopAfter = int(arg[7:]) + elif arg == '-autonomous' or arg == '-a': + self.autonomous = True + elif arg.startswith('-ns:'): + self.namespaces.append(int(arg[4:])) + elif arg.startswith('-wikilog:'): + self.wikilogfile = arg[9:] + elif arg.startswith('-failedlog:'): + self.failedTitles = arg[11:] + elif arg == '-failed': + self.doFailed = True + else: + pywikibot.output(u'Unknown argument %s.' % arg) + pywikibot.showHelp() + sys.exit() + + if self.namespaces == [] and not self.doFailed: + if self.apfrom == u'': + # 0 should be after templates ns + self.namespaces = [14, 10, 12, 0] + else: + self.namespaces = [0] + + if self.aplimit is None: + self.aplimit = 200 if self.links else 'max' + + if not self.doFailed: + self.queryParams = {'action': 'query', + 'generator': 'allpages', + 'gaplimit': self.aplimit, + 'gapfilterredir': self.filterredir} + else: + self.queryParams = {'action': 'query'} + if self.apfrom != u'': + pywikibot.output(u'Argument "-from" is ignored with "-failed"') + + propParam = 'info' + if self.links: + propParam += '|links|categories' + self.queryParams['pllimit'] = 'max' + self.queryParams['cllimit'] = 'max' + + self.queryParams['prop'] = propParam + + self.site = pywikibot.getSite() + + if len(self.localSuspects) != len(self.latinSuspects): + raise ValueError(u'Suspects must be the same size') + if len(self.localKeyboard) != len(self.latinKeyboard): + raise ValueError(u'Keyboard info must be the same size') + + if not os.path.isabs(self.wikilogfile): + self.wikilogfile = pywikibot.config.datafilepath(self.wikilogfile) + self.wikilog = self.OpenLogFile(self.wikilogfile) + + if not os.path.isabs(self.failedTitles): + self.failedTitles = pywikibot.config.datafilepath(self.failedTitles) + + if self.doFailed: + with codecs.open(self.failedTitles, 'r', 'utf-8') as f: + self.titleList = [self.Page(t) for t in f] + self.failedTitles += '.failed' + + self.lclToLatDict = dict([(ord(self.localSuspects[i]), + self.latinSuspects[i]) + for i in xrange(len(self.localSuspects))]) + self.latToLclDict = dict([(ord(self.latinSuspects[i]), + self.localSuspects[i]) + for i in xrange(len(self.localSuspects))]) + + if self.localKeyboard is not None: + self.lclToLatKeybDict = dict( + [(ord(self.localKeyboard[i]), + self.latinKeyboard[i]) + for i in xrange(len(self.localKeyboard))]) + self.latToLclKeybDict = dict( + [(ord(self.latinKeyboard[i]), + self.localKeyboard[i]) + for i in xrange(len(self.localKeyboard))]) + else: + self.lclToLatKeybDict = {} + self.latToLclKeybDict = {} + + badPtrnStr = u'([%s][%s]|[%s][%s])' \ + % (self.latLtr, self.localLtr, self.localLtr, self.latLtr) + self.badWordPtrn = re.compile(u'[%s%s]*%s[%s%s]*' + % (self.latLtr, self.localLtr, + badPtrnStr, self.latLtr, + self.localLtr)) + + # Get whitelist + self.knownWords = set() + self.seenUnresolvedLinks = set() + + # TODO: handle "continue" + if self.site.lang in self.whitelists: + wlpage = self.whitelists[self.site.lang] + pywikibot.output(u'Loading whitelist from %s' % wlpage) + wlparams = { + 'action': 'query', + 'prop': 'links', + 'titles': wlpage, + 'redirects': '', + 'indexpageids': '', + 'pllimit': 'max', + } + + req = api.Request(**wlparams) + data = req.submit() + if len(data['query']['pageids']) == 1: + pageid = data['query']['pageids'][0] + links = data['query']['pages'][pageid]['links'] + + allWords = [nn for n in links + for nn in self.FindBadWords(n['title'])] + + self.knownWords = set(allWords) +## kw = set() +## for w in allWords: +## if len(self.ProcessTitle(w)[1]) > 0: +## kw.add(w) +## self.knownWords = kw + + else: + raise ValueError(u'The number of pageids is not 1') + + pywikibot.output(u'Loaded whitelist with %i items' + % len(self.knownWords)) + if len(self.knownWords) > 0: + pywikibot.log(u'Whitelist: %s' + % u', '.join([self.MakeLink(i, False) + for i in self.knownWords])) + else: + pywikibot.output(u'Whitelist is not known for language %s' + % self.site.lang) + + def RunQuery(self, params): + while True: + # Get data + req = api.Request(**params) + data = req.submit() + + # Process received data + yield data + + # Clear any continuations first + if 'clcontinue' in params: + del params['clcontinue'] + if 'plcontinue' in params: + del params['plcontinue'] + + if 'query-continue' not in data: + if 'gapcontinue' in params: + del params['gapcontinue'] + break + + qc = data['query-continue'] + # First continue properties only, once done, continue with allpages + if 'categories' in qc or 'links' in qc: + if 'categories' in qc: + params.update(qc['categories']) + if 'links' in qc: + params.update(qc['links']) + elif 'allpages' in qc: + params.update(qc['allpages']) + else: + raise ValueError(u'Unexpected query-continue values: %s' % qc) + continue + + def Run(self): + try: + self.lastLetter = '' + + if not self.doFailed: + for namespace in self.namespaces: + self.currentTitle = None + self.queryParams['gapnamespace'] = namespace + self.queryParams['gapfrom'] = self.apfrom + for data in self.RunQuery(self.queryParams): + self.ProcessDataBlock(data) + else: + self.currentTitle = None + batchSize = 10 + for batchStart in xrange(0, len(self.titleList), batchSize): + self.queryParams['titles'] = self.titleList[ + batchStart:batchStart + batchSize] + for data in self.RunQuery(self.queryParams): + self.ProcessDataBlock(data) + print "*" * 29, "Done" + except: + pywikibot.output(u'Exception at Title = %s, Next = %s' + % (self.currentTitle, self.apfrom)) + try: + import traceback + pywikibot.output(traceback.format_exc()) + except: + pywikibot.output(u'Unable to print exception info') + raise + + def ProcessDataBlock(self, data): + if 'query' not in data or 'pages' not in data['query']: + return + + firstItem = True + for pageID, page in data['query']['pages'].iteritems(): + printed = False + title = page['title'] + self.currentTitle = title + if 'missing' in page: + continue + if firstItem: + if self.lastLetter != title[0]: + pywikibot.ui.output('Processing %s\n' % title) + self.lastLetter = title[0] + firstItem = False + if self.titles: + err = self.ProcessTitle(title) + if err: + changed = False + if self.replace: + if len(err[1]) == 1: + newTitle = err[1][0] +## choice = pywikibot.inputChoice(u'Move %s to %s?' +## % (title, newTitle), +## ['Yes', 'No'], +## ['y', 'n']) + editSummary = i18n.twtranslate(self.site, + "casechecker-rename") + dst = self.Page(newTitle) + + if 'redirect' in page: + src = self.Page(title) + redir = src.getRedirectTarget() + redirTitle = redir.title(asLink=True, + textlink=True) + + if not dst.exists(): + src.move(newTitle, editSummary, + movesubpages=True) + changed = True + + replErrors = False + for p in src.getReferences( + follow_redirects=False): + if p.namespace() == 2: + continue + oldText = p.get(get_redirect=True) + newText = self.ReplaceLink(oldText, title, + newTitle) + if not self.PutNewPage( + p, newText, [ + self.MakeMoveSummary(title, + newTitle)]): + replErrors = True + if not replErrors: + editSummary = i18n.twtranslate( + self.site, "casechecker-delete-summary") + newText = i18n.twtranslate( + self.site, + "casechecker-delete-reason", redirTitle, + fallback=False) + if newText: + src.put(newText, editSummary, + minorEdit=False) + changed = True + + elif not dst.exists(): + src = self.Page(title) + if page['ns'] == 14: + import category + dst = self.Page(newTitle) + bot = category.CategoryMoveRobot( + src.title(withNamespace=False), + dst.title(withNamespace=False), + self.autonomous, + editSummary + u' ' + + self.MakeMoveSummary(title, newTitle), + True) + bot.run() + else: + src.move(newTitle, editSummary, + movesubpages=True) + changed = True + + if not changed: + if len(err[1]) > 0: + self.AppendLineToLog(self.failedTitles, title) + else: + self.AddNoSuggestionTitle(title) + + self.WikiLog(u"* " + err[0]) + printed = True + + if self.links: + allLinks = None + if 'links' in page: + allLinks = page['links'] + if 'categories' in page: + if allLinks: + allLinks = allLinks + page['categories'] + else: + allLinks = page['categories'] + + if allLinks: + pageObj = None + pageTxt = None + msg = [] + foundSuggestions = False + + for l in allLinks: + ltxt = l['title'] + err = self.ProcessTitle(ltxt) + if err: + if len(err[1]) > 0: + foundSuggestions = True + elif self.AddNoSuggestionTitle(ltxt): + continue + + newTitle = None + if self.replace: + newTitle = self.PickTarget(title, ltxt, err[1]) + if newTitle: + if pageObj is None: + pageObj = self.Page(title) + pageTxt = pageObj.get() + + msg.append(self.MakeMoveSummary(ltxt, + newTitle)) + + pageTxt = self.ReplaceLink(pageTxt, ltxt, + newTitle) + if not newTitle: + if not printed: + self.WikiLog(u"* %s: link to %s" + % (self.MakeLink(title, False), + err[0])) + printed = True + else: + self.WikiLog(u"** link to %s" % err[0]) + if pageObj is not None: + if self.PutNewPage(pageObj, pageTxt, msg): + # done, no need to log anything + foundSuggestions = False + + if foundSuggestions: + self.AppendLineToLog(self.failedTitles, title) + if self.stopAfter > 0: + self.stopAfter -= 1 + if self.stopAfter == 0: + raise ValueError(u'Stopping because we are done') + + def WikiLog(self, text): + pywikibot.output(text) + self.wikilog.write(text + u'\n') + self.wikilog.flush() + + def FindBadWords(self, title): + for m in self.badWordPtrn.finditer(title): + yield title[m.span()[0]:m.span()[1]] + + def ProcessTitle(self, title): + badWords = list(self.FindBadWords(title)) + if len(badWords) > 0: + # Allow known words, allow any roman numerals with local suffixes + badWords = set([i for i in badWords + if i not in self.knownWords and + self.romanNumSfxPtrn.match(i) is not None]) + + if len(badWords) == 0 or self.Page(title).isImage(): + return + count = 0 + ambigBadWords = set() + ambigBadWordsCount = 0 + mapLcl = {} + mapLat = {} + + for badWord in badWords: + # See if it would make sense to treat the whole word as either + # cyrilic or latin + mightBeLat = mightBeLcl = True + for l in badWord: + if l in self.localLtr: + if mightBeLat and l not in self.localSuspects: + mightBeLat = False + else: + if mightBeLcl and l not in self.latinSuspects: + mightBeLcl = False + if l not in self.latLtr: + raise ValueError(u'Assert failed') + + # Some words are well known and frequently mixed-typed + if mightBeLcl and mightBeLat: + if badWord in self.alwaysInLocal: + mightBeLat = False + elif badWord in self.alwaysInLatin: + mightBeLoc = False + + if mightBeLcl: + mapLcl[badWord] = badWord.translate(self.latToLclDict) + if mightBeLat: + mapLat[badWord] = badWord.translate(self.lclToLatDict) + if mightBeLcl and mightBeLat: + ambigBadWords.add(badWord) + # Cannot do len(ambigBadWords) because they might be duplicates + ambigBadWordsCount += 1 + if not mightBeLcl and not mightBeLat: + # try to match one of the knownWords + bwLen = len(badWord) + kw = [w for w in self.knownWords if len(w) == bwLen] + for p in xrange(bwLen): + if len(kw) == 0: + break + c = badWord[p] + co = ord(c) + if co in self.latToLclDict: + c2 = self.latToLclDict[co] + elif co in self.lclToLatDict: + c2 = self.lclToLatDict[co] + else: + c2 = None + kw = [w for w in kw if p < len(w) and + (w[p] == c or (c2 is not None and w[p] == c2))] + if len(kw) > 1: + pywikibot.output(u"Word '%s' could be treated as more than " + u"one known words" % badWord) + elif len(kw) == 1: + mapLcl[badWord] = kw[0] + count += 1 + + infoText = self.MakeLink(title) + possibleAlternatives = [] + + if len(mapLcl) + len(mapLat) - ambigBadWordsCount < count: + # We cannot auto-translate - offer a list of suggested words + suggestions = mapLcl.values() + mapLat.values() + if len(suggestions) > 0: + infoText += u", word suggestions: " + u', '.join( + [self.ColorCodeWord(t) for t in suggestions]) + else: + infoText += u", no suggestions" + else: + + # Replace all unambiguous bad words + for k, v in mapLat.items() + mapLcl.items(): + if k not in ambigBadWords: + title = title.replace(k, v) + if len(ambigBadWords) == 0: + # There are no ambiguity, we can safelly convert + possibleAlternatives.append(title) + infoText += u", convert to " + self.MakeLink(title) + else: + # Try to pick 0, 1, 2, ..., len(ambiguous words) unique + # combinations from the bad words list, and convert just the + # picked words to cyrilic, whereas making all other words as + # latin character. + for itemCntToPick in xrange(0, len(ambigBadWords) + 1): + title2 = title + for uc in xuniqueCombinations(list(ambigBadWords), + itemCntToPick): + wordsToLat = ambigBadWords.copy() + for bw in uc: + title2 = title2.replace(bw, mapLcl[bw]) + wordsToLat.remove(bw) + for bw in wordsToLat: + title2 = title2.replace(bw, mapLat[bw]) + possibleAlternatives.append(title2) + + if len(possibleAlternatives) > 0: + infoText += u", can be converted to " + u', '.join( + [self.MakeLink(t) for t in possibleAlternatives]) + else: + infoText += u", no suggestions" + return (infoText, possibleAlternatives) + + def PickTarget(self, title, original, candidates): + if len(candidates) == 0: + return + if len(candidates) == 1: + return candidates[0] + + pagesDontExist = [] + pagesRedir = {} + pagesExist = [] + + for newTitle in candidates: + dst = self.Page(newTitle) + if not dst.exists(): + pagesDontExist.append(newTitle) + elif dst.isRedirectPage(): + pagesRedir[newTitle] = dst.getRedirectTarget().title() + else: + pagesExist.append(newTitle) + if len(pagesExist) == 1: + return pagesExist[0] + elif len(pagesExist) == 0 and len(pagesRedir) > 0: + if len(pagesRedir) == 1: + return pagesRedir.keys()[0] + t = None + for k, v in pagesRedir.iteritems(): + if not t: + t = v # first item + elif t != v: + break + else: + # all redirects point to the same target + # pick the first one, doesn't matter what it is + return pagesRedir.keys()[0] + + if not self.autonomous: + pywikibot.output(u'Could not auto-decide for page %s. Which link ' + u'should be chosen?' % self.MakeLink(title, False)) + pywikibot.output(u'Original title: ', newline=False) + self.ColorCodeWord(original + "\n", True) + count = 1 + for t in candidates: + if t in pagesDontExist: + msg = u'missing' + elif t in pagesRedir: + msg = u'Redirect to ' + pagesRedir[t] + else: + msg = u'page exists' + self.ColorCodeWord(u' %d: %s (%s)\n' % (count, t, msg), True) + count += 1 + answers = [str(i) for i in xrange(0, count)] + choice = int(pywikibot.inputChoice( + u'Which link to choose? (0 to skip)', + answers, [a[0] for a in answers])) + if choice > 0: + return candidates[choice - 1] + + def ColorCodeWord(self, word, toScreen=False): + if not toScreen: + res = u"" + lastIsCyr = word[0] in self.localLtr + if lastIsCyr: + if toScreen: + SetColor(FOREGROUND_GREEN) + else: + res += self.lclClrFnt + else: + if toScreen: + SetColor(FOREGROUND_RED) + else: + res += self.latClrFnt + + for l in word: + if l in self.localLtr: + if not lastIsCyr: + if toScreen: + SetColor(FOREGROUND_GREEN) + else: + res += self.suffixClr + self.lclClrFnt + lastIsCyr = True + elif l in self.latLtr: + if lastIsCyr: + if toScreen: + SetColor(FOREGROUND_RED) + else: + res += self.suffixClr + self.latClrFnt + lastIsCyr = False + if toScreen: + pywikibot.output(l, newline=False) + else: + res += l + + if toScreen: + SetColor(FOREGROUND_WHITE) + else: + return res + self.suffixClr + u"" + + def AddNoSuggestionTitle(self, title): + if title in self.seenUnresolvedLinks: + return True + self.seenUnresolvedLinks.add(title) + + params = { + 'action': 'query', + 'list': 'backlinks', + 'bltitle': title, + 'bllimit': '50', + } + + req = api.Request(**params) + data = req.submit() + cl = 0 + redirs = 0 + if 'backlinks' in data['query']: + bl = data['query']['backlinks'] + cl = len(bl) + redirs = len([i for i in bl if 'redirect' in i]) + + if cl > 0 and 'query-continue' in data: + count = '50+' + else: + count = str(cl if cl > 0 else 'no backlinks') + + self.AppendLineToLog(self.nosuggestions, u'* %s (%s%s)' + % (self.MakeLink(title), count, u', %d redirects' + % redirs if redirs > 0 else u'')) + return False + + def PutNewPage(self, pageObj, pageTxt, msg): + title = pageObj.title(asLink=True, textlink=True) + coloredMsg = u', '.join([self.ColorCodeWord(m) for m in msg]) + if pageObj.get(get_redirect=True) == pageTxt: + self.WikiLog(u"* Error: Text replacement failed in %s (%s)" + % (self.MakeLink(title, False), coloredMsg)) + else: + pywikibot.output(u'Case Replacements: %s' % u', '.join(msg)) + try: + pageObj.put( + pageTxt, + u'%s: %s' + % (i18n.twtranslate( + self.site, + "casechecker-replacement-summary"), + self.site.mediawiki_message(u"Comma-separator").join(msg))) + return True + except KeyboardInterrupt: + raise + except: + self.WikiLog(u"* Error: Could not save updated page %s (%s)" + % (self.MakeLink(title, False), coloredMsg)) + return False + + def MakeMoveSummary(self, fromTitle, toTitle): + return i18n.twtranslate(self.site, "casechecker-replacement-linklist") % {'source': fromTitle, 'target': toTitle} + + def MakeLink(self, title, colorcode=True): + prf = u'' if self.Page(title).namespace() == 0 else u':' + cc = u'|««« %s »»»' % self.ColorCodeWord(title) if colorcode else u'' + return u"[[%s%s%s]]" % (prf, title, cc) + + def OpenLogFile(self, filename): + try: + return codecs.open(filename, 'a', 'utf-8') + except IOError: + return codecs.open(filename, 'w', 'utf-8') + + def AppendLineToLog(self, filename, text): + with self.OpenLogFile(filename) as f: + f.write(text + u'\n') + + def Page(self, title): + return pywikibot.Page(self.site, title) + + def ReplaceLink(self, text, oldtxt, newtxt): + + frmParts = [s.strip(self.stripChars) + for d in self.wordBreaker.split(oldtxt)] + toParts = [s.strip(self.stripChars) + for s in self.wordBreaker.split(newtxt)] + + if len(frmParts) != len(toParts): + raise ValueError(u'Splitting parts do not match counts') + for i in xrange(0, len(frmParts)): + if len(frmParts[i]) != len(toParts[i]): + raise ValueError(u'Splitting parts do not match word length') + if len(frmParts[i]) > 0: + text = text.replace(frmParts[i][0].lower() + frmParts[i][1:], + toParts[i][0].lower() + toParts[i][1:]) + text = text.replace(frmParts[i][0].upper() + frmParts[i][1:], + toParts[i][0].upper() + toParts[i][1:]) + + return text + + +if __name__ == "__main__": + try: + bot = CaseChecker() + bot.Run() + finally: + pywikibot.stopme() -- To view, visit https://gerrit.wikimedia.org/r/101609 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Iccb0720e09e2fa484800a779804fc49fff2ba3ec Gerrit-PatchSet: 9 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Jcf2000 <jaredflores2000(a)gmail.com> Gerrit-Reviewer: Jcf2000 <jaredflores2000(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Strainu <wiki(a)strainu.ro> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: Yurik <yurik(a)wikimedia.org> Gerrit-Reviewer: jenkins-bot

10 years, 4 months

[Gerrit] Port disambredir.py to core - change (pywikibot/core)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: Port disambredir.py to core ...................................................................... Port disambredir.py to core Change-Id: If6cb976cd47675ff780f49029f5fa5277b9fd95c --- A scripts/disambredir.py 1 file changed, 184 insertions(+), 0 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/disambredir.py b/scripts/disambredir.py new file mode 100644 index 0000000..3af5c9f --- /dev/null +++ b/scripts/disambredir.py @@ -0,0 +1,184 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +Goes through the disambiguation pages, checks their links, and asks for +each link that goes to a redirect page whether it should be replaced. +""" +# +# (C) André Engels and others, 2006-2009 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$' +# +import pywikibot +from pywikibot import pagegenerators +import re +import sys +from pywikibot import catlib + +msg = { + 'ar': u'تغيير التحويلات في صفحة توضيح', + 'be-x-old': u'Замена перанакіраваньняў на старонку неадназначнасьцяў', + 'en': u'Changing redirects on a disambiguation page', + 'he': u'משנה קישורים להפניות בדף פירושונים', + 'fa': u'اصلاح تغییرمسیرها در یک صفحه ابهام‌زدایی', + 'ja': u'ロボットによる: 曖昧さ回避ページのリダイレクト修正', + 'nl': u'Verandering van redirects op een doorverwijspagina', + 'pl': u'Zmiana przekierowań na stronie ujednoznaczającej', + 'pt': u'Arrumando redirects na página de desambiguação', + 'ru': u'Изменение перенаправлений на странице неоднозначности', + 'uk': u'Зміна перенаправлень на сторінці багатозначності', + 'zh': u'機器人: 修改消歧義頁中的重定向連結', +} + + +def firstcap(string): + return string[0].upper() + string[1:] + + +def treat(text, linkedPage, targetPage): + """ + Based on the method of the same name in solve_disambiguation.py. + """ + # make a backup of the original text so we can show the changes later + linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')') + curpos = 0 + # This loop will run until we have finished the current page + while True: + m = linkR.search(text, pos=curpos) + if not m: + break + # Make sure that next time around we will not find this same hit. + curpos = m.start() + 1 + # ignore interwiki links and links to sections of the same page + if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')): + continue + else: + actualLinkPage = pywikibot.Page(page.site, m.group('title')) + # Check whether the link found is to page. + if actualLinkPage != linkedPage: + continue + + # how many bytes should be displayed around the current link + context = 30 + # at the beginning of the link, start red color. + # at the end of the link, reset the color to default + pywikibot.output(text[max(0, m.start() - context): m.start()] + + '\03{lightred}' + text[m.start(): m.end()] + + '\03{default}' + text[m.end(): m.end() + context]) + while True: + choice = pywikibot.input( + u"Option (N=do not change, y=change link to \03{lightpurple}%s\03{default}, r=change and replace text, u=unlink)" % targetPage.title()) + try: + choice = choice[0] + except: + choice = 'N' + if choice in 'nNyYrRuU': + break + if choice in "nN": + continue + + # The link looks like this: + # [[page_title|link_text]]trailing_chars + page_title = m.group('title') + link_text = m.group('label') + if not link_text: + # or like this: [[page_title]]trailing_chars + link_text = page_title + if m.group('section') is None: + section = '' + else: + section = m.group('section') + trailing_chars = m.group('linktrail') + if trailing_chars: + link_text += trailing_chars + + if choice in "uU": + # unlink - we remove the section if there's any + text = text[:m.start()] + link_text + text[m.end():] + continue + replaceit = choice in "rR" + + if link_text[0].isupper(): + new_page_title = targetPage.title() + else: + new_page_title = targetPage.title()[0].lower() + \ + targetPage.title()[1:] + if replaceit and trailing_chars: + newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) + elif replaceit or (new_page_title == link_text and not section): + newlink = "[[%s]]" % new_page_title + # check if we can create a link with trailing characters instead of a + # pipelink + elif len(new_page_title) <= len(link_text) and \ + firstcap(link_text[:len(new_page_title)]) == \ + firstcap(new_page_title) and \ + re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section: + newlink = "[[%s]]%s" % (link_text[:len(new_page_title)], + link_text[len(new_page_title):]) + else: + newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text) + text = text[:m.start()] + newlink + text[m.end():] + continue + return text + + +def workon(page, links): + text = page.get() + # Show the title of the page we're working on. + # Highlight the title in purple. + pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" + % page.title()) + for page2 in links: + try: + target = page2.getRedirectTarget() + except (pywikibot.Error, pywikibot.SectionError): + continue + text = treat(text, page2, target) + if text != page.get(): + comment = pywikibot.translate(mysite, msg) + page.put(text, comment) + + +def main(): + global mysite, linktrail, page + start = [] + for arg in pywikibot.handleArgs(): + start.append(arg) + if start: + start = " ".join(start) + else: + start = "!" + mysite = pywikibot.getSite() + linktrail = mysite.linktrail() + try: + generator = pagegenerators.CategorizedPageGenerator( + mysite.disambcategory(), start=start) + except pywikibot.NoPage: + pywikibot.output( + "The bot does not know the disambiguation category for your wiki.") + raise + # only work on articles + generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0]) + generator = pagegenerators.PreloadingGenerator(generator) + pagestodo = [] + pagestoload = [] + for page in generator: + if page.isRedirectPage(): + continue + linked = page.linkedPages() + pagestodo.append((page, linked)) + pagestoload += linked + if len(pagestoload) > 49: + pagestoload = pagegenerators.PreloadingGenerator(pagestoload) + for page, links in pagestodo: + workon(page, links) + pagestoload = [] + pagestodo = [] + +if __name__ == "__main__": + try: + main() + finally: + pywikibot.stopme() -- To view, visit https://gerrit.wikimedia.org/r/102912 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: If6cb976cd47675ff780f49029f5fa5277b9fd95c Gerrit-PatchSet: 7 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Vldandrew <vldandrew(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Strainu <wiki(a)strainu.ro> Gerrit-Reviewer: Vldandrew <vldandrew(a)gmail.com> Gerrit-Reviewer: jenkins-bot

10 years, 4 months

[Gerrit] [L10N] remove wrong templates which does not exist on target... - change (pywikibot/i18n)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: [L10N] remove wrong templates which does not exist on target wikipedia ...................................................................... [L10N] remove wrong templates which does not exist on target wikipedia Change-Id: Ifab9ee9dac3f2558fc9e813f9d33e1febbe5adb7 --- M redirect.py 1 file changed, 1 insertion(+), 14 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/redirect.py b/redirect.py index ca9c5bc..cfe3dd2 100644 --- a/redirect.py +++ b/redirect.py @@ -20,7 +20,7 @@ 'redirect-fix-broken-moved': u'Edit summary when the bot fixes a broken redirect to a moved page whose origin has been deleted.\nParameters:\n* <code>%(to)s</code>: the new redirect target, as a wiki link.', 'redirect-fix-loop': u'Edit summary when the bot fixes redirect loops. <code>%(to)s</code> displays the new redirect target as a wiki link.', 'redirect-remove-loop': u'Edit summary when the bot tags a redirect loop for speedy deletion. The internal links are to pages on the English Wikipedia, [http://en.wikipedia.org/wiki/Wikipedia:CSD#G8 here] and [http://en.wikipedia.org/wiki/Wikipedia:Redirect here]. They won\'t work anywhere except on the English Wikipedia, as they stand.', - 'redirect-broken-redirect-template': u'Template for speedy deletion of broken redirect or redirect loops which the bot tags onto the redirect page. This message may contain additional informations like template parameters or reasons for the deletion request.\n\nNOTE: If this system message is not given for a language code, speedy deletion request by a bot is not supported on your site except there is a bot with sysop flag.\n\n{{doc-important|Only use your deletion template like <code><nowiki>{{delete}}</nowiki></code> which exist on your local project.}}', + 'redirect-broken-redirect-template': u'NOTE TO TRANSLATOR: This should only be translated by someone on the Wikipedia of your language code. Thank you.\n\nTemplate for speedy deletion of broken redirect or redirect loops which the bot tags onto the redirect page. This message may contain additional informations like template parameters or reasons for the deletion request.\n\nNOTE: If this system message is not given for a language code, speedy deletion request by a bot is not supported on your site except there is a bot with sysop flag.\n\n{{doc-important|Only use your deletion template like <code><nowiki>{{delete}}</nowiki></code> which exist on your local project.}}', }, # Author: Csisc 'aeb': { @@ -28,7 +28,6 @@ 'redirect-remove-broken': u'تحويلة إلى صفحة محذوفة أو غير موجودة', 'redirect-fix-loop': u'روبوت: تعديل حلقة إعادة التوجيه إلى %(to)s', 'redirect-remove-loop': u'هدف التحويلة يصنع عقدة تحويل: Robot', - 'redirect-broken-redirect-template': u'{{شطب|تحويلة مكسورة}}', }, # Author: Naudefj # Author: Xqt @@ -37,7 +36,6 @@ 'redirect-remove-broken': u'Robot: Aanstuur na \'n geskrapte of nie-bestaande bladsy', 'redirect-fix-loop': u'Robot: sirkulêre aanstuur na %(to)s reggemaak', 'redirect-remove-loop': u'Robot: Aanstuur vorm \'n sirkulêre lus', - 'redirect-broken-redirect-template': u'{{db-r1}}', }, # Author: Als-Holder # Author: Xqt @@ -84,7 +82,6 @@ 'redirect-fix-broken-moved': u'Bot: İşləməyən yönləndirilmənin yeri dəyişdirilmiş hədəf səhifəyə %(to)s düzəldilməsi', 'redirect-fix-loop': u'Bot: Sonsuz yönləndirilmənin %(to)s düzəldilməsi', 'redirect-remove-loop': u'Bot: Yönləndirilmə sonsuz yönləndirilmə formalaşdırır', - 'redirect-broken-redirect-template': u'{{db-r1}}', }, # Author: Amir a57 # Author: E THP @@ -93,7 +90,6 @@ 'redirect-remove-broken': u'[[ویکی‌پئدییا:سیل#یستیقامتلندیرمه|وپ:سیل]]: سیلینئن یا دا وار اولمایان صحیفه‌یه اولان ایستیقامیلندیرمه', 'redirect-fix-loop': u'روبوت: فیخینگ اوزوک اولان%(to)s یؤنلن‌دیرن', 'redirect-remove-loop': u'بوت: ایستیقامتلندیرمه هدفی بیر ایستیقامتلندیرمه دؤورو تشکیل ائدیر', - 'redirect-broken-redirect-template': u'{{سیل|y1}}', }, # Author: Haqmar # Author: Sagan @@ -102,7 +98,6 @@ 'redirect-remove-broken': u'Робот: булмаған йәки юйылған биткә йүнәлтеү', 'redirect-fix-loop': u'Робот: %(to)s битенә йүнәлтеүҙе төҙәтеү', 'redirect-remove-loop': u'Робот: бер ҡайҙа ла йүнәлтелмәгән', - 'redirect-broken-redirect-template': u'{{db-r1}}', }, # Author: Mucalexx # Author: Xqt @@ -114,10 +109,6 @@ }, 'bat-smg': { 'redirect-fix-double': u'Robots: Taisuoms dvėgobs paradresavėms → %(to)s', - }, - # Author: Stephensuleeman - 'bbc-latn': { - 'redirect-broken-redirect-template': u'{{db-r1}}', }, # Author: EugeneZelenko # Author: Jim-by @@ -140,7 +131,6 @@ 'redirect-fix-double': u'Robot: Pamasangan paugahan ganda ka %(to)s', 'redirect-remove-broken': u'[[WP:CSD#G8|G8]]: [[Wikipedia:Redirect|Paalihan]] ka tungkaran nang dihapus atawa kada ada', 'redirect-remove-loop': u'[[WP:CSD#G8|G8]]: Bidikan [[Wikipedia:Redirect|paalihan]] mahasilakan paalihan siklik', - 'redirect-broken-redirect-template': u'{{db-r1}}', }, # Author: Wikitanvir 'bn': { @@ -156,7 +146,6 @@ 'redirect-fix-broken-moved': u'Robot : O reizhañ an adkasoù torret war-zu ar bajenn bal %(to)s', 'redirect-fix-loop': u'Robot : O kempenn al lagadenn adkas war-zu %(to)s', 'redirect-remove-loop': u'Robot: Stumm ur c\'helc\'h-tro born zo gant an [[Wikipedia:Redirect|adkas]]', - 'redirect-broken-redirect-template': u'{{db-r1}}', }, # Author: CERminator # Author: Edinwiki @@ -208,7 +197,6 @@ 'redirect-fix-double': u'Bot: Yn trwsio ailgyfeiriad dwbl i %(to)s', 'redirect-remove-broken': u'Bot: Yn ailgyfeirio i dudalen a ddilëwyd neu nad yw ar gael', 'redirect-remove-loop': u'Bot: Mae nod yr ailgyfeiriad yn ffurfio dolen ailgyfeirio', - 'redirect-broken-redirect-template': u'{{db-r1}}', }, # Author: Christian List # Author: Kaare @@ -389,7 +377,6 @@ 'redirect-fix-broken-moved': u'機械人：修復損壞个重定向頁到移動目標頁面 %(to)s', 'redirect-fix-loop': u'機械人：修復重定向迴圈至%(to)s', 'redirect-remove-loop': u'機械人：重定向目標構成循環', - 'redirect-broken-redirect-template': u'{{db-r1}}', }, # Author: Amire80 # Author: YaronSh -- To view, visit https://gerrit.wikimedia.org/r/102071 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ifab9ee9dac3f2558fc9e813f9d33e1febbe5adb7 Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/i18n Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Siebrand <siebrand(a)wikimedia.org> Gerrit-Reviewer: jenkins-bot

10 years, 4 months

[Gerrit] [PEP8] changes, code improvements, insert __version__ string - change (pywikibot/compat)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: [PEP8] changes, code improvements, insert __version__ string ...................................................................... [PEP8] changes, code improvements, insert __version__ string Change-Id: Icff281c4d659d40a527eeecac12de17afaef8201 --- M data_ingestion.py 1 file changed, 80 insertions(+), 51 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/data_ingestion.py b/data_ingestion.py index f5c8f3d..4098399 100644 --- a/data_ingestion.py +++ b/data_ingestion.py @@ -1,69 +1,85 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -''' +""" A generic bot to do data ingestion (batch uploading) to Commons -''' -import pywikibot, upload -import posixpath, urlparse +""" +# +# (C) Pywikibot team, 2011-2013 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$' +# + +import posixpath +import urlparse import urllib -import hashlib, base64 +import hashlib +import base64 import StringIO try: import json except ImportError: import simplejson as json +import pywikibot +import upload + class Photo(object): - ''' + """ Represents a Photo (or other file), with metadata, to upload to Commons. The constructor takes two parameters: URL (string) and metadata (dict with str:str key:value pairs) that can be referred to from the title & template generation. - - ''' + """ def __init__(self, URL, metadata): self.URL = URL self.metadata = metadata self.metadata["_url"] = URL - self.metadata["_filename"] = filename = posixpath.split(urlparse.urlparse(URL)[2])[1] + self.metadata["_filename"] = filename = posixpath.split( + urlparse.urlparse(URL)[2])[1] self.metadata["_ext"] = ext = filename.split(".")[-1] if ext == filename: self.metadata["_ext"] = ext = None self.contents = None def downloadPhoto(self): - ''' + """ Download the photo and store it in a StringIO.StringIO object. TODO: Add exception handling - ''' + + """ if not self.contents: - imageFile=urllib.urlopen(self.URL).read() + imageFile = urllib.urlopen(self.URL).read() self.contents = StringIO.StringIO(imageFile) return self.contents - def findDuplicateImages(self, site = pywikibot.getSite(u'commons', u'commons')): - ''' - Takes the photo, calculates the SHA1 hash and asks the mediawiki api for a list of duplicates. + def findDuplicateImages(self, + site=pywikibot.getSite(u'commons', u'commons')): + """ + Takes the photo, calculates the SHA1 hash and asks the mediawiki api + for a list of duplicates. TODO: Add exception handling, fix site thing - ''' + + """ hashObject = hashlib.sha1() hashObject.update(self.downloadPhoto().getvalue()) return site.getFilesFromAnHash(base64.b16encode(hashObject.digest())) def getTitle(self, fmt): """ - Given a format string with %(name)s entries, returns the string formatted with metadata + Given a format string with %(name)s entries, returns the string + formatted with metadata + """ return fmt % self.metadata def getDescription(self, template, extraparams={}): - ''' - Generate a description for a file - ''' + """ Generate a description for a file """ params = {} params.update(self.metadata) @@ -72,13 +88,15 @@ for key in sorted(params.keys()): value = params[key] if not key.startswith("_"): - description = description + (u'|%s=%s' % (key, self._safeTemplateValue(value))) + "\n" - description = description + u'}}' + description += (u'|%s=%s' + % (key, self._safeTemplateValue(value))) + "\n" + description += u'}}' return description def _safeTemplateValue(self, value): return value.replace("|", "{{!}}") + def CSVReader(fileobj, urlcolumn, *args, **kwargs): import csv @@ -88,30 +106,35 @@ yield Photo(line[urlcolumn], line) -def JSONReader(baseurl, start=0, end=100, JSONBase=None, metadataFunction=None, fileurl=u'fileurl'): - ''' +def JSONReader(baseurl, start=0, end=100, JSONBase=None, metadataFunction=None, + fileurl=u'fileurl'): + """ Loops over a bunch of json page and process them with processJSONPage(). Will yield Photo objects with metadata - ''' + + """ if baseurl: - for i in range(start , end): + for i in range(start, end): url = baseurl % (i,) - photo = processJSONPage(url, JSONBase=JSONBase, metadataFunction=metadataFunction, fileurl=u'fileurl') + photo = processJSONPage(url, JSONBase=JSONBase, + metadataFunction=metadataFunction, + fileurl=u'fileurl') if photo: yield photo - -def processJSONPage(url, JSONBase=None, metadataFunction=None, fileurl=u'fileurl'): - ''' +def processJSONPage(url, JSONBase=None, metadataFunction=None, + fileurl=u'fileurl'): + """ Process a single JSON page. For the JSON page you can rebase it to not get all the crap You can apply a custom metadata function to do some modification on the metadata and checking By default the field 'fileurl' is expected in the metadata to contain the file. You can change this. Will a return Photo object with metadata or None if something is wrong - ''' + + """ JSONPage = urllib.urlopen(url) JSONData = json.load(JSONPage) JSONPage.close() @@ -130,17 +153,20 @@ if metadataFunction: metadata = metadataFunction(metadata) - # If the metadataFunction didn't return none (something was wrong). Return the photo + # If the metadataFunction didn't return none (something was wrong). + # Return the photo if metadata: return Photo(metadata.get(fileurl), metadata) - return False + def JSONRebase(JSONData, JSONBase): - ''' + """ Moves the base of the JSON object to the part you're intrested in. - JSONBase is a list to crawl the tree. If one of the steps is not found, return None - ''' + JSONBase is a list to crawl the tree. If one of the steps is not found, + return None + + """ for step in JSONBase: if JSONData: if type(JSONData) == dict: @@ -148,21 +174,20 @@ elif type(JSONData) == list: # FIXME: Needs error, length etc checking JSONData = JSONData[step] - return JSONData def JSONTree(metadata, fieldlist, record): - ''' + """ metadata: Dict with end result key: The key we encountered record: Record to work on - ''' + """ if type(record) == list: for r in record: metadata = JSONTree(metadata, fieldlist, r) elif type(record) == dict: - for k,v in record.items(): + for k, v in record.items(): metadata = JSONTree(metadata, fieldlist + [k], v) elif type(record) == unicode: key = u'_'.join(fieldlist) @@ -172,11 +197,13 @@ newkey = key + u'_2' if not newkey in metadata: metadata[newkey] = record - return metadata + class DataIngestionBot: - def __init__(self, reader, titlefmt, pagefmt, site=pywikibot.getSite(u'commons', u'commons')): + + def __init__(self, reader, titlefmt, pagefmt, + site=pywikibot.getSite(u'commons', u'commons')): self.reader = reader self.titlefmt = titlefmt self.pagefmt = pagefmt @@ -190,17 +217,16 @@ title = photo.getTitle(self.titlefmt) description = photo.getDescription(self.pagefmt) - bot = upload.UploadRobot(url = photo.URL, - description = description, - useFilename = title, - keepFilename = True, - verifyDescription = False, + bot = upload.UploadRobot(url=photo.URL, + description=description, + useFilename=title, + keepFilename=True, + verifyDescription=False, ignoreWarning=True, - targetSite = self.site) + targetSite=self.site) bot._contents = photo.downloadPhoto().getvalue() bot._retrieved = True bot.run() - return title def doSingle(self): @@ -210,9 +236,12 @@ for photo in self.reader: self._doUpload(photo) -if __name__=="__main__": + +if __name__ == "__main__": reader = CSVReader(open('tests/data/csv_ingestion.csv'), 'url') - bot = DataIngestionBot(reader, "%(name)s - %(set)s.%(_ext)s", ":user:valhallasw/test_template", pywikibot.getSite('test', 'test')) + bot = DataIngestionBot(reader, "%(name)s - %(set)s.%(_ext)s", + ":user:valhallasw/test_template", + pywikibot.getSite('test', 'test')) bot.run() """ -- To view, visit https://gerrit.wikimedia.org/r/103252 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Icff281c4d659d40a527eeecac12de17afaef8201 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Multichill <maarten(a)mdammers.nl> Gerrit-Reviewer: jenkins-bot Gerrit-Reviewer: saper <saper(a)saper.info>

10 years, 4 months

[Gerrit] [PEP8] changes - change (pywikibot/compat)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: [PEP8] changes ...................................................................... [PEP8] changes Change-Id: I0562f6b814e4d83f5094d4cd851354cb86aee493 --- M daemonize.py 1 file changed, 4 insertions(+), 2 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/daemonize.py b/daemonize.py index 121d3a3..0eb9ffb 100644 --- a/daemonize.py +++ b/daemonize.py @@ -1,16 +1,18 @@ # -*- coding: utf-8 -*- # -# (C) Pywikipedia bot team, 2007-2008, 2010 +# (C) Pywikibot team, 2007-2013 # # Distributed under the terms of the MIT license. # __version__ = '$Id$' # -import sys, os +import sys +import os is_daemon = False + def daemonize(close_fd=True, chdir=True, write_pid=False, redirect_std=None): """ Daemonize the current process. Only works on POSIX compatible operating systems. The process will fork to the background and return control to -- To view, visit https://gerrit.wikimedia.org/r/103250 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I0562f6b814e4d83f5094d4cd851354cb86aee493 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

10 years, 4 months

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

Pywikibot-commits