Pywikibot-commits

pywikibot-commits@lists.wikimedia.org

1 participants
13418 discussions

[Gerrit] imagetransfer.py: ported to core (patch 6) - change (pywikibot/core)
by Strainu (Code Review) 19 Dec '13

19 Dec '13

Strainu has submitted this change and it was merged. Change subject: imagetransfer.py: ported to core (patch 6) ...................................................................... imagetransfer.py: ported to core (patch 6) Ported imagetransfer.py to pywikibot core. Also fixed related import issue in nowcommons.py, concerning 'nowCommonsMessage'. Change-Id: I826aafe9d14e01abf01d5f6dfc947a33078d3689 --- A scripts/imagetransfer.py M scripts/nowcommons.py 2 files changed, 379 insertions(+), 25 deletions(-) Approvals: Strainu: Verified; Looks good to me, approved diff --git a/scripts/imagetransfer.py b/scripts/imagetransfer.py new file mode 100644 index 0000000..65b8fe4 --- /dev/null +++ b/scripts/imagetransfer.py @@ -0,0 +1,378 @@ +# -*- coding: utf-8 -*- +""" +Script to copy images to Wikimedia Commons, or to another wiki. + +Syntax: + + python imagetransfer.py pagename [-interwiki] [-targetLang:xx] -targetFamily:yy] + +Arguments: + + -interwiki Look for images in pages found through interwiki links. + + -keepname Keep the filename and do not verify description while replacing + + -tolang:xx Copy the image to the wiki in language xx + + -tofamily:yy Copy the image to a wiki in the family yy + + -file:zz Upload many files from textfile: [[Image:xx]] + [[Image:yy]] + +If pagename is an image description page, offers to copy the image to the +target site. If it is a normal page, it will offer to copy any of the images +used on that page, or if the -interwiki argument is used, any of the images +used on a page reachable via interwiki links. +""" +# +# (C) Andre Engels, 2004 +# (C) Pywikipedia bot team, 2004-2012 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$' + +import re +import sys +import pywikibot +import upload +from pywikibot import config +from pywikibot import i18n +from pywikibot import pagegenerators + +copy_message = { + 'ar': u"هذه الصورة تم نقلها من %s. الوصف الأصلي كان:\r\n\r\n%s", + 'en': u"This image was copied from %s. The original description was:\r\n\r\n%s", + 'fa': u"تصویر از %s کپی شده‌است.توضیحات اصلی ان این بود::\r\n\r\n%s", + 'de': u"Dieses Bild wurde von %s kopiert. Die dortige Beschreibung lautete:\r\n\r\n%s", + 'fr': u"Cette image est copiée de %s. La description originale était:\r\n\r\n%s", + 'he': u"תמונה זו הועתקה מהאתר %s. תיאור הקובץ המקורי היה:\r\n\r\n%s", + 'hu': u"Kép másolása innen: %s. Az eredeti leírás:\r\n\r\n%s", + 'ia': u"Iste imagine esseva copiate de %s. Le description original esseva:\r\n\r\n%s", + 'it': u"Questa immagine è stata copiata da %s. La descrizione originale era:\r\n\r\n%s", + 'kk': u"Бұл сурет %s дегеннен көшірілді. Түпнұсқа сипатттамасы былай болды:\r\n\r\n%s", + 'lt': u"Šis paveikslėlis buvo įkeltas iš %s. Originalus aprašymas buvo:\r\n\r\n%s", + 'nl': u"Afbeelding gekopieerd vanaf %s. De beschrijving daar was:\r\n\r\n%s", + 'pl': u"Ten obraz został skopiowany z %s. Oryginalny opis to:\r\n\r\n%s", + 'pt': u"Esta imagem foi copiada de %s. A descrição original foi:\r\n\r\n%s", + 'ru': u"Изображение было скопировано с %s. Оригинальное описание содержало:\r\n\r\n%s", + 'sr': u"Ова слика је копирана са %s. Оригинални опис је:\r\n\r\n%s", + 'zh': u"本圖像從 %s 複製，原始說明資料：\r\n\r\n%s", +} + +nowCommonsTemplate = { + 'ar': u'{{subst:الآن_كومنز|Image:%s}}', + 'de': u'{{NowCommons|%s}}', + 'fr': u'{{Désormais sur Commons|%s}}', + 'en': u'{{subst:ncd|Image:%s}}', + 'fa': u'{{موجود در انبار|%s}}', + 'he': u'{{גם בוויקישיתוף|%s}}', + 'hu': u'{{azonnali-commons|Kép:%s}}', + 'ia': u'{{OraInCommons|Imagine:%s}}', + 'it': u'{{NowCommons unlink|%s}}', + 'ja': u'{{NowCommons|Image:%s}}', + 'kk': u'{{NowCommons|Image:%s}}', + 'li': u'{{NowCommons|%s}}', + 'lt': u'{{NowCommons|Image:%s}}', + 'nds-nl': u'{{NoenCommons|File:%s}}', + 'nl': u'{{NuCommons|Image:%s}}', + 'pl': u'{{NowCommons|%s}}', + 'pt': u'{{NowCommons|%s}}', + 'sr': u'{{NowCommons|%s}}', + 'zh': u'{{NowCommons|Image:%s}}', +} + +#nowCommonsThis = { + #'en': u'{{NowCommonsThis|%s}}', + #'it': u'{{NowCommons omonima|%s}}', + #'kk': u'{{NowCommonsThis|%s}}', + #'pt': u'{{NowCommonsThis|%s}}', +#} + +nowCommonsMessage = { + 'ar': u'الملف الآن متوفر في ويكيميديا كومنز.', + 'de': u'Datei ist jetzt auf Wikimedia Commons verfügbar.', + 'en': u'File is now available on Wikimedia Commons.', + 'eo': u'Dosiero nun estas havebla en la Wikimedia-Komunejo.', + 'fa': u'پرونده اکنون در انبار است', + 'he': u'הקובץ זמין כעת בוויקישיתוף.', + 'hu': u'A fájl most már elérhető a Wikimedia Commonson', + 'ia': u'Le file es ora disponibile in Wikimedia Commons.', + 'ja': u'ファイルはウィキメディア・コモンズにあります', + 'it': u'L\'immagine è adesso disponibile su Wikimedia Commons.', + 'kk': u'Файлды енді Wikimedia Ортаққорынан қатынауға болады.', + 'lt': u'Failas įkeltas į Wikimedia Commons projektą.', + 'nl': u'Dit bestand staat nu op [[w:nl:Wikimedia Commons|Wikimedia Commons]].', + 'pl': u'Plik jest teraz dostępny na Wikimedia Commons.', + 'pt': u'Arquivo está agora na Wikimedia Commons.', + 'ru': u'[[ВП:КБУ#Ф8|Ф.8]]: доступно на [[Викисклад]]е', + 'sr': u'Слика је сада доступна и на Викимедија Остави.', + 'zh': u'檔案已存在於維基共享資源。', +} + +#nowCommonsThisMessage = { + #'ar': u'الملف الآن متوفر في كومنز بنفس الاسم.', + #'en': u'File is now available on Commons with the same name.', + #'he': u'הקובץ זמין כעת בוויקישיתוף בשם זהה.', + #'it': u'L\'immagine è adesso disponibile su Wikimedia Commons con lo stesso nome.', + #'kk': u'Файлды дәл сол атауымен енді Ортаққордан қатынауға болады.', + #'pt': u'Esta imagem está agora no Commons com o mesmo nome.', +#} + +# Translations for license templates. +# Must only be given when they are in fact different. +licenseTemplates = { + ('wikipedia:de', 'commons:commons'): { + u'Bild-GFDL': u'GFDL', + u'Bild-GFDL-OpenGeoDB': u'GFDL-OpenGeoDB', + u'Bild-Innweb-Lizenz': u'Map-Austria-GNU', + u'Bild-PD': u'PD', + u'Bild-PD-alt': u'PD-old', + u'Bild-PD-Kunst': u'PD-Art', + u'Bild-PD-US': u'PD-USGov', + }, + ('wikipedia:fa', 'commons:commons'): { + u'مالکیت عمومی': u'PD', + u'مالکیت عمومی-خود': u'PD-self', + u'مجوز گنو': u'GFDL', + u'مجوز گنو-خود': u'GFDL-self', + u'نگاره قدیمی': u'PD-Iran', + u'نگاره نوشتاری': u'PD-textlogo', + u'نگاره عراقی': u'PD-Iraq', + u'نگاره بریتانیا': u'PD-UK', + u'نگاره هابل': u'PD-Hubble', + u'نگاره آمریکا': u'PD-US', + u'نگاره دولت آمریکا': u'PD-USGov', + u'کک-یاد-دو': u'Cc-by-2.0', + u'کک-یاد-حفظ-دونیم': u'Cc-by-sa-2.5', + u'کک-یاد-سه': u'Cc-by-3.0', + }, + ('wikipedia:fr', 'commons:commons'): { + u'Domaine public': u'PD' + }, + ('wikipedia:he', 'commons:commons'): { + u'שימוש חופשי': u'PD-self', + u'שימוש חופשי מוגן': u'Copyrighted free use', + u'שימוש חופשי מוגן בתנאי': u'Copyrighted free use provided that', + u'תמונה ישנה': u'PD-Israel', + u'ייחוס': u'Attribution', + u'לוגו ויקימדיה': u'Copyright by Wikimedia', + }, + ('wikipedia:hu', 'commons:commons'): { + u'Közkincs': u'PD', + u'Közkincs-régi': u'PD-old', + }, + ('wikipedia:pt', 'commons:commons'): { + u'Domínio público': u'PD', + }, +} + + +class ImageTransferBot: + def __init__(self, generator, targetSite=None, interwiki=False, + keep_name=False): + self.generator = generator + self.interwiki = interwiki + self.targetSite = targetSite + self.keep_name = keep_name + + def transferImage(self, sourceImagePage, debug=False): + """Gets a wikilink to an image, downloads it and its description, + and uploads it to another wikipedia. + Returns the filename which was used to upload the image + This function is used by imagetransfer.py and by copy_table.py + + """ + sourceSite = sourceImagePage.site + if debug: + print "-" * 50 + print "Found image: %s" % imageTitle + url = sourceImagePage.fileUrl().encode('utf-8') + pywikibot.output(u"URL should be: %s" % url) + # localize the text that should be printed on the image description page + try: + description = sourceImagePage.get() + # try to translate license templates + if (sourceSite.sitename(), self.targetSite.sitename()) in licenseTemplates: + for old, new in licenseTemplates[(sourceSite.sitename(), + self.targetSite.sitename())].iteritems(): + new = '{{%s}}' % new + old = re.compile('{{%s}}' % old) + description = pywikibot.replaceExcept(description, old, new, + ['comment', 'math', + 'nowiki', 'pre']) + + description = i18n.translate(self.targetSite, copy_message) \ + % (sourceSite, description) + description += '\n\n' + str(sourceImagePage.getFileVersionHistoryTable()) + # add interwiki link + if sourceSite.family == self.targetSite.family: + description += "\r\n\r\n" + unicode(sourceImagePage) + except pywikibot.NoPage: + description = '' + print "Image does not exist or description page is empty." + except pywikibot.IsRedirectPage: + description = '' + print "Image description page is redirect." + else: + bot = upload.UploadRobot(url=url, description=description, + targetSite=self.targetSite, + urlEncoding=sourceSite.encoding(), + keepFilename=self.keep_name, + verifyDescription=not self.keep_name) + # try to upload + targetFilename = bot.run() + if targetFilename and self.targetSite.family.name == 'commons' and \ + self.targetSite.code == 'commons': + # upload to Commons was successful + reason = i18n.translate(sourceSite, nowCommonsMessage) + # try to delete the original image if we have a sysop account + if sourceSite.family.name in config.sysopnames and \ + sourceSite.lang in config.sysopnames[sourceSite.family.name]: + if sourceImagePage.delete(reason): + return + if sourceSite.lang in nowCommonsTemplate and \ + sourceSite.family.name in config.usernames and \ + sourceSite.lang in config.usernames[sourceSite.family.name]: + # add the nowCommons template. + pywikibot.output(u'Adding nowCommons template to %s' + % sourceImagePage.title()) + sourceImagePage.put(sourceImagePage.get() + '\n\n' + + nowCommonsTemplate[sourceSite.lang] + % targetFilename, + comment=nowCommonsMessage[sourceSite.lang]) + + def showImageList(self, imagelist): + for i in range(len(imagelist)): + image = imagelist[i] + #sourceSite = sourceImagePage.site + print "-" * 60 + pywikibot.output(u"%s. Found image: %s" + % (i, image.title(asLink=True))) + try: + # Show the image description page's contents + pywikibot.output(image.get(throttle=False)) + # look if page already exists with this name. + # TODO: consider removing this: a different image of the same + # name may exist on the target wiki, and the bot user may want + # to upload anyway, using another name. + try: + # Maybe the image is on the target site already + targetTitle = '%s:%s' % (self.targetSite.image_namespace(), + image.title().split(':', 1)[1]) + targetImage = pywikibot.Page(self.targetSite, targetTitle) + targetImage.get(throttle=False) + pywikibot.output(u"Image with this name is already on %s." + % self.targetSite) + print "-" * 60 + pywikibot.output(targetImage.get(throttle=False)) + sys.exit() + except pywikibot.NoPage: + # That's the normal case + pass + except pywikibot.IsRedirectPage: + pywikibot.output( + u"Description page on target wiki is redirect?!") + + except pywikibot.NoPage: + break + print "=" * 60 + + def run(self): + for page in self.generator: + if self.interwiki: + imagelist = [] + for linkedPage in page.interwiki(): + imagelist.append(linkedPage.imagelinks(followRedirects=True)) + elif page.isImage(): + imagePage = pywikibot.ImagePage(page.site, page.title()) + imagelist = [imagePage] + else: + imagePage = (page.imagelinks(followRedirects=True)).result( + {'title': page.title(), 'ns': pywikibot.getSite().image_namespace()}) + imagelist = [imagePage] + + while len(imagelist) > 0: + self.showImageList(imagelist) + if len(imagelist) == 1: + # no need to query the user, only one possibility + todo = 0 + else: + pywikibot.output( + u"Give the number of the image to transfer.") + todo = pywikibot.input(u"To end uploading, press enter:") + if not todo: + break + todo = int(todo) + if todo in range(len(imagelist)): + if imagelist[todo].fileIsShared(): + pywikibot.output( + u'The image is already on Wikimedia Commons.') + else: + self.transferImage(imagelist[todo], debug=False) + # remove the selected image from the list + imagelist = imagelist[:todo] + imagelist[todo + 1:] + else: + pywikibot.output(u'No such image number.') + + +def main(): + # if -file is not used, this temporary array is used to read the page title. + pageTitle = [] + page = None + gen = None + interwiki = False + keep_name = False + targetLang = None + targetFamily = None + + for arg in pywikibot.handleArgs(): + if arg == '-interwiki': + interwiki = True + elif arg.startswith('-keepname'): + keep_name = True + elif arg.startswith('-tolang:'): + targetLang = arg[8:] + elif arg.startswith('-tofamily:'): + targetFamily = arg[10:] + elif arg.startswith('-file'): + if len(arg) == 5: + filename = pywikibot.input( + u'Please enter the list\'s filename: ') + else: + filename = arg[6:] + gen = pagegenerators.TextfilePageGenerator(filename) + else: + pageTitle.append(arg) + + if not gen: + # if the page title is given as a command line argument, + # connect the title's parts with spaces + if pageTitle != []: + pageTitle = ' '.join(pageTitle) + page = pywikibot.Page(pywikibot.getSite(), pageTitle) + # if no page title was given as an argument, and none was + # read from a file, query the user + if not page: + pageTitle = pywikibot.input(u'Which page to check:') + page = pywikibot.Page(pywikibot.getSite(), pageTitle) + # generator which will yield only a single Page + gen = iter([page]) + + if not targetLang and not targetFamily: + targetSite = pywikibot.getSite('commons', 'commons') + else: + if not targetLang: + targetLang = pywikibot.getSite().language + if not targetFamily: + targetFamily = pywikibot.getSite().family + targetSite = pywikibot.getSite(targetLang, targetFamily) + bot = ImageTransferBot(gen, interwiki=interwiki, targetSite=targetSite, + keep_name=keep_name) + bot.run() + +if __name__ == "__main__": + try: + main() + finally: + pywikibot.stopme() diff --git a/scripts/nowcommons.py b/scripts/nowcommons.py index af33690..79b6c2e 100644 --- a/scripts/nowcommons.py +++ b/scripts/nowcommons.py @@ -64,10 +64,7 @@ from pywikibot import i18n from pywikibot import pagegenerators as pg import image -# only for nowCommonsMessage -# from imagetransfer import nowCommonsMessage -# nowCommonsMessage defined on line #163 -# taken from imagetransfer.py (compat) +from imagetransfer import nowCommonsMessage nowCommons = { '_default': [ @@ -163,27 +160,6 @@ u'Nowcommons', u'NCT', ], -} - -nowCommonsMessage = { - 'ar': u'Ø§Ù„Ù…Ù„Ù Ø§Ù„Ø¢Ù† Ù…ØªÙˆÙØ± ÙÙŠ ÙˆÙŠÙƒÙŠÙ…ÙŠØ¯ÙŠØ§ ÙƒÙˆÙ…Ù†Ø².', - 'de': u'Datei ist jetzt auf Wikimedia Commons verfÃ¼gbar.', - 'en': u'File is now available on Wikimedia Commons.', - 'eo': u'Dosiero nun estas havebla en la Wikimedia-Komunejo.', - 'fa': u'Ù¾Ø±ÙˆÙ†Ø¯Ù‡ Ø§Ú©Ù†ÙˆÙ† Ø¯Ø± Ø§Ù†Ø¨Ø§Ø± Ø§Ø³Øª', - 'he': u'×”×§×•×‘×¥ ×–×ž×™×Ÿ ×›×¢×ª ×‘×•×•×™×§×™×©×™×ª×•×£.', - 'hu': u'A fÃ¡jl most mÃ¡r elÃ©rhetÅ‘ a Wikimedia Commonson', - 'ia': u'Le file es ora disponibile in Wikimedia Commons.', - 'ja': u'ãƒ•ã‚¡ã‚¤ãƒ«ã¯ã‚¦ã‚£ã‚ãƒ¡ãƒ‡ã‚£ã‚¢ãƒ»ã‚³ãƒ¢ãƒ³ã‚ºã«ã‚ã‚Šã¾ã™', - 'it': u'L\'immagine Ã¨ adesso disponibile su Wikimedia Commons.', - 'kk': u'Ð¤Ð°Ð¹Ð»Ð´Ñ‹ ÐµÐ½Ð´Ñ– Wikimedia ÐžÑ€Ñ‚Ð°Ò›Ò›Ð¾Ñ€Ñ‹Ð½Ð°Ð½ Ò›Ð°Ñ‚Ñ‹Ð½Ð°ÑƒÒ“Ð° Ð±Ð¾Ð»Ð°Ð´Ñ‹.', - 'lt': u'Failas Ä¯keltas Ä¯ Wikimedia Commons projektÄ….', - 'nl': u'Dit bestand staat nu op [[w:nl:Wikimedia Commons|Wikimedia Commons]].', - 'pl': u'Plik jest teraz dostÄ™pny na Wikimedia Commons.', - 'pt': u'Arquivo estÃ¡ agora na Wikimedia Commons.', - 'ru': u'[[Ð’ÐŸ:ÐšÐ‘Ð£#Ð¤8|Ð¤.8]]: Ð´Ð¾ÑÑ‚ÑƒÐ¿Ð½Ð¾ Ð½Ð° [[Ð’Ð¸ÐºÐ¸ÑÐºÐ»Ð°Ð´]]Ðµ', - 'sr': u'Ð¡Ð»Ð¸ÐºÐ° Ñ˜Ðµ ÑÐ°Ð´Ð° Ð´Ð¾ÑÑ‚ÑƒÐ¿Ð½Ð° Ð¸ Ð½Ð° Ð’Ð¸ÐºÐ¸Ð¼ÐµÐ´Ð¸Ñ˜Ð° ÐžÑÑ‚Ð°Ð²Ð¸.', - 'zh': u'æª”æ¡ˆå·²å˜åœ¨æ–¼ç¶åŸºå…±äº«è³‡æºã€‚', } namespaceInTemplate = [ -- To view, visit https://gerrit.wikimedia.org/r/101623 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I826aafe9d14e01abf01d5f6dfc947a33078d3689 Gerrit-PatchSet: 6 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com> Gerrit-Reviewer: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Qgil <qgil(a)wikimedia.org> Gerrit-Reviewer: Strainu <wiki(a)strainu.ro> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Add English to Vikidia languages - change (pywikibot/core)
by jenkins-bot (Code Review) 19 Dec '13

19 Dec '13

jenkins-bot has submitted this change and it was merged. Change subject: Add English to Vikidia languages ...................................................................... Add English to Vikidia languages Change-Id: I0b2c3b1f5833f2aeb0cfcf84a8c1e2638ca4bb87 --- M pywikibot/families/vikidia_family.py 1 file changed, 1 insertion(+), 0 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/families/vikidia_family.py b/pywikibot/families/vikidia_family.py index 635571c..68c4227 100644 --- a/pywikibot/families/vikidia_family.py +++ b/pywikibot/families/vikidia_family.py @@ -12,6 +12,7 @@ self.langs = { 'fr': 'fr.vikidia.org', + 'en': 'en.vikidia.org', 'es': 'es.vikidia.org', 'it': 'it.vikidia.org', 'ru': 'ru.vikidia.org', -- To view, visit https://gerrit.wikimedia.org/r/102732 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I0b2c3b1f5833f2aeb0cfcf84a8c1e2638ca4bb87 Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Linedwell <Linedwell(a)icloud.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] featured.py : add template used for FA in itwiki - change (pywikibot/compat)
by jenkins-bot (Code Review) 19 Dec '13

19 Dec '13

jenkins-bot has submitted this change and it was merged. Change subject: featured.py : add template used for FA in itwiki ...................................................................... featured.py : add template used for FA in itwiki Reported by AttoRenato: https://it.wikipedia.org/w/index.php?diff=63027756 Change-Id: I2f6197c8169b9b03bfc4d7925c931e6bf87af2ec --- M featured.py 1 file changed, 1 insertion(+), 1 deletion(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/featured.py b/featured.py index bf624e0..f437a33 100644 --- a/featured.py +++ b/featured.py @@ -104,7 +104,7 @@ 'ga': ['Nasc AR'], 'hi': ['Link FA', 'Lien AdQ'], 'is': [u'Tengill ÚG'], - 'it': ['Link AdQ'], + 'it': ['Link V', 'Link AdQ'], 'no': ['Link UA'], 'oc': ['Ligam AdQ', 'Lien AdQ'], 'ro': [u'Legătură AC', u'Legătură AF'], -- To view, visit https://gerrit.wikimedia.org/r/102457 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I2f6197c8169b9b03bfc4d7925c931e6bf87af2ec Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Beta16 <l.rabinelli(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Port archivebot.py from compat - change (pywikibot/core)
by jenkins-bot (Code Review) 18 Dec '13

18 Dec '13

jenkins-bot has submitted this change and it was merged. Change subject: Port archivebot.py from compat ...................................................................... Port archivebot.py from compat Ported archivebot.py from the compat branch. Command line options are made single-dashed, and some of them are removed as duplicating the standard options. Change-Id: I1f3d7f5ed19c8f52ac371600218e531b4e80028c --- A scripts/archivebot.py 1 file changed, 654 insertions(+), 0 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/archivebot.py b/scripts/archivebot.py new file mode 100644 index 0000000..fe40a96 --- /dev/null +++ b/scripts/archivebot.py @@ -0,0 +1,654 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +archivebot.py - discussion page archiving bot. + +usage: + + python pwb.py archivebot [OPTIONS] TEMPLATE_PAGE + +Bot examines backlinks (Special:WhatLinksHere) to TEMPLATE_PAGE. +Then goes through all pages (unless a specific page specified using options) +and archives old discussions. This is done by breaking a page into threads, +then scanning each thread for timestamps. Threads older than a specified +treshold are then moved to another page (the archive), which can be named +either basing on the thread's name or then name can contain a counter which +will be incremented when the archive reaches a certain size. + +Trancluded template may contain the following parameters: + +{{TEMPLATE_PAGE +|archive = +|algo = +|counter = +|maxarchivesize = +|minthreadsleft = +|minthreadstoarchive = +|archiveheader = +|key = +}} + +Meanings of parameters are: + +archive Name of the page to which archived threads will be put. + Must be a subpage of the current page. Variables are + supported. +algo specifies the maximum age of a thread. Must be in the form + old(<delay>) where <delay> specifies the age in hours or + days like 24h or 5d. + Default ist old(24h) +counter The current value of a counter which could be assigned as + variable. Will be actualized by bot. Initial value is 1. +maxarchivesize The maximum archive size before incrementing the counter. + Value can be given with appending letter like K or M which + indicates KByte or MByte. Default value is 1000M. +minthreadsleft Minimum number of threads that should be left on a page. + Default value is 5. +minthreadstoarchive The minimum number of threads to archive at once. Default + value is 2. +archiveheader Content that will be put on new archive pages as the + header. This parameter supports the use of variables. + Default value is {{talkarchive}} +key A secret key that (if valid) allows archives to not be + subpages of the page being archived. + + +Options (may be omitted): + -help show this help message and exit + -calc:PAGE calculate key for PAGE and exit + -file:FILE load list of pages from FILE + -force override security options + -locale:LOCALE switch to locale LOCALE + -namespace:NS only archive pages from a given namespace + -page:PAGE archive a single PAGE, default ns is a user talk page + -salt:SALT specify salt +""" +# +# (C) Misza13, 2006-2010 +# (C) xqt, 2009-2012 +# (C) Pywikibot team, 2007-2013 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$' +# +import os +import re +import time +import locale +import traceback +import string +import urllib +import unicodedata +try: # Get a constructor for the MD5 hash object + import hashlib + new_hash = hashlib.md5 +except ImportError: # Old python? + import md5 + new_hash = md5.md5 + +import pywikibot +from pywikibot import i18n, pagegenerators + + +Site = pywikibot.getSite() +language = Site.language() + + +def message(key, lang=Site.language()): + return i18n.twtranslate(lang, key) + + +class MalformedConfigError(pywikibot.Error): + """There is an error in the configuration template.""" + + +class MissingConfigError(pywikibot.Error): + """The config is missing in the header (either it's in one of the threads + or transcluded from another page). + + """ + + +class AlgorithmError(MalformedConfigError): + """Invalid specification of archiving algorithm.""" + + +class ArchiveSecurityError(pywikibot.Error): + """Archive is not a subpage of page being archived and key not specified + (or incorrect). + + """ + + +def str2time(str): + """Accepts a string defining a time period: + 7d - 7 days + 36h - 36 hours + Returns the corresponding time, measured in seconds. + + """ + if str[-1] == 'd': + return int(str[:-1]) * 24 * 3600 + elif str[-1] == 'h': + return int(str[:-1]) * 3600 + else: + return int(str) + + +def str2size(str): + """Accepts a string defining a size: + 1337 - 1337 bytes + 150K - 150 kilobytes + 2M - 2 megabytes + Returns a tuple (size,unit), where size is an integer and unit is + 'B' (bytes) or 'T' (threads). + + """ + if str[-1] in string.digits: # TODO: de-uglify + return (int(str), 'B') + elif str[-1] in ['K', 'k']: + return (int(str[:-1]) * 1024, 'B') + elif str[-1] == 'M': + return (int(str[:-1]) * 1024 * 1024, 'B') + elif str[-1] == 'T': + return (int(str[:-1]), 'T') + else: + return (int(str[:-1]) * 1024, 'B') + + +def int2month(num): + """Returns the locale's full name of month 'num' (1-12).""" + if hasattr(locale, 'nl_langinfo'): + return locale.nl_langinfo(locale.MON_1 + num - 1).decode('utf-8') + Months = ['january', 'february', 'march', 'april', 'may_long', 'june', + 'july', 'august', 'september', 'october', 'november', 'december'] + return Site.mediawiki_message(Months[num - 1]) + + +def int2month_short(num): + """Returns the locale's abbreviated name of month 'num' (1-12).""" + if hasattr(locale, 'nl_langinfo'): + #filter out non-alpha characters + return ''.join([c for c in + locale.nl_langinfo( + locale.ABMON_1 + num - 1).decode('utf-8') + if c.isalpha()]) + Months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', + 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] + return Site.mediawiki_message(Months[num - 1]) + + +def txt2timestamp(txt, format): + """Attempts to convert the timestamp 'txt' according to given 'format'. + On success, returns the time tuple; on failure, returns None. + + """ +## print txt, format + try: + return time.strptime(txt, format) + except ValueError: + try: + return time.strptime(txt.encode('utf8'), format) + except: + pass + + +def generateTransclusions(Site, template, namespaces=[]): + pywikibot.output(u'Fetching template transclusions...') + transclusionPage = pywikibot.Page(Site, template, ns=10) + gen = pagegenerators.ReferringPageGenerator(transclusionPage, + onlyTemplateInclusion=True) + if namespaces: + gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces, Site) + for page in gen: + yield page + + +class DiscussionThread(object): + """An object representing a discussion thread on a page, that is something + of the form: + + == Title of thread == + + Thread content here. ~~~~ + :Reply, etc. ~~~~ + + """ + + def __init__(self, title): + self.title = title + self.content = "" + self.timestamp = None + + def __repr__(self): + return '%s("%s",%d bytes)' \ + % (self.__class__.__name__, self.title, len(self.content)) + + def feedLine(self, line): + if not self.content and not line: + return + self.content += line + '\n' + #Update timestamp +# nnwiki: +# 19:42, 25 mars 2008 (CET) +# enwiki +# 16:36, 30 March 2008 (UTC) +# huwiki +# 2007. december 8., 13:42 (CET) + TM = re.search(r'(\d\d):(\d\d), (\d\d?) (\S+) (\d\d\d\d) $.*?$', line) + if not TM: + TM = re.search(r'(\d\d):(\d\d), (\S+) (\d\d?), (\d\d\d\d) $.*?$', + line) + if not TM: + TM = re.search(r'(\d{4})\. (\S+) (\d\d?)\., (\d\d:\d\d) $.*?$', + line) +# 18. apr 2006 kl.18:39 (UTC) +# 4. nov 2006 kl. 20:46 (CET) + if not TM: + TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kl\.\W*(\d\d):(\d\d) $.*?$', + line) +#3. joulukuuta 2008 kello 16.26 (EET) + if not TM: + TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kello \W*(\d\d).(\d\d) $.*?$', + line) + if not TM: +# 14:23, 12. Jan. 2009 (UTC) + pat = re.compile(r'(\d\d):(\d\d), (\d\d?)\. (\S+)\.? (\d\d\d\d) $(?:UTC|CES?T)$') + TM = pat.search(line) +# ro.wiki: 4 august 2012 13:01 (EEST) + if not TM: + TM = re.search(r'(\d\d?) (\S+) (\d\d\d\d) (\d\d):(\d\d) $.*?$', + line) +# Japanese: 2012年8月4日 (日) 13:01 (UTC) + if not TM: + TM = re.search(re.compile(u'(\d\d\d\d)年(\d\d?)月(\d\d?)日 $.$ (\d\d):(\d\d) $.*?$'), + line) + if TM: + # Strip away all diacritics in the Mn ('Mark, non-spacing') category + # NFD decomposition splits combined characters (e.g. 'ä", + # LATIN SMALL LETTER A WITH DIAERESIS) into two entities: + # LATIN SMALL LETTER A and COMBINING DIAERESIS. The latter falls + # in the Mn category and is filtered out, resuling in 'a'. + _TM = ''.join(c for c in unicodedata.normalize('NFD', TM.group(0)) + if unicodedata.category(c) != 'Mn') + + TIME = txt2timestamp(_TM, "%d. %b %Y kl. %H:%M (%Z)") + if not TIME: + TIME = txt2timestamp(_TM, "%Y. %B %d., %H:%M (%Z)") + if not TIME: + TIME = txt2timestamp(_TM, "%d. %b %Y kl.%H:%M (%Z)") + if not TIME: + TIME = txt2timestamp(re.sub(' *$[^ ]+$ *', '', _TM), + "%H:%M, %d %B %Y") + if not TIME: + TIME = txt2timestamp(_TM, "%H:%M, %d %b %Y (%Z)") + if not TIME: + TIME = txt2timestamp(re.sub(' *$[^ ]+$ *', '', _TM), + "%H:%M, %d %b %Y") + if not TIME: + TIME = txt2timestamp(_TM, "%H:%M, %b %d %Y (%Z)") + if not TIME: + TIME = txt2timestamp(_TM, "%H:%M, %B %d %Y (%Z)") + if not TIME: + TIME = txt2timestamp(_TM, "%H:%M, %b %d, %Y (%Z)") + if not TIME: + TIME = txt2timestamp(_TM, "%H:%M, %B %d, %Y (%Z)") + if not TIME: + TIME = txt2timestamp(_TM, "%d. %Bta %Y kello %H.%M (%Z)") + if not TIME: + TIME = txt2timestamp(_TM, "%d %B %Y %H:%M (%Z)") + if not TIME: + TIME = txt2timestamp(_TM, "%Y年%B%d日 (%a) %H:%M (%Z)") + if not TIME: + TIME = txt2timestamp(re.sub(' *$[^ ]+$ *', '', _TM), + "%H:%M, %d. %b. %Y") + if TIME: + self.timestamp = max(self.timestamp, time.mktime(TIME)) +## pywikibot.output(u'Time to be parsed: %s' % TM.group(0)) +## pywikibot.output(u'Parsed time: %s' % TIME) +## pywikibot.output(u'Newest timestamp in thread: %s' % TIME) + + def size(self): + return len(self.title) + len(self.content) + 12 + + def toText(self): + return "== " + self.title + ' ==\n\n' + self.content + + def shouldBeArchived(self, Archiver): + algo = Archiver.get('algo') + reT = re.search(r'^old$(.*)$$', algo) + if reT: + if not self.timestamp: + return '' + #TODO: handle this: + #return 'unsigned' + maxage = str2time(reT.group(1)) + if self.timestamp + maxage < time.time(): + return message('archivebot-older-than') + ' ' + reT.group(1) + return '' + + +class DiscussionPage(pywikibot.Page): + """A class that represents a single discussion page as well as an archive + page. Feed threads to it and run an update() afterwards. + + """ + + def __init__(self, title, archiver, vars=None): + pywikibot.Page.__init__(self, Site, title) + self.threads = [] + self.full = False + self.archiver = archiver + self.vars = vars + try: + self.loadPage() + except pywikibot.NoPage: + self.header = archiver.get('archiveheader', + message('archivebot-archiveheader')) + if self.vars: + self.header = self.header % self.vars + + def loadPage(self): + """Loads the page to be archived and breaks it up into threads.""" + self.header = '' + self.threads = [] + self.archives = {} + self.archivedThreads = 0 + lines = self.get().split('\n') + found = False # Reading header + curThread = None + for line in lines: + threadHeader = re.search('^== *([^=].*?) *== *$', line) + if threadHeader: + found = True # Reading threads now + if curThread: + self.threads.append(curThread) + curThread = DiscussionThread(threadHeader.group(1)) + else: + if found: + curThread.feedLine(line) + else: + self.header += line + '\n' + if curThread: + self.threads.append(curThread) + pywikibot.output(u'%d Threads found on %s' % (len(self.threads), self)) + + def feedThread(self, thread, maxArchiveSize=(250 * 1024, 'B')): + self.threads.append(thread) + self.archivedThreads += 1 + if maxArchiveSize[1] == 'B': + if self.size() >= maxArchiveSize[0]: + self.full = True + elif maxArchiveSize[1] == 'T': + if len(self.threads) >= maxArchiveSize[0]: + self.full = True + return self.full + + def size(self): + return len(self.header) + sum([t.size() for t in self.threads]) + + def update(self, summary, sortThreads=False): + if sortThreads: + pywikibot.output(u'Sorting threads...') + self.threads.sort(key=lambda t: t.timestamp) + newtext = re.sub('\n*$', '\n\n', self.header) # Fix trailing newlines + for t in self.threads: + newtext += t.toText() + if self.full: + summary += ' ' + message('archivebot-archive-full') + self.put(newtext, comment=summary) + + +class PageArchiver(object): + """A class that encapsulates all archiving methods. + __init__ expects a pywikibot.Page object. + Execute by running the .run() method.""" + + algo = 'none' + + def __init__(self, Page, tpl, salt, force=False): + self.attributes = { + 'algo': ['old(24h)', False], + 'archive': ['', False], + 'maxarchivesize': ['1000M', False], + 'counter': ['1', False], + 'key': ['', False], + } + self.tpl = tpl + self.salt = salt + self.force = force + self.Page = DiscussionPage(Page.title(), self) + self.loadConfig() + self.commentParams = { + 'from': self.Page.title(), + } + self.archives = {} + self.archivedThreads = 0 + + def get(self, attr, default=''): + return self.attributes.get(attr, [default])[0] + + def set(self, attr, value, out=True): + if attr == 'archive': + value = value.replace('_', ' ') + self.attributes[attr] = [value, out] + + def saveables(self): + return [a for a in self.attributes if self.attributes[a][1] + and a != 'maxage'] + + def attr2text(self): + return '{{%s\n%s\n}}' % (self.tpl, + '\n'.join(['|%s = %s ' + % (a, self.get(a)) + for a in self.saveables()])) + + def key_ok(self): + s = new_hash() + s.update(self.salt + '\n') + s.update(self.Page.title().encode('utf8') + '\n') + return self.get('key') == s.hexdigest() + + def loadConfig(self): + pywikibot.output(u'Looking for: {{%s}} in %s' % (self.tpl, self.Page)) + found = False + for tpl in self.Page.templatesWithParams(): + if tpl[0].title() == self.tpl: + for param in tpl[1]: + item, value = param.split('=', 1) + self.set(item.strip(), value.strip()) + found = True + break + if not found: + raise MissingConfigError(u'Missing or malformed template') + if not self.get('algo', ''): + raise MissingConfigError(u'Missing algo') + + def feedArchive(self, archive, thread, maxArchiveSize, vars=None): + """Feed the thread to one of the archives. + If it doesn't exist yet, create it. + If archive name is an empty string (or None), + discard the thread (/dev/null). + Also checks for security violations.""" + if not archive: + return + if not self.force \ + and not self.Page.title() + '/' == archive[ + :len(self.Page.title()) + 1] \ + and not self.key_ok(): + raise ArchiveSecurityError + if not archive in self.archives: + self.archives[archive] = DiscussionPage(archive, self, vars) + return self.archives[archive].feedThread(thread, maxArchiveSize) + + def analyzePage(self): + maxArchSize = str2size(self.get('maxarchivesize')) + archCounter = int(self.get('counter', '1')) + oldthreads = self.Page.threads + self.Page.threads = [] + T = time.mktime(time.gmtime()) + whys = [] + pywikibot.output(u'Processing %d threads' % len(oldthreads)) + for t in oldthreads: + if len(oldthreads) - self.archivedThreads \ + <= int(self.get('minthreadsleft', 5)): + self.Page.threads.append(t) + continue # Because there's too little threads left. + # TODO: Make an option so that unstamped (unsigned) posts get + # archived. + why = t.shouldBeArchived(self) + if why: + archive = self.get('archive') + TStuple = time.gmtime(t.timestamp) + vars = { + 'counter': archCounter, + 'year': TStuple[0], + 'month': TStuple[1], + 'monthname': int2month(TStuple[1]), + 'monthnameshort': int2month_short(TStuple[1]), + 'week': int(time.strftime('%W', TStuple)), + } + archive = pywikibot.Page(Site, archive % vars).title() + if self.feedArchive(archive, t, maxArchSize, vars): + archCounter += 1 + self.set('counter', str(archCounter)) + whys.append(why) + self.archivedThreads += 1 + else: + self.Page.threads.append(t) + return set(whys) + + def run(self): + if not self.Page.botMayEdit(): + return + whys = self.analyzePage() + if self.archivedThreads < int(self.get('minthreadstoarchive', 2)): + # We might not want to archive a measly few threads + # (lowers edit frequency) + pywikibot.output(u'There are only %d Threads. Skipping' + % self.archivedThreads) + return + if whys: + pywikibot.output(u'Archiving %d thread(s).' % self.archivedThreads) + # Save the archives first (so that bugs don't cause a loss of data) + for a in sorted(self.archives.keys()): + self.commentParams['count'] = self.archives[a].archivedThreads + comment = i18n.twntranslate(language, + 'archivebot-archive-summary', + self.commentParams) + self.archives[a].update(comment) + + #Save the page itself + rx = re.compile('{{%s\n.*?\n}}' % self.tpl, re.DOTALL) + self.Page.header = rx.sub(self.attr2text(), self.Page.header) + self.commentParams['count'] = self.archivedThreads + self.commentParams['archives'] = ', '.join( + ['[[%s]]' % a.title() for a in self.archives.values()]) + if not self.commentParams['archives']: + self.commentParams['archives'] = '/dev/null' + self.commentParams['why'] = ', '.join(whys) + comment = i18n.twntranslate(language, + 'archivebot-page-summary', + self.commentParams) + self.Page.update(comment) + + +def main(): + global Site, language + + import sys + + filename = None + pagename = None + namespace = None + salt = None + force = False + calc = None + args = [] + + def if_arg_value(arg, name): + if arg.startswith(name): + yield arg[len(name) + 1:] + + for arg in pywikibot.handleArgs(*sys.argv): + for v in if_arg_value(arg, '-file'): + filename = v + for v in if_arg_value(arg, '-locale'): + #Required for english month names + locale.setlocale(locale.LC_TIME, v.encode('utf8')) + for v in if_arg_value(arg, '-timezone'): + os.environ['TZ'] = v.timezone + #Or use the preset value + if hasattr(time, 'tzset'): + time.tzset() + for v in if_arg_value(arg, '-calc'): + calc = v + for v in if_arg_value(arg, '-salt'): + salt = v + for v in if_arg_value(arg, '-force'): + force = True + for v in if_arg_value(arg, '-filename'): + filename = v + for v in if_arg_value(arg, '-pagename'): + pagename = v + for v in if_arg_value(arg, '-namespace'): + namespace = v + if not arg.startswith('-'): + args.append(arg) + + if calc: + if not salt: + parser.error('Note: you must specify a salt to calculate a key') + s = new_hash() + s.update(salt + '\n') + s.update(calc + '\n') + pywikibot.output(u'key = ' + s.hexdigest()) + return + + if not salt: + salt = '' + + Site = pywikibot.getSite() + language = Site.language() + + if not args or len(args) <= 1: + pywikibot.output(u'NOTE: you must specify a template to run the bot') + pywikibot.showHelp('archivebot') + return + + for a in args[1:]: + pagelist = [] + a = a.decode('utf8') + if not filename and not pagename: + if namespace is not None: + ns = [str(namespace)] + else: + ns = [] + for pg in generateTransclusions(Site, a, ns): + pagelist.append(pg) + if filename: + for pg in file(filename, 'r').readlines(): + pagelist.append(pywikibot.Page(Site, pg, ns=10)) + if pagename: + pagelist.append(pywikibot.Page(Site, pagename, + ns=3)) + pagelist = sorted(pagelist) + for pg in iter(pagelist): + pywikibot.output(u'Processing %s' % pg) + # Catching exceptions, so that errors in one page do not bail out + # the entire process + try: + Archiver = PageArchiver(pg, a, salt, force) + Archiver.run() + time.sleep(10) + except: + pywikibot.output(u'Error occured while processing page %s' % pg) + traceback.print_exc() + + +if __name__ == '__main__': + try: + main() + finally: + pywikibot.stopme() -- To view, visit https://gerrit.wikimedia.org/r/101477 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I1f3d7f5ed19c8f52ac371600218e531b4e80028c Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Whym <whym(a)whym.org> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Whym <whym(a)whym.org> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] featured.py : add template used for FA in itwiki - change (pywikibot/core)
by jenkins-bot (Code Review) 18 Dec '13

18 Dec '13

jenkins-bot has submitted this change and it was merged. Change subject: featured.py : add template used for FA in itwiki ...................................................................... featured.py : add template used for FA in itwiki Reported by AttoRenato: https://it.wikipedia.org/w/index.php?diff=63027756 Change-Id: I85243ce696cb5a0011c06793ab4a0ec74fc4fbd6 --- M scripts/featured.py 1 file changed, 1 insertion(+), 1 deletion(-) Approvals: Ladsgroup: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/featured.py b/scripts/featured.py index e29db0a..a79770a 100644 --- a/scripts/featured.py +++ b/scripts/featured.py @@ -128,7 +128,7 @@ 'ga': ['Nasc AR'], 'hi': ['Link FA', 'Lien AdQ'], 'is': [u'Tengill ÚG'], - 'it': ['Link AdQ'], + 'it': ['Link V', 'Link AdQ'], 'no': ['Link UA'], 'oc': ['Ligam AdQ', 'Lien AdQ'], 'ro': [u'Legătură AC', u'Legătură AF'], -- To view, visit https://gerrit.wikimedia.org/r/102459 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I85243ce696cb5a0011c06793ab4a0ec74fc4fbd6 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Beta16 <l.rabinelli(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Port create_categories.py to core - change (pywikibot/core)
by jenkins-bot (Code Review) 18 Dec '13

18 Dec '13

jenkins-bot has submitted this change and it was merged. Change subject: Port create_categories.py to core ...................................................................... Port create_categories.py to core Change-Id: Id170af5a909918a56f4868665ff38333e30e37ca --- A scripts/create_categories.py 1 file changed, 102 insertions(+), 0 deletions(-) Approvals: Ladsgroup: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/create_categories.py b/scripts/create_categories.py new file mode 100755 index 0000000..2a9cca2 --- /dev/null +++ b/scripts/create_categories.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +""" +Program to batch create categories. + +The program expects a generator containing a list of page titles to be used as +base. + +The following command line parameters are supported: + +-always (not implemented yet) Don't ask, just do the edit. + +-overwrite (not implemented yet). + +-parent The name of the parent category. + +-basename The base to be used for the new category names. + +Example: +create_categories.py + -lang:commons + -family:commons + -links:User:Multichill/Wallonia + -parent:"Cultural heritage monuments in Wallonia" + -basename:"Cultural heritage monuments in" + +""" +__version__ = '$Id$' +# +# (C) Multichill, 2011 +# (C) xqt, 2011 +# +# Distributed under the terms of the MIT license. +# +# +import pywikibot +from pywikibot import catlib, config, pagegenerators +import sys + + +def createCategory(page, parent, basename): + title = page.title(withNamespace=False) + + newpage = pywikibot.Page(pywikibot.getSite(u'commons', u'commons'), + u'Category:' + basename + u' ' + title) + newtext = u'' + newtext += u'[[Category:' + parent + u'|' + title + u']]\n' + newtext += u'[[Category:' + title + u']]\n' + + if not newpage.exists(): + #FIXME: Add user prompting and -always option + pywikibot.output(newpage.title()) + pywikibot.showDiff(u'', newtext) + + comment = u'Creating new category' + try: + newpage.put(newtext, comment) + except pywikibot.EditConflict: + pywikibot.output(u'Skipping %s due to edit conflict' % newpage.title()) + except pywikibot.ServerError: + pywikibot.output(u'Skipping %s due to server error' % newpage.title()) + except pywikibot.PageNotSaved as error: + pywikibot.output(u'Error putting page: %s' % error.args) + else: + #FIXME: Add overwrite option + pywikibot.output(u'%s already exists, skipping' % newpage.title()) + + +def main(args): + ''' + Main loop. Get a generator and options. + ''' + generator = None + parent = u'' + basename = u'' + always = False + + genFactory = pagegenerators.GeneratorFactory() + + for arg in pywikibot.handleArgs(): + if arg == '-always': + always = True + elif arg.startswith('-parent:'): + parent = arg[len('-parent:'):].strip() + elif arg.startswith('-basename'): + basename = arg[len('-basename:'):].strip() + else: + genFactory.handleArg(arg) + + generator = genFactory.getCombinedGenerator() + if generator: + for page in generator: + createCategory(page, parent, basename) + else: + pywikibot.output(u'No pages to work on') + + pywikibot.output(u'All done') + +if __name__ == "__main__": + try: + main(sys.argv[1:]) + finally: + pywikibot.stopme() -- To view, visit https://gerrit.wikimedia.org/r/101472 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Id170af5a909918a56f4868665ff38333e30e37ca Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com> Gerrit-Reviewer: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com> Gerrit-Reviewer: Guoguo12 <Guoguo12(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] (bug 58188) Ignore BadTitle Error - change (pywikibot/compat)
by jenkins-bot (Code Review) 18 Dec '13

18 Dec '13

jenkins-bot has submitted this change and it was merged. Change subject: (bug 58188) Ignore BadTitle Error ...................................................................... (bug 58188) Ignore BadTitle Error Change-Id: I4ba74b924f0369688dbafd979d7acce26afb712f --- M fixing_redirects.py 1 file changed, 1 insertion(+), 1 deletion(-) Approvals: Ladsgroup: Looks good to me, approved jenkins-bot: Verified diff --git a/fixing_redirects.py b/fixing_redirects.py index 00ed7ea..49627dd 100644 --- a/fixing_redirects.py +++ b/fixing_redirects.py @@ -174,7 +174,7 @@ except pywikibot.NoPage: try: target = page2.getMovedTarget() - except pywikibot.NoPage: + except (pywikibot.NoPage, pywikibot.BadTitle): continue except (pywikibot.Error, pywikibot.SectionError): continue -- To view, visit https://gerrit.wikimedia.org/r/101899 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I4ba74b924f0369688dbafd979d7acce26afb712f Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Siebrand <siebrand(a)wikimedia.org> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] (bug 58505) pass 'textmissing' error and raise serverError - change (pywikibot/compat)
by Alex S.H. Lin (Code Review) 18 Dec '13

18 Dec '13

Alex S.H. Lin has submitted this change and it was merged. Change subject: (bug 58505) pass 'textmissing' error and raise serverError ...................................................................... (bug 58505) pass 'textmissing' error and raise serverError Sometimes the text is missing and we get a "textmissing" entry via api. Now we ignore that bug and we raise a serverError in next step because textareaFound remains false. Change-Id: I63bfea933a00243c7b38d8c71f5733b2b6d092af --- M wikipedia.py 1 file changed, 5 insertions(+), 1 deletion(-) Approvals: Alex S.H. Lin: Verified; Looks good to me, approved jenkins-bot: Verified diff --git a/wikipedia.py b/wikipedia.py index 7c6feec..18c2b2c 100644 --- a/wikipedia.py +++ b/wikipedia.py @@ -852,7 +852,11 @@ raise BadTitle('BadTitle: %s' % self) elif 'revisions' in pageInfo: # valid Title lastRev = pageInfo['revisions'][0] - if isinstance(lastRev['*'], basestring): + if 'textmissing' in lastRev: + # Maybe we could use a new error exception. + # Now we just pass and got a server error + pass + elif isinstance(lastRev['*'], basestring): textareaFound = True # I got page date with 'revisions' in pageInfo but # lastRev['*'] = False instead of the content. The Page itself was -- To view, visit https://gerrit.wikimedia.org/r/101646 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I63bfea933a00243c7b38d8c71f5733b2b6d092af Gerrit-PatchSet: 3 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Alex S.H. Lin <alexsh(a)mail2000.com.tw> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] (bug 55030) Add color to put_async waiting message - change (pywikibot/compat)
by Alex S.H. Lin (Code Review) 18 Dec '13

18 Dec '13

Alex S.H. Lin has submitted this change and it was merged. Change subject: (bug 55030) Add color to put_async waiting message ...................................................................... (bug 55030) Add color to put_async waiting message - The color names will be stripped by output layer - I removed the double datetime import - doc for setSitelink - pep8 changes - synchronized with core Change-Id: I50819c8a0db0d306e7680b1a55f7bb8e345b9fca --- M wikipedia.py 1 file changed, 20 insertions(+), 10 deletions(-) Approvals: Alex S.H. Lin: Verified; Looks good to me, approved diff --git a/wikipedia.py b/wikipedia.py index 7c6feec..101ec31 100644 --- a/wikipedia.py +++ b/wikipedia.py @@ -142,6 +142,7 @@ import socket import traceback import time +import datetime import threading import Queue import re @@ -750,7 +751,7 @@ else: # Make sure we re-raise an exception we got on an earlier attempt if hasattr(self, '_redirarg') and not get_redirect: - raise IsRedirectPage, self._redirarg + raise IsRedirectPage, self._redirarg # noqa elif hasattr(self, '_getexception'): if self._getexception == IsRedirectPage and get_redirect: pass @@ -1399,7 +1400,6 @@ """ if self._editTime and datetime: - import datetime return datetime.datetime.strptime(str(self._editTime), '%Y%m%d%H%M%S') @@ -4429,11 +4429,18 @@ return int(self.title()[1:]) def setSitelink(self, page, summary=""): - """Set a Sitelink for a Datapage. - page - the site to link to - summary - edit summary""" - sitename = page.site().dbName().replace("_p","") - return self.setitem(summary=summary, items={'type': 'sitelink', 'site': sitename, 'title': page.title()}) + """Set a Sitelink for a Datapage. + @param page: the site to link to + @type page: pywikibot.Page object + @param summary: edit summary + @type summary: basestring + + """ + sitename = page.site().dbName().replace("_p", "") + return self.setitem(summary=summary, + items={'type': 'sitelink', + 'site': sitename, + 'title': page.title()}) def setitem(self, summary=None, watchArticle=False, minorEdit=True, newPage=False, token=None, newToken=False, sysop=False, @@ -10322,7 +10329,6 @@ """ def remaining(): - import datetime remainingPages = page_put_queue.qsize() - 1 # -1 because we added a None element to stop the queue remainingSeconds = datetime.timedelta( @@ -10332,8 +10338,12 @@ page_put_queue.put((None, ) * 7) if page_put_queue.qsize() > 1: - output(u'Waiting for %i pages to be put. Estimated time remaining: %s' - % remaining()) + num, sec = remaining() + format_values = dict(num=num, sec=sec) + output(u'\03{lightblue}' + u'Waiting for %(num)i pages to be put. ' + u'Estimated time remaining: %(sec)s%' + '\03{default}' % format_values) while(_putthread.isAlive()): try: -- To view, visit https://gerrit.wikimedia.org/r/100205 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I50819c8a0db0d306e7680b1a55f7bb8e345b9fca Gerrit-PatchSet: 5 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Alex S.H. Lin <alexsh(a)mail2000.com.tw> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Enables retrieving broken redirects from special page and tr... - change (pywikibot/core)
by jenkins-bot (Code Review) 17 Dec '13

17 Dec '13

jenkins-bot has submitted this change and it was merged. Change subject: Enables retrieving broken redirects from special page and try to fix them ...................................................................... Enables retrieving broken redirects from special page and try to fix them There is a new behavior for broken redirects. They are no longer just deleted but bot tries to fix broken redirects: - Now we can read broken redirects from special page - First check we can solve the problem. We look at the deleted target page and check whether that has been moved. If yes, we have found the new target and we repair the broken redirect with the already existing message 'redirect-fix-broken-moved'. - Otherwise we delete the broken redirect if the new -delete option is given and the bot has sysop rights. Without sysop rights we place a speedy deletion template at the page if we have some. - Documentation updated. Change-Id: I5a0f50d4145f1510bd2a430d3f5db0dbb081ef7c --- M scripts/redirect.py 1 file changed, 88 insertions(+), 12 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/redirect.py b/scripts/redirect.py index 8ece506..0487f2a 100755 --- a/scripts/redirect.py +++ b/scripts/redirect.py @@ -12,7 +12,11 @@ where action can be one of these: double Fix redirects which point to other redirects -broken Delete redirects where targets don\'t exist. Requires adminship. +broken Tries to fix broken redirect to the last moved target of the + destination page. If this fails and -delete option is given + it deletes redirects where targets don't exist if bot has + admin rights otherwise it marks the page with a speedy deletion + template if available. both Both of the above. and arguments can be: @@ -44,8 +48,12 @@ -until:title The possible last page title in each namespace. Page needs not exist. +-step:n The number of entries retrieved at oncevia API + -total:n The maximum count of redirects to work upon. If omitted, there is no limit. + +-delete Enables deletion of broken redirects. -always Don't prompt you for each replacement. @@ -53,7 +61,7 @@ # # (C) Daniel Herding, 2004. # (C) Purodha Blissenbach, 2009. -# (C) xqt, 2009-2012 +# (C) xqt, 2009-2013 # (C) Pywikibot team, 2004-2013 # # Distributed under the terms of the MIT license. @@ -261,6 +269,12 @@ count += 1 if count >= self.api_number: break + elif not self.xmlFilename: + # retrieve information from broken redirect special page + pywikibot.output(u'Retrieving special page...') + for redir_name in self.site.broken_redirects(): + yield redir_name.title() + # TODO: add XML dump support ## elif self.xmlFilename == None: ## # retrieve information from the live wiki's maintenance page @@ -356,12 +370,14 @@ class RedirectRobot: - def __init__(self, action, generator, always=False, number=None): + def __init__(self, action, generator, always=False, number=None, + delete=False): self.site = pywikibot.getSite() self.action = action self.generator = generator self.always = always self.number = number + self.delete = delete self.exiting = False def prompt(self, question): @@ -380,13 +396,21 @@ def delete_broken_redirects(self): # get reason for deletion text - reason = i18n.twtranslate(self.site, 'redirect-remove-broken') for redir_name in self.generator.retrieve_broken_redirects(): - self.delete_1_broken_redirect(redir_name, reason) + self.delete_1_broken_redirect(redir_name) if self.exiting: break - def delete_1_broken_redirect(self, redir_name, reason): + def moved_page(self, source): + gen = iter(self.site.logevents(logtype='move', page=source, total=1)) + try: + lastmove = next(gen) + except StopIteration: + return None + else: + return lastmove.new_title() + + def delete_1_broken_redirect(self, redir_name): redir_page = pywikibot.Page(self.site, redir_name) # Show the title of the page we're working on. # Highlight the title in purple. @@ -401,11 +425,57 @@ else: try: targetPage.get() + except pywikibot.BadTitle: + pywikibot.warning( + u'Redirect target %s is not a valid page title.' + % str(e)[10:]) + pass except pywikibot.NoPage: - if self.prompt(u'Redirect target %s does not exist. ' - u'Do you want to delete %s?' - % (targetPage.title(asLink=True), - redir_page.title(asLink=True))): + movedTarget = self.moved_page(targetPage) + if movedTarget: + if not movedTarget.exists(): + ### FIXME: Test to another move + pywikibot.output(u'Target page %s does not exist' + % (movedTarget)) + elif redir_name == movedTarget.title(): + pywikibot.output(u'Target page forms a redirect loop') + else: + pywikibot.output(u'%s has been moved to %s' + % (redir_page, movedTarget)) + reason = i18n.twtranslate(self.site, + 'redirect-fix-broken-moved', + {'to': movedTarget.title( + asLink=True)}) + content = redir_page.get(get_redirect=True) + text = self.site.redirectRegex().sub( + '#%s %s' % (self.site.redirect(), + movedTarget.title(asLink=True, + textlink=True)), + content) + pywikibot.showDiff(content, text) + pywikibot.output(u'Summary - %s' % reason) + if self.prompt( + u'Redirect target %s has been moved to %s.\n' + u'Do you want to fix %s?' + % (targetPage, movedTarget, redir_page)): + try: + redir_page.put(text, reason) + except pywikibot.NoUsername: + pywikibot.output(u"Page [[%s]] not saved; " + u"sysop privileges required." + % redir.title()) + pass + except pywikibot.LockedPage: + pywikibot.output(u'%s is locked.' + % redir.title()) + pass + elif self.delete and self.prompt( + u'Redirect target %s does not exist.\n' + u'Do you want to delete %s?' + % (targetPage.title(asLink=True), + redir_page.title(asLink=True))): + reason = i18n.twtranslate(self.site, + 'redirect-remove-broken') try: redir_page.delete(reason, prompt=False) except pywikibot.NoUsername: @@ -413,7 +483,7 @@ targetPage.site.lang, 'redirect-broken-redirect-template') and i18n.twhas_key(targetPage.site.lang, - 'redirect-remove-broken')) or \ + 'redirect-remove-broken')) or targetPage.site.lang == '-'): pywikibot.output(u"No sysop in user-config.py, " u"put page to speedy deletion.") @@ -425,6 +495,9 @@ 'redirect-broken-redirect-template' ) + "\n" + content redir_page.put(content, reason) + else: + pywikibot.output( + u'No speedy deletion template availlable') except pywikibot.IsRedirectPage: pywikibot.output(u"Redirect target %s is also a redirect! " u"Won't delete anything." @@ -663,6 +736,7 @@ number = None step = None always = False + delete = False for arg in pywikibot.handleArgs(*args): if arg == 'double' or arg == 'do': action = 'double' @@ -706,6 +780,8 @@ step = int(arg[6:]) elif arg == '-always': always = True + elif arg == '-delete': + delete = True else: pywikibot.output(u'Unknown argument: %s' % arg) @@ -718,7 +794,7 @@ else: gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, fullscan, start, until, number, step) - bot = RedirectRobot(action, gen, always, number) + bot = RedirectRobot(action, gen, always, number, delete) bot.run() if __name__ == '__main__': -- To view, visit https://gerrit.wikimedia.org/r/102151 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I5a0f50d4145f1510bd2a430d3f5db0dbb081ef7c Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

Pywikibot-commits