Revision: 7346 Author: siebrand Date: 2009-10-02 09:22:46 +0000 (Fri, 02 Oct 2009)
Log Message: ----------- Preparing for 'rewrite branch readiness' changes
Added Paths: ----------- branches/rewrite/scripts/blockpageschecker.py
Copied: branches/rewrite/scripts/blockpageschecker.py (from rev 7345, trunk/pywikipedia/blockpageschecker.py) =================================================================== --- branches/rewrite/scripts/blockpageschecker.py (rev 0) +++ branches/rewrite/scripts/blockpageschecker.py 2009-10-02 09:22:46 UTC (rev 7346) @@ -0,0 +1,439 @@ +# -*- coding: utf-8 -*- +""" +This is a script originally written by Wikihermit and then rewritten by Filnik, +to delete the templates used to warn in the pages that a page is blocked, +when the page isn't blocked at all. Indeed, very often sysops block the pages +for a setted time but then the forget to delete the warning! This script is useful +if you want to delete those useless warning left in these pages. + +Parameters: + +These command line parameters can be used to specify which pages to work on: + +¶ms; + +-xml Retrieve information from a local XML dump (pages-articles + or pages-meta-current, see http://download.wikimedia.org). + Argument can also be given as "-xml:filename". + +-page Only edit a specific page. + Argument can also be given as "-page:pagetitle". You can + give this parameter multiple times to edit multiple pages. + +-protectedpages: Check all the blocked pages (useful when you have not categories + or when you have problems with them. (add the namespace after ":" where + you want to check - default checks all protected pages) + +-moveprotected: Same as -protectedpages, for moveprotected pages + +Furthermore, the following command line parameters are supported: + +-always Doesn't ask every time if the bot should make the change or not, do it always. + +-debug When the bot can't delete the template from the page (wrong regex or something like that) + it will ask you if it should open the page on your browser. + (attention: pages included may give false positives..) + +-move The bot will check if the page is blocked also for the move option, not only for edit + +--- Warning! --- +You have to edit this script in order to add your preferences +otherwise the script won't work! + +If you have problems, ask on botwiki ( http://botwiki.sno.cc ) +or on IRC (#pywikipediabot) + +--- Example of how to use the script --- + +python blockpageschecker.py -always + +python blockpageschecker.py -cat:Geography -always + +python blockpageschecker.py -debug -protectedpages:4 + +""" +# +# (C) Monobi a.k.a. Wikihermit, 2007 +# (C) Filnik, 2007-2008-2009 +# (C) NicDumZ, 2008 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id: blockpageschecker.py,v 1.5 2008/04/24 19.40.00 filnik Exp$' +# + +import re, webbrowser +import wikipedia, catlib, pagegenerators, config + +# This is required for the text that is shown when you run this script +# with the parameter -help. +docuReplacements = { + '¶ms;': pagegenerators.parameterHelp, +} + +####################################################### +#--------------------- PREFERENCES -------------------# +################### -- Edit below! -- ################# + +templateSemiProtection = { + 'en': None, + 'it':[r'{{(?:[Tt]emplate:|)[Aa]vvisobloccoparziale(?:|[ _]scad|.*?||.*?)}}', + r'{{(?:[Tt]emplate:|)[Aa]bp(?:|[ _]scad|(?:.*?))}}'], + 'fr': [ur'{{(?:[Tt]emplate:|[Mm]odèle:|)[Ss]emi[- ]?protection(|[^}]*)}}'], + 'ja':[ur'(?<!<nowiki>){{(?:[Tt]emplate:|)半保護(?:[Ss]|)(?:|.+|)}}(?!</nowiki>)\s*(?:\r\n|)*'], + 'zh':[ur'{{(?:[Tt]emplate:|)Protected|(?:[Ss]|[Ss]emi|半)(?:|.+|)}}(\n+?|)',ur'{{(?:[Tt]emplate:|)Mini-protected|(?:[Ss]|[Ss]emi|半)(?:|.+|)}}(\n+?|)',ur'{{(?:[Tt]emplate:|)Protected-logo|(?:[Ss]|[Ss]emi|半)(?:|.+|)}}(\n+?|)'], + } +# Regex to get the total-protection template +templateTotalProtection = { + 'en': None, + 'it':[r'{{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad|(?:.*?)|minaccia|cancellata)}}', + r'{{(?:[Tt]emplate:|)(?:[Cc][Tt]|[Cc]anc fatte|[Cc][Ee])}}', r'<div class="toccolours[ _]itwiki[ _]template[ _]avviso">(?:\s|\n)*?[Qq]uesta pagina'], + 'fr':[ur'{{(?:[Tt]emplate:|[Mm]odèle:|)[Pp]rotection(|[^}]*)}}', + ur'{{(?:[Tt]emplate:|[Mm]odèle:|)(?:[Pp]age|[Aa]rchive|[Mm]odèle) protégée?(|[^}]*)}}'], + 'ja':[ur'(?<!<nowiki>){{(?:[Tt]emplate:|)保護(?:性急|)(?:[Ss]|)(?:|.+|)}}(?!</nowiki>)\s*(?:\r\n|)*'], + 'zh':[r'{{(?:[Tt]emplate:|)Protected|(?:[Nn]|[Nn]ormal)(?:|.+|)}}(\n+?|)',r'{{(?:[Tt]emplate:|)Mini-protected|(?:[Nn]|[Nn]ormal)(?:|.+|)}}(\n+?|)',r'{{(?:[Tt]emplate:|)Protected-logo|(?:[Nn]|[Nn]ormal)(?:|.+|)}}(\n+?|)'], + } +# Regex to get the semi-protection move template +templateSemiMoveProtection = { + 'en': None, + 'it':[r'{{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad|.*?||.*?)}}'], + 'ja':[ur'(?<!<nowiki>){{(?:[Tt]emplate:|)移動半保護(?:[Ss]|)(?:|.+|)}}(?!</nowiki>)\s*(?:\r\n|)*'], + 'zh':[r'{{(?:[Tt]emplate:|)Protected|(?:MS|ms)(?:|.+|)}}(\n+?|)',r'{{(?:[Tt]emplate:|)Mini-protected|(?:MS|ms)(?:|.+|)}}(\n+?|)',r'{{(?:[Tt]emplate:|)Protected-logo|(?:MS|ms)(?:|.+|)}}(\n+?|)'], + } +# Regex to get the total-protection move template +templateTotalMoveProtection = { + 'en': None, + 'it':[r'{{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad|.*?||.*?)}}'], + 'ja':[ur'(?<!<nowiki>){{(?:[Tt]emplate:|)移動保護(?:[Ss]|)(?:|.+|)}}(?!</nowiki>)\s*(?:\r\n|)*'], + 'zh':[ur'{{(?:[Tt]emplate:|)Protected|(?:[Mm]|[Mm]ove|移[動动])(?:|.+|)}}(\n+?|)',ur'{{(?:[Tt]emplate:|)Mini-protected|(?:[Mm]|[Mm]ove|移[動动])(?:|.+|)}}(\n+?|)',ur'{{(?:[Tt]emplate:|)Protected-logo|(?:[Mm]|[Mm]ove|移[動动])(?:|.+|)}}(\n+?|)'], + } + +# If you use only one template for all the type of protection, put it here. +# You may use only one template or an unique template and some other "old" template that the +# script should still check (as on it.wikipedia) +templateUnique = { + 'en': None, + 'it': [r'{{(?:[Tt]emplate:|)[Pp]rotetta}}'], +} + +# Array: 0 => Semi-block, 1 => Total Block, 2 => Semi-Move, 3 => Total-Move, 4 => template-unique +templateNoRegex = { + 'it':['{{Avvisobloccoparziale}}', '{{Avvisoblocco}}', None, None, '{{Protetta}}'], + 'fr':['{{Semi-protection}}', '{{Protection}}', None, None, None], + 'ja':[u'{{半保護}}', u'{{保護}}', u'{{移動半保護}}', u'{{移動保護}}', None], + 'zh':[u'{{Protected/semi}}',u'{{Protected}}',u'{{Protected/ms}}',u'{{Protected/move}}', None], + } + +# Category where the bot will check +categoryToCheck = { + 'en':[u'Category:Protected'], + 'ar':[u'تصنيف:محتويات محمية'], + 'fr':[u'Category:Page semi-protégée', u'Category:Page protégée', u'Catégorie:Article protégé'], + 'he':[u'קטגוריה:ויקיפדיה: דפים מוגנים', u'קטגוריה:ויקיפדיה: דפים מוגנים חלקית'], + 'it':[u'Categoria:Pagine protette - scadute', u'Categoria:Pagine semiprotette', u'Categoria:Voci protette'], + 'ja':[u'Category:編集保護中の記事',u'Category:編集半保護中の記事', + u'Category:移動保護中の記事',], + 'pt':[u'Category:!Páginas protegidas', u'Category:!Páginas semiprotegidas'], + 'zh':[u'Category:被保护的页面',u'Category:被保護的模板',u'Category:暂时不能移动的页面', + u'Category:被半保护的页面',], + } +# Comment used when the Bot edits +comment = { + 'en':u'Bot: Deleting out-dated template', + 'ar':u'بوت: حذف قالب قديم', + 'fr':u'Robot: Mise à jour des bandeaux de protection', + 'he':u'בוט: מסיר תבנית שעבר זמנה', + 'it':u'Bot: Tolgo o sistemo template di avviso blocco', + 'ja':u'ロボットによる: 保護テンプレート除去', + 'pt':u'Bot: Retirando predefinição de proteção', + 'zh':u'機器人: 移除過期的保護模板', + } +# Check list to block the users that haven't set their preferences +project_inserted = ['en', 'fr', 'it', 'ja', 'pt', 'zh'] + +####################################################### +#------------------ END PREFERENCES ------------------# +################## -- Edit above! -- ################## + +def understandBlock(text, TTP, TSP, TSMP, TTMP, TU): + """ Understand if the page is blocked and if it has the right template """ + if TTP != None: + for catchRegex in TTP: # TTP = templateTotalProtection + resultCatch = re.findall(catchRegex, text) + if resultCatch: + return ('sysop-total', catchRegex) + if TSP != None: + for catchRegex in TSP: + resultCatch = re.findall(catchRegex, text) + if resultCatch: + return ('autoconfirmed-total', catchRegex) + if TU != None: + for catchRegex in TU: + resultCatch = re.findall(catchRegex, text) + if resultCatch: + return ('unique', catchRegex) + if TSMP != None and TTMP != None and TTP != TTMP and TSP != TSMP: + for catchRegex in TTMP: + resultCatch = re.findall(catchRegex, text) + if resultCatch: + return ('sysop-move', catchRegex) + for catchRegex in TSMP: + resultCatch = re.findall(catchRegex, text) + if resultCatch: + return ('autoconfirmed-move', catchRegex) + return ('editable', r'\A\n') # If editable means that we have no regex, won't change anything with this regex + +def debugQuest(site, page): + quest = wikipedia.input(u'Do you want to open the page on your [b]rowser, [g]ui or [n]othing?') + pathWiki = site.family.nicepath(site.lang) + url = 'http://%s%s%s?&redirect=no' % (wikipedia.getSite().hostname(), pathWiki, page.urlname()) + while 1: + if quest.lower() in ['b', 'B']: + webbrowser.open(url) + break + elif quest.lower() in ['g', 'G']: + import editarticle + editor = editarticle.TextEditor() + text = editor.edit(page.get()) + break + elif quest.lower() in ['n', 'N']: + break + else: + wikipedia.output(u'wrong entry, type "b", "g" or "n"') + continue + +def main(): + """ Main Function """ + # Loading the comments + global categoryToCheck; global comment; global project_inserted + if config.mylang not in project_inserted: + wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!") + return + # always, define a generator to understand if the user sets one, defining what's genFactory + always = False; generator = False; debug = False + moveBlockCheck = False; genFactory = pagegenerators.GeneratorFactory() + # To prevent Infinite loops + errorCount = 0 + # Load the right site + site = wikipedia.getSite() + # Loading the default options. + for arg in wikipedia.handleArgs(): + if arg == '-always': + always = True + elif arg == '-move': + moveBlockCheck = True + elif arg == '-debug': + debug = True + elif arg.startswith('-protectedpages'): + if len(arg) == 15: + generator = site.protectedpages(namespace = 0) + else: + generator = site.protectedpages(namespace = int(arg[16:])) + elif arg.startswith('-moveprotected'): + if len(arg) == 14: + generator = site.protectedpages(namespace = 0, type = 'move') + else: + generator = site.protectedpages(namespace = int(arg[16:]), + type = 'move') + elif arg.startswith('-page'): + if len(arg) == 5: + generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] + else: + generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] + else: + genFactory.handleArg(arg) + + # Take the right templates to use, the category and the comment + TSP = wikipedia.translate(site, templateSemiProtection) + TTP = wikipedia.translate(site, templateTotalProtection) + TSMP = wikipedia.translate(site, templateSemiMoveProtection) + TTMP = wikipedia.translate(site, templateTotalMoveProtection) + TNR = wikipedia.translate(site, templateNoRegex) + TU = wikipedia.translate(site, templateUnique) + + category = wikipedia.translate(site, categoryToCheck) + commentUsed = wikipedia.translate(site, comment) + if not generator: + gen = genFactory.getCombinedGenerator() + if not generator: + generator = list() + wikipedia.output(u'Loading categories...') + # Define the category if no other generator has been setted + for CAT in category: + cat = catlib.Category(site, CAT) + # Define the generator + gen = pagegenerators.CategorizedPageGenerator(cat) + for pageCat in gen: + generator.append(pageCat) + wikipedia.output(u'Categories loaded, start!') + # Main Loop + preloadingGen = pagegenerators.PreloadingGenerator(generator, pageNumber = 60) + for page in preloadingGen: + pagename = page.aslink() + wikipedia.output('Loading %s...' % pagename) + try: + text = page.get() + restrictions = page.getRestrictions() + except wikipedia.NoPage: + wikipedia.output("%s doesn't exist! Skipping..." % pagename) + continue + except wikipedia.IsRedirectPage: + wikipedia.output("%s is a redirect! Skipping..." % pagename) + if debug: + debugQuest(site, page) + continue + """ + # This check does not work : + # PreloadingGenerator cannot set correctly page.editRestriction + # (see bug #1949476 ) + if not page.canBeEdited(): + wikipedia.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename) + continue + """ + editRestr = restrictions['edit'] + if editRestr and editRestr[0] == 'sysop': + try: + config.sysopnames[site.family.name][site.lang] + except: + wikipedia.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename) + continue + + # Understand, according to the template in the page, what should be the protection + # and compare it with what there really is. + TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP, TU) + # Only to see if the text is the same or not... + oldtext = text + # keep track of the changes for each step (edit then move) + changes = -1 + + if not editRestr: + # page is not edit-protected + # Deleting the template because the page doesn't need it. + if TU != None: + replaceToPerform = u'|'.join(TTP + TSP + TU) + else: + replaceToPerform = u'|'.join(TTP + TSP) + text, changes = re.subn('<noinclude>(%s)</noinclude>' % replaceToPerform, '', text) + if changes == 0: + text, changes = re.subn('(%s)' % replaceToPerform, '', text) + wikipedia.output(u'The page is editable for all, deleting the template...') + + elif editRestr[0] == 'sysop': + # total edit protection + if (TemplateInThePage[0] == 'sysop-total' and TTP != None) or (TemplateInThePage[0] == 'unique' and TU != None): + msg = 'The page is protected to the sysop' + if not moveBlockCheck: + msg += ', skipping...' + wikipedia.output(msg) + else: + wikipedia.output(u'The page is protected to the sysop, but the template seems not correct. Fixing...') + if TU != None: + text, changes = re.subn(TemplateInThePage[1], TNR[4], text) + else: + text, changes = re.subn(TemplateInThePage[1], TNR[1], text) + + elif TSP != None or TU != None: + # implicitely editRestr[0] = 'autoconfirmed', edit-Semi-protection + if TemplateInThePage[0] == 'autoconfirmed-total' or TemplateInThePage[0] == 'unique': + msg = 'The page is editable only for the autoconfirmed users' + if not moveBlockCheck: + msg += ', skipping...' + wikipedia.output(msg) + else: + wikipedia.output(u'The page is editable only for the autoconfirmed users, but the template seems not correct. Fixing...') + if TU != None: + text, changes = re.subn(TemplateInThePage[1], TNR[4], text) + else: + text, changes = re.subn(TemplateInThePage[1], TNR[0], text) + + if changes == 0: + # We tried to fix edit-protection templates, but it did not work. + wikipedia.output('Warning : No edit-protection template could be found') + + if moveBlockCheck: + # checking move protection now + moveRestr = restrictions['move'] + changes = -1 + + if not moveRestr: + wikipedia.output(u'The page is movable for all, deleting the template...') + # Deleting the template because the page doesn't need it. + if TU != None: + replaceToPerform = u'|'.join(TSMP + TTMP + TU) + else: + replaceToPerform = u'|'.join(TSMP + TTMP) + text, changes = re.subn('<noinclude>(%s)</noinclude>' % replaceToPerform, '', text) + if changes == 0: + text, changes = re.subn('(%s)' % replaceToPerform, '', text) + elif moveRestr[0] == 'sysop': + # move-total-protection + if (TemplateInThePage[0] == 'sysop-move' and TTMP != None) or (TemplateInThePage[0] == 'unique' and TU != None): + wikipedia.output(u'The page is protected from moving to the sysop, skipping...') + else: + wikipedia.output(u'The page is protected from moving to the sysop, but the template seems not correct. Fixing...') + if TU != None: + text, changes = re.subn(TemplateInThePage[1], TNR[4], text) + else: + text, changes = re.subn(TemplateInThePage[1], TNR[3], text) + + elif TSMP != None or TU != None: + # implicitely moveRestr[0] = 'autoconfirmed', move-semi-protection + if TemplateInThePage[0] == 'autoconfirmed-move' or TemplateInThePage[0] == 'unique': + wikipedia.output(u'The page is movable only for the autoconfirmed users, skipping...') + else: + wikipedia.output(u'The page is movable only for the autoconfirmed users, but the template seems not correct. Fixing...') + if TU != None: + text, changes = re.subn(TemplateInThePage[1], TNR[4], text) + else: + text, changes = re.subn(TemplateInThePage[1], TNR[2], text) + + if changes == 0: + # We tried to fix move-protection templates, but it did not work. + wikipedia.output('Warning : No move-protection template could be found') + + + if oldtext != text: + # Ok, asking if the change has to be performed and do it if yes. + wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) + wikipedia.showDiff(oldtext, text) + if not always: + choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') + if choice == 'a': + always = True + if always or choice == 'y': + while 1: + try: + page.put(text, commentUsed, force=True) + except wikipedia.EditConflict: + wikipedia.output(u'Edit conflict! skip!') + break + except wikipedia.ServerError: + # Sometimes there is this error that's quite annoying because + # can block the whole process for nothing. + errorCount += 1 + if errorCount < 5: + wikipedia.output(u'Server Error! Wait..') + time.sleep(3) + continue + else: + # Prevent Infinite Loops + raise wikipedia.ServerError(u'Fifth Server Error!') + except wikipedia.SpamfilterError, e: + wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) + break + except wikipedia.PageNotSaved, error: + wikipedia.output(u'Error putting page: %s' % (error.args,)) + break + except wikipedia.LockedPage: + wikipedia.output(u'The page is still protected. Skipping...') + break + else: + # Break only if the errors are one after the other + errorCount = 0 + break + +if __name__ == "__main__": + try: + main() + finally: + wikipedia.stopme()
pywikipedia-svn@lists.wikimedia.org