Revision: 5142 Author: filnik Date: 2008-03-18 19:31:35 +0000 (Tue, 18 Mar 2008)
Log Message: ----------- Adding some NicDumZ' fixes (and some not :P :-)) and changing some fr's settings (also them by NicDumZ)
Modified Paths: -------------- trunk/pywikipedia/blockpageschecker.py
Modified: trunk/pywikipedia/blockpageschecker.py =================================================================== --- trunk/pywikipedia/blockpageschecker.py 2008-03-18 18:51:52 UTC (rev 5141) +++ trunk/pywikipedia/blockpageschecker.py 2008-03-18 19:31:35 UTC (rev 5142) @@ -22,7 +22,7 @@
-protectedpages: Check all the blocked pages (useful when you have not categories or when you have problems with them. (add the namespace after ":" where - you want to check - default: 0) + you want to check - default checks all protected pages)
Furthermore, the following command line parameters are supported:
@@ -84,7 +84,9 @@ 'it':[r'{{(?:[Tt]emplate:|)[Aa]vvisobloccoparziale(?:|[ _]scad|.*?||.*?)}}', r'{{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad|(?:.*?))}}', r'{{(?:[Tt]emplate:|)[Aa]bp(?:|[ _]scad|(?:.*?))}}', - r'{{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad|(?:.*?)|minaccia|cancellata)}}',], + r'{{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad|(?:.*?)|minaccia|cancellata)}}', + r'{{(?:[Tt]emplate:|)(?:[Cc][Tt]|[Cc]anc fatte|[Cc][Ee].*?)}}', + r'<div class="toccolours[ _]itwiki[ _]template[ _]avviso">(?:\s|\n)*?[Qq]uesta pagina'], 'ja':[ur'{{(?:[Tt]emplate:|)(?:半|移動|移動半|)保護(?:性急|)(?:[Ss]|)(?:|.+|)?}}(\n+?|)'], 'pt':[r'{{(?:[Tt]emplate:|)[Pp]rotegido(?:|*)}}', r'{{(?:[Tt]emplate:|)(?:[Ss]emi-|)[Pp]rotegid[ao](?:IP|[- _]ip|PP|)}}'], @@ -99,13 +101,18 @@ 'en': None, 'it':[r'{{(?:[Tt]emplate:|)[Aa]vvisobloccoparziale(?:|[ _]scad|.*?||.*?)}}', r'{{(?:[Tt]emplate:|)[Aa]bp(?:|[ _]scad|(?:.*?))}}'], + 'fr': [r'{{(?:[Tt]emplate:|[Mm]odèle:|)[Ss]emi[- ]?protection(|[^}]*)}}'], 'ja':[ur'{{(?:[Tt]emplate:|)半保護(?:[Ss]|)(?:|.+|)}}(\n+?|)'], 'zh':[ur'{{(?:[Tt]emplate:|)Protected|(?:[Ss]|[Ss]emi|半)(?:|.+|)}}(\n+?|)',ur'{{(?:[Tt]emplate:|)Mini-protected|(?:[Ss]|[Ss]emi|半)(?:|.+|)}}(\n+?|)',ur'{{(?:[Tt]emplate:|)Protected-logo|(?:[Ss]|[Ss]emi|半)(?:|.+|)}}(\n+?|)'], } # Regex to get the total-protection template templateTotalProtection = { 'en': None, - 'it':[r'{{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad|(?:.*?)|minaccia|cancellata)}}'], + 'it':[r'{{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad|(?:.*?)|minaccia|cancellata)}}', + r'{{(?:[Tt]emplate:|)(?:[Cc][Tt]|[Cc]anc fatte|[Cc][Ee].*?)}}', r'<div class="toccolours[ _]itwiki[ _]template[ _]avviso">(?:\s|\n)*?[Qq]uesta pagina'], + 'fr':[r'{{(?:[Tt]emplate:|[Mm]odèle:|)[Pp]rotection(|[^}]*)}}', + + r'{{(?:[Tt]emplate:|[Mm]odèle:|)(?:[Pp]age|[Aa]rchive|[Mm]odèle) protégée?(|[^}]*)}}'], 'ja':[ur'{{(?:[Tt]emplate:|)保護(?:[Ss]|)(?:|.+|)}}(\n+?|)'], 'zh':[r'{{(?:[Tt]emplate:|)Protected|(?:[Nn]|[Nn]ormal)(?:|.+|)}}(\n+?|)',r'{{(?:[Tt]emplate:|)Mini-protected|(?:[Nn]|[Nn]ormal)(?:|.+|)}}(\n+?|)',r'{{(?:[Tt]emplate:|)Protected-logo|(?:[Nn]|[Nn]ormal)(?:|.+|)}}(\n+?|)'], } @@ -126,6 +133,7 @@ # Array: 0 => Semi-block, 1 => Total Block, 2 => Semi-Move, 3 => Total-Move templateNoRegex = { 'it':['{{Avvisobloccoparziale}}', '{{Avvisoblocco}}', None, None], + 'fr':['{{Semi-protection}}', '{{Protection}}', None, None], 'ja':[u'{{半保護}}', u'{{保護}}', u'{{移動半保護}}',u'{{移動保護}}'], 'zh':[u'{{Protected/semi}}',u'{{Protected}}',u'{{Protected/ms}}',u'{{Protected/move}}'], } @@ -145,7 +153,7 @@ # Comment used when the Bot edits comment = { 'en':u'Bot: Deleting out-dated template', - 'fr':u'Robot: Retrait du bandeau protection/semi-protection d'une page qui ne l'es plus', + 'fr':u'Robot: Mise à jour des bandeaux de protection', 'he':u'בוט: מסיר תבנית שעבר זמנה', 'it':u'Bot: Tolgo o sistemo template di avviso blocco', 'ja':u'ロボットによる: 保護テンプレート除去', @@ -161,16 +169,14 @@
def understandBlock(text, TTP, TSP, TSMP, TTMP): """ Understand if the page is blocked and if it has the right template """ - if TTP: - for catchRegex in TTP: # TTP = templateTotalProtection - resultCatch = re.findall(catchRegex, text) - if resultCatch != []: - return ('sysop-total', catchRegex) - if TSP: - for catchRegex in TSP: - resultCatch = re.findall(catchRegex, text) - if resultCatch != []: - return ('autoconfirmed-total', catchRegex) + for catchRegex in TTP: # TTP = templateTotalProtection + resultCatch = re.findall(catchRegex, text) + if resultCatch != []: + return ('sysop-total', catchRegex) + for catchRegex in TSP: + resultCatch = re.findall(catchRegex, text) + if resultCatch != []: + return ('autoconfirmed-total', catchRegex) if TSMP != None and TTMP != None and TTP != TTMP and TSP != TSMP: for catchRegex in TSMP: resultCatch = re.findall(catchRegex, text) @@ -180,12 +186,15 @@ resultCatch = re.findall(catchRegex, text) if resultCatch != []: return ('autoconfirmed-move', catchRegex) - return ('editable', r'\A\n') + return ('editable', r'\A\n') # If editable means that we have no regex, won't change anything with this regex
-def ProtectedPagesData(namespace = 0): +def ProtectedPagesData(namespace = None): """ Yield all the pages blocked, using Special:ProtectedPages """ # Avoid problems of encoding and stuff like that, let it divided please - url = '/w/index.php?title=Speciale%3AProtectedPages' + '&namespace=%s&type=edit&level=0&size=' % namespace + url = '/w/index.php?title=Special:ProtectedPages&type=edit&level=0' + if namespace != None: # /!\ if namespace seems simpler, but returns false when ns=0 + + url += '&namespace=%s' % namespace site = wikipedia.getSite() parser_text = site.getUrl(url) while 1: @@ -277,7 +286,7 @@ debug = True elif arg.startswith('-protectedpages'): if len(arg) == 15: - generator = ProtectedPages(0) + generator = ProtectedPages() else: generator = ProtectedPages(int(arg[16:])) elif arg.startswith('-page'): @@ -313,7 +322,7 @@ # Main Loop preloadingGen = pagegenerators.PreloadingGenerator(generator, pageNumber = 60) for page in preloadingGen: - pagename = page.title() + pagename = page.aslink() wikipedia.output('Loading %s...' % pagename) try: text = page.get() @@ -326,6 +335,11 @@ if debug: debugQuest(site, page) continue + if not page.canBeEdited(): + + wikipedia.output("%s is protected : this account can't edit it! Skipping..." % pagename) + + continue # Understand, according to the template in the page, what should be the protection # and compare it with what there really is. TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP) @@ -407,10 +421,9 @@ errorCount = 0 break else: - wikipedia.output(u'No changes! Strange! Check the regex!') + wikipedia.output(u'No changes! Strange, try to check the regex used!') if debug == True: debugQuest(site, page) -
if __name__ == "__main__": try: