Revision: 5246 Author: nicdumz Date: 2008-04-21 23:14:21 +0000 (Mon, 21 Apr 2008)
Log Message: -----------
* moving blockpageschecker.getRestrictions() to Page class * Rewriting getRestrictions so it returns a protection dictionary, much more handy than a string. Also, move-protections and edit-protections are now separated.
== Rewrite of blockpageschecker == * Fixing a fr: regex (there was no u'' flag, and a '?\195?\168' inside) * Getting rid of TemplateToRemove ugly regexes. These were only concatenations of other template regex lists. There was no need to have both, it required the user to copypaste them, where we can simply use the other regexes.
=== Erm... If... what ? === * Trying to understand the entangling if structure from main() I noticed that nothing was being done when -move was provided and a page without move protection had a move protection template. Fixing that. * Reordering these big if's using the new getRestrictions to make it clearer
=== Better output messages === * Do not say that we are skipping the page after a successful edit-protection check if we are about to check the same page for move-protection problems * Removing "No changes! Strange, try to check the regex used!", and replacing it with a nice helpful warning, because some pages may actually be in Protection categories without being protected, and without having a template protection.
I tested it on fr:, it seems OK. Let me know if there are any problems.
Modified Paths: -------------- trunk/pywikipedia/blockpageschecker.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/blockpageschecker.py =================================================================== --- trunk/pywikipedia/blockpageschecker.py 2008-04-21 20:58:45 UTC (rev 5245) +++ trunk/pywikipedia/blockpageschecker.py 2008-04-21 23:14:21 UTC (rev 5246) @@ -72,28 +72,6 @@ #--------------------- PREFERENCES -------------------# ################### -- Edit below! -- #################
-# Use only regex! - Regex to delete the template -templateToRemove = { - 'en':[r'{{(?:[Tt]emplate:|)[Pp]p-protected}}', r'{{([Tt]emplate:|)[Pp]p-dispute}}', - r'{{(?:[Tt]emplate:|)[Pp]p-template}}', r'{{([Tt]emplate:|)[Pp]p-usertalk}}'], - 'fr':[r'{{(?:[Tt]emplate:|[Mm]odèle:|)[Pp]rotection(|[^}]*)}}', - r'{{(?:[Tt]emplate:|[Mm]odèle:|)(?:[Pp]age|[Aa]rchive|[Mm]odèle) protégée?}}', - r'{{(?:[Tt]emplate:|[Mm]odèle:|)[Ss]emi[- ]?protection}}' - ], - 'he':[ur'{{(?:[Tt]emplate:|תבנית:|)מוגן(?: חלקית)?(?:|?.*)}}'], - 'it':[r'{{(?:[Tt]emplate:|)[Aa]vvisobloccoparziale(?:|[ _]scad|.*?||.*?)}}', - r'{{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad|(?:.*?))}}', - r'{{(?:[Tt]emplate:|)[Aa]bp(?:|[ _]scad|(?:.*?))}}', - r'{{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad|(?:.*?)|minaccia|cancellata)}}', - r'{{(?:[Tt]emplate:|)(?:[Cc][Tt]|[Cc]anc fatte|[Cc][Ee].*?)}}', - r'<div class="toccolours[ _]itwiki[ _]template[ _]avviso">(?:\s|\n)*?[Qq]uesta pagina'], - 'ja':[ur'{{(?:[Tt]emplate:|)(?:半|移動|移動半|)保護(?:性急|)(?:[Ss]|)(?:|.+|)?}}(\n+?|)'], - 'pt':[r'{{(?:[Tt]emplate:|)[Pp]rotegido(?:|*)}}', - r'{{(?:[Tt]emplate:|)(?:[Ss]emi-|)[Pp]rotegid[ao](?:IP|[- _]ip|PP|)}}'], - 'zh':[r'{{(?:[Tt]emplate:|)Protected(?:|*)}}',r'{{(?:[Tt]emplate:|)Mini-protected(?:|*)}}', - r'{{(?:[Tt]emplate:|)Protected logo(?:|*)}}'], - } - # Added a new feature! Please update and add the settings in order # to improve the intelligence of this script ;-) # Regex to get the semi-protection template @@ -101,7 +79,7 @@ 'en': None, 'it':[r'{{(?:[Tt]emplate:|)[Aa]vvisobloccoparziale(?:|[ _]scad|.*?||.*?)}}', r'{{(?:[Tt]emplate:|)[Aa]bp(?:|[ _]scad|(?:.*?))}}'], - 'fr': [r'{{(?:[Tt]emplate:|[Mm]odèle:|)[Ss]emi[- ]?protection(|[^}]*)}}'], + 'fr': [ur'{{(?:[Tt]emplate:|[Mm]odèle:|)[Ss]emi[- ]?protection(|[^}]*)}}'], 'ja':[ur'{{(?:[Tt]emplate:|)半保護(?:[Ss]|)(?:|.+|)}}(\n+?|)'], 'zh':[ur'{{(?:[Tt]emplate:|)Protected|(?:[Ss]|[Ss]emi|半)(?:|.+|)}}(\n+?|)',ur'{{(?:[Tt]emplate:|)Mini-protected|(?:[Ss]|[Ss]emi|半)(?:|.+|)}}(\n+?|)',ur'{{(?:[Tt]emplate:|)Protected-logo|(?:[Ss]|[Ss]emi|半)(?:|.+|)}}(\n+?|)'], } @@ -110,8 +88,7 @@ 'en': None, 'it':[r'{{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad|(?:.*?)|minaccia|cancellata)}}', r'{{(?:[Tt]emplate:|)(?:[Cc][Tt]|[Cc]anc fatte|[Cc][Ee].*?)}}', r'<div class="toccolours[ _]itwiki[ _]template[ _]avviso">(?:\s|\n)*?[Qq]uesta pagina'], - 'fr':[r'{{(?:[Tt]emplate:|[Mm]odèle:|)[Pp]rotection(|[^}]*)}}', - + 'fr':[r'{{(?:[Tt]emplate:|[Mm]odèle:|)[Pp]rotection(|[^}]*)}}', r'{{(?:[Tt]emplate:|[Mm]odèle:|)(?:[Pp]age|[Aa]rchive|[Mm]odèle) protégée?(|[^}]*)}}'], 'ja':[ur'{{(?:[Tt]emplate:|)保護(?:[Ss]|)(?:|.+|)}}(\n+?|)'], 'zh':[r'{{(?:[Tt]emplate:|)Protected|(?:[Nn]|[Nn]ormal)(?:|.+|)}}(\n+?|)',r'{{(?:[Tt]emplate:|)Mini-protected|(?:[Nn]|[Nn]ormal)(?:|.+|)}}(\n+?|)',r'{{(?:[Tt]emplate:|)Protected-logo|(?:[Nn]|[Nn]ormal)(?:|.+|)}}(\n+?|)'], @@ -171,20 +148,20 @@ """ Understand if the page is blocked and if it has the right template """ for catchRegex in TTP: # TTP = templateTotalProtection resultCatch = re.findall(catchRegex, text) - if resultCatch != []: + if resultCatch: return ('sysop-total', catchRegex) for catchRegex in TSP: resultCatch = re.findall(catchRegex, text) - if resultCatch != []: + if resultCatch: return ('autoconfirmed-total', catchRegex) if TSMP != None and TTMP != None and TTP != TTMP and TSP != TSMP: for catchRegex in TSMP: resultCatch = re.findall(catchRegex, text) - if resultCatch != []: + if resultCatch: return ('sysop-move', catchRegex) for catchRegex in TTMP: resultCatch = re.findall(catchRegex, text) - if resultCatch != []: + if resultCatch: return ('autoconfirmed-move', catchRegex) return ('editable', r'\A\n') # If editable means that we have no regex, won't change anything with this regex
@@ -211,34 +188,6 @@ continue else: break - -def getRestrictions(page): - api_url = '/w/api.php?action=query&prop=info&inprop=protection&format=xml&titles=%s' % page.urlname() - text = wikipedia.getSite().getUrl(api_url) - if not 'pageid="' in text: # Avoid errors when you can't reach the APIs - raise wikipedia.Error("API problem, can't reach the APIs!") - match = re.findall(r'<protection>(.*?)</protection>', text) - status = 'editable' - if match != []: - text = match[0] # If there's the block "protection" take the settings inside it. - api_found = re.compile(r'<pr type="(.*?)" level="(.*?)" expiry="(.*?)" />') - results = api_found.findall(text) - if results != []: - if len(results) < 2: - result = results[0] - type_of_protection = result[0]; level = result[1]; expiry = result[2] - if type_of_protection == 'move': - status = '%s-%s' % (level, type_of_protection) - else: - status = '%s' % level - else: - for result in results: - # If blocked both move and edit, select edit. - if result[0] == 'move': - continue - type_of_protection = result[0]; level = result[1]; expiry = result[2] - status = '%s' % level - return status
def ProtectedPages(namespace = 0): """ Return only the wiki page object and not the tuple with all the data as above """ @@ -267,7 +216,7 @@ def main(): """ Main Function """ # Loading the comments - global templateToRemove; global categoryToCheck; global comment; global project_inserted + global categoryToCheck; global comment; global project_inserted if config.mylang not in project_inserted: wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!") wikipedia.stopme() @@ -299,7 +248,6 @@ # Load the right site site = wikipedia.getSite() # Take the right templates to use, the category and the comment - TTR = wikipedia.translate(site, templateToRemove) TSP = wikipedia.translate(site, templateSemiProtection) TTP = wikipedia.translate(site, templateTotalProtection) TSMP = wikipedia.translate(site, templateSemiMoveProtection) @@ -326,7 +274,7 @@ wikipedia.output('Loading %s...' % pagename) try: text = page.get() - editRestriction = getRestrictions(page) + restrictions = page.getRestrictions() except wikipedia.NoPage: wikipedia.output("%s doesn't exist! Skipping..." % pagename) continue @@ -335,49 +283,69 @@ if debug: debugQuest(site, page) continue - if not page.canBeEdited(): - - wikipedia.output("%s is protected : this account can't edit it! Skipping..." % pagename) - + if not page.canBeEdited(): + wikipedia.output("%s is protected : this account can't edit it! Skipping..." % pagename) continue # Understand, according to the template in the page, what should be the protection # and compare it with what there really is. TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP) # Only to see if the text is the same or not... oldtext = text - if editRestriction == 'sysop': + editRestr = restrictions['edit'] + + if not editRestr: + # Deleting the template because the page doesn't need it. + replaceToPerform = '|'.join(TTP + TSP) + text = re.sub('(?:<noinclude>|)(%s)(?:</noinclude>|)' % replaceToPerform, '', text) + if text != oldtext: + wikipedia.output(u'The page is editable for all, deleting the template...') + elif not moveBlockCheck: + wikipedia.output('Warning : This page is in a protection category, and is not edition-protected; yet no edit-protection templates could be found') + + elif editRestr[0] == 'sysop': if TemplateInThePage[0] == 'sysop-total' and TTP != None: - wikipedia.output(u'The page is protected to the sysop, skipping...') - continue + msg = 'The page is protected to the sysop' + if not moveBlockCheck: + msg += ', skipping...' + wikipedia.output(msg) else: wikipedia.output(u'The page is protected to the sysop, but the template seems not correct. Fixing...') text = re.sub(TemplateInThePage[1], TNR[1], text) - elif moveBlockCheck and editRestriction == 'sysop-move': - if TemplateInThePage[0] == 'sysop-move' and TTMP != None: - wikipedia.output(u'The page is protected from moving to the sysop, skipping...') - continue - else: - wikipedia.output(u'The page is protected from moving to the sysop, but the template seems not correct. Fixing...') - text = re.sub(TemplateInThePage[1], TNR[3], text) - elif editRestriction == 'autoconfirmed' and TSP != None: + + elif TSP != None: # implicitely editRestr[0] = 'autoconfirmed' if TemplateInThePage[0] == 'autoconfirmed-total': - wikipedia.output(u'The page is editable only for the autoconfirmed users, skipping...') - continue + msg = 'The page is editable only for the autoconfirmed users' + if not moveBlockCheck: + msg += ', skipping...' + wikipedia.output(msg) else: wikipedia.output(u'The page is editable only for the autoconfirmed users, but the template seems not correct. Fixing...') text = re.sub(TemplateInThePage[1], TNR[0], text) - elif moveBlockCheck == True and editRestriction == 'autoconfirmed-move' and TSMP != None: - if TemplateInThePage[0] == 'autoconfirmed-move': - wikipedia.output(u'The page is movable only for the autoconfirmed users, skipping...') - continue - else: - wikipedia.output(u'The page is movable only for the autoconfirmed users, but the template seems not correct. Fixing...') - text = re.sub(TemplateInThePage[1], TNR[2], text) - else: - wikipedia.output(u'The page is editable for all, deleting the template...') - # Deleting the template because the page doesn't need it. - for replaceToPerform in TTR: - text = re.sub('(?:<noinclude>|)%s(?:</noinclude>|)' % replaceToPerform, '', text) + + + if moveBlockCheck: + moveRestr = restrictions['move'] + if not moveRestr: + wikipedia.output(u'The page is movable for all, deleting the template...') + # Deleting the template because the page doesn't need it. + replaceToPerform = '|'.join(TSMP + TTMP) + text = re.sub('(?:<noinclude>|)(%s)(?:</noinclude>|)' % replaceToPerform, '', text) + + elif moveRestr[0] == 'sysop': + if TemplateInThePage[0] == 'sysop-move' and TTMP != None: + wikipedia.output(u'The page is protected from moving to the sysop, skipping...') + else: + wikipedia.output(u'The page is protected from moving to the sysop, but the template seems not correct. Fixing...') + text = re.sub(TemplateInThePage[1], TNR[3], text) + + elif TSMP != None: #implicitely moveRestr[0] = 'autoconfirmed' + if TemplateInThePage[0] == 'autoconfirmed-move': + wikipedia.output(u'The page is movable only for the autoconfirmed users, skipping...') + else: + wikipedia.output(u'The page is movable only for the autoconfirmed users, but the template seems not correct. Fixing...') + text = re.sub(TemplateInThePage[1], TNR[2], text) + + if oldtext != text: # Ok, asking if the change has to be performed and do it if yes. wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) @@ -420,10 +388,6 @@ # Break only if the errors are one after the other errorCount = 0 break - else: - wikipedia.output(u'No changes! Strange, try to check the regex used!') - if debug == True: - debugQuest(site, page)
if __name__ == "__main__": try:
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-04-21 20:58:45 UTC (rev 5245) +++ trunk/pywikipedia/wikipedia.py 2008-04-21 23:14:21 UTC (rev 5246) @@ -265,6 +265,7 @@ previousRevision (*) : The revision id of the previous version permalink (*) : The url of the permalink of the current version getOldVersion(id) (*) : The text of a previous version of the page + getRestrictions : Returns a protection dictionary getVersionHistory : Load the version history information from wiki getVersionHistoryTable: Create a wiki table from the history data fullVersionHistory : Return all past versions including wikitext @@ -1129,6 +1130,30 @@
return sysop
+ def getRestrictions(self): + """ + Get the protections on the page. + * Returns a restrictions dictionary. Keys are 'edit' and 'move', + Values are None (no restriction for that action) or [level, expiry] : + * level is the level of auth needed to perform that action + ('autoconfirmed' or 'sysop') + * expiry is the expiration time of the restriction + """ + api_url = '/w/api.php?action=query&prop=info&inprop=protection&format=xml&titles=%s' % self.urlname() + text = self.site().getUrl(api_url) + if not 'pageid="' in text: # Avoid errors when you can't reach the API + raise Error("API problem, can't reach the API!") + match = re.findall(r'<protection>(.*?)</protection>', text) + restrictions = { 'edit': None, 'move': None } + + if match: + text = match[0] # If there's the block "protection" take the settings inside it. + api_found = re.compile(r'<pr type="(.*?)" level="(.*?)" expiry="(.*?)" />') + for entry in api_found.findall(text): + restrictions[ entry[0] ] = [ entry[1], entry[2] ] + + return restrictions + def put_async(self, newtext, comment=None, watchArticle=None, minorEdit=True, force=False, callback=None):
pywikipedia-l@lists.wikimedia.org