http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11285
Revision: 11285 Author: xqt Date: 2013-03-28 17:03:30 +0000 (Thu, 28 Mar 2013) Log Message: ----------- remove obsolete interwiki message on top of iw links; update from trunk r11284
Modified Paths: -------------- branches/rewrite/scripts/cosmetic_changes.py
Modified: branches/rewrite/scripts/cosmetic_changes.py =================================================================== --- branches/rewrite/scripts/cosmetic_changes.py 2013-03-28 16:25:40 UTC (rev 11284) +++ branches/rewrite/scripts/cosmetic_changes.py 2013-03-28 17:03:30 UTC (rev 11285) @@ -64,14 +64,15 @@ cosmetic_changes_deny_script += ['your_script_name_1', 'your_script_name_2'] """ # -# (C) xqt, 2009-2012 -# (C) Pywikipedia bot team, 2006-2012 +# (C) xqt, 2009-2013 +# (C) Pywikipedia bot team, 2006-2013 # # Distributed under the terms of the MIT license. # __version__ = '$Id$' # -import sys, re +import sys +import re import pywikibot import isbn from pywikibot import pagegenerators @@ -89,41 +90,34 @@ '&warning;': warning, }
-# Interwiki message on top of iw links -# 2nd line is a regex if needed -msg_interwiki = { - 'fr' : u'<!-- Autres langues -->', - 'nn' : (u'<!--interwiki (no, sv, da first; then other languages alphabetically by name)-->', - u'(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other languages alphabetically by name\) ?-->)') -} - # This is from interwiki.py; # move it to family file and implement global instances moved_links = { - 'ca' : (u'ús de la plantilla', u'/ús'), - 'cs' : (u'dokumentace', u'/doc'), - 'de' : (u'dokumentation', u'/Meta'), - 'en' : ([u'documentation', - u'template documentation', - u'template doc', - u'doc', - u'documentation, template'], u'/doc'), - 'es' : ([u'documentación', u'documentación de plantilla'], u'/doc'), - 'fa' : ([u'documentation',u'توضیحات',u'توضیحات الگو',u'doc'], u'/توضیحات'), - 'fr' : (u'/documentation', u'/Documentation'), - 'hu' : (u'sablondokumentáció', u'/doc'), - 'id' : (u'template doc', u'/doc'), - 'ja' : (u'documentation', u'/doc'), - 'ka' : (u'თარგის ინფო', u'/ინფო'), - 'ko' : (u'documentation', u'/설명문서'), - 'ms' : (u'documentation', u'/doc'), - 'pl' : (u'dokumentacja', u'/opis'), - 'pt' : ([u'documentação', u'/doc'], u'/doc'), - 'ro' : (u'documentaţie', u'/doc'), - 'ru' : (u'doc', u'/doc'), - 'sv' : (u'dokumentation', u'/dok'), - 'vi' : (u'documentation', u'/doc'), - 'zh' : ([u'documentation', u'doc'], u'/doc'), + 'ca': (u'ús de la plantilla', u'/ús'), + 'cs': (u'dokumentace', u'/doc'), + 'de': (u'dokumentation', u'/Meta'), + 'en': ([u'documentation', + u'template documentation', + u'template doc', + u'doc', + u'documentation, template'], u'/doc'), + 'es': ([u'documentación', u'documentación de plantilla'], u'/doc'), + 'fa': ([u'documentation', u'توضیحات', u'توضیحات الگو', + u'doc'], u'/توضیحات'), + 'fr': (u'/documentation', u'/Documentation'), + 'hu': (u'sablondokumentáció', u'/doc'), + 'id': (u'template doc', u'/doc'), + 'ja': (u'documentation', u'/doc'), + 'ka': (u'თარგის ინფო', u'/ინფო'), + 'ko': (u'documentation', u'/설명문서'), + 'ms': (u'documentation', u'/doc'), + 'pl': (u'dokumentacja', u'/opis'), + 'pt': ([u'documentação', u'/doc'], u'/doc'), + 'ro': (u'documentaţie', u'/doc'), + 'ru': (u'doc', u'/doc'), + 'sv': (u'dokumentation', u'/dok'), + 'vi': (u'documentation', u'/doc'), + 'zh': ([u'documentation', u'doc'], u'/doc'), }
# Template which should be replaced or removed. @@ -171,7 +165,7 @@ Given a wiki source code text, return the cleaned up version. """ oldText = text - if self.site.sitename()== u'commons:commons' and self.namespace == 6: + if self.site.sitename() == u'commons:commons' and self.namespace == 6: text = self.commonsfiledesc(text) text = self.fixSelfInterwiki(text) text = self.standardizePageFooter(text) @@ -207,7 +201,7 @@ Interwiki links to the site itself are displayed like local links. Remove their language code prefix. """ - if not self.talkpage and pywikibot.calledModuleName() <> 'interwiki': + if not self.talkpage and pywikibot.calledModuleName() != 'interwiki': interwikiR = re.compile(r'[[%s\s?:([^[]\n]*)]]' % self.site.lang) text = interwikiR.sub(r'[[\1]]', text) @@ -224,6 +218,7 @@ 3. additional information depending on local site policy 4. stars templates for featured and good articles 5. interwiki links + """ starsList = [ u'bueno', @@ -257,7 +252,6 @@ categories = None interwikiLinks = None allstars = [] - hasCommentLine = False
# The PyWikipediaBot is no longer allowed to touch categories on the # German Wikipedia. See @@ -266,9 +260,9 @@ if not self.template and not '{{Personendaten' in text and \ not '{{SORTIERUNG' in text and not '{{DEFAULTSORT' in text and \ not self.site.lang in ('et', 'it', 'bg', 'ru'): - categories = pywikibot.getCategoryLinks(text, site = self.site) + categories = pywikibot.getCategoryLinks(text, site=self.site)
- if not self.talkpage:# and pywikibot.calledModuleName() <> 'interwiki': + if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None @@ -277,13 +271,13 @@ del tmpl except KeyError: pass - if loc != None and loc in self.title: + if loc is not None and loc in self.title: subpage = True interwikiLinks = pywikibot.getLanguageLinks( text, insite=self.site, template_subpage=subpage)
# Removing the interwiki - text = pywikibot.removeLanguageLinks(text, site = self.site) + text = pywikibot.removeLanguageLinks(text, site=self.site) # Removing the stars' issue starstext = pywikibot.removeDisabledParts(text) for star in starsList: @@ -294,21 +288,6 @@ text = regex.sub('', text) allstars += found
- # nn got a message between the categories and the iw's - # and they want to keep it there, first remove it - if self.site.lang in msg_interwiki: - iw_msg = msg_interwiki[self.site.lang] - if isinstance(iw_msg, tuple): - iw_reg = iw_msg[1] - iw_msg = iw_msg[0] - else: - iw_reg = u'(%s)' % iw_msg - regex = re.compile(iw_reg) - found = regex.findall(text) - if found: - hasCommentLine = True - text = regex.sub('', text) - # Adding categories if categories: ##Sorting categories in alphabetic order. beta test only on Persian Wikipedia, TODO fix bug for sorting @@ -321,16 +300,9 @@ # categories.insert(0, name) text = pywikibot.replaceCategoryLinks(text, categories, site=self.site) - # Put the iw message back - if not self.talkpage and \ - ((interwikiLinks or hasCommentLine) and - self.site.language() == 'nn' or - (interwikiLinks and hasCommentLine) and - self.site.language() == 'fr'): - text += config.line_separator * 2 + iw_msg # Adding stars templates if allstars: - text = text.strip()+self.site.family.interwiki_text_separator + text = text.strip() + self.site.family.interwiki_text_separator allstars.sort() for element in allstars: text += '%s%s' % (element.strip(), config.line_separator) @@ -398,13 +370,16 @@ # arz uses english stylish codes if self.site.lang not in ['arz', 'ru']: exceptions = ['nowiki', 'comment', 'math', 'pre'] - for magicWord in ['img_thumbnail', 'img_left', 'img_center', 'img_right', 'img_none', - 'img_framed', 'img_frameless', 'img_border', 'img_upright',]: + for magicWord in ['img_thumbnail', 'img_left', 'img_center', + 'img_right', 'img_none', 'img_framed', + 'img_frameless', 'img_border', 'img_upright', ]: aliases = self.site.getmagicwords(magicWord) if not aliases: continue - text = pywikibot.replaceExcept(text, r'[[(?P<left>.+?:.+?..+?|) *(' + '|'.join(aliases) +') *(?P<right>(|.*?)?]])', - r'[[\g<left>' + aliases[0] + '\g<right>', - exceptions) + text = pywikibot.replaceExcept( + text, + r'[[(?P<left>.+?:.+?..+?|) *(' + '|'.join(aliases) + \ + ') *(?P<right>(|.*?)?]])', + r'[[\g<left>' + aliases[0] + '\g<right>', exceptions) return text
def cleanUpLinks(self, text): @@ -524,7 +499,7 @@ # group <linktrail> is the link trail after ]] which are part of the word. # note that the definition of 'letter' varies from language to language. linkR = re.compile( - r'(?P<newline>[\n]*)[[(?P<titleWithSection>[^]|]+)(|(?P<label>[^]|]*))?]](?P<linktrail>' + \ + r'(?P<newline>[\n]*)[[(?P<titleWithSection>[^]|]+)(|(?P<label>[^]|]*))?]](?P<linktrail>' + self.site.linktrail() + ')')
text = pywikibot.replaceExcept(text, linkR, handleOneLink, @@ -534,24 +509,24 @@
def resolveHtmlEntities(self, text): ignore = [ - 38, # Ampersand (&) - 39, # Bugzilla 24093 - 60, # Less than (<) - 62, # Great than (>) - 91, # Opening bracket - sometimes used intentionally inside links - 93, # Closing bracket - sometimes used intentionally inside links - 124, # Vertical bar (??) - used intentionally in navigation bar templates on de: - 160, # Non-breaking space ( ) - not supported by Firefox textareas - 173, # Soft-hypen (­) - enable editing - 8206, # left-to-right mark (<r;) - 8207, # right-to-left mark (&rtl;) + 38, # Ampersand (&) + 39, # Bugzilla 24093 + 60, # Less than (<) + 62, # Great than (>) + 91, # Opening bracket - sometimes used intentionally inside links + 93, # Closing bracket - sometimes used intentionally inside links + 124, # Vertical bar (??) - used intentionally in navigation bar templates on de: + 160, # Non-breaking space ( ) - not supported by Firefox textareas + 173, # Soft-hypen (­) - enable editing + 8206, # left-to-right mark (<r;) + 8207, # right-to-left mark (&rtl;) ] # ignore ' see http://eo.wikipedia.org/w/index.php?title=Liberec&diff=next&oldid=23... #if self.site.lang == 'eo': # ignore += [39] if self.template: ignore += [58] - text = pywikibot.html2unicode(text, ignore = ignore) + text = pywikibot.html2unicode(text, ignore=ignore) return text
def validXhtml(self, text): @@ -617,17 +592,21 @@
def replaceDeprecatedTemplates(self, text): exceptions = ['comment', 'math', 'nowiki', 'pre'] - if self.site.family.name in deprecatedTemplates and self.site.lang in deprecatedTemplates[self.site.family.name]: + if self.site.family.name in deprecatedTemplates and \ + self.site.lang in deprecatedTemplates[self.site.family.name]: for template in deprecatedTemplates[self.site.family.name][self.site.lang]: old = template[0] new = template[1] - if new == None: + if new is None: new = '' else: - new = '{{'+new+'}}' + new = '{{%s}}' % new if not self.site.nocapitalize: old = '[' + old[0].upper() + old[0].lower() + ']' + old[1:] - text = pywikibot.replaceExcept(text, r'{{([mM][sS][gG]:)?' + old + '(?P<parameters>|[^}]+|)}}', new, exceptions) + text = pywikibot.replaceExcept( + text, + r'{{([mM][sS][gG]:)?' + old + '(?P<parameters>|[^}]+|)}}', + new, exceptions) return text
#from fixes.py @@ -666,54 +645,76 @@ def fixHtml(self, text): # Everything case-insensitive (?i) # Keep in mind that MediaWiki automatically converts <br> to <br /> - exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] - text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''" , exceptions) - text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>', r"'''\1'''" , exceptions) - text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''" , exceptions) - text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''" , exceptions) + exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', + 'startspace'] + text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''", + exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>', + r"'''\1'''", exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''", + exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''", + exceptions) # horizontal line without attributes in a single line - text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])', r'\1----\2', exceptions) + text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])', + r'\1----\2', exceptions) # horizontal line with attributes; can't be done with wiki syntax # so we only make it XHTML compliant - text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>', r'<hr \1 />', exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>', + r'<hr \1 />', + exceptions) # a header where only spaces are in the same line for level in range(1, 7): - equals = '\1%s \2 %s\3' % ("="*level, "="*level) - text = pywikibot.replaceExcept(text, - r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])'%(level, level), - r'%s'%equals, exceptions) + equals = '\1%s \2 %s\3' % ("=" * level, "=" * level) + text = pywikibot.replaceExcept( + text, + r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])' + % (level, level), + r'%s' % equals, + exceptions) # TODO: maybe we can make the bot replace <p> tags with \r\n's. return text
def fixReferences(self, text): #http://en.wikipedia.org/wiki/User:AnomieBOT/source/tasks/OrphanReferenceFixe... - exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] + exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', + 'startspace']
# it should be name = " or name=" NOT name =" text = re.sub(r'(?i)<ref +name(= *| *=)"', r'<ref name="', text) #remove empty <ref/>-tag - text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref *>\s*</ref>)', r'', exceptions) - text = pywikibot.replaceExcept(text, r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>', r'<ref \1/>', exceptions) + text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref *>\s*</ref>)', + r'', exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>', + r'<ref \1/>', exceptions) return text
def fixStyle(self, text): - exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] + exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', + 'startspace'] # convert prettytable to wikitable class if self.site.language in ('de', 'en'): - text = pywikibot.replaceExcept(text, ur'(class="[^"]*)prettytable([^"]*")', ur'\1wikitable\2', exceptions) + text = pywikibot.replaceExcept(text, + ur'(class="[^"]*)prettytable([^"]*")', + ur'\1wikitable\2', exceptions) return text
def fixTypo(self, text): - exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace', 'gallery', 'hyperlink', 'interwiki', 'link'] + exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', + 'startspace', 'gallery', 'hyperlink', 'interwiki', 'link'] # change <number> ccm -> <number> cm³ - text = pywikibot.replaceExcept(text, ur'(\d)\s* ccm', ur'\1 cm³', exceptions) - text = pywikibot.replaceExcept(text, ur'(\d)\s*ccm', ur'\1 cm³', exceptions) + text = pywikibot.replaceExcept(text, ur'(\d)\s* ccm', + ur'\1 cm³', exceptions) + text = pywikibot.replaceExcept(text, ur'(\d)\s*ccm', ur'\1 cm³', + exceptions) # Solve wrong Nº sign with °C or °F # additional exception requested on fr-wiki for this stuff pattern = re.compile(u'«.*?»', re.UNICODE) exceptions.append(pattern) - text = pywikibot.replaceExcept(text, ur'(\d)\s* [º°]([CF])', ur'\1 °\2', exceptions) - text = pywikibot.replaceExcept(text, ur'(\d)\s*[º°]([CF])', ur'\1 °\2', exceptions) + text = pywikibot.replaceExcept(text, ur'(\d)\s* [º°]([CF])', + ur'\1 °\2', exceptions) + text = pywikibot.replaceExcept(text, ur'(\d)\s*[º°]([CF])', + ur'\1 °\2', exceptions) text = pywikibot.replaceExcept(text, ur'º([CF])', ur'°\1', exceptions) return text
@@ -735,9 +736,10 @@ ] # valid digits digits = { - 'ckb' : u'٠١٢٣٤٥٦٧٨٩', - 'fa' : u'۰۱۲۳۴۵۶۷۸۹' + 'ckb': u'٠١٢٣٤٥٦٧٨٩', + 'fa': u'۰۱۲۳۴۵۶۷۸۹', } + faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa'] new = digits.pop(self.site.lang) # This only works if there are only two items in digits dict old = digits[digits.keys()[0]] @@ -745,9 +747,12 @@ namespaces = list(self.site.namespace(6, all=True)) pattern = re.compile(u'[[(' + '|'.join(namespaces) + '):.+?.\w+? *(|(([[.*?]])|.)*)?]]', re.UNICODE) + #not to let bot edits in latin content + exceptions.append(re.compile(u"[^%(fa)s] *?"*? *?, *?[^%(fa)s]" + % {'fa': faChrs})) exceptions.append(pattern) text = pywikibot.replaceExcept(text, u',', u'،', exceptions) - if self.site.lang=='ckb': + if self.site.lang == 'ckb': text = pywikibot.replaceExcept(text, ur'ه([.،_<]\s])', ur'ە\1', exceptions) @@ -766,7 +771,7 @@ # do not change digits inside html-tags pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE) exceptions.append(pattern) - exceptions.append('table') #exclude tables for now + exceptions.append('table') # exclude tables for now # replace digits for i in xrange(0, 10): text = pywikibot.replaceExcept(text, str(i), new[i], exceptions) @@ -846,7 +851,7 @@ if not self.acceptall: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', - ['Yes', 'No', 'All', 'Quit'], ['y', 'N', 'a', 'q'], 'N') + ['Yes', 'No', 'All', 'Quit'], ['y', 'n', 'a', 'q'], 'n') if choice == 'a': self.acceptall = True elif choice == 'q': @@ -918,7 +923,7 @@ if not always: answer = pywikibot.inputChoice( warning + '\nDo you really want to continue?', - ['yes', 'no'], ['y', 'N'], 'N') + ['yes', 'no'], ['y', 'n'], 'n') if answer == 'y': preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = CosmeticChangesBot(preloadingGen, acceptall=always,
pywikipedia-svn@lists.wikimedia.org