http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11174
Revision: 11174 Author: xqt Date: 2013-03-03 17:32:47 +0000 (Sun, 03 Mar 2013) Log Message: ----------- some PEP8 changes
Modified Paths: -------------- trunk/pywikipedia/cosmetic_changes.py
Modified: trunk/pywikipedia/cosmetic_changes.py =================================================================== --- trunk/pywikipedia/cosmetic_changes.py 2013-03-03 16:18:14 UTC (rev 11173) +++ trunk/pywikipedia/cosmetic_changes.py 2013-03-03 17:32:47 UTC (rev 11174) @@ -59,7 +59,8 @@ # __version__ = '$Id$' # -import sys, re +import sys +import re import wikipedia as pywikibot import isbn import pagegenerators @@ -79,38 +80,39 @@ # Interwiki message on top of iw links # 2nd line is a regex if needed msg_interwiki = { - 'fr' : u'<!-- Autres langues -->', - 'nn' : (u'<!--interwiki (no, sv, da first; then other languages alphabetically by name)-->', - u'(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other languages alphabetically by name\) ?-->)') + 'fr': u'<!-- Autres langues -->', + 'nn': (u'<!--interwiki (no, sv, da first; then other languages alphabetically by name)-->', + u'(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other languages alphabetically by name\) ?-->)') }
# This is from interwiki.py; # move it to family file and implement global instances moved_links = { - 'ca' : (u'ús de la plantilla', u'/ús'), - 'cs' : (u'dokumentace', u'/doc'), - 'de' : (u'dokumentation', u'/Meta'), - 'en' : ([u'documentation', - u'template documentation', - u'template doc', - u'doc', - u'documentation, template'], u'/doc'), - 'es' : ([u'documentación', u'documentación de plantilla'], u'/doc'), - 'fa' : ([u'documentation',u'توضیحات',u'توضیحات الگو',u'doc'], u'/توضیحات'), - 'fr' : (u'/documentation', u'/Documentation'), - 'hu' : (u'sablondokumentáció', u'/doc'), - 'id' : (u'template doc', u'/doc'), - 'ja' : (u'documentation', u'/doc'), - 'ka' : (u'თარგის ინფო', u'/ინფო'), - 'ko' : (u'documentation', u'/설명문서'), - 'ms' : (u'documentation', u'/doc'), - 'pl' : (u'dokumentacja', u'/opis'), - 'pt' : ([u'documentação', u'/doc'], u'/doc'), - 'ro' : (u'documentaţie', u'/doc'), - 'ru' : (u'doc', u'/doc'), - 'sv' : (u'dokumentation', u'/dok'), - 'vi' : (u'documentation', u'/doc'), - 'zh' : ([u'documentation', u'doc'], u'/doc'), + 'ca': (u'ús de la plantilla', u'/ús'), + 'cs': (u'dokumentace', u'/doc'), + 'de': (u'dokumentation', u'/Meta'), + 'en': ([u'documentation', + u'template documentation', + u'template doc', + u'doc', + u'documentation, template'], u'/doc'), + 'es': ([u'documentación', u'documentación de plantilla'], u'/doc'), + 'fa': ([u'documentation', u'توضیحات', u'توضیحات الگو', + u'doc'], u'/توضیحات'), + 'fr': (u'/documentation', u'/Documentation'), + 'hu': (u'sablondokumentáció', u'/doc'), + 'id': (u'template doc', u'/doc'), + 'ja': (u'documentation', u'/doc'), + 'ka': (u'თარგის ინფო', u'/ინფო'), + 'ko': (u'documentation', u'/설명문서'), + 'ms': (u'documentation', u'/doc'), + 'pl': (u'dokumentacja', u'/opis'), + 'pt': ([u'documentação', u'/doc'], u'/doc'), + 'ro': (u'documentaţie', u'/doc'), + 'ru': (u'doc', u'/doc'), + 'sv': (u'dokumentation', u'/dok'), + 'vi': (u'documentation', u'/doc'), + 'zh': ([u'documentation', u'doc'], u'/doc'), }
# Template which should be replaced or removed. @@ -158,7 +160,7 @@ Given a wiki source code text, return the cleaned up version. """ oldText = text - if self.site.sitename()== u'commons:commons' and self.namespace == 6: + if self.site.sitename() == u'commons:commons' and self.namespace == 6: text = self.commonsfiledesc(text) text = self.fixSelfInterwiki(text) text = self.standardizePageFooter(text) @@ -195,7 +197,7 @@ Interwiki links to the site itself are displayed like local links. Remove their language code prefix. """ - if not self.talkpage and pywikibot.calledModuleName() <> 'interwiki': + if not self.talkpage and pywikibot.calledModuleName() != 'interwiki': interwikiR = re.compile(r'[[%s\s?:([^[]\n]*)]]' % self.site.lang) text = interwikiR.sub(r'[[\1]]', text) @@ -212,6 +214,7 @@ 3. additional information depending on local site policy 4. stars templates for featured and good articles 5. interwiki links + """ starsList = [ u'bueno', @@ -254,9 +257,13 @@ if not self.template and not '{{Personendaten' in text and \ not '{{SORTIERUNG' in text and not '{{DEFAULTSORT' in text and \ not self.site.lang in ('et', 'it', 'bg', 'ru'): - categories = pywikibot.getCategoryLinks(text, site = self.site) + try: + categories = pywikibot.getCategoryLinks(text, site=self.site) + # there are categories like [[categoy:Foo {{#time:Y...}}]] + except InvalidTitle: + pass
- if not self.talkpage:# and pywikibot.calledModuleName() <> 'interwiki': + if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None @@ -265,13 +272,13 @@ del tmpl except KeyError: pass - if loc != None and loc in self.title: + if loc is not None and loc in self.title: subpage = True interwikiLinks = pywikibot.getLanguageLinks( text, insite=self.site, template_subpage=subpage)
# Removing the interwiki - text = pywikibot.removeLanguageLinks(text, site = self.site) + text = pywikibot.removeLanguageLinks(text, site=self.site) # Removing the stars' issue starstext = pywikibot.removeDisabledParts(text) for star in starsList: @@ -314,16 +321,16 @@ ((interwikiLinks or hasCommentLine) and self.site.language() == 'nn' or (interwikiLinks and hasCommentLine) and - self.site.language() == 'fr'): + self.site.language() == 'fr'): text += '\r\n\r\n' + iw_msg # Adding stars templates if allstars: - text = text.strip()+self.site.family.interwiki_text_separator + text = text.strip() + self.site.family.interwiki_text_separator allstars.sort() for element in allstars: text += '%s\r\n' % element.strip() if pywikibot.verbose: - pywikibot.output(u'%s' %element.strip()) + pywikibot.output(u'%s' % element.strip()) # Adding the interwiki if interwikiLinks: text = pywikibot.replaceLanguageLinks(text, interwikiLinks, @@ -370,8 +377,8 @@ if thisNs and namespaces: text = pywikibot.replaceExcept( text, - r'[[\s*(' + '|'.join(namespaces) + \ - ') *:(?P<nameAndLabel>.*?)]]', r'[[' + thisNs + \ + r'[[\s*(' + '|'.join(namespaces) + + ') *:(?P<nameAndLabel>.*?)]]', r'[[' + thisNs + ':\g<nameAndLabel>]]', exceptions) return text
@@ -383,13 +390,15 @@ # arz uses english stylish codes if self.site.lang not in ['arz', 'ru']: exceptions = ['nowiki', 'comment', 'math', 'pre'] - for magicWord in ['img_thumbnail', 'img_left', 'img_center', 'img_right', 'img_none', - 'img_framed', 'img_frameless', 'img_border', 'img_upright',]: + for magicWord in ['img_thumbnail', 'img_left', 'img_center', + 'img_right', 'img_none', 'img_framed', + 'img_frameless', 'img_border', 'img_upright', ]: aliases = self.site.siteinfo('magicwords').get(magicWord) if not aliases: continue - text = pywikibot.replaceExcept(text, r'[[(?P<left>.+?:.+?..+?|) *(' + '|'.join(aliases) +') *(?P<right>(|.*?)?]])', - r'[[\g<left>' + aliases[0] + '\g<right>', - exceptions) + text = pywikibot.replaceExcept( + text, + r'[[(?P<left>.+?:.+?..+?|) *(' + '|'.join(aliases) + ') *(?P<right>(|.*?)?]])', + r'[[\g<left>' + aliases[0] + '\g<right>', exceptions) return text
def cleanUpLinks(self, text): @@ -409,7 +418,7 @@ try: page = pywikibot.Page(self.site, titleWithSection) #except pywikibot.InvalidTitle: - except: #empty self link occures + except: # empty self link occures return match.group() if page.namespace() == 0: # Replace underlines by spaces, also multiple underlines @@ -469,7 +478,8 @@ newLink = "[[%s]]" % label # Check if we can create a link with trailing characters # instead of a pipelink - elif self.site.sitename() != 'wikipedia:fa' and len(titleWithSection) <= len(label) and \ + elif self.site.sitename() != 'wikipedia:fa' and \ + len(titleWithSection) <= len(label) and \ label[:len(titleWithSection)] == titleWithSection and \ re.sub(trailR, '', label[len(titleWithSection):]) == '': @@ -509,7 +519,7 @@ # group <linktrail> is the link trail after ]] which are part of the word. # note that the definition of 'letter' varies from language to language. linkR = re.compile( - r'(?P<newline>[\n]*)[[(?P<titleWithSection>[^]|]+)(|(?P<label>[^]|]*))?]](?P<linktrail>' + \ + r'(?P<newline>[\n]*)[[(?P<titleWithSection>[^]|]+)(|(?P<label>[^]|]*))?]](?P<linktrail>' + self.site.linktrail() + ')')
text = pywikibot.replaceExcept(text, linkR, handleOneLink, @@ -526,24 +536,24 @@
def resolveHtmlEntities(self, text): ignore = [ - 38, # Ampersand (&) - 39, # Bugzilla 24093 - 60, # Less than (<) - 62, # Great than (>) - 91, # Opening bracket - sometimes used intentionally inside links - 93, # Closing bracket - sometimes used intentionally inside links - 124, # Vertical bar (??) - used intentionally in navigation bar templates on de: - 160, # Non-breaking space ( ) - not supported by Firefox textareas - 173, # Soft-hypen (­) - enable editing - 8206, # left-to-right mark (<r;) - 8207, # right-to-left mark (&rtl;) + 38, # Ampersand (&) + 39, # Bugzilla 24093 + 60, # Less than (<) + 62, # Great than (>) + 91, # Opening bracket - sometimes used intentionally inside links + 93, # Closing bracket - sometimes used intentionally inside links + 124, # Vertical bar (??) - used intentionally in navigation bar templates on de: + 160, # Non-breaking space ( ) - not supported by Firefox textareas + 173, # Soft-hypen (­) - enable editing + 8206, # left-to-right mark (<r;) + 8207, # right-to-left mark (&rtl;) ] # ignore ' see http://eo.wikipedia.org/w/index.php?title=Liberec&diff=next&oldid=23... #if self.site.lang == 'eo': # ignore += [39] if self.template: ignore += [58] - text = pywikibot.html2unicode(text, ignore = ignore) + text = pywikibot.html2unicode(text, ignore=ignore) return text
def validXhtml(self, text): @@ -608,17 +618,21 @@
def replaceDeprecatedTemplates(self, text): exceptions = ['comment', 'math', 'nowiki', 'pre'] - if self.site.family.name in deprecatedTemplates and self.site.lang in deprecatedTemplates[self.site.family.name]: + if self.site.family.name in deprecatedTemplates and \ + self.site.lang in deprecatedTemplates[self.site.family.name]: for template in deprecatedTemplates[self.site.family.name][self.site.lang]: old = template[0] new = template[1] - if new == None: + if new is None: new = '' else: - new = '{{'+new+'}}' + new = '{{%s}}' % new if not self.site.nocapitalize: old = '[' + old[0].upper() + old[0].lower() + ']' + old[1:] - text = pywikibot.replaceExcept(text, r'{{([mM][sS][gG]:)?' + old + '(?P<parameters>|[^}]+|)}}', new, exceptions) + text = pywikibot.replaceExcept( + text, + r'{{([mM][sS][gG]:)?%s(?P<parameters>|[^}]+|)}}' % old, + new, exceptions) return text
#from fixes.py @@ -657,22 +671,31 @@ def fixHtml(self, text): # Everything case-insensitive (?i) # Keep in mind that MediaWiki automatically converts <br> to <br /> - exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] - text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''" , exceptions) - text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>', r"'''\1'''" , exceptions) - text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''" , exceptions) - text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''" , exceptions) + exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', + 'startspace'] + text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''", + exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>', + r"'''\1'''", exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''", + exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''", + exceptions) # horizontal line without attributes in a single line - text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])', r'\1----\2', exceptions) + text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])', + r'\1----\2', exceptions) # horizontal line with attributes; can't be done with wiki syntax # so we only make it XHTML compliant - text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>', r'<hr \1 />', exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>', r'<hr \1 />', + exceptions) # a header where only spaces are in the same line for level in range(1, 7): - equals = '\1%s \2 %s\3' % ("="*level, "="*level) - text = pywikibot.replaceExcept(text, - r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])'%(level, level), - r'%s'%equals, exceptions) + equals = '\1%s \2 %s\3' % ("=" * level, "=" * level) + text = pywikibot.replaceExcept( + text, + r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])' % (level, level), + r'%s' % equals, + exceptions) # TODO: maybe we can make the bot replace <p> tags with \r\n's. return text
@@ -683,15 +706,19 @@ # it should be name = " or name=" NOT name =" text = re.sub(r'(?i)<ref +name(= *| *=)"', r'<ref name="', text) #remove empty <ref/>-tag - text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref *>\s*</ref>)', r'', exceptions) - text = pywikibot.replaceExcept(text, r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>', r'<ref \1/>', exceptions) + text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref *>\s*</ref>)', + r'', exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>', + r'<ref \1/>', exceptions) return text
def fixStyle(self, text): exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] # convert prettytable to wikitable class if self.site.language in ('de', 'en'): - text = pywikibot.replaceExcept(text, ur'(class="[^"]*)prettytable([^"]*")', ur'\1wikitable\2', exceptions) + text = pywikibot.replaceExcept(text, + ur'(class="[^"]*)prettytable([^"]*")', + ur'\1wikitable\2', exceptions) return text
def fixTypo(self, text): @@ -726,22 +753,23 @@ ] # valid digits digits = { - 'ckb' : u'٠١٢٣٤٥٦٧٨٩', - 'fa' : u'۰۱۲۳۴۵۶۷۸۹' + 'ckb': u'٠١٢٣٤٥٦٧٨٩', + 'fa': u'۰۱۲۳۴۵۶۷۸۹', } new = digits.pop(self.site.lang) # This only works if there are only two items in digits dict old = digits[digits.keys()[0]] - faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك'+u'۱۲۳۴۵۶۷۸۹۰' + faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa'] # do not change inside file links namespaces = list(self.site.namespace(6, all=True)) pattern = re.compile(u'[[(' + '|'.join(namespaces) + '):.+?.\w+? *(|(([[.*?]])|.)*)?]]', re.UNICODE) #not to let bot edits in latin content - exceptions.append(re.compile(u"[^"+faChrs+u"] *?"*? *?, *?[^"+faChrs+u"]")) + exceptions.append(re.compile(u"[^%(fa)s] *?"*? *?, *?[^%(fa)s]" + % {'fa': faChrs})) exceptions.append(pattern) text = pywikibot.replaceExcept(text, u',', u'،', exceptions) - if self.site.lang=='ckb': + if self.site.lang == 'ckb': text = pywikibot.replaceExcept(text, ur'ه([.،_<]\s])', ur'ە\1', exceptions) @@ -760,7 +788,7 @@ # do not change digits inside html-tags pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE) exceptions.append(pattern) - exceptions.append('table') #exclude tables for now + exceptions.append('table') # exclude tables for now # replace digits for i in xrange(0, 10): text = pywikibot.replaceExcept(text, str(i), new[i], exceptions)
pywikipedia-svn@lists.wikimedia.org