http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11175
Revision: 11175
Author: xqt
Date: 2013-03-03 17:45:06 +0000 (Sun, 03 Mar 2013)
Log Message:
-----------
some PEP8 changes, update from trunk r11169
Modified Paths:
--------------
branches/rewrite/scripts/interwiki.py
Modified: branches/rewrite/scripts/interwiki.py
===================================================================
--- branches/rewrite/scripts/interwiki.py 2013-03-03 17:32:47 UTC (rev 11174)
+++ branches/rewrite/scripts/interwiki.py 2013-03-03 17:45:06 UTC (rev 11175)
@@ -163,6 +163,8 @@
for the format, one can for example give "en:something" or
"20:" as hint.
+ -repository Include data repository
+
-same looks over all 'serious' languages for the same title.
-same is equivalent to -hint:all:
(note: without ending colon)
@@ -497,6 +499,7 @@
restoreAll = False
async = False
summary = u''
+ repository = False
def readOptions(self, arg):
""" Read all commandline parameters for the global container """
@@ -524,6 +527,8 @@
self.same = True
elif arg == '-wiktionary':
self.same = 'wiktionary'
+ elif arg == '-repository':
+ self.repository = True
elif arg == '-untranslated':
self.untranslated = True
elif arg == '-untranslatedonly':
@@ -831,6 +836,7 @@
# Remember the "origin page"
self.originPage = originPage
+ self.repoPage = None
# todo is a list of all pages that still need to be analyzed.
# Mark the origin page as todo.
self.todo = PageTree()
@@ -1578,15 +1584,15 @@
else:
#TODO: allow answer to repeat previous or go back after a mistake
answer = pywikibot.inputChoice(u'What should be done?', ['accept', 'reject', 'give up', 'accept all'], ['a', 'r', 'g', 'l'], 'a')
- if answer == 'l': # accept all
+ if answer == 'l': # accept all
acceptall = True
answer = 'a'
- if answer == 'a': # accept this one
+ if answer == 'a': # accept this one
result[site] = pages[0]
break
- elif answer == 'g': # give up
+ elif answer == 'g': # give up
return None
- elif answer == 'r': # reject
+ elif answer == 'r': # reject
# None acceptable
break
return result
@@ -1609,7 +1615,7 @@
t2 = (((int(time2[0:4]) * 12 + int(time2[4:6])) * 30 +
int(time2[6:8])) * 24 + int(time2[8:10])) * 60 + \
int(time2[10:12])
- return abs(t2-t1)
+ return abs(t2 - t1)
if not self.isDone():
raise "Bugcheck: finish called before done"
@@ -1637,7 +1643,7 @@
pywikibot.output(u"======Post-processing %s======" % self.originPage)
# Assemble list of accepted interwiki links
new = self.assemble()
- if new is None: # User said give up
+ if new is None: # User said give up
pywikibot.output(u"======Aborted processing %s======"
% self.originPage)
return
@@ -1662,7 +1668,7 @@
frgnSiteDone = False
for siteCode in lclSite.family.languages_by_size:
- site = pywikibot.getSite(code = siteCode)
+ site = pywikibot.getSite(siteCode)
if (not lclSiteDone and site == lclSite) or \
(not frgnSiteDone and site != lclSite and site in new):
if site == lclSite:
@@ -1672,14 +1678,14 @@
if self.replaceLinks(new[site], new):
updatedSites.append(site)
if site != lclSite:
- frgnSiteDone = True
+ frgnSiteDone = True
except SaveError:
notUpdatedSites.append(site)
except GiveUpOnPage:
break
elif not globalvar.strictlimittwo and site in new \
and site != lclSite:
- old={}
+ old = {}
try:
for link in new[site].iterlanglinks():
page = pywikibot.Page(link)
@@ -1689,12 +1695,12 @@
% new[site])
continue
mods, mcomment, adding, removing, modifying \
- = compareLanguages(old, new, insite = lclSite)
+ = compareLanguages(old, new, insite=lclSite)
if (len(removing) > 0 and not globalvar.autonomous) or \
(len(modifying) > 0 and self.problemfound) or \
len(old) == 0 or \
- (globalvar.needlimit and \
- len(adding) + len(modifying) >= globalvar.needlimit +1):
+ (globalvar.needlimit and
+ len(adding) + len(modifying) >= globalvar.needlimit + 1):
try:
if self.replaceLinks(new[site], new):
updatedSites.append(site)
@@ -1721,7 +1727,7 @@
(page.site.sitename() == 'wikipedia:is' or
page.site.sitename() == 'wikipedia:zh' and
page.namespace() == 10):
- old={}
+ old = {}
try:
for mypage in new[page.site].interwiki():
old[mypage.site] = mypage
@@ -1747,7 +1753,7 @@
diff = minutesDiff(page.editTime(),
time.strftime("%Y%m%d%H%M%S",
time.gmtime()))
- if diff > 30*24*60:
+ if diff > 30 * 24 * 60:
smallWikiAllowed = True
else:
pywikibot.output(
@@ -1869,7 +1875,7 @@
del new[stmp]
# Put interwiki links into a map
- old={}
+ old = {}
for page2 in interwikis:
old[page2.site] = page2
@@ -2079,6 +2085,7 @@
except (socket.error, IOError):
pywikibot.output(u'ERROR: could not report backlinks')
+
class InterwikiBot(object):
"""A class keeping track of a list of subjects, controlling which pages
are queried from which languages when."""
@@ -2095,29 +2102,31 @@
self.pageGenerator = None
self.generated = 0
- def add(self, page, hints = None):
+ def add(self, page, hints=None):
"""Add a single subject to the list"""
- subj = Subject(page, hints = hints)
+ subj = Subject(page, hints=hints)
self.subjects.append(subj)
for site, count in subj.openSites():
# Keep correct counters
self.plus(site, count)
- def setPageGenerator(self, pageGenerator, number = None, until = None):
+ def setPageGenerator(self, pageGenerator, number=None, until=None):
"""Add a generator of subjects. Once the list of subjects gets
too small, this generator is called to produce more Pages"""
self.pageGenerator = pageGenerator
self.generateNumber = number
self.generateUntil = until
- def dump(self, append = True):
+ def dump(self, append=True):
site = pywikibot.getSite()
dumpfn = pywikibot.config.datafilepath(
'data',
'interwiki-dumps',
'%s-%s.pickle' % (site.family.name, site.lang))
- if append: mode = 'appended'
- else: mode = 'written'
+ if append:
+ mode = 'appended'
+ else:
+ mode = 'written'
f = open(dumpfn, mode[0])
titles = [s.originPage.title() for s in self.subjects]
pickle.dump(titles, f)
@@ -2177,10 +2186,10 @@
if self.generateUntil:
until = self.generateUntil
if page.site.lang not in page.site.family.nocapitalize:
- until = until[0].upper()+until[1:]
+ until = until[0].upper() + until[1:]
if page.title(withNamespace=False) > until:
raise StopIteration
- self.add(page, hints = globalvar.hints)
+ self.add(page, hints=globalvar.hints)
self.generated += 1
if self.generateNumber:
if self.generated >= self.generateNumber:
@@ -2229,12 +2238,14 @@
# Can we make more home-language queries by adding subjects?
if self.pageGenerator and mycount < globalvar.maxquerysize:
timeout = 60
- while timeout<3600:
+ while timeout < 3600:
try:
self.generateMore(globalvar.maxquerysize - mycount)
except pywikibot.ServerError:
# Could not extract allpages special page?
- pywikibot.output(u'ERROR: could not retrieve more pages. Will try again in %d seconds'%timeout)
+ pywikibot.output(
+ u'ERROR: could not retrieve more pages. Will try again in %d seconds'
+ % timeout)
time.sleep(timeout)
timeout *= 2
else:
@@ -2292,7 +2303,7 @@
def queryStep(self):
self.oneQuery()
# Delete the ones that are done now.
- for i in xrange(len(self.subjects)-1, -1, -1):
+ for i in xrange(len(self.subjects) - 1, -1, -1):
subj = self.subjects[i]
if subj.isDone():
subj.finish()
@@ -2322,6 +2333,7 @@
def __len__(self):
return len(self.subjects)
+
def compareLanguages(old, new, insite):
oldiw = set(old)
@@ -2393,16 +2405,20 @@
return False
return True
+
def readWarnfile(filename, bot):
import warnfile
reader = warnfile.WarnfileReader(filename)
# we won't use removeHints
(hints, removeHints) = reader.getHints()
for page, pagelist in hints.iteritems():
- # The WarnfileReader gives us a list of pagelinks, but titletranslate.py expects a list of strings, so we convert it back.
- # TODO: This is a quite ugly hack, in the future we should maybe make titletranslate expect a list of pagelinks.
- hintStrings = ['%s:%s' % (hintedPage.site.language(), hintedPage.title()) for hintedPage in pagelist]
- bot.add(page, hints = hintStrings)
+ # The WarnfileReader gives us a list of pagelinks, but titletranslate.py
+ # expects a list of strings, so we convert it back.
+ # TODO: This is a quite ugly hack, in the future we should maybe make
+ # titletranslate expect a list of pagelinks.
+ hintStrings = ['%s:%s' % (hintedPage.site.language(),
+ hintedPage.title()) for hintedPage in pagelist]
+ bot.add(page, hints=hintStrings)
def main():
singlePageTitle = []
@@ -2419,7 +2435,7 @@
optContinue = False
optRestore = False
restoredFiles = []
- File2Restore = []
+ File2Restore = []
dumpFileName = ''
append = True
newPages = None
@@ -2507,7 +2523,8 @@
namespaces = []
else:
ns = 'all'
- hintlessPageGen = pagegenerators.NewpagesPageGenerator(newPages, namespace=ns)
+ hintlessPageGen = pagegenerators.NewpagesPageGenerator(newPages,
+ namespace=ns)
elif optRestore or optContinue or globalvar.restoreAll:
dumpFileName = pywikibot.config.datafilepath(
@@ -2543,7 +2560,7 @@
if len(namespaces) > 0:
hintlessPageGen = pagegenerators.NamespaceFilterPageGenerator(hintlessPageGen, namespaces)
# we'll use iter() to create make a next() function available.
- bot.setPageGenerator(iter(hintlessPageGen), number = number, until=until)
+ bot.setPageGenerator(iter(hintlessPageGen), number=number, until=until)
elif warnfile:
# TODO: filter namespaces if -namespace parameter was used
readWarnfile(warnfile, bot)
@@ -2555,7 +2572,7 @@
singlePage = pywikibot.Page(pywikibot.getSite(), singlePageTitle)
else:
singlePage = None
- bot.add(singlePage, hints = globalvar.hints)
+ bot.add(singlePage, hints=globalvar.hints)
try:
append = not (optRestore or optContinue or globalvar.restoreAll)
@@ -2581,7 +2598,7 @@
pass
#===========
-globalvar=Global()
+globalvar = Global()
if __name__ == "__main__":
try:
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11174
Revision: 11174
Author: xqt
Date: 2013-03-03 17:32:47 +0000 (Sun, 03 Mar 2013)
Log Message:
-----------
some PEP8 changes
Modified Paths:
--------------
trunk/pywikipedia/cosmetic_changes.py
Modified: trunk/pywikipedia/cosmetic_changes.py
===================================================================
--- trunk/pywikipedia/cosmetic_changes.py 2013-03-03 16:18:14 UTC (rev 11173)
+++ trunk/pywikipedia/cosmetic_changes.py 2013-03-03 17:32:47 UTC (rev 11174)
@@ -59,7 +59,8 @@
#
__version__ = '$Id$'
#
-import sys, re
+import sys
+import re
import wikipedia as pywikibot
import isbn
import pagegenerators
@@ -79,38 +80,39 @@
# Interwiki message on top of iw links
# 2nd line is a regex if needed
msg_interwiki = {
- 'fr' : u'<!-- Autres langues -->',
- 'nn' : (u'<!--interwiki (no, sv, da first; then other languages alphabetically by name)-->',
- u'(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other languages alphabetically by name\) ?-->)')
+ 'fr': u'<!-- Autres langues -->',
+ 'nn': (u'<!--interwiki (no, sv, da first; then other languages alphabetically by name)-->',
+ u'(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other languages alphabetically by name\) ?-->)')
}
# This is from interwiki.py;
# move it to family file and implement global instances
moved_links = {
- 'ca' : (u'ús de la plantilla', u'/ús'),
- 'cs' : (u'dokumentace', u'/doc'),
- 'de' : (u'dokumentation', u'/Meta'),
- 'en' : ([u'documentation',
- u'template documentation',
- u'template doc',
- u'doc',
- u'documentation, template'], u'/doc'),
- 'es' : ([u'documentación', u'documentación de plantilla'], u'/doc'),
- 'fa' : ([u'documentation',u'توضیحات',u'توضیحات الگو',u'doc'], u'/توضیحات'),
- 'fr' : (u'/documentation', u'/Documentation'),
- 'hu' : (u'sablondokumentáció', u'/doc'),
- 'id' : (u'template doc', u'/doc'),
- 'ja' : (u'documentation', u'/doc'),
- 'ka' : (u'თარგის ინფო', u'/ინფო'),
- 'ko' : (u'documentation', u'/설명문서'),
- 'ms' : (u'documentation', u'/doc'),
- 'pl' : (u'dokumentacja', u'/opis'),
- 'pt' : ([u'documentação', u'/doc'], u'/doc'),
- 'ro' : (u'documentaţie', u'/doc'),
- 'ru' : (u'doc', u'/doc'),
- 'sv' : (u'dokumentation', u'/dok'),
- 'vi' : (u'documentation', u'/doc'),
- 'zh' : ([u'documentation', u'doc'], u'/doc'),
+ 'ca': (u'ús de la plantilla', u'/ús'),
+ 'cs': (u'dokumentace', u'/doc'),
+ 'de': (u'dokumentation', u'/Meta'),
+ 'en': ([u'documentation',
+ u'template documentation',
+ u'template doc',
+ u'doc',
+ u'documentation, template'], u'/doc'),
+ 'es': ([u'documentación', u'documentación de plantilla'], u'/doc'),
+ 'fa': ([u'documentation', u'توضیحات', u'توضیحات الگو',
+ u'doc'], u'/توضیحات'),
+ 'fr': (u'/documentation', u'/Documentation'),
+ 'hu': (u'sablondokumentáció', u'/doc'),
+ 'id': (u'template doc', u'/doc'),
+ 'ja': (u'documentation', u'/doc'),
+ 'ka': (u'თარგის ინფო', u'/ინფო'),
+ 'ko': (u'documentation', u'/설명문서'),
+ 'ms': (u'documentation', u'/doc'),
+ 'pl': (u'dokumentacja', u'/opis'),
+ 'pt': ([u'documentação', u'/doc'], u'/doc'),
+ 'ro': (u'documentaţie', u'/doc'),
+ 'ru': (u'doc', u'/doc'),
+ 'sv': (u'dokumentation', u'/dok'),
+ 'vi': (u'documentation', u'/doc'),
+ 'zh': ([u'documentation', u'doc'], u'/doc'),
}
# Template which should be replaced or removed.
@@ -158,7 +160,7 @@
Given a wiki source code text, return the cleaned up version.
"""
oldText = text
- if self.site.sitename()== u'commons:commons' and self.namespace == 6:
+ if self.site.sitename() == u'commons:commons' and self.namespace == 6:
text = self.commonsfiledesc(text)
text = self.fixSelfInterwiki(text)
text = self.standardizePageFooter(text)
@@ -195,7 +197,7 @@
Interwiki links to the site itself are displayed like local links.
Remove their language code prefix.
"""
- if not self.talkpage and pywikibot.calledModuleName() <> 'interwiki':
+ if not self.talkpage and pywikibot.calledModuleName() != 'interwiki':
interwikiR = re.compile(r'\[\[%s\s?:([^\[\]\n]*)\]\]'
% self.site.lang)
text = interwikiR.sub(r'[[\1]]', text)
@@ -212,6 +214,7 @@
3. additional information depending on local site policy
4. stars templates for featured and good articles
5. interwiki links
+
"""
starsList = [
u'bueno',
@@ -254,9 +257,13 @@
if not self.template and not '{{Personendaten' in text and \
not '{{SORTIERUNG' in text and not '{{DEFAULTSORT' in text and \
not self.site.lang in ('et', 'it', 'bg', 'ru'):
- categories = pywikibot.getCategoryLinks(text, site = self.site)
+ try:
+ categories = pywikibot.getCategoryLinks(text, site=self.site)
+ # there are categories like [[categoy:Foo {{#time:Y...}}]]
+ except InvalidTitle:
+ pass
- if not self.talkpage:# and pywikibot.calledModuleName() <> 'interwiki':
+ if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki':
subpage = False
if self.template:
loc = None
@@ -265,13 +272,13 @@
del tmpl
except KeyError:
pass
- if loc != None and loc in self.title:
+ if loc is not None and loc in self.title:
subpage = True
interwikiLinks = pywikibot.getLanguageLinks(
text, insite=self.site, template_subpage=subpage)
# Removing the interwiki
- text = pywikibot.removeLanguageLinks(text, site = self.site)
+ text = pywikibot.removeLanguageLinks(text, site=self.site)
# Removing the stars' issue
starstext = pywikibot.removeDisabledParts(text)
for star in starsList:
@@ -314,16 +321,16 @@
((interwikiLinks or hasCommentLine) and
self.site.language() == 'nn' or
(interwikiLinks and hasCommentLine) and
- self.site.language() == 'fr'):
+ self.site.language() == 'fr'):
text += '\r\n\r\n' + iw_msg
# Adding stars templates
if allstars:
- text = text.strip()+self.site.family.interwiki_text_separator
+ text = text.strip() + self.site.family.interwiki_text_separator
allstars.sort()
for element in allstars:
text += '%s\r\n' % element.strip()
if pywikibot.verbose:
- pywikibot.output(u'%s' %element.strip())
+ pywikibot.output(u'%s' % element.strip())
# Adding the interwiki
if interwikiLinks:
text = pywikibot.replaceLanguageLinks(text, interwikiLinks,
@@ -370,8 +377,8 @@
if thisNs and namespaces:
text = pywikibot.replaceExcept(
text,
- r'\[\[\s*(' + '|'.join(namespaces) + \
- ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs + \
+ r'\[\[\s*(' + '|'.join(namespaces) +
+ ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs +
':\g<nameAndLabel>]]', exceptions)
return text
@@ -383,13 +390,15 @@
# arz uses english stylish codes
if self.site.lang not in ['arz', 'ru']:
exceptions = ['nowiki', 'comment', 'math', 'pre']
- for magicWord in ['img_thumbnail', 'img_left', 'img_center', 'img_right', 'img_none',
- 'img_framed', 'img_frameless', 'img_border', 'img_upright',]:
+ for magicWord in ['img_thumbnail', 'img_left', 'img_center',
+ 'img_right', 'img_none', 'img_framed',
+ 'img_frameless', 'img_border', 'img_upright', ]:
aliases = self.site.siteinfo('magicwords').get(magicWord)
if not aliases: continue
- text = pywikibot.replaceExcept(text, r'\[\[(?P<left>.+?:.+?\..+?\|) *(' + '|'.join(aliases) +') *(?P<right>(\|.*?)?\]\])',
- r'[[\g<left>' + aliases[0] + '\g<right>',
- exceptions)
+ text = pywikibot.replaceExcept(
+ text,
+ r'\[\[(?P<left>.+?:.+?\..+?\|) *(' + '|'.join(aliases) + ') *(?P<right>(\|.*?)?\]\])',
+ r'[[\g<left>' + aliases[0] + '\g<right>', exceptions)
return text
def cleanUpLinks(self, text):
@@ -409,7 +418,7 @@
try:
page = pywikibot.Page(self.site, titleWithSection)
#except pywikibot.InvalidTitle:
- except: #empty self link occures
+ except: # empty self link occures
return match.group()
if page.namespace() == 0:
# Replace underlines by spaces, also multiple underlines
@@ -469,7 +478,8 @@
newLink = "[[%s]]" % label
# Check if we can create a link with trailing characters
# instead of a pipelink
- elif self.site.sitename() != 'wikipedia:fa' and len(titleWithSection) <= len(label) and \
+ elif self.site.sitename() != 'wikipedia:fa' and \
+ len(titleWithSection) <= len(label) and \
label[:len(titleWithSection)] == titleWithSection and \
re.sub(trailR, '',
label[len(titleWithSection):]) == '':
@@ -509,7 +519,7 @@
# group <linktrail> is the link trail after ]] which are part of the word.
# note that the definition of 'letter' varies from language to language.
linkR = re.compile(
- r'(?P<newline>[\n]*)\[\[(?P<titleWithSection>[^\]\|]+)(\|(?P<label>[^\]\|]*))?\]\](?P<linktrail>' + \
+ r'(?P<newline>[\n]*)\[\[(?P<titleWithSection>[^\]\|]+)(\|(?P<label>[^\]\|]*))?\]\](?P<linktrail>' +
self.site.linktrail() + ')')
text = pywikibot.replaceExcept(text, linkR, handleOneLink,
@@ -526,24 +536,24 @@
def resolveHtmlEntities(self, text):
ignore = [
- 38, # Ampersand (&)
- 39, # Bugzilla 24093
- 60, # Less than (<)
- 62, # Great than (>)
- 91, # Opening bracket - sometimes used intentionally inside links
- 93, # Closing bracket - sometimes used intentionally inside links
- 124, # Vertical bar (??) - used intentionally in navigation bar templates on de:
- 160, # Non-breaking space ( ) - not supported by Firefox textareas
- 173, # Soft-hypen (­) - enable editing
- 8206, # left-to-right mark (<r;)
- 8207, # right-to-left mark (&rtl;)
+ 38, # Ampersand (&)
+ 39, # Bugzilla 24093
+ 60, # Less than (<)
+ 62, # Great than (>)
+ 91, # Opening bracket - sometimes used intentionally inside links
+ 93, # Closing bracket - sometimes used intentionally inside links
+ 124, # Vertical bar (??) - used intentionally in navigation bar templates on de:
+ 160, # Non-breaking space ( ) - not supported by Firefox textareas
+ 173, # Soft-hypen (­) - enable editing
+ 8206, # left-to-right mark (<r;)
+ 8207, # right-to-left mark (&rtl;)
]
# ignore ' see http://eo.wikipedia.org/w/index.php?title=Liberec&diff=next&oldid=2320801
#if self.site.lang == 'eo':
# ignore += [39]
if self.template:
ignore += [58]
- text = pywikibot.html2unicode(text, ignore = ignore)
+ text = pywikibot.html2unicode(text, ignore=ignore)
return text
def validXhtml(self, text):
@@ -608,17 +618,21 @@
def replaceDeprecatedTemplates(self, text):
exceptions = ['comment', 'math', 'nowiki', 'pre']
- if self.site.family.name in deprecatedTemplates and self.site.lang in deprecatedTemplates[self.site.family.name]:
+ if self.site.family.name in deprecatedTemplates and \
+ self.site.lang in deprecatedTemplates[self.site.family.name]:
for template in deprecatedTemplates[self.site.family.name][self.site.lang]:
old = template[0]
new = template[1]
- if new == None:
+ if new is None:
new = ''
else:
- new = '{{'+new+'}}'
+ new = '{{%s}}' % new
if not self.site.nocapitalize:
old = '[' + old[0].upper() + old[0].lower() + ']' + old[1:]
- text = pywikibot.replaceExcept(text, r'\{\{([mM][sS][gG]:)?' + old + '(?P<parameters>\|[^}]+|)}}', new, exceptions)
+ text = pywikibot.replaceExcept(
+ text,
+ r'\{\{([mM][sS][gG]:)?%s(?P<parameters>\|[^}]+|)}}' % old,
+ new, exceptions)
return text
#from fixes.py
@@ -657,22 +671,31 @@
def fixHtml(self, text):
# Everything case-insensitive (?i)
# Keep in mind that MediaWiki automatically converts <br> to <br />
- exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace']
- text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''" , exceptions)
- text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>', r"'''\1'''" , exceptions)
- text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''" , exceptions)
- text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''" , exceptions)
+ exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
+ 'startspace']
+ text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''",
+ exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>',
+ r"'''\1'''", exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''",
+ exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''",
+ exceptions)
# horizontal line without attributes in a single line
- text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])', r'\1----\2', exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])',
+ r'\1----\2', exceptions)
# horizontal line with attributes; can't be done with wiki syntax
# so we only make it XHTML compliant
- text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>', r'<hr \1 />', exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>', r'<hr \1 />',
+ exceptions)
# a header where only spaces are in the same line
for level in range(1, 7):
- equals = '\\1%s \\2 %s\\3' % ("="*level, "="*level)
- text = pywikibot.replaceExcept(text,
- r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])'%(level, level),
- r'%s'%equals, exceptions)
+ equals = '\\1%s \\2 %s\\3' % ("=" * level, "=" * level)
+ text = pywikibot.replaceExcept(
+ text,
+ r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])' % (level, level),
+ r'%s' % equals,
+ exceptions)
# TODO: maybe we can make the bot replace <p> tags with \r\n's.
return text
@@ -683,15 +706,19 @@
# it should be name = " or name=" NOT name ="
text = re.sub(r'(?i)<ref +name(= *| *=)"', r'<ref name="', text)
#remove empty <ref/>-tag
- text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref *>\s*</ref>)', r'', exceptions)
- text = pywikibot.replaceExcept(text, r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>', r'<ref \1/>', exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref *>\s*</ref>)',
+ r'', exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>',
+ r'<ref \1/>', exceptions)
return text
def fixStyle(self, text):
exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace']
# convert prettytable to wikitable class
if self.site.language in ('de', 'en'):
- text = pywikibot.replaceExcept(text, ur'(class="[^"]*)prettytable([^"]*")', ur'\1wikitable\2', exceptions)
+ text = pywikibot.replaceExcept(text,
+ ur'(class="[^"]*)prettytable([^"]*")',
+ ur'\1wikitable\2', exceptions)
return text
def fixTypo(self, text):
@@ -726,22 +753,23 @@
]
# valid digits
digits = {
- 'ckb' : u'٠١٢٣٤٥٦٧٨٩',
- 'fa' : u'۰۱۲۳۴۵۶۷۸۹'
+ 'ckb': u'٠١٢٣٤٥٦٧٨٩',
+ 'fa': u'۰۱۲۳۴۵۶۷۸۹',
}
new = digits.pop(self.site.lang)
# This only works if there are only two items in digits dict
old = digits[digits.keys()[0]]
- faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك'+u'۱۲۳۴۵۶۷۸۹۰'
+ faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa']
# do not change inside file links
namespaces = list(self.site.namespace(6, all=True))
pattern = re.compile(u'\[\[(' + '|'.join(namespaces) + '):.+?\.\w+? *(\|((\[\[.*?\]\])|.)*)?\]\]',
re.UNICODE)
#not to let bot edits in latin content
- exceptions.append(re.compile(u"[^"+faChrs+u"] *?\"*? *?, *?[^"+faChrs+u"]"))
+ exceptions.append(re.compile(u"[^%(fa)s] *?\"*? *?, *?[^%(fa)s]"
+ % {'fa': faChrs}))
exceptions.append(pattern)
text = pywikibot.replaceExcept(text, u',', u'،', exceptions)
- if self.site.lang=='ckb':
+ if self.site.lang == 'ckb':
text = pywikibot.replaceExcept(text,
ur'ه([.،_<\]\s])',
ur'ە\1', exceptions)
@@ -760,7 +788,7 @@
# do not change digits inside html-tags
pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE)
exceptions.append(pattern)
- exceptions.append('table') #exclude tables for now
+ exceptions.append('table') # exclude tables for now
# replace digits
for i in xrange(0, 10):
text = pywikibot.replaceExcept(text, str(i), new[i], exceptions)