jenkins-bot has submitted this change and it was merged.
Change subject: Fix anomalous escape (\)
......................................................................
Fix anomalous escape (\)
Also fix checkimages to use given site, and add tests.
Change-Id: I8bcea1d5db3b5d0c2c9ada19382ada01f7339044
---
M .travis.yml
M pywikibot/__init__.py
M pywikibot/botirc.py
M pywikibot/i18n.py
M pywikibot/page.py
M pywikibot/pagegenerators.py
M pywikibot/textlib.py
M scripts/archivebot.py
M scripts/casechecker.py
M scripts/category.py
M scripts/checkimages.py
M scripts/cosmetic_changes.py
M scripts/image.py
M scripts/imagerecat.py
M scripts/interwiki.py
M scripts/maintenance/compat2core.py
M scripts/reflinks.py
M scripts/replace.py
M scripts/revertbot.py
M scripts/script_wui.py
M scripts/selflink.py
M scripts/solve_disambiguation.py
M scripts/template.py
M scripts/weblinkchecker.py
A tests/checkimages_tests.py
25 files changed, 140 insertions(+), 99 deletions(-)
Approvals:
XZise: Looks good to me, approved
jenkins-bot: Verified
diff --git a/.travis.yml b/.travis.yml
index addcc33..f0c970b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,6 +38,7 @@
- if [[ -n "$USER_PASSWORD" && -n "$PYWIKIBOT2_USERNAME"
]]; then echo "usernames['wikipedia']['en'] =
'$PYWIKIBOT2_USERNAME'" >> ~/.pywikibot/user-config.py; fi
- if [[ -n "$USER_PASSWORD" && -n "$PYWIKIBOT2_USERNAME"
]]; then echo "usernames['wikipedia']['test'] =
'$PYWIKIBOT2_USERNAME'" >> ~/.pywikibot/user-config.py; fi
- if [[ -n "$USER_PASSWORD" && -n "$PYWIKIBOT2_USERNAME"
]]; then echo "usernames['wikidata']['test'] =
'$PYWIKIBOT2_USERNAME'" >> ~/.pywikibot/user-config.py; fi
+ - if [[ -n "$USER_PASSWORD" && -n "$PYWIKIBOT2_USERNAME"
]]; then echo "usernames['commons']['commons'] =
'$PYWIKIBOT2_USERNAME'" >> ~/.pywikibot/user-config.py; fi
- if [[ -n "$USER_PASSWORD" && -n "$PYWIKIBOT2_USERNAME"
]]; then echo "('$PYWIKIBOT2_USERNAME', '$USER_PASSWORD')" >
~/.pywikibot/passwordfile; fi
diff --git a/pywikibot/__init__.py b/pywikibot/__init__.py
index b93f53f..b7161d8 100644
--- a/pywikibot/__init__.py
+++ b/pywikibot/__init__.py
@@ -398,7 +398,7 @@
@classmethod
def fromTimestr(cls, datetimestr, precision=14, before=0, after=0,
timezone=0, calendarmodel=None, site=None):
- match = re.match('([-+]?\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)Z',
+ match = re.match(r'([-+]?\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)Z',
datetimestr)
if not match:
raise ValueError(u"Invalid format: '%s'" % datetimestr)
diff --git a/pywikibot/botirc.py b/pywikibot/botirc.py
index ce5e1d3..e4868a1 100644
--- a/pywikibot/botirc.py
+++ b/pywikibot/botirc.py
@@ -50,8 +50,9 @@
self.channel = channel
self.site = site
self.other_ns = re.compile(
- u'14\[\[07(' + u'|'.join([item[0] for item in
- list(site.namespaces().values()) if item[0]]) +
u')')
+ u'\x0314\\[\\[\x0307(%s)'
+ % u'|'.join(item.custom_name for item in site.namespaces().values()
+ if item != 0))
self.api_url = self.site.apipath()
self.api_url +=
'?action=query&meta=siteinfo&siprop=statistics&format=xml'
self.api_found = re.compile(r'articles="(.*?)"')
diff --git a/pywikibot/i18n.py b/pywikibot/i18n.py
index bd9b418..f0bdb23 100644
--- a/pywikibot/i18n.py
+++ b/pywikibot/i18n.py
@@ -28,7 +28,7 @@
if sys.version_info[0] > 2:
basestring = (str, )
-PLURAL_PATTERN = '{{PLURAL:(?:%\()?([^\)]*?)(?:\)d)?\|(.*?)}}'
+PLURAL_PATTERN = r'{{PLURAL:(?:%\()?([^\)]*?)(?:\)d)?\|(.*?)}}'
# Package name for the translation messages
messages_package_name = 'scripts.i18n'
diff --git a/pywikibot/page.py b/pywikibot/page.py
index ea260c7..4a65bef 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -3296,7 +3296,7 @@
# to the namespace case=first-letter.
# Validate the title is 'Q' and a positive integer.
- if not re.match('^Q[1-9]\d*$', self._link.title):
+ if not re.match(r'^Q[1-9]\d*$', self._link.title):
raise pywikibot.InvalidTitle(
u"'%s' is not a valid item page title"
% self._link.title)
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 1a73e8f..9a66176 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -730,7 +730,7 @@
p = re.compile(r'(?<!\\),') # Match "," only if there
no "\" before
temp = [] # Array to store split argument
for arg in p.split(claim):
- temp.append(arg.replace('\,', ',').split('='))
+ temp.append(arg.replace(r'\,', ',').split('='))
self.claimfilter_list.append((temp[0][0], temp[0][1],
dict(temp[1:]), ifnot))
return True
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 145ea5f..aa40e10 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -32,7 +32,8 @@
from pywikibot.tools import OrderedDict
TEMP_REGEX = re.compile(
-
'{{(?:msg:)?(?P<name>[^{\|]+?)(?:\|(?P<params>[^{]+?(?:{[^{]+?}[^{]*?)?))?}}')
+
r'{{(?:msg:)?(?P<name>[^{\|]+?)(?:\|(?P<params>[^{]+?(?:{[^{]+?}[^{]*?)?))?}}')
+
NON_LATIN_DIGITS = {
'ckb': u'٠١٢٣٤٥٦٧٨٩',
'fa': u'۰۱۲۳۴۵۶۷۸۹',
@@ -174,8 +175,8 @@
marker1 = findmarker(text)
marker2 = findmarker(text, u'##', u'#')
Rvalue = re.compile('{{{.+?}}}')
- Rmarker1 = re.compile('%(mark)s(\d+)%(mark)s' % {'mark':
marker1})
- Rmarker2 = re.compile('%(mark)s(\d+)%(mark)s' % {'mark':
marker2})
+ Rmarker1 = re.compile(r'%(mark)s(\d+)%(mark)s' % {'mark':
marker1})
+ Rmarker2 = re.compile(r'%(mark)s(\d+)%(mark)s' % {'mark':
marker2})
# hide the flat template marker
dontTouchRegexes.append(Rmarker1)
origin = text
@@ -600,9 +601,9 @@
# Do we have a noinclude at the end of the template?
parts = s2.split(includeOff)
lastpart = parts[-1]
- if re.match('\s*%s' % marker, lastpart):
+ if re.match(r'\s*%s' % marker, lastpart):
# Put the langlinks back into the noinclude's
- regexp = re.compile('%s\s*%s' % (includeOff, marker))
+ regexp = re.compile(r'%s\s*%s' % (includeOff, marker))
newtext = regexp.sub(s + includeOff, s2)
else:
# Put the langlinks at the end, inside noinclude's
@@ -734,7 +735,7 @@
site=site)
if marker:
# avoid having multiple linefeeds at the end of the text
- text = re.sub('\s*%s' % re.escape(marker), config.LS + marker,
+ text = re.sub(r'\s*%s' % re.escape(marker), config.LS + marker,
text.strip())
return text.strip()
@@ -920,12 +921,12 @@
# Note: While allowing dots inside URLs, MediaWiki will regard
# dots at the end of the URL as not part of that URL.
# The same applies to comma, colon and some other characters.
- notAtEnd = '\]\s\.:;,<>"\|\)'
+ notAtEnd = r'\]\s\.:;,<>"\|\)'
# So characters inside the URL can be anything except whitespace,
# closing squared brackets, quotation marks, greater than and less
# than, and the last character also can't be parenthesis or another
# character disallowed by MediaWiki.
- notInside = '\]\s<>"'
+ notInside = r'\]\s<>"'
# The first half of this regular expression is required because '' is
# not allowed inside links. For example, in this wiki text:
# ''Please see
https://www.example.org.''
@@ -1216,7 +1217,7 @@
"""
# match preceding colon for text links
- section = re.sub(r'\\\[\\\[(\\:)?', '\[\[\:?', re.escape(section))
+ section = re.sub(r'\\\[\\\[(\\:)?', r'\[\[\:?', re.escape(section))
# match underscores and white spaces
section = re.sub(r'\\?[ _]', '[ _]', section)
m = re.search("=+[ ']*%s[ ']*=+" % section, pagetext)
diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index 588e3be..993c454 100644
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -186,7 +186,7 @@
'B' (bytes) or 'T' (threads).
"""
- r = re.search('(\d+) *([BkKMT]?)', string)
+ r = re.search(r'(\d+) *([BkKMT]?)', string)
val, unit = (int(r.group(1)), r.group(2))
if unit == 'M':
val *= 1024
diff --git a/scripts/casechecker.py b/scripts/casechecker.py
index e77e856..4a9a684 100644
--- a/scripts/casechecker.py
+++ b/scripts/casechecker.py
@@ -117,7 +117,7 @@
latClrFnt = u'<font color=brown>'
suffixClr = u'</font>'
- wordBreaker = re.compile(u'[ _\-/\|#[\]():]')
+ wordBreaker = re.compile(r'[ _\-/\|#[\]():]')
stripChars = u' \t,'
titles = True
diff --git a/scripts/category.py b/scripts/category.py
index 2236f11..4e2fcc2 100755
--- a/scripts/category.py
+++ b/scripts/category.py
@@ -303,7 +303,7 @@
site = pagelink.site
# regular expression that matches a name followed by a space and
# disambiguation brackets. Group 1 is the name without the rest.
- bracketsR = re.compile('(.*) \(.+?\)')
+ bracketsR = re.compile(r'(.*) \(.+?\)')
match_object = bracketsR.match(page_name)
if match_object:
page_name = match_object.group(1)
diff --git a/scripts/checkimages.py b/scripts/checkimages.py
index 7b94bdb..91cc078 100644
--- a/scripts/checkimages.py
+++ b/scripts/checkimages.py
@@ -561,6 +561,17 @@
# END OF CONFIGURATION.
+SETTINGS_REGEX = re.compile(r"""
+<-------\ ------->\n
+\*[Nn]ame\ ?=\ ?['"](.*?)['"]\n
+\*([Ff]ind|[Ff]indonly)\ ?=\ ?(.*?)\n
+\*[Ii]magechanges\ ?=\ ?(.*?)\n
+\*[Ss]ummary\ ?=\ ?['"](.*?)['"]\n
+\*[Hh]ead\ ?=\ ?['"](.*?)['"]\n
+\*[Tt]ext\ ?=\ ?['"](.*?)['"]\n
+\*[Mm]ex\ ?=\ ?['"]?([^\n]*?)['"]?\n
+""", re.UNICODE | re.DOTALL | re.VERBOSE)
+
class LogIsFull(pywikibot.Error):
@@ -603,7 +614,7 @@
self.comment = i18n.translate(self.site, msg_comm, fallback=True)
# Adding the bot's nickname at the notification text if needed.
botolist = i18n.translate(self.site, bot_list)
- project = pywikibot.Site().family.name
+ project = site.family.name
self.project = project
bot = config.usernames[project]
try:
@@ -1159,19 +1170,9 @@
self.settingsData = list()
try:
testo = wikiPage.get()
- r = re.compile(
- r"<------- ------->\n"
- "\*[Nn]ame ?= ?['\"](.*?)['\"]\n"
- "\*([Ff]ind|[Ff]indonly)=(.*?)\n"
- "\*[Ii]magechanges=(.*?)\n"
- "\*[Ss]ummary=['\"](.*?)['\"]\n"
- "\*[Hh]ead=['\"](.*?)['\"]\n"
- "\*[Tt]ext ?= ?['\"](.*?)['\"]\n"
- "\*[Mm]ex ?=
?['\"]?([^\n]*?)['\"]?\n",
- re.UNICODE | re.DOTALL)
number = 1
- for m in r.finditer(testo):
+ for m in SETTINGS_REGEX.finditer(testo):
name = str(m.group(1))
find_tipe = str(m.group(2))
find = str(m.group(3))
@@ -1185,7 +1186,7 @@
self.settingsData += [tupla]
number += 1
- if self.settingsData == list():
+ if not self.settingsData:
pywikibot.output(
u"You've set wrongly your settings, please take a
"
u"look to the relative page. (run without them)")
diff --git a/scripts/cosmetic_changes.py b/scripts/cosmetic_changes.py
index 3d5f5b2..04a19e6 100755
--- a/scripts/cosmetic_changes.py
+++ b/scripts/cosmetic_changes.py
@@ -143,10 +143,10 @@
deprecatedTemplates = {
'wikipedia': {
'de': [
- (u'Belege', u'Belege fehlen\g<parameters>'),
- (u'Quelle', u'Belege fehlen\g<parameters>'),
- (u'Quellen', u'Belege fehlen\g<parameters>'),
- (u'Quellen fehlen', u'Belege fehlen\g<parameters>'),
+ (u'Belege', u'Belege fehlen\\g<parameters>'),
+ (u'Quelle', u'Belege fehlen\\g<parameters>'),
+ (u'Quellen', u'Belege fehlen\\g<parameters>'),
+ (u'Quellen fehlen', u'Belege fehlen\\g<parameters>'),
],
}
}
@@ -337,7 +337,7 @@
# Removing the stars' issue
starstext = textlib.removeDisabledParts(text)
for star in starsList:
- regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
+ regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
% star, re.I)
found = regex.findall(starstext)
if found != []:
@@ -431,8 +431,8 @@
text = textlib.replaceExcept(
text,
r'\[\[(?P<left>.+?:.+?\..+?\|) *(' +
'|'.join(aliases) +
- ') *(?P<right>(\|.*?)?\]\])',
- r'[[\g<left>' + aliases[0] + '\g<right>',
exceptions)
+ r') *(?P<right>(\|.*?)?\]\])',
+ r'[[\g<left>' + aliases[0] +
r'\g<right>', exceptions)
return text
def cleanUpLinks(self, text):
@@ -637,7 +637,7 @@
text = textlib.replaceExcept(
text,
r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)',
- '\g<bullet> \g<char>',
+ r'\g<bullet> \g<char>',
exceptions)
return text
@@ -809,8 +809,8 @@
# do not change inside file links
namespaces = list(self.site.namespace(6, all=True))
pattern = re.compile(
- u'\[\[(' + '|'.join(namespaces) +
- '):.+?\.\w+? *(\|((\[\[.*?\]\])|.)*)?\]\]',
+ u'\\[\\[(%s):.+?\\.\\w+? *(\\|((\\[\\[.*?\\]\\])|.)*)?\\]\\]'
+ % u'|'.join(namespaces),
re.UNICODE)
# not to let bot edits in latin content
exceptions.append(re.compile(u"[^%(fa)s] *?\"*? *?, *?[^%(fa)s]"
@@ -834,7 +834,7 @@
for i in range(0, 10):
text = textlib.replaceExcept(text, old[i], new[i], exceptions)
# do not change digits in class, style and table params
- pattern = re.compile(u'\w+=(".+?"|\d+)', re.UNICODE)
+ pattern = re.compile(r'\w+=(".+?"|\d+)', re.UNICODE)
exceptions.append(pattern)
# do not change digits inside html-tags
pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE)
diff --git a/scripts/image.py b/scripts/image.py
index 00758e4..fb26ad2 100644
--- a/scripts/image.py
+++ b/scripts/image.py
@@ -155,7 +155,10 @@
if self.new_image:
if not self.getOption('loose'):
- replacements.append((image_regex, '[[' +
self.site.image_namespace() + ':' + self.new_image +
'\g<parameters>]]'))
+ replacements.append((image_regex,
+ u'[[%s:%s\\g<parameters>]]'
+ % (self.site.image_namespace(),
+ self.new_image)))
else:
replacements.append((image_regex, self.new_image))
else:
diff --git a/scripts/imagerecat.py b/scripts/imagerecat.py
index ab8d03f..b911148 100644
--- a/scripts/imagerecat.py
+++ b/scripts/imagerecat.py
@@ -275,7 +275,7 @@
project = ''
article = ''
usageRe = re.compile(
-
'^(?P<lang>([\w-]+))\.(?P<project>([\w]+))\.org:(?P<articles>\s(.*))')
+
r'^(?P<lang>([\w-]+))\.(?P<project>([\w]+))\.org:(?P<articles>\s(.*))')
matches = usageRe.search(use)
if matches:
if matches.group('lang'):
@@ -377,7 +377,7 @@
toFilter = toFilter + "[[Category:" + cat + "]]\n"
parameters = urlencode({'source': toFilter.encode('utf-8'),
'bot': '1'})
- filterCategoriesRe = re.compile('\[\[Category:([^\]]*)\]\]')
+ filterCategoriesRe = re.compile(r'\[\[Category:([^\]]*)\]\]')
try:
filterCategoriesPage = urlopen(
"https://toolserver.org/~multichill/filtercats.php?%s" %
parameters)
@@ -416,10 +416,11 @@
def removeTemplates(oldtext=u''):
"""Remove {{Uncategorized}} and {{Check categories}}
templates."""
result = re.sub(
- u'\{\{\s*([Uu]ncat(egori[sz]ed(
image)?)?|[Nn]ocat|[Nn]eedscategory)[^}]*\}\}', u'', oldtext)
+ r'{{\s*([Uu]ncat(egori[sz]ed(
image)?)?|[Nn]ocat|[Nn]eedscategory)[^}]*}}',
+ u'', oldtext)
result = re.sub(u'<!-- Remove this line once you have added categories
-->',
u'', result)
- result = re.sub(u'\{\{\s*[Cc]heck categories[^}]*\}\}', u'', result)
+ result = re.sub(r'\{\{\s*[Cc]heck categories[^}]*\}\}', u'', result)
return result
diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index 93cb2f8..f17bc0c 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -1850,7 +1850,7 @@
interwikis = [pywikibot.Page(l) for l in page.iterlanglinks()]
# remove interwiki links to ignore
- for iw in re.finditer('<!-- *\[\[(.*?:.*?)\]\] *-->', pagetext):
+ for iw in re.finditer(r'<!-- *\[\[(.*?:.*?)\]\] *-->', pagetext):
try:
ignorepage = pywikibot.Page(page.site, iw.groups()[0])
if (new[ignorepage.site] == ignorepage) and \
diff --git a/scripts/maintenance/compat2core.py b/scripts/maintenance/compat2core.py
index a8b2c58..386d509 100644
--- a/scripts/maintenance/compat2core.py
+++ b/scripts/maintenance/compat2core.py
@@ -52,32 +52,33 @@
('import catlib\r?\n', ''),
('import userlib\r?\n', ''),
# change wikipedia to pywikibot, exclude URLs
- ('(?<!\.)wikipedia\.', u'pywikibot.'),
+ (r'(?<!\.)wikipedia\.', u'pywikibot.'),
# site instance call
- ('pywikibot\.getSite\s*\(\s*', 'pywikibot.Site('),
+ (r'pywikibot\.getSite\s*\(\s*', 'pywikibot.Site('),
# lang is different from code. We should use code in core
- ('([Ss])ite.lang(?:uage\(\))?', r'\1ite.code'),
+ (r'([Ss])ite.lang(?:uage\(\))?', r'\1ite.code'),
# change compat library classes to pywikibot intrinsic classes
- ('catlib\.Category\s*\(\s*', 'pywikibot.Category('),
-
('catlib\.change_category\s*\((\s*)(?P<article>.+?),\s*(?P<oldcat>.+?),',
+ (r'catlib\.Category\s*\(\s*', 'pywikibot.Category('),
+
(r'catlib\.change_category\s*\((\s*)(?P<article>.+?),\s*(?P<oldcat>.+?),',
r'\g<article>.change_category(\1\g<oldcat>,'),
- ('userlib\.User\s*\(\s*', 'pywikibot.User('),
+ (r'userlib\.User\s*\(\s*', 'pywikibot.User('),
# change ImagePage to FilePage
- ('pywikibot\.ImagePage\s*\(\s*', 'pywikibot.FilePage('),
+ (r'pywikibot\.ImagePage\s*\(\s*', 'pywikibot.FilePage('),
# deprecated title methods
- ('\.urlname\s*\(\s*\)', '.title(asUrl=True)'),
- ('\.urlname\s*\(\s*(?:withNamespace\s*=\s*)?(True|False)+\s*\)',
+ (r'\.urlname\s*\(\s*\)', '.title(asUrl=True)'),
+ (r'\.urlname\s*\(\s*(?:withNamespace\s*=\s*)?(True|False)+\s*\)',
r'.title(asUrl=True, withNamespace=\1)'),
- ('\.titleWithoutNamespace\s*\(\s*\)',
'.title(withNamespace=False)'),
- ('\.sectionFreeTitle\s*\(\s*\)', '.title(withSection=False)'),
- ('\.aslink\s*\(\s*\)', '.title(asLink=True)'),
+ (r'\.titleWithoutNamespace\s*\(\s*\)',
'.title(withNamespace=False)'),
+ (r'\.sectionFreeTitle\s*\(\s*\)', '.title(withSection=False)'),
+ (r'\.aslink\s*\(\s*\)', '.title(asLink=True)'),
# other deprecated methods
- ('(?<!site)\.encoding\s*\(\s*\)', '.site.encoding()'),
- ('\.newimages\s*\(', '.newfiles('),
+ (r'(?<!site)\.encoding\s*\(\s*\)', '.site.encoding()'),
+ (r'\.newimages\s*\(', '.newfiles('),
# new core methods
- ('\.get\s*\(\s*get_redirect\s*=\s*True\s*\)', '.text'),
+ (r'\.get\s*\(\s*get_redirect\s*=\s*True\s*\)', '.text'),
# stopme() is done by the framework itself
-
('(\s*)try\:\s*\r?\n\s+main\(\)\s*\r?\n\s*finally\:\s*\r?\n\s+pywikibot\.stopme\(\)',
+ (r'(\s*)try\:\s*\r?\n\s+main\(\)\s*\r?\n\s*finally\:\s*\r?\n'
+ r'\s+pywikibot\.stopme\(\)',
r'\1main()'),
)
diff --git a/scripts/reflinks.py b/scripts/reflinks.py
index f50083b..f7badef 100644
--- a/scripts/reflinks.py
+++ b/scripts/reflinks.py
@@ -122,7 +122,7 @@
# Extracts the domain name
domain = re.compile(r'^(\w+)://(?:www.|)([^/]+)')
-globalbadtitles = """
+globalbadtitles = r"""
# is
(test|
# starts with
@@ -304,11 +304,11 @@
def __init__(self):
# Match references
self.REFS = re.compile(
-
u'(?i)<ref(?P<params>[^>/]*)>(?P<content>.*?)</ref>')
+
r'(?i)<ref(?P<params>[^>/]*)>(?P<content>.*?)</ref>')
self.NAMES = re.compile(
-
u'(?i).*name\s*=\s*(?P<quote>"?)\s*(?P<name>.+)\s*(?P=quote).*')
+
r'(?i).*name\s*=\s*(?P<quote>"?)\s*(?P<name>.+)\s*(?P=quote).*')
self.GROUPS = re.compile(
-
u'(?i).*group\s*=\s*(?P<quote>"?)\s*(?P<group>.+)\s*(?P=quote).*')
+
r'(?i).*group\s*=\s*(?P<quote>"?)\s*(?P<group>.+)\s*(?P=quote).*')
self.autogen = i18n.twtranslate(pywikibot.Site(), 'reflinks-autogen')
def process(self, text):
@@ -401,7 +401,7 @@
if v[1]:
name = u'"%s"' % name
text = re.sub(
- u'<ref
name\s*=\s*(?P<quote>"?)\s*%s\s*(?P=quote)\s*/>' % k,
+ u'<ref
name\\s*=\\s*(?P<quote>"?)\\s*%s\\s*(?P=quote)\\s*/>' % k,
u'<ref name=%s />' % name, text)
return text
@@ -662,7 +662,7 @@
s = self.CHARSET.search(tag)
if s:
tmp = s.group('enc').strip("\"'
").lower()
- naked = re.sub('[ _\-]', '', tmp)
+ naked = re.sub(r'[ _\-]', '', tmp)
# Convert to python correct encoding names
if naked == "gb2312":
enc.append("gbk")
diff --git a/scripts/replace.py b/scripts/replace.py
index 95fe144..cbc9a43 100755
--- a/scripts/replace.py
+++ b/scripts/replace.py
@@ -627,9 +627,9 @@
def prepareRegexForMySQL(pattern):
"""Convert regex to MySQL syntax."""
- pattern = pattern.replace('\s', '[:space:]')
- pattern = pattern.replace('\d', '[:digit:]')
- pattern = pattern.replace('\w', '[:alnum:]')
+ pattern = pattern.replace(r'\s', '[:space:]')
+ pattern = pattern.replace(r'\d', '[:digit:]')
+ pattern = pattern.replace(r'\w', '[:alnum:]')
pattern = pattern.replace("'", "\\" + "'")
# pattern = pattern.replace('\\', '\\\\')
diff --git a/scripts/revertbot.py b/scripts/revertbot.py
index 161263e..4f99c73 100644
--- a/scripts/revertbot.py
+++ b/scripts/revertbot.py
@@ -131,7 +131,7 @@
if 'top' in item:
page = pywikibot.Page(self.site, item['title'])
text = page.get(get_redirect=True)
- pattern = re.compile(u'\[\[.+?:.+?\..+?\]\]', re.UNICODE)
+ pattern = re.compile(r'\[\[.+?:.+?\..+?\]\]', re.UNICODE)
return pattern.search(text) >= 0
return False
diff --git a/scripts/script_wui.py b/scripts/script_wui.py
index 7250589..fd6e792 100755
--- a/scripts/script_wui.py
+++ b/scripts/script_wui.py
@@ -280,7 +280,7 @@
# (might be a problem here for TS and SGE, output string has another encoding)
if False:
buffer = buffer.decode(pywikibot.config.console_encoding)
- buffer = re.sub("\03\{(.*?)\}(.*?)\03\{default\}", "\g<2>",
buffer)
+ buffer = re.sub(r'\03\{(.*?)\}(.*?)\03\{default\}', r'\g<2>',
buffer)
if rev is None:
rev = page.latestRevision()
link = page.permalink(oldid=rev)
diff --git a/scripts/selflink.py b/scripts/selflink.py
index 1687c97..734cf39 100644
--- a/scripts/selflink.py
+++ b/scripts/selflink.py
@@ -53,7 +53,7 @@
self.linkR = re.compile(
r'\[\[(?P<title>[^\]\|#]*)'
r'(?P<section>#[^\]\|]*)?'
- '(\|(?P<label>[^\]]*))?\]\]'
+ r'(\|(?P<label>[^\]]*))?\]\]'
r'(?P<linktrail>' + linktrail + ')')
def handleNextLink(self, page, match, context=100):
diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py
index f25f617..f36b28e 100644
--- a/scripts/solve_disambiguation.py
+++ b/scripts/solve_disambiguation.py
@@ -202,7 +202,7 @@
'en': [
u'Wikipedia:Links to disambiguating pages',
u'Wikipedia:Disambiguation pages with links',
- u'Wikipedia:Multiple-place names \([A-Z]\)',
+ u'Wikipedia:Multiple-place names \\([A-Z]\\)',
u'Wikipedia:Non-unique personal name',
u"User:Jerzy/Disambiguation Pages i've Editted",
u'User:Gareth Owen/inprogress',
@@ -252,7 +252,7 @@
# hu:Wikipédia:Kocsmafal (egyéb)#Hol nem kell egyértelműsíteni?
# 2012-02-08
u'Wikipédia:(?!Sportműhely/Eddigi cikkeink).*',
- u'.*\(egyértelműsítő lap\)$',
+ u'.*\\(egyértelműsítő lap\\)$',
u'.*[Vv]ita:.*',
u'Szerkesztő:[^/]+$',
],
@@ -293,7 +293,7 @@
u'Overleg Wikipedia:Logboek.*',
u'Wikipedia:Logboek.*',
u'Overleg gebruiker:Sybren/test.*',
- u'Overleg
gebruiker:[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?',
+ u'Overleg gebruiker:([0-9][0-9]?[0-9]?\\.){3}[0-9][0-9]?[0-9]?',
u'Overleg:Lage Landen (staatkunde)',
u'Wikipedia:.*[aA]rchief.*',
u'Wikipedia:Doorverwijspagina',
@@ -757,7 +757,7 @@
search_text = text[m.end():m.end() + context]
# figure out where the link (and sentance) ends, put note
# there
- end_of_word_match = re.search("\s", search_text)
+ end_of_word_match = re.search(r'\s', search_text)
if end_of_word_match:
position_split = end_of_word_match.start(0)
else:
diff --git a/scripts/template.py b/scripts/template.py
index de98b32..25aad55 100755
--- a/scripts/template.py
+++ b/scripts/template.py
@@ -265,11 +265,11 @@
if self.getOption('subst') and self.getOption('remove'):
replacements.append((templateRegex,
- '{{subst:%s\g<parameters>}}' % new))
+ r'{{subst:%s\g<parameters>}}' % new))
exceptions['inside-tags'] = ['ref', 'gallery']
elif self.getOption('subst'):
replacements.append((templateRegex,
- '{{subst:%s\g<parameters>}}' % old))
+ r'{{subst:%s\g<parameters>}}' % old))
exceptions['inside-tags'] = ['ref', 'gallery']
elif self.getOption('remove'):
replacements.append((templateRegex, ''))
@@ -281,7 +281,7 @@
default=False, automatic_quit=False):
continue
replacements.append((templateRegex,
- '{{%s\g<parameters>}}' % new))
+ r'{{%s\g<parameters>}}' % new))
replaceBot = replace.ReplaceRobot(self.generator, replacements,
exceptions,
acceptall=self.getOption('always'),
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index 4c36069..968d763 100644
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -128,26 +128,30 @@
# Officially reserved for testing, documentation, etc. in
#
https://tools.ietf.org/html/rfc2606#page-2
# top-level domains:
- re.compile('.*[\./(a)]test(/.*)?'))?'),
- re.compile('.*[\./(a)]example(/.*)?'))?'),
- re.compile('.*[\./(a)]invalid(/.*)?'))?'),
- re.compile('.*[\./(a)]localhost(/.*)?'))?'),
+ re.compile(r'.*[\./(a)]test(/.*)?'))?'),
+ re.compile(r'.*[\./(a)]example(/.*)?'))?'),
+ re.compile(r'.*[\./(a)]invalid(/.*)?'))?'),
+ re.compile(r'.*[\./(a)]localhost(/.*)?'))?'),
# second-level domains:
- re.compile('.*[\./(a)]example\.com(/.*)?'))?'),
- re.compile('.*[\./(a)]example\.net(/.*)?'))?'),
- re.compile('.*[\./(a)]example\.org(/.*)?'))?'),
+ re.compile(r'.*[\./(a)]example\.com(/.*)?'))?'),
+ re.compile(r'.*[\./(a)]example\.net(/.*)?'))?'),
+ re.compile(r'.*[\./(a)]example\.org(/.*)?'))?'),
# Other special cases
- re.compile('.*[\./(a)]gso\.gbv\.de(/.*)?'))?'), # bot somehow can't handle
their redirects
- re.compile('.*[\./(a)]berlinonline\.de(/.*)?'))?'),
+ # bot somehow can't handle their redirects:
+ re.compile(r'.*[\./(a)]gso\.gbv\.de(/.*)?'))?'),
+ re.compile(r'.*[\./(a)]berlinonline\.de(/.*)?'))?'),
# above entry to be manually fixed per request at [[de:Benutzer:BLueFiSH.as/BZ]]
- re.compile('.*[\./(a)]bodo\.kommune\.no(/.*)?'))?'), # bot can't handle their
redirects
- re.compile('.*[\./(a)]jpl\.nasa\.gov(/.*)?'))?'), # bot rejected on the site
- re.compile('.*[\./(a)]itis\.gov(/.*)?'))?'), # bot rejected on the site
- re.compile('.*[\./(a)]cev\.lu(/.*)?'))?'), # bot rejected on the site
- re.compile('.*[\./(a)]science\.ksc\.nasa\.gov(/.*)?'))?'), # very slow response
resulting in bot error
- re.compile('.*[\./(a)]britannica\.com(/.*)?'))?'), # HTTP redirect loop
- re.compile('.*[\./(a)]quickfacts\.census\.gov(/.*)?'))?'), # bot rejected on the
site
+ # bot can't handle their redirects:
+ re.compile(r'.*[\./(a)]bodo\.kommune\.no(/.*)?'))?'),
+ re.compile(r'.*[\./(a)]jpl\.nasa\.gov(/.*)?'))?'), # bot rejected on the site
+ re.compile(r'.*[\./(a)]itis\.gov(/.*)?'))?'), # bot rejected on the site
+ re.compile(r'.*[\./(a)]cev\.lu(/.*)?'))?'), # bot rejected on the site
+ # very slow response resulting in bot error:
+ re.compile(r'.*[\./(a)]science\.ksc\.nasa\.gov(/.*)?'))?'),
+ re.compile(r'.*[\./(a)]britannica\.com(/.*)?'))?'), # HTTP redirect loop
+ # bot rejected on the site:
+ re.compile(r'.*[\./(a)]quickfacts\.census\.gov(/.*)?'))?'),
]
diff --git a/tests/checkimages_tests.py b/tests/checkimages_tests.py
new file mode 100644
index 0000000..4c0b10e
--- /dev/null
+++ b/tests/checkimages_tests.py
@@ -0,0 +1,28 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""Unit tests for checkimages script."""
+
+from scripts import checkimages
+
+from tests.aspects import unittest, TestCase
+
+
+class TestSettings(TestCase):
+
+ """Test checkimages settings."""
+
+ family = 'commons'
+ code = 'commons'
+ user = True
+
+ def test_load(self):
+ """Test loading settings."""
+ b = checkimages.checkImagesBot(self.get_site())
+ rv = b.takesettings()
+ item1 = rv[0]
+ self.assertEqual(item1[0], 1)
+ self.assertEqual(item1[1], 'a deprecated template')
+
+
+if __name__ == "__main__":
+ unittest.main()
--
To view, visit
https://gerrit.wikimedia.org/r/189914
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I8bcea1d5db3b5d0c2c9ada19382ada01f7339044
Gerrit-PatchSet: 7
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Siebrand <siebrand(a)kitano.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>