jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/342324 )
Change subject: Use raw strings in regular expressions ......................................................................
Use raw strings in regular expressions
Change-Id: I0b781837f74e79513a507123d8078da6acc82957 --- M pywikibot/proofreadpage.py M pywikibot/userinterfaces/terminal_interface_base.py M pywikibot/userinterfaces/terminal_interface_unix.py M scripts/checkimages.py M scripts/commonscat.py M scripts/flickrripper.py M scripts/imagecopy.py M scripts/imagecopy_self.py M scripts/imagerecat.py M scripts/panoramiopicker.py M scripts/table2wiki.py M tests/site_tests.py 12 files changed, 158 insertions(+), 158 deletions(-)
Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved
diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py index 84460db..4034cb5 100644 --- a/pywikibot/proofreadpage.py +++ b/pywikibot/proofreadpage.py @@ -507,7 +507,7 @@
def _parse_redlink(self, href): """Parse page title when link in Index is a redlink.""" - p_href = re.compile('/w/index.php?title=(.+?)&action=edit&redlink=1') + p_href = re.compile(r'/w/index.php?title=(.+?)&action=edit&redlink=1') title = p_href.search(href) if title: return title.group(1) diff --git a/pywikibot/userinterfaces/terminal_interface_base.py b/pywikibot/userinterfaces/terminal_interface_base.py index f0c20cb..c37cc9e 100755 --- a/pywikibot/userinterfaces/terminal_interface_base.py +++ b/pywikibot/userinterfaces/terminal_interface_base.py @@ -60,7 +60,7 @@
"""Base for terminal user interfaces."""
- split_col_pat = re.compile('(\w+);?(\w+)?') + split_col_pat = re.compile(r'(\w+);?(\w+)?')
def __init__(self): """ diff --git a/pywikibot/userinterfaces/terminal_interface_unix.py b/pywikibot/userinterfaces/terminal_interface_unix.py index 5f94858..95290b1 100755 --- a/pywikibot/userinterfaces/terminal_interface_unix.py +++ b/pywikibot/userinterfaces/terminal_interface_unix.py @@ -45,7 +45,7 @@
def make_unix_bg_color(self, color): """Obtain background color from foreground color.""" - code = re.search('(?<=[)\d+', color).group() + code = re.search(r'(?<=[)\d+', color).group() return chr(27) + '[' + str(int(code) + 10) + 'm'
def encounter_color(self, color, target_stream): diff --git a/scripts/checkimages.py b/scripts/checkimages.py index a56bd56..a8917d3 100755 --- a/scripts/checkimages.py +++ b/scripts/checkimages.py @@ -1451,7 +1451,7 @@ summary = tupla[5] head_2 = tupla[6] if head_2.count('==') == 2: - head_2 = re.findall('\s*== *(.+?) *==\s*', head_2)[0] + head_2 = re.findall(r'\s*== *(.+?) *==\s*', head_2)[0] text = tupla[7] % self.imageName mexCatched = tupla[8] for k in find_list: diff --git a/scripts/commonscat.py b/scripts/commonscat.py index e43aa44..44a62d7 100755 --- a/scripts/commonscat.py +++ b/scripts/commonscat.py @@ -352,17 +352,17 @@ oldcat.lower() in page.title().lower()): linktitle = oldcat if linktitle and newcat != page.title(withNamespace=False): - newtext = re.sub(u'(?i){{%s|?[^{}]*(?:{{.*}})?}}' + newtext = re.sub(r'(?i){{%s|?[^{}]*(?:{{.*}})?}}' % oldtemplate, u'{{%s|%s|%s}}' % (newtemplate, newcat, linktitle), page.get()) elif newcat == page.title(withNamespace=False): - newtext = re.sub(u'(?i){{%s|?[^{}]*(?:{{.*}})?}}' + newtext = re.sub(r'(?i){{%s|?[^{}]*(?:{{.*}})?}}' % oldtemplate, u'{{%s}}' % newtemplate, page.get()) elif oldcat.strip() != newcat: # strip trailing white space - newtext = re.sub(u'(?i){{%s|?[^{}]*(?:{{.*}})?}}' + newtext = re.sub(r'(?i){{%s|?[^{}]*(?:{{.*}})?}}' % oldtemplate, u'{{%s|%s}}' % (newtemplate, newcat), page.get()) diff --git a/scripts/flickrripper.py b/scripts/flickrripper.py index 9782f91..9fd15da 100755 --- a/scripts/flickrripper.py +++ b/scripts/flickrripper.py @@ -224,23 +224,23 @@
""" title = title.strip() - title = re.sub(u"[<{\[]", u"(", title) - title = re.sub(u"[>}\]]", u")", title) - title = re.sub(u"[ _]?\(!\)", u"", title) - title = re.sub(u",:[ _]", u", ", title) - title = re.sub(u"[;:][ _]", u", ", title) - title = re.sub(u"[\t\n ]+", u" ", title) - title = re.sub(u"[\r\n ]+", u" ", title) - title = re.sub(u"[\n]+", u"", title) - title = re.sub(u"[?!]([."]|$)", u"\1", title) - title = re.sub(u"[&#%?!]", u"^", title) - title = re.sub(u"[;]", u",", title) - title = re.sub(u"[/+\\:]", u"-", title) - title = re.sub(u"--+", u"-", title) - title = re.sub(u",,+", u",", title) - title = re.sub(u"[-,^]([.]|$)", u"\1", title) - title = title.replace(u" ", u"_") - title = title.strip(u"_") + title = re.sub(r'[<{[]', '(', title) + title = re.sub(r'[>}]]', ')', title) + title = re.sub(r'[ _]?(!)', '', title) + title = re.sub(',:[ _]', ', ', title) + title = re.sub('[;:][ _]', ', ', title) + title = re.sub(r'[\t\n ]+', ' ', title) + title = re.sub(r'[\r\n ]+', ' ', title) + title = re.sub('[\n]+', '', title) + title = re.sub('[?!]([."]|$)', r'\1', title) + title = re.sub('[&#%?!]', '^', title) + title = re.sub('[;]', ',', title) + title = re.sub(r'[/+\:]', '-', title) + title = re.sub('--+', '-', title) + title = re.sub(',,+', ',', title) + title = re.sub('[-,^]([.]|$)', r'\1', title) + title = title.replace(' ', '_') + title = title.strip('_') return title
diff --git a/scripts/imagecopy.py b/scripts/imagecopy.py index fc5b71e..49ce90b 100644 --- a/scripts/imagecopy.py +++ b/scripts/imagecopy.py @@ -310,8 +310,8 @@ if self.imagePage.site.lang in moveToCommonsTemplate: for moveTemplate in moveToCommonsTemplate[ self.imagePage.site.lang]: - imtxt = re.sub(u'(?i){{' + moveTemplate + u'[^}]*}}', - u'', imtxt) + imtxt = re.sub(r'(?i){{' + moveTemplate + r'[^}]*}}', + '', imtxt)
# add {{NowCommons}} if self.imagePage.site.lang in nowCommonsTemplate: diff --git a/scripts/imagecopy_self.py b/scripts/imagecopy_self.py index 1e5018a..0f2e12f 100644 --- a/scripts/imagecopy_self.py +++ b/scripts/imagecopy_self.py @@ -165,99 +165,99 @@
licenseTemplates = { 'de': [ - ('{{Bild-CC-by-sa/3.0/de}}[\s\r\n]*{{Bild-CC-by-sa/3.0}}' - '[\s\r\n]*{{Bild-GFDL-Neu}}', + (r'{{Bild-CC-by-sa/3.0/de}}[\s\r\n]*{{Bild-CC-by-sa/3.0}}' + r'[\s\r\n]*{{Bild-GFDL-Neu}}', '{{Self|Cc-by-sa-3.0-de|Cc-by-sa-3.0|GFDL|author=' '[[:%(lang)s:User:%(author)s|%(author)s]] at [http://%(lang)s.%' '(family)s.org %(lang)s.%(family)s]}}'), - ('{{Bild-GFDL}}[\s\r\n]*{{Bild-CC-by-sa/(\d.\d)}}', - '{{Self|GFDL|Cc-by-sa-3.0-migrated|Cc-by-sa-\1|author=' + (r'{{Bild-GFDL}}[\s\r\n]*{{Bild-CC-by-sa/(\d.\d)}}', + r'{{Self|GFDL|Cc-by-sa-3.0-migrated|Cc-by-sa-\1|author=' '[[:%(lang)s:User:%(author)s|%(author)s]] at ' '[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'), - ('{{Bild-GFDL}}', + (r'{{Bild-GFDL}}', '{{Self|GFDL|Cc-by-sa-3.0-migrated|author=' '[[:%(lang)s:User:%(author)s|%(author)s]] at ' '[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'), - ('{{Bild-CC-by-sa/(\d.\d)}}', - '{{Self|Cc-by-sa-\1|author=[[:%(lang)s:User:%(author)s|%(author)s]] ' + (r'{{Bild-CC-by-sa/(\d.\d)}}', + r'{{Self|Cc-by-sa-\1|author=[[:%(lang)s:User:%(author)s|%(author)s]] ' 'at [http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'), - ('{{Bild-CC-by-sa/(\d.\d)/de}}', - '{{Self|Cc-by-sa-\1-de|author=' + (r'{{Bild-CC-by-sa/(\d.\d)/de}}', + r'{{Self|Cc-by-sa-\1-de|author=' '[[:%(lang)s:User:%(author)s|%(author)s]] at ' '[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'), - ('{{Bild-CC-by/(\d.\d)}}', - '{{Self|Cc-by-\1|author=[[:%(lang)s:User:%(author)s|%(author)s]] at ' + (r'{{Bild-CC-by/(\d.\d)}}', + r'{{Self|Cc-by-\1|author=[[:%(lang)s:User:%(author)s|%(author)s]] at ' '[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'), - ('{{Bild-CC-by/(\d.\d)/de}}', - '{{Self|Cc-by-\1-de|author=[[:%(lang)s:User:%(author)s|%(author)s]] ' + (r'{{Bild-CC-by/(\d.\d)/de}}', + r'{{Self|Cc-by-\1-de|author=[[:%(lang)s:User:%(author)s|%(author)s]] ' 'at [http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'), ], 'en': [ - ('{{(self|self2)|([^}]+)}}', - '{{Self|\2|author=[[:%(lang)s:User:%(author)s|%(author)s]] at ' + (r'{{(self|self2)|([^}]+)}}', + r'{{Self|\2|author=[[:%(lang)s:User:%(author)s|%(author)s]] at ' '[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'), - ('{{(GFDL-self|GFDL-self-no-disclaimers)|([^}]+)}}', - '{{Self|GFDL|\2|author=[[:%(lang)s:User:%(author)s|%(author)s]] at ' + (r'{{(GFDL-self|GFDL-self-no-disclaimers)|([^}]+)}}', + r'{{Self|GFDL|\2|author=[[:%(lang)s:User:%(author)s|%(author)s]] at ' '[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'), - ('{{GFDL-self-with-disclaimers|([^}]+)}}', - '{{Self|GFDL-with-disclaimers|\1|author=' + (r'{{GFDL-self-with-disclaimers|([^}]+)}}', + r'{{Self|GFDL-with-disclaimers|\1|author=' '[[:%(lang)s:User:%(author)s|%(author)s]] at ' '[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'), - ('{{PD-self(|date=[^}]+)?}}', + (r'{{PD-self(|date=[^}]+)?}}', '{{PD-user-w|%(lang)s|%(family)s|%(author)s}}'), - ('{{Multilicense replacing placeholder' - '(|[^}|=]+=[^}|]+)*(?P<migration>|[^}|=]+=[^}|]+)' - '(|[^}|=]+=[^}|]+)*}}', - '{{Self|GFDL|Cc-by-sa-2.5,2.0,1.0\g<migration>|author=' + (r'{{Multilicense replacing placeholder' + r'(|[^}|=]+=[^}|]+)*(?P<migration>|[^}|=]+=[^}|]+)' + r'(|[^}|=]+=[^}|]+)*}}', + r'{{Self|GFDL|Cc-by-sa-2.5,2.0,1.0\g<migration>|author=' '[[:%(lang)s:User:%(author)s|%(author)s]] at ' '[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'), - ('{{Multilicense replacing placeholder new(|class=[^}]+)?}}', + (r'{{Multilicense replacing placeholder new(|class=[^}]+)?}}', '{{Self|GFDL|Cc-by-sa-3.0,2.5,2.0,1.0|author=' '[[:%(lang)s:User:%(author)s|%(author)s]] at' '[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'), ], 'lb': [ - ('{{(self|self2)|([^}]+)}}', - '{{Self|\2|author=' + (r'{{(self|self2)|([^}]+)}}', + r'{{Self|\2|author=' '[[:%(lang)s:User:%(author)s|%(author)s]] at ' '[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'), ], 'nds-nl': [ - ('{{PD-eigenwark}}', + (r'{{PD-eigenwark}}', '{{PD-user-w|%(lang)s|%(family)s|%(author)s}}'), ], 'shared': [ - ('{{(self|self2)|([^}]+)}}', - '{{Self|\2|author=%(author)s at old wikivoyage shared}}'), + (r'{{(self|self2)|([^}]+)}}', + r'{{Self|\2|author=%(author)s at old wikivoyage shared}}'), ], }
sourceGarbage = { - 'de': [u'==\s*Beschreibung,\sQuelle\s*==', - u'==\s*Beschrieving\s*==', - u'==\s*[[Wikipedia:Lizenzvorlagen für Bilder|Lizenz]]\s*==', + 'de': [r'==\s*Beschreibung,\sQuelle\s*==', + r'==\s*Beschrieving\s*==', + r'==\s*[[Wikipedia:Lizenzvorlagen für Bilder|Lizenz]]\s*==', ], - 'en': [u'==\s*Description\s*==', - u'==\s*Summary\s*==', - u'==\s*Licensing:?\s*==', - u'{{' - u'(Copy to Wikimedia Commons|Move to Commons|Move to commons|' - u'Move to Wikimedia Commons|Copy to commons|Mtc|MtC|MTC|CWC|CtWC|' - u'CTWC|Ctwc|Tocommons|Copy to Commons|To Commons|Movetocommons|' - u'Move to Wikimedia commons|Move-to-commons|Commons ok|ToCommons|' - u'To commons|MoveToCommons|Copy to wikimedia commons|' - u'Upload to commons|CopyToCommons|Copytocommons|MITC|MovetoCommons|' - u'Do move to Commons|Orphan image)' - u'(|[^}]+)?}}' + 'en': [r'==\s*Description\s*==', + r'==\s*Summary\s*==', + r'==\s*Licensing:?\s*==', + r'{{' + '(Copy to Wikimedia Commons|Move to Commons|Move to commons|' + 'Move to Wikimedia Commons|Copy to commons|Mtc|MtC|MTC|CWC|CtWC|' + 'CTWC|Ctwc|Tocommons|Copy to Commons|To Commons|Movetocommons|' + 'Move to Wikimedia commons|Move-to-commons|Commons ok|ToCommons|' + 'To commons|MoveToCommons|Copy to wikimedia commons|' + 'Upload to commons|CopyToCommons|Copytocommons|MITC|MovetoCommons|' + 'Do move to Commons|Orphan image)' + r'(|[^}]+)?}}' ], - 'lb': [u'==\s*Résumé\s*==', - u'==\s*Lizenz:\s*==', + 'lb': [r'==\s*Résumé\s*==', + r'==\s*Lizenz:\s*==', ], - 'nds-nl': [u'==\s*Licentie\s*==', - u'{{DEFAULTSORT:{{PAGENAME}}}}', + 'nds-nl': [r'==\s*Licentie\s*==', + r'{{DEFAULTSORT:{{PAGENAME}}}}', ], - 'shared': [u'==\s*Beschreibung,\sQuelle\s*==', - u'==\s*Licensing:?\s*==', + 'shared': [r'==\s*Beschreibung,\sQuelle\s*==', + r'==\s*Licensing:?\s*==', ], }
diff --git a/scripts/imagerecat.py b/scripts/imagerecat.py index cf098df..96d68ff 100755 --- a/scripts/imagerecat.py +++ b/scripts/imagerecat.py @@ -151,10 +151,10 @@ return [], [], []
commonsenseRe = re.compile( - '^#COMMONSENSE(.*)#USAGE(\s)+((?P<usagenum>(\d)+))\s(?P<usage>(.*))\s' - '#KEYWORDS(\s)+((?P<keywords>(\d)+))(.*)' - '#CATEGORIES(\s)+((?P<catnum>(\d)+))\s(?P<cats>(.*))\s' - '#GALLERIES(\s)+((?P<galnum>(\d)+))\s(?P<gals>(.*))\s(.*)#EOF$', + r'^#COMMONSENSE(.*)#USAGE(\s)+((?P<usagenum>(\d)+))\s(?P<usage>(.*))\s' + r'#KEYWORDS(\s)+((?P<keywords>(\d)+))(.*)' + r'#CATEGORIES(\s)+((?P<catnum>(\d)+))\s(?P<cats>(.*))\s' + r'#GALLERIES(\s)+((?P<galnum>(\d)+))\s(?P<gals>(.*))\s(.*)#EOF$', re.MULTILINE + re.DOTALL)
gotInfo = False diff --git a/scripts/panoramiopicker.py b/scripts/panoramiopicker.py index 6108ff3..4c973f1 100644 --- a/scripts/panoramiopicker.py +++ b/scripts/panoramiopicker.py @@ -134,22 +134,22 @@ Otherwise the title of the page might not be allowed by the software. """ title = title.strip() - title = re.sub(u"[<{\[]", u"(", title) - title = re.sub(u"[>}\]]", u")", title) - title = re.sub(u"[ _]?\(!\)", u"", title) - title = re.sub(u",:[ _]", u", ", title) - title = re.sub(u"[;:][ _]", u", ", title) - title = re.sub(u"[\t\n ]+", u" ", title) - title = re.sub(u"[\r\n ]+", u" ", title) - title = re.sub(u"[\n]+", u"", title) - title = re.sub(u"[?!]([."]|$)", u"\1", title) - title = re.sub(u"[&#%?!]", u"^", title) - title = re.sub(u"[;]", u",", title) - title = re.sub(u"[/+\\:]", u"-", title) - title = re.sub(u"--+", u"-", title) - title = re.sub(u",,+", u",", title) - title = re.sub(u"[-,^]([.]|$)", u"\1", title) - title = title.replace(u" ", u"_") + title = re.sub(r'[<{[]', '(', title) + title = re.sub(r'[>}]]', ')', title) + title = re.sub(r'[ _]?(!)', '', title) + title = re.sub(',:[ _]', ', ', title) + title = re.sub('[;:][ _]', ', ', title) + title = re.sub(r'[\t\n ]+', ' ', title) + title = re.sub(r'[\r\n ]+', ' ', title) + title = re.sub('[\n]+', '', title) + title = re.sub('[?!]([."]|$)', r'\1', title) + title = re.sub('[&#%?!]', '^', title) + title = re.sub('[;]', ',', title) + title = re.sub(r'[/+\:]', '-', title) + title = re.sub('--+', '-', title) + title = re.sub(',,+', ',', title) + title = re.sub('[-,^]([.]|$)', r'\1', title) + title = title.replace(' ', '_') return title
diff --git a/scripts/table2wiki.py b/scripts/table2wiki.py index d406699..87aa6dc 100644 --- a/scripts/table2wiki.py +++ b/scripts/table2wiki.py @@ -118,8 +118,8 @@ # bring every <tag> into one single line. num = 1 while num != 0: - newTable, num = re.subn("([^\r\n]{1})(<[tT]{1}[dDhHrR]{1})", - r"\1\r\n\2", newTable) + newTable, num = re.subn(r'([^\r\n]{1})(<[tT]{1}[dDhHrR]{1})', + r'\1\r\n\2', newTable)
################## # every open-tag gets a new line. @@ -128,45 +128,45 @@ # Note that we added the ## characters in markActiveTables(). # <table> tag with attributes, with more text on the same line newTable = re.sub( - "(?i)[\r\n]*?<##table## (?P<attr>[\w\W]*?)>(?P<more>[\w\W]*?)[\r\n ]*", - r"\r\n{| \g<attr>\r\n\g<more>", newTable) + r'(?i)[\r\n]*?<##table## (?P<attr>[\w\W]*?)>(?P<more>[\w\W]*?)[\r\n ]*', + r'\r\n{| \g<attr>\r\n\g<more>', newTable) # <table> tag without attributes, with more text on the same line - newTable = re.sub("(?i)[\r\n]*?<##table##>(?P<more>[\w\W]*?)[\r\n ]*", - r"\r\n{|\n\g<more>\r\n", newTable) + newTable = re.sub(r'(?i)[\r\n]*?<##table##>(?P<more>[\w\W]*?)[\r\n ]*', + r'\r\n{|\n\g<more>\r\n', newTable) # <table> tag with attributes, without more text on the same line - newTable = re.sub("(?i)[\r\n]*?<##table## (?P<attr>[\w\W]*?)>[\r\n ]*", - r"\r\n{| \g<attr>\r\n", newTable) + newTable = re.sub(r'(?i)[\r\n]*?<##table## (?P<attr>[\w\W]*?)>[\r\n ]*', + r'\r\n{| \g<attr>\r\n', newTable) # <table> tag without attributes, without more text on the same line - newTable = re.sub("(?i)[\r\n]*?<##table##>[\r\n ]*", - "\r\n{|\r\n", newTable) + newTable = re.sub(r'(?i)[\r\n]*?<##table##>[\r\n ]*', + '\r\n{|\r\n', newTable) # end </table> - newTable = re.sub("(?i)[\s]*</##table##>", - "\r\n|}", newTable) + newTable = re.sub(r'(?i)[\s]*</##table##>', + '\r\n|}', newTable)
################## # caption with attributes newTable = re.sub( - "(?i)<caption (?P<attr>[\w\W]*?)>(?P<caption>[\w\W]*?)</caption>", - r"\r\n|+\g<attr> | \g<caption>", newTable) + r'(?i)<caption (?P<attr>[\w\W]*?)>(?P<caption>[\w\W]*?)</caption>', + r'\r\n|+\g<attr> | \g<caption>', newTable) # caption without attributes - newTable = re.sub("(?i)<caption>(?P<caption>[\w\W]*?)</caption>", - r"\r\n|+ \g<caption>", newTable) + newTable = re.sub(r'(?i)<caption>(?P<caption>[\w\W]*?)</caption>', + r'\r\n|+ \g<caption>', newTable)
################## # <th> often people don't write them within <tr>, be warned! # <th> with attributes newTable = re.sub( - "(?i)[\r\n]+<th(?P<attr> [^>]*?)>(?P<header>[\w\W]*?)</th>", + r"(?i)[\r\n]+<th(?P<attr> [^>]*?)>(?P<header>[\w\W]*?)</th>", r"\r\n!\g<attr> | \g<header>\r\n", newTable)
# <th> without attributes - newTable = re.sub("(?i)[\r\n]+<th>(?P<header>[\w\W]*?)</th>", - r"\r\n! \g<header>\r\n", newTable) + newTable = re.sub(r"(?i)[\r\n]+<th>(?P<header>[\w\W]*?)</th>", + r'\r\n! \g<header>\r\n', newTable)
# fail save. sometimes people forget </th> # <th> without attributes, without closing </th> - newTable, n = re.subn("(?i)[\r\n]+<th>(?P<header>[\w\W]*?)[\r\n]+", - r"\r\n! \g<header>\r\n", newTable) + newTable, n = re.subn(r'(?i)[\r\n]+<th>(?P<header>[\w\W]*?)[\r\n]+', + r'\r\n! \g<header>\r\n', newTable) if n > 0: warning_messages.append( u'WARNING: found <th> without </th>. (%d occurences)\n' % n) @@ -174,8 +174,8 @@
# <th> with attributes, without closing </th> newTable, n = re.subn( - "(?i)[\r\n]+<th(?P<attr> [^>]*?)>(?P<header>[\w\W]*?)[\r\n]+", - r"\n!\g<attr> | \g<header>\r\n", newTable) + r'(?i)[\r\n]+<th(?P<attr> [^>]*?)>(?P<header>[\w\W]*?)[\r\n]+', + r'\n!\g<attr> | \g<header>\r\n', newTable) if n > 0: warning_messages.append( u'WARNING: found <th ...> without </th>. (%d occurences\n)' % n) @@ -192,14 +192,14 @@
################## # normal <td> without arguments - newTable = re.sub("(?i)[\r\n]+<td>(?P<cell>[\w\W]*?)</td>", - r"\r\n| \g<cell>\r\n", newTable) + newTable = re.sub(r'(?i)[\r\n]+<td>(?P<cell>[\w\W]*?)</td>', + r'\r\n| \g<cell>\r\n', newTable)
################## # normal <td> with arguments newTable = re.sub( - "(?i)[\r\n]+<td(?P<attr> [^>]*?)>(?P<cell>[\w\W]*?)</td>", - r"\r\n|\g<attr> | \g<cell>", newTable) + r'(?i)[\r\n]+<td(?P<attr> [^>]*?)>(?P<cell>[\w\W]*?)</td>', + r'\r\n|\g<attr> | \g<cell>', newTable)
# WARNING: this sub might eat cells of bad HTML, but most likely it # will correct errors @@ -231,25 +231,25 @@
# fail save. sometimes people forget </td> # <td> without arguments, with missing </td> - newTable, n = re.subn("(?i)<td>(?P<cell>[^<]*?)[\r\n]+", - r"\r\n| \g<cell>\r\n", newTable) + newTable, n = re.subn(r'(?i)<td>(?P<cell>[^<]*?)[\r\n]+', + r'\r\n| \g<cell>\r\n', newTable) if n > 0: warning_messages.append(u"NOTE: Found <td> without </td>. This " u"shouldn't cause problems.\n")
# <td> with attributes, with missing </td> newTable, n = re.subn( - "(?i)[\r\n]*<td(?P<attr> [^>]*?)>(?P<cell>[\w\W]*?)[\r\n]+", - r"\r\n|\g<attr> | \g<cell>\r\n", newTable) + r'(?i)[\r\n]*<td(?P<attr> [^>]*?)>(?P<cell>[\w\W]*?)[\r\n]+', + r'\r\n|\g<attr> | \g<cell>\r\n', newTable) if n > 0: warning_messages.append(u"NOTE: Found <td> without </td>. This " u"shouldn't cause problems.\n")
################## # Garbage collecting ;-) - newTable = re.sub("(?i)<td>[\r\n]*</tr>", "", newTable) + newTable = re.sub(r'(?i)<td>[\r\n]*</tr>', '', newTable) # delete closing tags - newTable = re.sub("(?i)[\r\n]*</t[rdh]>", "", newTable) + newTable = re.sub(r'(?i)[\r\n]*</t[rdh]>', '', newTable)
################## # OK, that's only theory but works most times. @@ -265,8 +265,8 @@ ################## # most <th> come with '''title'''. Senseless in my eyes cuz # <th> should be bold anyways. - newTable = re.sub("[\r\n]+!([^'\n\r]*)'''([^'\r\n]*)'''", - r"\r\n!\1\2", newTable) + newTable = re.sub(r"[\r\n]+!([^'\n\r]*)'''([^'\r\n]*)'''", + r'\r\n!\1\2', newTable)
################## # kills indention within tables. Be warned, it might seldom bring @@ -275,29 +275,29 @@ if config.deIndentTables: num = 1 while num != 0: - newTable, num = re.subn("({|[\w\W]*?)\n[ \t]+([\w\W]*?|})", - r"\1\r\n\2", newTable) + newTable, num = re.subn(r'({|[\w\W]*?)\n[ \t]+([\w\W]*?|})', + r'\1\r\n\2', newTable)
################## # kills additional spaces after | or ! or {| # This line was creating problems, so I commented it out --Daniel # newTable = re.sub("[\r\n]+|[\t ]+?[\r\n]+", "\r\n| ", newTable) # kills trailing spaces and tabs - newTable = re.sub("\r\n(.*)[\t\ ]+[\r\n]+", - r"\r\n\1\r\n", newTable) + newTable = re.sub(r'\r\n(.*)[\t\ ]+[\r\n]+', + r'\r\n\1\r\n', newTable) # kill extra new-lines - newTable = re.sub("[\r\n]{4,}(!||)", - r"\r\n\1", newTable) + newTable = re.sub(r'[\r\n]{4,}(!||)', + r'\r\n\1', newTable)
################## # shortening if <table> had no arguments/parameters - newTable = re.sub("[\r\n]+{|[\ ]+| ", "\r\n{| ", newTable) + newTable = re.sub(r'[\r\n]+{|[\ ]+| ', r'\r\n{| ', newTable) # shortening if <td> had no articles - newTable = re.sub("[\r\n]+|[\ ]+| ", "\r\n| ", newTable) + newTable = re.sub(r'[\r\n]+|[\ ]+| ', '\r\n| ', newTable) # shortening if <th> had no articles - newTable = re.sub("\n|+[\ ]+|", "\n|+ ", newTable) + newTable = re.sub(r'\n|+[\ ]+|', '\n|+ ', newTable) # shortening of <caption> had no articles - newTable = re.sub("[\r\n]+![\ ]+| ", "\r\n! ", newTable) + newTable = re.sub(r'[\r\n]+![\ ]+| ', '\r\n! ', newTable)
################## # proper attributes. attribute values need to be in quotation marks. @@ -331,35 +331,35 @@ num = 1 while num != 0: newTable, num = re.subn( - "[\r\n]+(|[^|-}]{1}[^\n\r]{0,35})" + - "[\r\n]+(|[^|-}]{1}[^\r\n]{0,35})[\r\n]+", - r"\r\n\1 |\2\r\n", newTable) + r'[\r\n]+(|[^|-}]{1}[^\n\r]{0,35})' + r'[\r\n]+(|[^|-}]{1}[^\r\n]{0,35})[\r\n]+', + r'\r\n\1 |\2\r\n', newTable) #### # add a new line if first is * or # - newTable = re.sub("[\r\n]+| ([*#]{1})", - r"\r\n|\r\n\1", newTable) + newTable = re.sub(r'[\r\n]+| ([*#]{1})', + r'\r\n|\r\n\1', newTable)
################## # strip <center> from <th> - newTable = re.sub("([\r\n]+![^\r\n]+?)<center>([\w\W]+?)</center>", - r"\1 \2", newTable) + newTable = re.sub(r'([\r\n]+![^\r\n]+?)<center>([\w\W]+?)</center>', + r'\1 \2', newTable) # strip align="center" from <th> because the .css does it # if there are no other attributes than align, we don't need # that | either - newTable = re.sub("([\r\n]+! +)align="center" +|", - r"\1", newTable) + newTable = re.sub(r'([\r\n]+! +)align="center" +|', + r'\1', newTable) # if there are other attributes, simply strip the align="center" newTable = re.sub( - "([\r\n]+![^\r\n|]+?)align="center"([^\n\r|]+?|)", - r"\1 \2", newTable) + r'([\r\n]+![^\r\n|]+?)align="center"([^\n\r|]+?|)', + r'\1 \2', newTable)
################## # kill additional spaces within arguments num = 1 while num != 0: newTable, num = re.subn( - "[\r\n]+(||!)([^|\r\n]*?)[ \t]{2,}([^\r\n]+?)", - r"\r\n\1\2 \3", newTable) + r'[\r\n]+(||!)([^|\r\n]*?)[ \t]{2,}([^\r\n]+?)', + r'\r\n\1\2 \3', newTable)
################## # I hate those long lines because they make a wall of letters @@ -370,8 +370,8 @@ # TODO: how does this work? docu please. # why are only äöüß used, but not other special characters? newTable, num = re.subn( - "(\r\n[A-Z]{1}[^\n\r]{200,}?[a-zäöüß].)\ ([A-ZÄÖÜ]{1}[^\n\r]{200,})", - r"\1\r\n\2", newTable) + r'(\r\n[A-Z]{1}[^\n\r]{200,}?[a-zäöüß].)\ ([A-ZÄÖÜ]{1}[^\n\r]{200,})', + r'\1\r\n\2', newTable) return newTable, warnings, warning_messages
def markActiveTables(self, text): diff --git a/tests/site_tests.py b/tests/site_tests.py index 4b44516..2fc3557 100644 --- a/tests/site_tests.py +++ b/tests/site_tests.py @@ -2200,7 +2200,7 @@ self.assertIsInstance( datetime.strptime(mysite.siteinfo['time'], '%Y-%m-%dT%H:%M:%SZ'), datetime) - self.assertEqual(re.findall("$1", mysite.siteinfo['articlepath']), ["$1"]) + self.assertEqual(re.findall(r'$1', mysite.siteinfo['articlepath']), ['$1'])
def test_properties_with_defaults(self): """Test the siteinfo properties with defaults."""
pywikibot-commits@lists.wikimedia.org