[Gerrit] Add "category" and "file" as exemptions in replaceExept - change (pywikibot/compat) - Pywikibot-commits

11 Nov 2013

Xqt has submitted this change and it was merged.
Change subject: Add "category" and "file" as exemptions in replaceExept
......................................................................
Add "category" and "file" as exemptions in replaceExept
- pep8ified and synchronized with core by xqt
Change-Id: If103b5fa1dc5952628665ee54f1bd72c31a29040
---
M pywikibot/textlib.py
1 file changed, 34 insertions(+), 23 deletions(-)
Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified

diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index d0e69c4..80b55e5 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -22,6 +22,7 @@
 TEMP_REGEX = re.compile(
     '{{(?:msg:)?(?P<name>[^{|]+?)(?:|(?P<params>[^{]+?(?:{[^{]+?}[^{]*?)?))?}}')
+
 def unescape(s):
     """Replace escaped HTML-special characters by their originals"""
     if '&' not in s:
@@ -90,6 +91,10 @@
         'property':     re.compile(r'(?i){{\s*#property:\s*p\d+\s*}}'),
         # Module invocations (currently only Lua)
         'invoke':       re.compile(r'(?i){{\s*#invoke:.*?}}'),
+        # categories
+        'category':     re.compile(ur'[[ *(?:%s)\s*:.*?]]' % ur'|'.join(site.namespace(14, all=True))),
+        #files
+        'file':         re.compile(ur'[[ *(?:%s)\s*:.*?]]' % ur'|'.join(site.namespace(6, all=True))),
}
@@ -215,12 +220,12 @@
                     groupMatch = groupR.search(replacement)
                     if not groupMatch:
                         break
-                    groupID = groupMatch.group('name') or \
-                              int(groupMatch.group('number'))
+                    groupID = (groupMatch.group('name') or
+                               int(groupMatch.group('number')))
                     try:
-                        replacement = replacement[:groupMatch.start()] + \
-                                      match.group(groupID) + \
-                                      replacement[groupMatch.end():]
+                        replacement = (replacement[:groupMatch.start()] +
+                                       match.group(groupID) + \
+                                       replacement[groupMatch.end():])
                     except IndexError:
                         print '\nInvalid group reference:', groupID
                         print 'Groups found:\n', match.groups()
@@ -353,7 +358,7 @@
                                   lenseparator:firstinseparator]):
                 firstinseparator -= lenseparator
                 striploopcontinue = True
-            elif text[firstinseparator-1] < ' ':
+            elif text[firstinseparator - 1] < ' ':
                 firstinseparator -= 1
                 striploopcontinue = True
         marker = text[firstinseparator:firstinmarker] + marker
@@ -510,10 +515,10 @@
         if site.language() in site.family.interwiki_attop or \
            u'<!-- interwiki at top -->' in oldtext:
             #do not add separator if interiki links are on one line
-            newtext = s + \
-                      [separator, u''][site.language() in
-                                       site.family.interwiki_on_one_line] + \
-                      s2.replace(marker, '').strip()
+            newtext = (s +
+                       [separator, u''][site.language() in
+                                        site.family.interwiki_on_one_line] +
+                       s2.replace(marker, '').strip())
         else:
             # calculate what was after the language links on the page
             firstafter = s2.find(marker)
@@ -525,8 +530,9 @@
             if "</noinclude>" in s2[firstafter:]:
                 if separatorstripped:
                     s = separator + s
-                newtext = s2[:firstafter].replace(marker, '') + s + \
-                          s2[firstafter:]
+                newtext = (s2[:firstafter].replace(marker, '') +
+                           s +
+                           s2[firstafter:])
             elif site.language() in site.family.categories_last:
                 cats = getCategoryLinks(s2, site=site)
                 s2 = removeCategoryLinksAndSeparator(
@@ -538,8 +544,9 @@
             # (not supported by rewrite - no API)
             elif site.family.name == 'wikitravel':
                 s = separator + s + separator
-                newtext = s2[:firstafter].replace(marker, '') + s + \
-                          s2[firstafter:]
+                newtext = (s2[:firstafter].replace(marker, '') +
+                           s +
+                           s2[firstafter:])
             else:
                 if template or template_subpage:
                     if template_subpage:
@@ -558,8 +565,10 @@
                         newtext = regexp.sub(s + includeOff, s2)
                     else:
                         # Put the langlinks at the end, inside noinclude's
-                        newtext = s2.replace(marker, '').strip() + separator + \
-                                  u'%s\n%s%s\n' % (includeOn, s, includeOff)
+                        newtext = (s2.replace(marker, '').strip() +
+                                   separator +
+                                   u'%s\n%s%s\n' % (includeOn, s, includeOff)
+                                   )
                 else:
                     newtext = s2.replace(marker, '').strip() + separator + s
     else:
@@ -646,8 +655,9 @@
                    r'(?:|(?P<sortKey>.+?))?\s*]]'
                    % catNamespace, re.I)
     for match in R.finditer(text):
-        cat = catlib.Category(site, '%s:%s' % (match.group('namespace'),
-                                               match.group('catName')),
+        cat = catlib.Category(site,
+                              '%s:%s' % (match.group('namespace'),
+                                         match.group('catName')),
                               sortKey=match.group('sortKey'))
         result.append(cat)
     return result
@@ -788,8 +798,9 @@
             if "</noinclude>" in s2[firstafter:]:
                 if separatorstripped:
                     s = separator + s
-                newtext = s2[:firstafter].replace(marker, '') + s + \
-                          s2[firstafter:]
+                newtext = (s2[:firstafter].replace(marker, '') +
+                           s +
+                           s2[firstafter:])
             elif site.language() in site.family.categories_last:
                 newtext = s2.replace(marker, '').strip() + separator + s
             else:
@@ -823,7 +834,7 @@
         if categories[0][0] == '[':
             catLinks = categories
         else:
-            catLinks = ['[[Category:'+category+']]' for category in categories]
+            catLinks = ['[[Category:' + category + ']]' for category in categories]
     else:
         catLinks = [category.aslink(noInterwiki=True)
                     for category in categories]
@@ -865,8 +876,8 @@
             r'(?=[%(notAtEnd)s]*'')|http[s]?://[^%(notInside)s]*' \
             r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEnd}
     regexb = r'(?P<urlb>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \
-            r'(?=[%(notAtEnd)s]*'')|http[s]?://[^%(notInside)s]*' \
-            r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEndb}
+             r'(?=[%(notAtEnd)s]*'')|http[s]?://[^%(notInside)s]*' \
+             r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEndb}
     if withoutBracketed:
         regex = r'(?<![)' + regex
     elif onlyBracketed:
-- 
To view, visit https://gerrit.wikimedia.org/r/94493
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: If103b5fa1dc5952628665ee54f1bd72c31a29040
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup ladsgroup@gmail.com
Gerrit-Reviewer: Legoktm legoktm.wikipedia@gmail.com
Gerrit-Reviewer: Merlijn van Deen valhallasw@arctus.nl
Gerrit-Reviewer: Reza reza.energy@gmail.com
Gerrit-Reviewer: Xqt info@gno.de
Gerrit-Reviewer: jenkins-bot