[Gerrit] pywikibot/core[master]: CosmeticChangesToolkit.removeEmptySections: Fix skipping of language ... - Pywikibot-commits

24 Aug 2018

jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/454853 )
Change subject: CosmeticChangesToolkit.removeEmptySections: Fix skipping of language links
......................................................................
CosmeticChangesToolkit.removeEmptySections: Fix skipping of language links
The 'interwiki' regex includes the trailing newlines and therefore may
potentially mess up page sections. (T202629)
Instead of using the 'interwiki' regex, use the textlib.removeLanguageLinks
function. Use marker='\n' argument to avoid the issue mentioned above.
Add a few tests for the fixed bug.
Do minor cleanups in textlib.py:
  - _ignore_case: No need to ignore-case a letter if it is not case-sensitive.
  - 'interwiki' regex patter, replace '[\s]*' with '\s*'.
Bug: T202629
Change-Id: I22b4ad92c7058ca65c6e3e6a680586dc1c2490a0
---
M pywikibot/cosmetic_changes.py
M pywikibot/textlib.py
M tests/cosmetic_changes_tests.py
3 files changed, 20 insertions(+), 8 deletions(-)
Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified

diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index 3130081..ac4d548 100755
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -663,12 +663,8 @@
def removeEmptySections(self, text):
         """Cleanup empty sections."""
-        # comments, categories, and interwikis
-        skippings = ['comment', 'category', 'interwiki']
+        skippings = ['comment', 'category']
         skip_regexes = _get_regexes(skippings, self.site)
-        # we want only interwikis, not interlanguage links
-        skip_regexes[1] = re.compile(
-            skip_regexes[1].pattern.replace(':?', ''))
         # site defined templates
         skip_templates = {
             'cs': ('Pahýl[ _]část',),  # stub section
@@ -681,7 +677,7 @@
         skip_regexes.append(re.compile(r'(?m)^[*#] *$'))
# get stripped sections
-        stripped_text = text
+        stripped_text = textlib.removeLanguageLinks(text, self.site, '\n')
         for reg in skip_regexes:
             stripped_text = reg.sub(r'', stripped_text)
         strip_sections = textlib.extract_sections(
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 074ba72..c9d416b 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -251,7 +251,9 @@
def _ignore_case(string):
     """Return a case-insensitive pattern for the string."""
-    return ''.join('[' + c.upper() + c.lower() + ']' for c in string)
+    return ''.join(
+        '[' + c + s + ']' if c != s else c
+        for s, c in zip(string, string.swapcase()))
def _tag_pattern(tag_name):
@@ -282,7 +284,7 @@
         'hyperlink': compileLinkR(),
         # also finds links to foreign sites with preleading ":"
         'interwiki': (
-            r'[[:?(%s)\s?:[^]]*]][\s]*',
+            r'[[:?(%s)\s?:[^]]*]]\s*',
             lambda site: '|'.join(
                 _ignore_case(i) for i in site.validLanguageLinks()
                 + list(site.family.obsolete.keys()))),
diff --git a/tests/cosmetic_changes_tests.py b/tests/cosmetic_changes_tests.py
index d5354c0..a6024a2 100644
--- a/tests/cosmetic_changes_tests.py
+++ b/tests/cosmetic_changes_tests.py
@@ -332,6 +332,20 @@
             self.cct.removeEmptySections('\n==Bar==\n[[cs:Foo]]'
                                          '\n[[Category:Baz]]'))
+    def test_remove_empty_sections_interlanguage_links(self):
+        """Test removeEmptySections with edge cases of language links."""
+        # When removing language links, do not remove the \n after them,
+        # otherwise the sections won't be detected correctly.
+        text = 'text [[:en:link]]\n=== title1 ===\ncontent1'
+        self.assertEqual(text, self.cct.removeEmptySections(text))
+        self.assertEqual(
+            't [[en:link]]\n=== 1 ===\nc',
+            self.cct.removeEmptySections('t [[en:link]]\n=== 1 ===\nc'))
+        # Treat sections that only contain language links as empty sections.
+        self.assertEqual(
+            't\n[[en:link]]',
+            self.cct.removeEmptySections('t\n=== 1 ===\n[[en:link]]'))
+
     def test_remove_empty_sections_with_heading_comments(self):
         """Test removeEmptySections with comments in the section headings."""
         self.assertEqual(
-- 
To view, visit https://gerrit.wikimedia.org/r/454853
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I22b4ad92c7058ca65c6e3e6a680586dc1c2490a0
Gerrit-Change-Number: 454853
Gerrit-PatchSet: 9
Gerrit-Owner: Dalba dalba.wiki@gmail.com
Gerrit-Reviewer: Dalba dalba.wiki@gmail.com
Gerrit-Reviewer: John Vandenberg jayvdb@gmail.com
Gerrit-Reviewer: Xqt info@gno.de
Gerrit-Reviewer: Zoranzoki21 zorandori4444@gmail.com
Gerrit-Reviewer: jenkins-bot (75)