jenkins-bot merged this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[FEAT] cc: Remove empty sections

Bug: T140570
Change-Id: I0b5be6daaa4a827db6781ae5b90e381f682f31b5
---
M pywikibot/cosmetic_changes.py
M tests/cosmetic_changes_tests.py
2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index 0abd016..9c3a541 100755
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -71,7 +71,8 @@
import pywikibot

from pywikibot import config, textlib
-from pywikibot.textlib import _MultiTemplateMatchBuilder, FILE_LINK_REGEX
+from pywikibot.textlib import (_MultiTemplateMatchBuilder, FILE_LINK_REGEX,
+ _get_regexes)
from pywikibot.tools import deprecated_args, first_lower, first_upper
from pywikibot.tools import MediaWikiVersion

@@ -255,6 +256,7 @@
self.replaceDeprecatedTemplates,
# FIXME: fix bugs and re-enable
# self.resolveHtmlEntities,
+ self.removeEmptySections,
self.removeUselessSpaces,
self.removeNonBreakingSpaceBeforePercent,

@@ -639,6 +641,30 @@
text = pywikibot.html2unicode(text, ignore=ignore)
return text

+ def removeEmptySections(self, text):
+ """Cleanup multiple empty sections."""
+ exceptions = ['comment', 'pre', 'source', 'nowiki', 'code',
+ 'startspace']
+
+ skippings = ['comment']
+ skip_regexes = _get_regexes(skippings, self.site)
+ skip_templates = {
+ 'cs': ('Pahýl[ _]část',), # stub section
+ }
+ if self.site.code in skip_templates:
+ for template in skip_templates[self.site.code]:
+ skip_regexes.append(
+ re.compile(r'\{\{\s*' + template + r'\s*\}\}', re.I))
+ skip_regexes.append(re.compile(r'\s*'))
+
+ pattern = re.compile(r'\n(=+) *[^=]+? *\1(?:'
+ + '|'.join(x.pattern for x in skip_regexes)
+ + r')+(?=\1 *[^=]+? *\1)', re.I)
+ text = textlib.replaceExcept(text, pattern, r'\n',
+ exceptions=exceptions,
+ caseInsensitive=True)
+ return text
+
def removeUselessSpaces(self, text):
"""Cleanup multiple or trailing spaces."""
exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace',
diff --git a/tests/cosmetic_changes_tests.py b/tests/cosmetic_changes_tests.py
index 64c6a34..9695aae 100644
--- a/tests/cosmetic_changes_tests.py
+++ b/tests/cosmetic_changes_tests.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""Test cosmetic_changes module."""
#
-# (C) Pywikibot team, 2015-2017
+# (C) Pywikibot team, 2015-2018
#
# Distributed under the terms of the MIT license.
#
@@ -56,6 +56,28 @@
'&# # #0#>#x',
self.cct.resolveHtmlEntities('&# # #0#>#x'))

+ def test_removeEmptySections(self):
+ """Test removeEmptySections method."""
+ # same level
+ self.assertEqual(
+ '\n==Bar==',
+ self.cct.removeEmptySections('\n== Foo ==\n\n==Bar=='))
+ # different level
+ self.assertEqual(
+ '\n===Foo===\n\n==Bar==',
+ self.cct.removeEmptySections('\n===Foo===\n\n==Bar=='))
+ self.assertEqual(
+ '\n==Foo==\n\n===Bar===',
+ self.cct.removeEmptySections('\n==Foo==\n\n===Bar==='))
+ # comment inside
+ self.assertEqual(
+ '\n==Bar==',
+ self.cct.removeEmptySections('\n==Foo==\n<!-- Baz -->\n==Bar=='))
+ # inside comment
+ self.assertEqual(
+ '<!--\n==Foo==\n\n==Bar==\n-->',
+ self.cct.removeEmptySections('<!--\n==Foo==\n\n==Bar==\n-->'))
+
def test_removeUselessSpaces(self):
"""Test removeUselessSpaces method."""
self.assertEqual('Foo bar',

To view, visit change 433914. To unsubscribe, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I0b5be6daaa4a827db6781ae5b90e381f682f31b5
Gerrit-Change-Number: 433914
Gerrit-PatchSet: 7
Gerrit-Owner: Dvorapa <dvorapa@seznam.cz>
Gerrit-Reviewer: Dalba <dalba.wiki@gmail.com>
Gerrit-Reviewer: Dvorapa <dvorapa@seznam.cz>
Gerrit-Reviewer: Framawiki <framawiki@tools.wmflabs.org>
Gerrit-Reviewer: John Vandenberg <jayvdb@gmail.com>
Gerrit-Reviewer: XXN <dan10real@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: Zoranzoki21 <zorandori4444@gmail.com>
Gerrit-Reviewer: Zppix <Megadev44s.mail@gmail.com>
Gerrit-Reviewer: jenkins-bot <>