jenkins-bot merged this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[bugfix] Re-enable resolveHtmlEntities

Bug: T57222
Change-Id: Icf2c7e47ffe3c906ea7aab9e653affbadef0955f
---
M pywikibot/cosmetic_changes.py
M pywikibot/page.py
M tests/cosmetic_changes_tests.py
3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index 9c3a541..7752809 100755
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -254,8 +254,7 @@
self.translateAndCapitalizeNamespaces,
self.translateMagicWords,
self.replaceDeprecatedTemplates,
- # FIXME: fix bugs and re-enable
- # self.resolveHtmlEntities,
+ self.resolveHtmlEntities,
self.removeEmptySections,
self.removeUselessSpaces,
self.removeNonBreakingSpaceBeforePercent,
@@ -621,9 +620,9 @@
"""Replace HTML entities with unicode."""
ignore = [
38, # Ampersand (&)
- 39, # Single quotation mark (") - bug T26093
+ 39, # Single quotation mark (") per T26093
60, # Less than (<)
- 62, # Great than (>)
+ 62, # Greater than (>)
91, # Opening square bracket ([)
# - sometimes used intentionally inside links
93, # Closing square bracket (])
@@ -637,8 +636,8 @@
8207, # Right-to-left mark (&rtl;)
]
if self.template:
- ignore += [58]
- text = pywikibot.html2unicode(text, ignore=ignore)
+ ignore += [58] # Colon (:)
+ text = pywikibot.html2unicode(text, ignore=ignore, exceptions=['code'])
return text

def removeEmptySections(self, text):
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 570cb6a..a8ab8a6 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -5925,7 +5925,7 @@
# Utility functions for parsing page titles


-def html2unicode(text, ignore=None):
+def html2unicode(text, ignore=None, exceptions=None):
"""
Replace HTML entities with equivalent unicode.

@@ -5977,6 +5977,10 @@
ignore)) | {129, 141, 157})

def handle_entity(match):
+ if textlib.isDisabled(match.string, match.start(), tags=exceptions):
+ # match.string stores original text so we do not need
+ # to pass it to handle_entity, ♥ Python
+ return match.group(0)
if match.group('decimal'):
unicodeCodepoint = int(match.group('decimal'))
elif match.group('hex'):
diff --git a/tests/cosmetic_changes_tests.py b/tests/cosmetic_changes_tests.py
index 9695aae..59779a6 100644
--- a/tests/cosmetic_changes_tests.py
+++ b/tests/cosmetic_changes_tests.py
@@ -55,6 +55,9 @@
self.assertEqual(
'&# # #0#>#x',
self.cct.resolveHtmlEntities('&# # #0#>#x'))
+ self.assertEqual(
+ '<code>&#32;</code>',
+ self.cct.resolveHtmlEntities('<code>&#32;</code>'))

def test_removeEmptySections(self):
"""Test removeEmptySections method."""

To view, visit change 434309. To unsubscribe, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: Icf2c7e47ffe3c906ea7aab9e653affbadef0955f
Gerrit-Change-Number: 434309
Gerrit-PatchSet: 4
Gerrit-Owner: Dvorapa <dvorapa@seznam.cz>
Gerrit-Reviewer: Dalba <dalba.wiki@gmail.com>
Gerrit-Reviewer: Dvorapa <dvorapa@seznam.cz>
Gerrit-Reviewer: Framawiki <framawiki@tools.wmflabs.org>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot <>