jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/434309 )
Change subject: [bugfix] Re-enable resolveHtmlEntities ......................................................................
[bugfix] Re-enable resolveHtmlEntities
Bug: T57222 Change-Id: Icf2c7e47ffe3c906ea7aab9e653affbadef0955f --- M pywikibot/cosmetic_changes.py M pywikibot/page.py M tests/cosmetic_changes_tests.py 3 files changed, 13 insertions(+), 7 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py index 9c3a541..7752809 100755 --- a/pywikibot/cosmetic_changes.py +++ b/pywikibot/cosmetic_changes.py @@ -254,8 +254,7 @@ self.translateAndCapitalizeNamespaces, self.translateMagicWords, self.replaceDeprecatedTemplates, - # FIXME: fix bugs and re-enable - # self.resolveHtmlEntities, + self.resolveHtmlEntities, self.removeEmptySections, self.removeUselessSpaces, self.removeNonBreakingSpaceBeforePercent, @@ -621,9 +620,9 @@ """Replace HTML entities with unicode.""" ignore = [ 38, # Ampersand (&) - 39, # Single quotation mark (") - bug T26093 + 39, # Single quotation mark (") per T26093 60, # Less than (<) - 62, # Great than (>) + 62, # Greater than (>) 91, # Opening square bracket ([) # - sometimes used intentionally inside links 93, # Closing square bracket (]) @@ -637,8 +636,8 @@ 8207, # Right-to-left mark (&rtl;) ] if self.template: - ignore += [58] - text = pywikibot.html2unicode(text, ignore=ignore) + ignore += [58] # Colon (:) + text = pywikibot.html2unicode(text, ignore=ignore, exceptions=['code']) return text
def removeEmptySections(self, text): diff --git a/pywikibot/page.py b/pywikibot/page.py index 570cb6a..a8ab8a6 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -5925,7 +5925,7 @@ # Utility functions for parsing page titles
-def html2unicode(text, ignore=None): +def html2unicode(text, ignore=None, exceptions=None): """ Replace HTML entities with equivalent unicode.
@@ -5977,6 +5977,10 @@ ignore)) | {129, 141, 157})
def handle_entity(match): + if textlib.isDisabled(match.string, match.start(), tags=exceptions): + # match.string stores original text so we do not need + # to pass it to handle_entity, ♥ Python + return match.group(0) if match.group('decimal'): unicodeCodepoint = int(match.group('decimal')) elif match.group('hex'): diff --git a/tests/cosmetic_changes_tests.py b/tests/cosmetic_changes_tests.py index 9695aae..59779a6 100644 --- a/tests/cosmetic_changes_tests.py +++ b/tests/cosmetic_changes_tests.py @@ -55,6 +55,9 @@ self.assertEqual( '&# # #0#>#x', self.cct.resolveHtmlEntities('&# # #0#>#x')) + self.assertEqual( + '<code> </code>', + self.cct.resolveHtmlEntities('<code> </code>'))
def test_removeEmptySections(self): """Test removeEmptySections method."""