jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/605962 )
Change subject: [IMPR] Make _GetDataHTML a context manager ......................................................................
[IMPR] Make _GetDataHTML a context manager
- Make _GetDataHTML a context manager and call HTMLParser at exit time. Closing is also done if an exception occurres. - Reorder imports; we have a PY2 section already.
Change-Id: Ib1e7296967293c6102d8416fb14d98da0d2f6d57 --- M pywikibot/textlib.py 1 file changed, 24 insertions(+), 10 deletions(-)
Approvals: Dvorapa: Looks good to me, but someone else must approve Zhuyifei1999: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 9054917..63e9be8 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -13,14 +13,11 @@ # from __future__ import absolute_import, division, unicode_literals
-from collections import OrderedDict, namedtuple -try: - from collections.abc import Sequence -except ImportError: # Python 2.7 - from collections import Sequence import datetime import re
+from collections import OrderedDict, namedtuple + import pywikibot from pywikibot.exceptions import InvalidTitle, SiteDefinitionError from pywikibot.family import Family @@ -35,8 +32,10 @@ )
if not PY2: + from collections.abc import Sequence from html.parser import HTMLParser else: + from collections import Sequence from future_builtins import zip from HTMLParser import HTMLParser
@@ -517,27 +516,42 @@ # thanks to: # https://www.hellboundhackers.org/articles/read-article.php?article_id=841 parser = _GetDataHTML() - parser.keeptags = keeptags - parser.feed(text) - parser.close() + with parser: + parser.keeptags = keeptags + parser.feed(text) return parser.textdata
# thanks to https://docs.python.org/3/library/html.parser.html class _GetDataHTML(HTMLParser): + + """HTML parser which removes html tags except they are listed in keeptags. + + This class is also a context manager which closes itself at exit time. + """ + textdata = '' keeptags = []
+ def __enter__(self): + pass + + def __exit__(self, *exc_info): + self.close() + def handle_data(self, data): + """Add data to text.""" self.textdata += data
def handle_starttag(self, tag, attrs): + """Add start tag to text if tag should be kept.""" if tag in self.keeptags: - self.textdata += '<%s>' % tag + self.textdata += '<{}>'.format(tag)
def handle_endtag(self, tag): + """Add end tag to text if tag should be kept.""" if tag in self.keeptags: - self.textdata += '</%s>' % tag + self.textdata += '</{}>'.format(tag)
def isDisabled(text, index, tags=None):
pywikibot-commits@lists.wikimedia.org