jenkins-bot submitted this change.

View Change

Approvals: Dvorapa: Looks good to me, but someone else must approve Zhuyifei1999: Looks good to me, approved jenkins-bot: Verified
[IMPR] Make _GetDataHTML a context manager

- Make _GetDataHTML a context manager and call HTMLParser
at exit time. Closing is also done if an exception occurres.
- Reorder imports; we have a PY2 section already.

Change-Id: Ib1e7296967293c6102d8416fb14d98da0d2f6d57
---
M pywikibot/textlib.py
1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 9054917..63e9be8 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -13,14 +13,11 @@
#
from __future__ import absolute_import, division, unicode_literals

-from collections import OrderedDict, namedtuple
-try:
- from collections.abc import Sequence
-except ImportError: # Python 2.7
- from collections import Sequence
import datetime
import re

+from collections import OrderedDict, namedtuple
+
import pywikibot
from pywikibot.exceptions import InvalidTitle, SiteDefinitionError
from pywikibot.family import Family
@@ -35,8 +32,10 @@
)

if not PY2:
+ from collections.abc import Sequence
from html.parser import HTMLParser
else:
+ from collections import Sequence
from future_builtins import zip
from HTMLParser import HTMLParser

@@ -517,27 +516,42 @@
# thanks to:
# https://www.hellboundhackers.org/articles/read-article.php?article_id=841
parser = _GetDataHTML()
- parser.keeptags = keeptags
- parser.feed(text)
- parser.close()
+ with parser:
+ parser.keeptags = keeptags
+ parser.feed(text)
return parser.textdata


# thanks to https://docs.python.org/3/library/html.parser.html
class _GetDataHTML(HTMLParser):
+
+ """HTML parser which removes html tags except they are listed in keeptags.
+
+ This class is also a context manager which closes itself at exit time.
+ """
+
textdata = ''
keeptags = []

+ def __enter__(self):
+ pass
+
+ def __exit__(self, *exc_info):
+ self.close()
+
def handle_data(self, data):
+ """Add data to text."""
self.textdata += data

def handle_starttag(self, tag, attrs):
+ """Add start tag to text if tag should be kept."""
if tag in self.keeptags:
- self.textdata += '<%s>' % tag
+ self.textdata += '<{}>'.format(tag)

def handle_endtag(self, tag):
+ """Add end tag to text if tag should be kept."""
if tag in self.keeptags:
- self.textdata += '</%s>' % tag
+ self.textdata += '</{}>'.format(tag)


def isDisabled(text, index, tags=None):

To view, visit change 605962. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ib1e7296967293c6102d8416fb14d98da0d2f6d57
Gerrit-Change-Number: 605962
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Dvorapa <dvorapa@seznam.cz>
Gerrit-Reviewer: Zhuyifei1999 <zhuyifei1999@gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged