jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] Remove empty body

- remove needless return statement in DryRequest._write_cache
- drop context manager functionality for textlib._GetDataHTML
and use contextlib.closing instead in removeHTMLParts()
- update documentation

Change-Id: I38287c83a8e8d1cac45b0a0deb8064e153434b80
---
M pywikibot/textlib.py
M tests/utils.py
2 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 788df3a..60968c7 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -10,7 +10,7 @@
import re
from collections import OrderedDict
from collections.abc import Sequence
-from contextlib import suppress
+from contextlib import closing, suppress
from html.parser import HTMLParser
from typing import NamedTuple

@@ -543,22 +543,26 @@


def removeHTMLParts(text: str, keeptags: list[str] | None = None) -> str:
- """
- Return text without portions where HTML markup is disabled.
+ """Return text without portions where HTML markup is disabled.

- Parts that can/will be removed are --
- * HTML and all wiki tags
+ Parts that can/will be removed are HTML tags and all wiki tags. The
+ exact set of parts which should NOT be removed can be passed as the
+ *keeptags* parameter, which defaults to
+ ``['tt', 'nowiki', 'small', 'sup']``.

- The exact set of parts which should NOT be removed can be passed as the
- 'keeptags' parameter, which defaults to ['tt', 'nowiki', 'small', 'sup'].
+ **Example:**
+
+ >>> removeHTMLParts('<div><b><ref><tt>Hi all!</tt></ref></b></div>')
+ '<tt>Hi all!</tt>'
+
+ .. seealso:: :class:`_GetDataHTML`
"""
- # try to merge with 'removeDisabledParts()' above into one generic function
- # thanks to:
- # https://www.hellboundhackers.org/articles/read-article.php?article_id=841
+ # TODO: try to merge with 'removeDisabledParts()' above into one generic
+ # function
parser = _GetDataHTML()
if keeptags is None:
keeptags = ['tt', 'nowiki', 'small', 'sup']
- with parser:
+ with closing(parser):
parser.keeptags = keeptags
parser.feed(text)
return parser.textdata
@@ -568,20 +572,39 @@

"""HTML parser which removes html tags except they are listed in keeptags.

- This class is also a context manager which closes itself at exit time.
+ The parser is used by :func:`removeHTMLParts` similar to this:

- .. seealso:: :pylib:`html.parser`
+ .. code:: python
+
+ from contextlib import closing
+ from pywikibot.textlib import _GetDataHTML
+ with closing(_GetDataHTML()) as parser:
+ parser.keeptags = ['html']
+ parser.feed('<html><head><title>Test</title></head>'
+ '<body><h1><!-- Parse --> me!</h1></body></html>')
+ print(parser.textdata)
+
+ The result is:
+
+ .. code:: text
+
+ <html>Test me!</html>
+
+ .. versionchanged:: 9.2
+ This class is no longer a context manager;
+ :pylib:`contextlib.closing()<contextlib#contextlib.closing>`
+ should be used instead.
+
+ .. seealso::
+ - :pylib:`html.parser`
+ - :pylib:`contextlib#contextlib.closing`
+
+ :meta public:
"""

textdata = ''
keeptags: list[str] = []

- def __enter__(self) -> None:
- pass
-
- def __exit__(self, *exc_info) -> None:
- self.close()
-
def handle_data(self, data) -> None:
"""Add data to text."""
self.textdata += data
diff --git a/tests/utils.py b/tests/utils.py
index 445075a..7ea74d1 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -322,9 +322,8 @@
"""Never invalidate cached data."""
return False

- def _write_cache(self, data):
- """Never write data."""
- return
+ def _write_cache(self, data) -> None:
+ """Never write data but just do nothing."""

def submit(self):
"""Prevented method."""

To view, visit change 1025444. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I38287c83a8e8d1cac45b0a0deb8064e153434b80
Gerrit-Change-Number: 1025444
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged