jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1025444?usp=email )
Change subject: [IMPR] Remove empty body ......................................................................
[IMPR] Remove empty body
- remove needless return statement in DryRequest._write_cache - drop context manager functionality for textlib._GetDataHTML and use contextlib.closing instead in removeHTMLParts() - update documentation
Change-Id: I38287c83a8e8d1cac45b0a0deb8064e153434b80 --- M pywikibot/textlib.py M tests/utils.py 2 files changed, 44 insertions(+), 22 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 788df3a..60968c7 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -10,7 +10,7 @@ import re from collections import OrderedDict from collections.abc import Sequence -from contextlib import suppress +from contextlib import closing, suppress from html.parser import HTMLParser from typing import NamedTuple
@@ -543,22 +543,26 @@
def removeHTMLParts(text: str, keeptags: list[str] | None = None) -> str: - """ - Return text without portions where HTML markup is disabled. + """Return text without portions where HTML markup is disabled.
- Parts that can/will be removed are -- - * HTML and all wiki tags + Parts that can/will be removed are HTML tags and all wiki tags. The + exact set of parts which should NOT be removed can be passed as the + *keeptags* parameter, which defaults to + ``['tt', 'nowiki', 'small', 'sup']``.
- The exact set of parts which should NOT be removed can be passed as the - 'keeptags' parameter, which defaults to ['tt', 'nowiki', 'small', 'sup']. + **Example:** + + >>> removeHTMLParts('<div><b><ref><tt>Hi all!</tt></ref></b></div>') + '<tt>Hi all!</tt>' + + .. seealso:: :class:`_GetDataHTML` """ - # try to merge with 'removeDisabledParts()' above into one generic function - # thanks to: - # https://www.hellboundhackers.org/articles/read-article.php?article_id=841 + # TODO: try to merge with 'removeDisabledParts()' above into one generic + # function parser = _GetDataHTML() if keeptags is None: keeptags = ['tt', 'nowiki', 'small', 'sup'] - with parser: + with closing(parser): parser.keeptags = keeptags parser.feed(text) return parser.textdata @@ -568,20 +572,39 @@
"""HTML parser which removes html tags except they are listed in keeptags.
- This class is also a context manager which closes itself at exit time. + The parser is used by :func:`removeHTMLParts` similar to this:
- .. seealso:: :pylib:`html.parser` + .. code:: python + + from contextlib import closing + from pywikibot.textlib import _GetDataHTML + with closing(_GetDataHTML()) as parser: + parser.keeptags = ['html'] + parser.feed('<html><head><title>Test</title></head>' + '<body><h1><!-- Parse --> me!</h1></body></html>') + print(parser.textdata) + + The result is: + + .. code:: text + + <html>Test me!</html> + + .. versionchanged:: 9.2 + This class is no longer a context manager; + :pylib:`contextlib.closing()<contextlib#contextlib.closing>` + should be used instead. + + .. seealso:: + - :pylib:`html.parser` + - :pylib:`contextlib#contextlib.closing` + + :meta public: """
textdata = '' keeptags: list[str] = []
- def __enter__(self) -> None: - pass - - def __exit__(self, *exc_info) -> None: - self.close() - def handle_data(self, data) -> None: """Add data to text.""" self.textdata += data diff --git a/tests/utils.py b/tests/utils.py index 445075a..7ea74d1 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -322,9 +322,8 @@ """Never invalidate cached data.""" return False
- def _write_cache(self, data): - """Never write data.""" - return + def _write_cache(self, data) -> None: + """Never write data but just do nothing."""
def submit(self): """Prevented method."""