jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/680016 )
Change subject: Revert "[parser] Make mwparserfromhell or wikitextparser mandatory" ......................................................................
Revert "[parser] Make mwparserfromhell or wikitextparser mandatory"
This reverts commit 908abeb94b2eecf6f2210cff2a9280a19db85787.
Reason for revert: CI tests are failing; postponed to a later release until deprecation time has expired
Change-Id: I10ca06e5ca9944aa76c71f6be9269efae4f8e142 --- M README.rst M pwb.py M pywikibot/textlib.py M requirements.txt M setup.py M tests/__init__.py M tests/textlib_tests.py M tox.ini 8 files changed, 165 insertions(+), 100 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/README.rst b/README.rst index 8872f7d..6b1a91f 100644 --- a/README.rst +++ b/README.rst @@ -44,7 +44,6 @@
::
- pip install requests git clone https://gerrit.wikimedia.org/r/pywikibot/core.git cd core git submodule update --init @@ -57,18 +56,6 @@ pip install -U setuptools pip install pywikibot
-In addition a MediaWiki markup parser is required. Please install one of them: - -:: - - pip install mwparserfromhell - -or - -:: - - pip install wikitextparser - Our `installation guide https://www.mediawiki.org/wiki/Manual:Pywikibot/Installation`_ has more details for advanced usage. diff --git a/pwb.py b/pwb.py index 95abb09..80bbc7e 100755 --- a/pwb.py +++ b/pwb.py @@ -14,7 +14,7 @@
python pwb.py -lang:de bot_tests -v """ -# (C) Pywikibot team, 2012-2021 +# (C) Pywikibot team, 2012-2020 # # Distributed under the terms of the MIT license. # @@ -182,8 +182,9 @@ try: if not check_modules(): raise RuntimeError('') # no further output needed -except RuntimeError as e: # setup.py may also raise RuntimeError - sys.exit(e) +except RuntimeError as e: + print(e) + sys.exit()
from pathlib import Path # noqa: E402
@@ -212,8 +213,6 @@ # we need to re-start the entire process. Ask the user to do so. print('Now, you have to re-execute the command to start your script.') sys.exit(1) -except ImportError as e: # raised in textlib - sys.exit(e)
def find_alternates(filename, script_paths): diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 8807069..2ee62d1 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -35,18 +35,8 @@ except ImportError: try: import mwparserfromhell as wikitextparser - except ImportError: - # print required because pywikibot is not imported completely - raise ImportError(""" -Pywikibot is missing a MediaWiki markup parser which is necessary. -Please update the required module with either - - pip install "mwparserfromhell>=0.5.0" - -or - - pip install "wikitextparser>=0.47.0" -""") from None + except ImportError as e: + wikitextparser = e
ETPType = List[Tuple[str, OrderedDictType[str, str]]]
@@ -1580,8 +1570,8 @@ # --------------------------------
def extract_templates_and_params(text: str, - remove_disabled_parts: bool = False, - strip: bool = False) -> ETPType: + remove_disabled_parts: Optional[bool] = None, + strip: Optional[bool] = None) -> ETPType: """Return a list of templates found in text.
Return value is a list of tuples. There is one tuple for each use of a @@ -1592,14 +1582,16 @@ parameters, and if this results multiple parameters with the same name only the last value provided will be returned.
- This uses the package L{mwparserfromhell} or L{wikitextparser} as - MediaWiki markup parser. It is mandatory that one of them is - installed. + This uses the package L{mwparserfromhell} (mwpfh) if it is installed. + Otherwise it falls back on a regex based implementation.
There are minor differences between the two implementations.
- The parser packages preserves whitespace in parameter names and - values. + The two implementations return nested templates in a different order. + i.e. for {{a|b={{c}}}}, mwpfh returns [a, c], whereas regex returns [c, a]. + + mwpfh preserves whitespace in parameter names and values. regex excludes + anything between <!-- --> before parsing the text.
If there are multiple numbered parameters in the wikitext for the same position, MediaWiki will only use the last parameter value. @@ -1607,9 +1599,43 @@ To replicate that behaviour, enable both remove_disabled_parts and strip.
@param text: The wikitext from which templates are extracted - @param remove_disabled_parts: If enabled, remove disabled wikitext - such as comments and pre. - @param strip: If enabled, strip arguments and values of templates. + @param remove_disabled_parts: Remove disabled wikitext such as comments + and pre. If None (default), this is enabled when neither + mwparserfromhell not wikitextparser package is available and + disabled otherwise. + @param strip: if enabled, strip arguments and values of templates. + If None (default), this is enabled when neither mwparserfromhell + nor wikitextparser package is available and disabled otherwise. + @return: list of template name and params + """ + use_regex = isinstance(wikitextparser, ImportError) + + if remove_disabled_parts is None: + remove_disabled_parts = use_regex + if remove_disabled_parts: + text = removeDisabledParts(text) + + if strip is None: + strip = use_regex + + if use_regex: + return extract_templates_and_params_regex(text, False, strip) + return _extract_templates_and_params_parser(text, strip) + + +def _extract_templates_and_params_parser(text: str, + strip: bool = False) -> ETPType: + """ + Extract templates with params using mwparserfromhell. + + This function should not be called directly. + + Use extract_templates_and_params, which will select this parser + implementation if the mwparserfromhell or wikitextparser package is + installed. + + @param text: The wikitext from which templates are extracted + @param strip: if enabled, strip arguments and values of templates @return: list of template name and params """ def explicit(param): @@ -1619,9 +1645,6 @@ attr = not param.positional return attr
- if remove_disabled_parts: - text = removeDisabledParts(text) - parser_name = wikitextparser.__name__ pywikibot.log('Using {!r} wikitext parser'.format(parser_name))
@@ -1660,21 +1683,20 @@ future_warning=True) def extract_templates_and_params_mwpfh(text: str, strip: bool = False) -> ETPType: - """DEPRECATED. Extract templates with params using mwparserfromhell.""" + """Extract templates with params using mwparserfromhell.""" global wikitextparser saved_parser = wikitextparser import mwparserfromhell as wikitextparser - result = extract_templates_and_params(text, strip=strip) + result = _extract_templates_and_params_parser(text, strip) wikitextparser = saved_parser return result
-@deprecated('extract_templates_and_params', since='20210331', - future_warning=True) def extract_templates_and_params_regex(text: str, remove_disabled_parts: bool = True, strip: bool = True) -> ETPType: - """DEPRECATED. Extract templates with params using a regex. + """ + Extract templates with params using a regex with additional processing.
This function should not be called directly.
diff --git a/requirements.txt b/requirements.txt index 2a6cbc4..8b64aec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ # It is organised so that simple requirements # are processed first, and more difficult packages # are last. +# All dependencies other than requests are optional. # # It is good practise to install packages using the system # package manager if it has a packaged version. If you are @@ -17,18 +18,12 @@ # or # $ awk -F '[#>=]' '{print $1}' requirements.txt | xargs apt-cache search
-# mandatory dependencies, others are optional +# mandatory requests>=2.20.1, < 2.26.0; python_version < '3.6' requests>=2.20.1 ; python_version >= '3.6' setuptools>=20.2, !=50.0.0, <50.2.0 ; python_version < '3.6' setuptools>=20.2 ; python_version >= '3.6'
-# MediaWiki markup parser -# mwparserfromhell is default, wikitextparser can be used instead -# mwparserfromhell is still required for commons_information.py and patrol.py -# wikitextparser>=0.47.0 -mwparserfromhell>=0.5.0 - # OAuth support # mwoauth 0.2.4 is needed because it supports getting identity information # about the user @@ -47,6 +42,9 @@ google >= 1.7 sseclient >= 0.0.18,!=0.0.23,!=0.0.24
+# textlib.py, commons_information and patrol.py +mwparserfromhell>=0.5.0 + # The mysql generator in pagegenerators depends on PyMySQL PyMySQL >= 0.6.7, < 1.0.0 ; python_version < '3.6' PyMySQL >= 1.0.0 ; python_version >= '3.6' diff --git a/setup.py b/setup.py index 2ae5ca7..6b34da3 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,6 @@ 'Graphviz': ['pydot>=1.2'], 'Google': ['google>=1.7'], 'mwparserfromhell': ['mwparserfromhell>=0.5.0'], - 'wikitextparser': ['wikitextparser>=0.47.0'], 'Tkinter': [ # vulnerability found in Pillow<8.1.1 'Pillow>=8.1.1;python_version>="3.6"', ], @@ -111,7 +110,7 @@ 'setuptools>=20.2, !=50.0.0, <50.2.0 ; python_version < "3.6"', 'setuptools>=20.2 ; python_version >= "3.6"', ] -# in addition either mwparserfromhell or wikitextparser is required +
# ------- setup tests_require ------- # test_deps = ['mock'] diff --git a/tests/__init__.py b/tests/__init__.py index 596f1ce..04f61c3 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -24,8 +24,7 @@ # Verify that the unit tests have a base working environment: # - requests is mandatory # however if unavailable this will fail on use; see pywikibot/tools.py -# - mwparserfromhell or wikitextparser is mandatory but the dependency -# is checked by textlib already +# - mwparserfromhell is optional, so is only imported in textlib_tests import requests # noqa: F401
import pywikibot.data.api diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index aed174f..dadc2a3 100644 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -23,10 +23,8 @@ from pywikibot import UnknownSite
from tests.aspects import ( - DefaultDrySiteTestCase, - require_modules, - SiteAttributeTestCase, - TestCase, + require_modules, TestCase, DefaultDrySiteTestCase, + PatchingTestCase, SiteAttributeTestCase, ) from tests import mock
@@ -304,7 +302,7 @@ 'Invalid category title extracted: nasty{{{!}}')
-WARNING_MSG = (r'.*extract_templates_and_params_.*' +WARNING_MSG = (r'.*extract_templates_and_params_mwpfh .*' r'is deprecated for .*; use extract_templates_and_params')
@@ -504,7 +502,7 @@ @require_modules('mwparserfromhell') def test_extract_templates_params_parser_stripped(self): """Test using mwparserfromhell with stripping.""" - func = functools.partial(textlib.extract_templates_and_params, + func = functools.partial(textlib._extract_templates_and_params_parser, strip=True)
self._common_results(func) @@ -537,39 +535,37 @@ """Test using many complex regexes.""" func = functools.partial(textlib.extract_templates_and_params_regex, remove_disabled_parts=False, strip=False) - with suppress_warnings(WARNING_MSG, category=FutureWarning): - self._common_results(func) - self._order_differs(func) - self._unstripped(func) - # FIXME: {} is normal text - self.assertEqual(func('{{a|b={} }}'), []) + self._common_results(func) + self._order_differs(func) + self._unstripped(func) + + self.assertEqual(func('{{a|b={} }}'), []) # FIXME: {} is normal text
def test_extract_templates_params_regex_stripped(self): """Test using many complex regexes with stripping.""" func = textlib.extract_templates_and_params_regex - with suppress_warnings(WARNING_MSG, category=FutureWarning): - self._common_results(func) - self._order_differs(func) - self._stripped(func)
- self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'), - [('a', OrderedDict((('b', ''), )))]) + self._common_results(func) + self._order_differs(func) + self._stripped(func)
- # Identical to mwpfh - self.assertCountEqual(func('{{a|{{c|{{d}}}}}}'), - [('c', OrderedDict((('1', '{{d}}'), ))), - ('a', OrderedDict([('1', '{{c|{{d}}}}')])), - ('d', OrderedDict()) - ]) + self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'), + [('a', OrderedDict((('b', ''), )))])
- # However fails to correctly handle three levels of balanced - # brackets with empty parameters - self.assertCountEqual(func('{{a|{{c|{{d|}}}}}}'), - [('c', OrderedDict((('1', '{{d|}}}'), ))), - ('d', OrderedDict([('1', '}')])) - ]) + # Identical to mwpfh + self.assertCountEqual(func('{{a|{{c|{{d}}}}}}'), + [('c', OrderedDict((('1', '{{d}}'), ))), + ('a', OrderedDict([('1', '{{c|{{d}}}}')])), + ('d', OrderedDict()) + ])
- @require_modules('mwparserfromhell') + # However fails to correctly handle three levels of balanced brackets + # with empty parameters + self.assertCountEqual(func('{{a|{{c|{{d|}}}}}}'), + [('c', OrderedDict((('1', '{{d|}}}'), ))), + ('d', OrderedDict([('1', '}')])) + ]) + def test_extract_templates_params(self): """Test that the normal entry point works.""" func = functools.partial(textlib.extract_templates_and_params, @@ -703,6 +699,79 @@ self.assertTrue(m.group(0).endswith('foo {{bar}}'))
+class TestGenericTemplateParams(PatchingTestCase): + + """Test whether the generic function forwards the call correctly.""" + + net = False + + @PatchingTestCase.patched(textlib, '_extract_templates_and_params_parser') + def extract_mwpfh(self, text, *args, **kwargs): + """Patched call to extract_templates_and_params_mwpfh.""" + self._text = text + self._args = args + self._mwpfh = True + + @PatchingTestCase.patched(textlib, 'extract_templates_and_params_regex') + def extract_regex(self, text, *args, **kwargs): + """Patched call to extract_templates_and_params_regex.""" + self._text = text + self._args = args + self._mwpfh = False + + def test_removing_disabled_parts_regex(self): + """Test removing disabled parts when using the regex variant.""" + self.patch(textlib, 'wikitextparser', ImportError()) + textlib.extract_templates_and_params('{{a<!-- -->}}', True) + self.assertEqual(self._text, '{{a}}') + self.assertFalse(self._mwpfh) + textlib.extract_templates_and_params('{{a<!-- -->}}', False) + self.assertEqual(self._text, '{{a<!-- -->}}') + self.assertFalse(self._mwpfh) + textlib.extract_templates_and_params('{{a<!-- -->}}') + self.assertEqual(self._text, '{{a}}') + self.assertFalse(self._mwpfh) + + @require_modules('mwparserfromhell') + def test_removing_disabled_parts_mwpfh(self): + """Test removing disabled parts when using the mwpfh variant.""" + textlib.extract_templates_and_params('{{a<!-- -->}}', True) + self.assertEqual(self._text, '{{a}}') + self.assertTrue(self._mwpfh) + textlib.extract_templates_and_params('{{a<!-- -->}}', False) + self.assertEqual(self._text, '{{a<!-- -->}}') + self.assertTrue(self._mwpfh) + textlib.extract_templates_and_params('{{a<!-- -->}}') + self.assertEqual(self._text, '{{a<!-- -->}}') + self.assertTrue(self._mwpfh) + + def test_strip_regex(self): + """Test stripping values when using the regex variant.""" + self.patch(textlib, 'wikitextparser', ImportError()) + textlib.extract_templates_and_params('{{a| foo }}', False, True) + self.assertEqual(self._args, (False, True)) + self.assertFalse(self._mwpfh) + textlib.extract_templates_and_params('{{a| foo }}', False, False) + self.assertEqual(self._args, (False, False)) + self.assertFalse(self._mwpfh) + textlib.extract_templates_and_params('{{a| foo }}', False) + self.assertEqual(self._args, (False, True)) + self.assertFalse(self._mwpfh) + + @require_modules('mwparserfromhell') + def test_strip_mwpfh(self): + """Test stripping values when using the mwpfh variant.""" + textlib.extract_templates_and_params('{{a| foo }}', None, True) + self.assertEqual(self._args, (True, )) + self.assertTrue(self._mwpfh) + textlib.extract_templates_and_params('{{a| foo }}', None, False) + self.assertEqual(self._args, (False, )) + self.assertTrue(self._mwpfh) + textlib.extract_templates_and_params('{{a| foo }}') + self.assertEqual(self._args, (False, )) + self.assertTrue(self._mwpfh) + + class TestDisabledParts(DefaultDrySiteTestCase):
"""Test the removeDisabledParts function in textlib.""" diff --git a/tox.ini b/tox.ini index cb76128..d68e21d 100644 --- a/tox.ini +++ b/tox.ini @@ -38,17 +38,10 @@ fasttest: pytest-attrib>=0.1.3 fasttest: pytest-subtests >= 0.3.2 fasttest: mock - fasttest: .[mwparserfromhell] fasttest: .[scripts]
- fasttest-py35: .[html] - fasttest-py37: .[wikitextparser] - - deeptest: .[html] - deeptest: .[mwparserfromhell] - deeptest: .[scripts] - deeptest: .[wikitextparser] - + fasttest-py35: mwparserfromhell + fasttest-py35: beautifulsoup4
[testenv:commit-message] basepython = python3 @@ -63,7 +56,6 @@ nosetests --with-doctest pywikibot {[params]doctest_skip} deps = nose - .[mwparserfromhell]
[testenv:venv] commands = {posargs}
pywikibot-commits@lists.wikimedia.org