jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
Revert "[parser] Make mwparserfromhell or wikitextparser mandatory"

This reverts commit 908abeb94b2eecf6f2210cff2a9280a19db85787.

Reason for revert:
CI tests are failing;
postponed to a later release until deprecation time has expired

Change-Id: I10ca06e5ca9944aa76c71f6be9269efae4f8e142
---
M README.rst
M pwb.py
M pywikibot/textlib.py
M requirements.txt
M setup.py
M tests/__init__.py
M tests/textlib_tests.py
M tox.ini
8 files changed, 165 insertions(+), 100 deletions(-)

diff --git a/README.rst b/README.rst
index 8872f7d..6b1a91f 100644
--- a/README.rst
+++ b/README.rst
@@ -44,7 +44,6 @@

::

- pip install requests
git clone https://gerrit.wikimedia.org/r/pywikibot/core.git
cd core
git submodule update --init
@@ -57,18 +56,6 @@
pip install -U setuptools
pip install pywikibot

-In addition a MediaWiki markup parser is required. Please install one of them:
-
-::
-
- pip install mwparserfromhell
-
-or
-
-::
-
- pip install wikitextparser
-
Our `installation
guide <https://www.mediawiki.org/wiki/Manual:Pywikibot/Installation>`_
has more details for advanced usage.
diff --git a/pwb.py b/pwb.py
index 95abb09..80bbc7e 100755
--- a/pwb.py
+++ b/pwb.py
@@ -14,7 +14,7 @@

python pwb.py -lang:de bot_tests -v
"""
-# (C) Pywikibot team, 2012-2021
+# (C) Pywikibot team, 2012-2020
#
# Distributed under the terms of the MIT license.
#
@@ -182,8 +182,9 @@
try:
if not check_modules():
raise RuntimeError('') # no further output needed
-except RuntimeError as e: # setup.py may also raise RuntimeError
- sys.exit(e)
+except RuntimeError as e:
+ print(e)
+ sys.exit()

from pathlib import Path # noqa: E402

@@ -212,8 +213,6 @@
# we need to re-start the entire process. Ask the user to do so.
print('Now, you have to re-execute the command to start your script.')
sys.exit(1)
-except ImportError as e: # raised in textlib
- sys.exit(e)


def find_alternates(filename, script_paths):
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 8807069..2ee62d1 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -35,18 +35,8 @@
except ImportError:
try:
import mwparserfromhell as wikitextparser
- except ImportError:
- # print required because pywikibot is not imported completely
- raise ImportError("""
-Pywikibot is missing a MediaWiki markup parser which is necessary.
-Please update the required module with either
-
- pip install "mwparserfromhell>=0.5.0"
-
-or
-
- pip install "wikitextparser>=0.47.0"
-""") from None
+ except ImportError as e:
+ wikitextparser = e

ETPType = List[Tuple[str, OrderedDictType[str, str]]]

@@ -1580,8 +1570,8 @@
# --------------------------------

def extract_templates_and_params(text: str,
- remove_disabled_parts: bool = False,
- strip: bool = False) -> ETPType:
+ remove_disabled_parts: Optional[bool] = None,
+ strip: Optional[bool] = None) -> ETPType:
"""Return a list of templates found in text.

Return value is a list of tuples. There is one tuple for each use of a
@@ -1592,14 +1582,16 @@
parameters, and if this results multiple parameters with the same name
only the last value provided will be returned.

- This uses the package L{mwparserfromhell} or L{wikitextparser} as
- MediaWiki markup parser. It is mandatory that one of them is
- installed.
+ This uses the package L{mwparserfromhell} (mwpfh) if it is installed.
+ Otherwise it falls back on a regex based implementation.

There are minor differences between the two implementations.

- The parser packages preserves whitespace in parameter names and
- values.
+ The two implementations return nested templates in a different order.
+ i.e. for {{a|b={{c}}}}, mwpfh returns [a, c], whereas regex returns [c, a].
+
+ mwpfh preserves whitespace in parameter names and values. regex excludes
+ anything between <!-- --> before parsing the text.

If there are multiple numbered parameters in the wikitext for the same
position, MediaWiki will only use the last parameter value.
@@ -1607,9 +1599,43 @@
To replicate that behaviour, enable both remove_disabled_parts and strip.

@param text: The wikitext from which templates are extracted
- @param remove_disabled_parts: If enabled, remove disabled wikitext
- such as comments and pre.
- @param strip: If enabled, strip arguments and values of templates.
+ @param remove_disabled_parts: Remove disabled wikitext such as comments
+ and pre. If None (default), this is enabled when neither
+ mwparserfromhell not wikitextparser package is available and
+ disabled otherwise.
+ @param strip: if enabled, strip arguments and values of templates.
+ If None (default), this is enabled when neither mwparserfromhell
+ nor wikitextparser package is available and disabled otherwise.
+ @return: list of template name and params
+ """
+ use_regex = isinstance(wikitextparser, ImportError)
+
+ if remove_disabled_parts is None:
+ remove_disabled_parts = use_regex
+ if remove_disabled_parts:
+ text = removeDisabledParts(text)
+
+ if strip is None:
+ strip = use_regex
+
+ if use_regex:
+ return extract_templates_and_params_regex(text, False, strip)
+ return _extract_templates_and_params_parser(text, strip)
+
+
+def _extract_templates_and_params_parser(text: str,
+ strip: bool = False) -> ETPType:
+ """
+ Extract templates with params using mwparserfromhell.
+
+ This function should not be called directly.
+
+ Use extract_templates_and_params, which will select this parser
+ implementation if the mwparserfromhell or wikitextparser package is
+ installed.
+
+ @param text: The wikitext from which templates are extracted
+ @param strip: if enabled, strip arguments and values of templates
@return: list of template name and params
"""
def explicit(param):
@@ -1619,9 +1645,6 @@
attr = not param.positional
return attr

- if remove_disabled_parts:
- text = removeDisabledParts(text)
-
parser_name = wikitextparser.__name__
pywikibot.log('Using {!r} wikitext parser'.format(parser_name))

@@ -1660,21 +1683,20 @@
future_warning=True)
def extract_templates_and_params_mwpfh(text: str,
strip: bool = False) -> ETPType:
- """DEPRECATED. Extract templates with params using mwparserfromhell."""
+ """Extract templates with params using mwparserfromhell."""
global wikitextparser
saved_parser = wikitextparser
import mwparserfromhell as wikitextparser
- result = extract_templates_and_params(text, strip=strip)
+ result = _extract_templates_and_params_parser(text, strip)
wikitextparser = saved_parser
return result


-@deprecated('extract_templates_and_params', since='20210331',
- future_warning=True)
def extract_templates_and_params_regex(text: str,
remove_disabled_parts: bool = True,
strip: bool = True) -> ETPType:
- """DEPRECATED. Extract templates with params using a regex.
+ """
+ Extract templates with params using a regex with additional processing.

This function should not be called directly.

diff --git a/requirements.txt b/requirements.txt
index 2a6cbc4..8b64aec 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,7 @@
# It is organised so that simple requirements
# are processed first, and more difficult packages
# are last.
+# All dependencies other than requests are optional.
#
# It is good practise to install packages using the system
# package manager if it has a packaged version. If you are
@@ -17,18 +18,12 @@
# or
# $ awk -F '[#>=]' '{print $1}' requirements.txt | xargs apt-cache search

-# mandatory dependencies, others are optional
+# mandatory
requests>=2.20.1, < 2.26.0; python_version < '3.6'
requests>=2.20.1 ; python_version >= '3.6'
setuptools>=20.2, !=50.0.0, <50.2.0 ; python_version < '3.6'
setuptools>=20.2 ; python_version >= '3.6'

-# MediaWiki markup parser
-# mwparserfromhell is default, wikitextparser can be used instead
-# mwparserfromhell is still required for commons_information.py and patrol.py
-# wikitextparser>=0.47.0
-mwparserfromhell>=0.5.0
-
# OAuth support
# mwoauth 0.2.4 is needed because it supports getting identity information
# about the user
@@ -47,6 +42,9 @@
google >= 1.7
sseclient >= 0.0.18,!=0.0.23,!=0.0.24

+# textlib.py, commons_information and patrol.py
+mwparserfromhell>=0.5.0
+
# The mysql generator in pagegenerators depends on PyMySQL
PyMySQL >= 0.6.7, < 1.0.0 ; python_version < '3.6'
PyMySQL >= 1.0.0 ; python_version >= '3.6'
diff --git a/setup.py b/setup.py
index 2ae5ca7..6b34da3 100644
--- a/setup.py
+++ b/setup.py
@@ -62,7 +62,6 @@
'Graphviz': ['pydot>=1.2'],
'Google': ['google>=1.7'],
'mwparserfromhell': ['mwparserfromhell>=0.5.0'],
- 'wikitextparser': ['wikitextparser>=0.47.0'],
'Tkinter': [ # vulnerability found in Pillow<8.1.1
'Pillow>=8.1.1;python_version>="3.6"',
],
@@ -111,7 +110,7 @@
'setuptools>=20.2, !=50.0.0, <50.2.0 ; python_version < "3.6"',
'setuptools>=20.2 ; python_version >= "3.6"',
]
-# in addition either mwparserfromhell or wikitextparser is required
+

# ------- setup tests_require ------- #
test_deps = ['mock']
diff --git a/tests/__init__.py b/tests/__init__.py
index 596f1ce..04f61c3 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -24,8 +24,7 @@
# Verify that the unit tests have a base working environment:
# - requests is mandatory
# however if unavailable this will fail on use; see pywikibot/tools.py
-# - mwparserfromhell or wikitextparser is mandatory but the dependency
-# is checked by textlib already
+# - mwparserfromhell is optional, so is only imported in textlib_tests
import requests # noqa: F401

import pywikibot.data.api
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index aed174f..dadc2a3 100644
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -23,10 +23,8 @@
from pywikibot import UnknownSite

from tests.aspects import (
- DefaultDrySiteTestCase,
- require_modules,
- SiteAttributeTestCase,
- TestCase,
+ require_modules, TestCase, DefaultDrySiteTestCase,
+ PatchingTestCase, SiteAttributeTestCase,
)
from tests import mock

@@ -304,7 +302,7 @@
'Invalid category title extracted: nasty{{{!}}')


-WARNING_MSG = (r'.*extract_templates_and_params_.*'
+WARNING_MSG = (r'.*extract_templates_and_params_mwpfh .*'
r'is deprecated for .*; use extract_templates_and_params')


@@ -504,7 +502,7 @@
@require_modules('mwparserfromhell')
def test_extract_templates_params_parser_stripped(self):
"""Test using mwparserfromhell with stripping."""
- func = functools.partial(textlib.extract_templates_and_params,
+ func = functools.partial(textlib._extract_templates_and_params_parser,
strip=True)

self._common_results(func)
@@ -537,39 +535,37 @@
"""Test using many complex regexes."""
func = functools.partial(textlib.extract_templates_and_params_regex,
remove_disabled_parts=False, strip=False)
- with suppress_warnings(WARNING_MSG, category=FutureWarning):
- self._common_results(func)
- self._order_differs(func)
- self._unstripped(func)
- # FIXME: {} is normal text
- self.assertEqual(func('{{a|b={} }}'), [])
+ self._common_results(func)
+ self._order_differs(func)
+ self._unstripped(func)
+
+ self.assertEqual(func('{{a|b={} }}'), []) # FIXME: {} is normal text

def test_extract_templates_params_regex_stripped(self):
"""Test using many complex regexes with stripping."""
func = textlib.extract_templates_and_params_regex
- with suppress_warnings(WARNING_MSG, category=FutureWarning):
- self._common_results(func)
- self._order_differs(func)
- self._stripped(func)

- self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'),
- [('a', OrderedDict((('b', ''), )))])
+ self._common_results(func)
+ self._order_differs(func)
+ self._stripped(func)

- # Identical to mwpfh
- self.assertCountEqual(func('{{a|{{c|{{d}}}}}}'),
- [('c', OrderedDict((('1', '{{d}}'), ))),
- ('a', OrderedDict([('1', '{{c|{{d}}}}')])),
- ('d', OrderedDict())
- ])
+ self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'),
+ [('a', OrderedDict((('b', ''), )))])

- # However fails to correctly handle three levels of balanced
- # brackets with empty parameters
- self.assertCountEqual(func('{{a|{{c|{{d|}}}}}}'),
- [('c', OrderedDict((('1', '{{d|}}}'), ))),
- ('d', OrderedDict([('1', '}')]))
- ])
+ # Identical to mwpfh
+ self.assertCountEqual(func('{{a|{{c|{{d}}}}}}'),
+ [('c', OrderedDict((('1', '{{d}}'), ))),
+ ('a', OrderedDict([('1', '{{c|{{d}}}}')])),
+ ('d', OrderedDict())
+ ])

- @require_modules('mwparserfromhell')
+ # However fails to correctly handle three levels of balanced brackets
+ # with empty parameters
+ self.assertCountEqual(func('{{a|{{c|{{d|}}}}}}'),
+ [('c', OrderedDict((('1', '{{d|}}}'), ))),
+ ('d', OrderedDict([('1', '}')]))
+ ])
+
def test_extract_templates_params(self):
"""Test that the normal entry point works."""
func = functools.partial(textlib.extract_templates_and_params,
@@ -703,6 +699,79 @@
self.assertTrue(m.group(0).endswith('foo {{bar}}'))


+class TestGenericTemplateParams(PatchingTestCase):
+
+ """Test whether the generic function forwards the call correctly."""
+
+ net = False
+
+ @PatchingTestCase.patched(textlib, '_extract_templates_and_params_parser')
+ def extract_mwpfh(self, text, *args, **kwargs):
+ """Patched call to extract_templates_and_params_mwpfh."""
+ self._text = text
+ self._args = args
+ self._mwpfh = True
+
+ @PatchingTestCase.patched(textlib, 'extract_templates_and_params_regex')
+ def extract_regex(self, text, *args, **kwargs):
+ """Patched call to extract_templates_and_params_regex."""
+ self._text = text
+ self._args = args
+ self._mwpfh = False
+
+ def test_removing_disabled_parts_regex(self):
+ """Test removing disabled parts when using the regex variant."""
+ self.patch(textlib, 'wikitextparser', ImportError())
+ textlib.extract_templates_and_params('{{a<!-- -->}}', True)
+ self.assertEqual(self._text, '{{a}}')
+ self.assertFalse(self._mwpfh)
+ textlib.extract_templates_and_params('{{a<!-- -->}}', False)
+ self.assertEqual(self._text, '{{a<!-- -->}}')
+ self.assertFalse(self._mwpfh)
+ textlib.extract_templates_and_params('{{a<!-- -->}}')
+ self.assertEqual(self._text, '{{a}}')
+ self.assertFalse(self._mwpfh)
+
+ @require_modules('mwparserfromhell')
+ def test_removing_disabled_parts_mwpfh(self):
+ """Test removing disabled parts when using the mwpfh variant."""
+ textlib.extract_templates_and_params('{{a<!-- -->}}', True)
+ self.assertEqual(self._text, '{{a}}')
+ self.assertTrue(self._mwpfh)
+ textlib.extract_templates_and_params('{{a<!-- -->}}', False)
+ self.assertEqual(self._text, '{{a<!-- -->}}')
+ self.assertTrue(self._mwpfh)
+ textlib.extract_templates_and_params('{{a<!-- -->}}')
+ self.assertEqual(self._text, '{{a<!-- -->}}')
+ self.assertTrue(self._mwpfh)
+
+ def test_strip_regex(self):
+ """Test stripping values when using the regex variant."""
+ self.patch(textlib, 'wikitextparser', ImportError())
+ textlib.extract_templates_and_params('{{a| foo }}', False, True)
+ self.assertEqual(self._args, (False, True))
+ self.assertFalse(self._mwpfh)
+ textlib.extract_templates_and_params('{{a| foo }}', False, False)
+ self.assertEqual(self._args, (False, False))
+ self.assertFalse(self._mwpfh)
+ textlib.extract_templates_and_params('{{a| foo }}', False)
+ self.assertEqual(self._args, (False, True))
+ self.assertFalse(self._mwpfh)
+
+ @require_modules('mwparserfromhell')
+ def test_strip_mwpfh(self):
+ """Test stripping values when using the mwpfh variant."""
+ textlib.extract_templates_and_params('{{a| foo }}', None, True)
+ self.assertEqual(self._args, (True, ))
+ self.assertTrue(self._mwpfh)
+ textlib.extract_templates_and_params('{{a| foo }}', None, False)
+ self.assertEqual(self._args, (False, ))
+ self.assertTrue(self._mwpfh)
+ textlib.extract_templates_and_params('{{a| foo }}')
+ self.assertEqual(self._args, (False, ))
+ self.assertTrue(self._mwpfh)
+
+
class TestDisabledParts(DefaultDrySiteTestCase):

"""Test the removeDisabledParts function in textlib."""
diff --git a/tox.ini b/tox.ini
index cb76128..d68e21d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -38,17 +38,10 @@
fasttest: pytest-attrib>=0.1.3
fasttest: pytest-subtests >= 0.3.2
fasttest: mock
- fasttest: .[mwparserfromhell]
fasttest: .[scripts]

- fasttest-py35: .[html]
- fasttest-py37: .[wikitextparser]
-
- deeptest: .[html]
- deeptest: .[mwparserfromhell]
- deeptest: .[scripts]
- deeptest: .[wikitextparser]
-
+ fasttest-py35: mwparserfromhell
+ fasttest-py35: beautifulsoup4

[testenv:commit-message]
basepython = python3
@@ -63,7 +56,6 @@
nosetests --with-doctest pywikibot {[params]doctest_skip}
deps =
nose
- .[mwparserfromhell]

[testenv:venv]
commands = {posargs}

To view, visit change 680016. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I10ca06e5ca9944aa76c71f6be9269efae4f8e142
Gerrit-Change-Number: 680016
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki@aol.com>
Gerrit-Reviewer: DannyS712 <dannys712.wiki@gmail.com>
Gerrit-Reviewer: JJMC89 <JJMC89.Wikimedia@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Dalba <dalba.wiki@gmail.com>
Gerrit-CC: Dvorapa <dvorapa@seznam.cz>
Gerrit-MessageType: merged