jenkins-bot has submitted this change and it was merged.
Change subject: textlib.extract_templates_and_params tests ......................................................................
textlib.extract_templates_and_params tests
allow textlib.extract_templates_and_params_mwpfh to be accessed and tested even if config.mwparserfromhell is disabled.
Delay import of mwparserfromhell until it is actually used.
Merge common test results for mwpfh and regex, highlighting the case known to be different, and mention them in the docstring.
Change-Id: Id6a17940f241d95d9e8dc9b86131dec3989ea36a --- M pywikibot/textlib.py M tests/textlib_tests.py 2 files changed, 119 insertions(+), 34 deletions(-)
Approvals: John Vandenberg: Looks good to me, but someone else must approve XZise: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 859c290..ed25218 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -14,19 +14,14 @@ __version__ = '$Id$' #
-try: - import mwparserfromhell -except ImportError: - mwparserfromhell = False +import datetime +import re +import sys
try: from collections import OrderedDict except ImportError: from ordereddict import OrderedDict - -import datetime -import re -import sys
if sys.version_info[0] > 2: from html.parser import HTMLParser @@ -966,17 +961,53 @@ parameters, and if this results multiple parameters with the same name only the last value provided will be returned.
- This uses a third party library (mwparserfromhell) if it is installed - and enabled in the user-config.py. Otherwise it falls back on a - regex based function defined below. + This uses the package L{mwparserfromhell} (mwpfh) if it is installed + and enabled by config.mwparserfromhell. Otherwise it falls back on a + regex based implementation. + + There are minor differences between the two implementations. + + The two implementations return nested templates in a different order. + i.e. for {{a|b={{c}}}}, mwpfh returns [a, c], whereas regex returns [c, a]. + + mwpfh preserves whitespace in parameter names and values. regex excludes + anything between <!-- --> before parsing the text.
@param text: The wikitext from which templates are extracted @type text: unicode or string @return: list of template name and params @rtype: list of tuple """ - if not (config.use_mwparserfromhell and mwparserfromhell): + use_mwparserfromhell = config.use_mwparserfromhell + if use_mwparserfromhell: + try: + import mwparserfromhell # noqa + except ImportError: + use_mwparserfromhell = False + + if use_mwparserfromhell: + return extract_templates_and_params_mwpfh(text) + else: return extract_templates_and_params_regex(text) + + +def extract_templates_and_params_mwpfh(text): + """ + Extract templates with params using mwparserfromhell. + + This function should not be called directly. + + Use extract_templates_and_params, which will select this + mwparserfromhell implementation if based on whether the + mwparserfromhell package is installed and enabled by + config.mwparserfromhell. + + @param text: The wikitext from which templates are extracted + @type text: unicode or string + @return: list of template name and params + @rtype: list of tuple + """ + import mwparserfromhell code = mwparserfromhell.parse(text) result = [] for template in code.filter_templates(recursive=True): diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index 10dd200..630ced4 100644 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -7,10 +7,6 @@ # __version__ = '$Id$'
-try: - import mwparserfromhell -except ImportError: - mwparserfromhell = False import codecs import os
@@ -53,24 +49,6 @@
def testCurrentBehaviour(self): self.assertContains("enwiki_help_editing", u"Editing") - - def testExtractTemplates(self): - if not (pywikibot.config.use_mwparserfromhell and mwparserfromhell): - raise unittest.SkipTest('mwparserfromhell not available or enabled') - func = textlib.extract_templates_and_params # It's really long. - self.assertEqual(func('{{a}}'), [('a', OrderedDict())]) - self.assertEqual(func('{{a|b=c}}'), [('a', OrderedDict((('b', 'c'), )))]) - self.assertEqual(func('{{a|b|c=d}}'), [('a', OrderedDict((('1', 'b'), ('c', 'd'))))]) - self.assertEqual(func('{{a|b={{c}}}}'), [('a', OrderedDict((('b', '{{c}}'), ))), ('c', OrderedDict())]) - self.assertEqual(func('{{a|b=c|f=g|d=e|1=}}'), [('a', OrderedDict((('b', 'c'), ('f', 'g'), ('d', 'e'), ('1', ''))))]) - - def testExtractTemplatesRegex(self): - func = textlib.extract_templates_and_params_regex # It's really long. - self.assertEqual(func('{{a}}'), [('a', OrderedDict())]) - self.assertEqual(func('{{a|b=c}}'), [('a', OrderedDict((('b', 'c'), )))]) - self.assertEqual(func('{{a|b|c=d}}'), [('a', OrderedDict((('1', 'b'), ('c', 'd'))))]) - self.assertEqual(func('{{a|b={{c}}}}'), [('c', OrderedDict()), ('a', OrderedDict((('b', '{{c}}'), )))]) - self.assertEqual(func('{{a|b=c|f=g|d=e|1=}}'), [('a', OrderedDict((('b', 'c'), ('f', 'g'), ('d', 'e'), ('1', ''))))])
def testSpacesInSection(self): self.assertContains("enwiki_help_editing", u"Minor_edits") @@ -218,6 +196,82 @@ '[[Category:nasty{{{!}}]]', self.site)
+class TestTemplateParams(TestCase): + + """Test to verify that template params extraction works.""" + + net = False + + def _extract_templates_params(self, func): + self.assertEqual(func('{{a}}'), [('a', OrderedDict())]) + self.assertEqual(func('{{ a}}'), [('a', OrderedDict())]) + self.assertEqual(func('{{a }}'), [('a', OrderedDict())]) + self.assertEqual(func('{{ a }}'), [('a', OrderedDict())]) + self.assertEqual(func('{{a|b=c}}'), [('a', OrderedDict((('b', 'c'), )))]) + self.assertEqual(func('{{a|b|c=d}}'), [('a', OrderedDict((('1', 'b'), ('c', 'd'))))]) + self.assertEqual(func('{{a|b=c|f=g|d=e|1=}}'), [('a', OrderedDict((('b', 'c'), ('f', 'g'), ('d', 'e'), ('1', ''))))]) + self.assertEqual(func('{{a|1=2|c=d}}'), [('a', OrderedDict((('1', '2'), ('c', 'd'))))]) + self.assertEqual(func('{{a|c=d|1=2}}'), [('a', OrderedDict((('c', 'd'), ('1', '2'))))]) + self.assertEqual(func('{{a|5=d|a=b}}'), [('a', OrderedDict((('5', 'd'), ('a', 'b'))))]) + self.assertEqual(func('{{a|=2}}'), [('a', OrderedDict((('', '2'), )))]) + self.assertEqual(func('{{a|=|}}'), [('a', OrderedDict((('', ''), ('1', ''))))]) + self.assertEqual(func('{{a||}}'), [('a', OrderedDict((('1', ''), ('2', ''))))]) + self.assertEqual(func('{{a|b={{{1}}}}}'), [('a', OrderedDict((('b', '{{{1}}}'), )))]) + self.assertEqual(func('{{a|b=<noinclude>{{{1}}}</noinclude>}}'), [('a', OrderedDict((('b', '<noinclude>{{{1}}}</noinclude>'), )))]) + self.assertEqual(func('{{subst:a|b=c}}'), [('subst:a', OrderedDict((('b', 'c'), )))]) + self.assertEqual(func('{{safesubst:a|b=c}}'), [('safesubst:a', OrderedDict((('b', 'c'), )))]) + self.assertEqual(func('{{msgnw:a|b=c}}'), [('msgnw:a', OrderedDict((('b', 'c'), )))]) + self.assertEqual(func('{{Template:a|b=c}}'), [('Template:a', OrderedDict((('b', 'c'), )))]) + self.assertEqual(func('{{template:a|b=c}}'), [('template:a', OrderedDict((('b', 'c'), )))]) + self.assertEqual(func('{{:a|b=c}}'), [(':a', OrderedDict((('b', 'c'), )))]) + self.assertEqual(func('{{subst::a|b=c}}'), [('subst::a', OrderedDict((('b', 'c'), )))]) + + def test_extract_templates_params_mwpfh(self): + try: + import mwparserfromhell # noqa + except ImportError: + raise unittest.SkipTest('mwparserfromhell not available') + + func = textlib.extract_templates_and_params_mwpfh + self._extract_templates_params(func) + + self.assertEqual(func('{{a|}}'), [('a', OrderedDict((('1', ''), )))]) + + self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict(((' b', 'c'), )))]) + self.assertEqual(func('{{a|b =c}}'), [('a', OrderedDict((('b ', 'c'), )))]) + self.assertEqual(func('{{a|b= c}}'), [('a', OrderedDict((('b', ' c'), )))]) + self.assertEqual(func('{{a|b=c }}'), [('a', OrderedDict((('b', 'c '), )))]) + + self.assertEqual(func('{{a| b={{c}}}}'), [('a', OrderedDict(((' b', '{{c}}'), ))), ('c', OrderedDict())]) + self.assertEqual(func('{{a|b={{c}}}}'), [('a', OrderedDict((('b', '{{c}}'), ))), ('c', OrderedDict())]) + self.assertEqual(func('{{a|b= {{c}}}}'), [('a', OrderedDict((('b', ' {{c}}'), ))), ('c', OrderedDict())]) + self.assertEqual(func('{{a|b={{c}} }}'), [('a', OrderedDict((('b', '{{c}} '), ))), ('c', OrderedDict())]) + + self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'), [('a', OrderedDict((('b', '<!--{{{1}}}-->'), )))]) + + def test_extract_templates_params_regex(self): + func = textlib.extract_templates_and_params_regex + self._extract_templates_params(func) + + self.assertEqual(func('{{a|}}'), []) # FIXME: this is a bug + + self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict((('b', 'c'), )))]) + self.assertEqual(func('{{a|b =c}}'), [('a', OrderedDict((('b', 'c'), )))]) + self.assertEqual(func('{{a|b= c}}'), [('a', OrderedDict((('b', 'c'), )))]) + self.assertEqual(func('{{a|b=c }}'), [('a', OrderedDict((('b', 'c'), )))]) + + self.assertEqual(func('{{a| b={{c}}}}'), [('c', OrderedDict()), ('a', OrderedDict((('b', '{{c}}'), )))]) + self.assertEqual(func('{{a|b={{c}}}}'), [('c', OrderedDict()), ('a', OrderedDict((('b', '{{c}}'), )))]) + self.assertEqual(func('{{a|b= {{c}}}}'), [('c', OrderedDict()), ('a', OrderedDict((('b', '{{c}}'), )))]) + self.assertEqual(func('{{a|b={{c}} }}'), [('c', OrderedDict()), ('a', OrderedDict((('b', '{{c}}'), )))]) + + self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'), [('a', OrderedDict((('b', ''), )))]) + + def test_extract_templates_params(self): + self._extract_templates_params( + textlib.extract_templates_and_params) + + class TestLocalDigits(TestCase):
"""Test to verify that local digits are correctly being handled."""
pywikibot-commits@lists.wikimedia.org