jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/227073 )
Change subject: templatesWithParams: cache and standardise params ......................................................................
templatesWithParams: cache and standardise params
extract_templates_and_params has two implementations (regex and mwparserfromhell) which have had different settings for handling of unnecessary whitespace and disabled wikitext.
Page.templatesWithParams previously used either implementation, as-is, and therefore its returned value varied depending on the implementation in use.
Combined with 84bd04258, this change ensures that Page.templatesWithParams uses a consistent approach by always removing unnecessary whitespace and disabled wikitext, irrespective of the textlib implementation used.
This changeset means that the Page.templatesWithParams returned values can different slightly to the result it previously returned.
The Page.templatesWithParams results are now also cached.
Bug: T113892 Change-Id: Id36011c93af673d07cb6169a7b43b562b985a102 --- M pywikibot/page.py 1 file changed, 33 insertions(+), 3 deletions(-)
Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved
diff --git a/pywikibot/page.py b/pywikibot/page.py index 0f30b58..98954ce 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -637,6 +637,8 @@ @param value: basestring """ self._text = None if value is None else unicode(value) + if hasattr(self, '_raw_extracted_templates'): + del self._raw_extracted_templates
@text.deleter def text(self): @@ -645,6 +647,8 @@ del self._text if hasattr(self, '_expanded_text'): del self._expanded_text + if hasattr(self, '_raw_extracted_templates'): + del self._raw_extracted_templates
def preloadText(self): """ @@ -2220,20 +2224,46 @@ 'if source is a Site.') super(Page, self).__init__(source, title, ns)
+ @property + def raw_extracted_templates(self): + """ + Extract templates using L{textlib.extract_templates_and_params}. + + Disabled parts and whitespace are stripped, except for + whitespace in anonymous positional arguments. + + This value is cached. + + @rtype: list of (str, OrderedDict) + """ + if not hasattr(self, '_raw_extracted_templates'): + templates = textlib.extract_templates_and_params( + self.text, True, True) + self._raw_extracted_templates = templates + + return self._raw_extracted_templates + @deprecate_arg("get_redirect", None) def templatesWithParams(self): """ Return templates used on this Page.
- @return: a list that contains a tuple for each use of a template + The templates are extracted by L{textlib.extract_templates_and_params}, + with positional arguments placed first in order, and each named + argument appearing as 'name=value'. + + All parameter keys and values for each template are stripped of + whitespace. + + @return: a list of tuples with one tuple for each template invocation in the page, with the template Page as the first entry and a list of parameters as the second entry. - @rtype: list + @rtype: list of (Page, list) """ # WARNING: may not return all templates used in particularly # intricate cases such as template substitution titles = [t.title() for t in self.templates()] - templates = textlib.extract_templates_and_params(self.text) + templates = self.raw_extracted_templates # backwards-compatibility: convert the dict returned as the second # element into a list in the format used by old scripts result = []
pywikibot-commits@lists.wikimedia.org