jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/780854 )
Change subject: [IMPR] Deprecate XMLDumpOldPageGenerator in favour of a 'content' parameter ......................................................................
[IMPR] Deprecate XMLDumpOldPageGenerator in favour of a 'content' parameter
- deprecate XMLDumpOldPageGenerator; content parameter of XMLDumpPageGenerator should be used instead - assigning entry.text to Page.text consumes 400 - 800 milliseconds. The new 'content' parameter fastens up yielding pages if old text entry is not required.
Bug: T306134 Change-Id: Id88c18b0122a57c1eb207076e340ad2fcc80dc31 --- M pywikibot/pagegenerators.py 1 file changed, 23 insertions(+), 18 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index 0926580..a033cfa 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -59,6 +59,7 @@ ) from pywikibot.proofreadpage import ProofreadPage from pywikibot.tools import ( + deprecated, DequeGenerator, filter_unique, intersect_generators, @@ -2775,9 +2776,11 @@ yield page
-class XMLDumpOldPageGenerator(abc.Iterator): # type: ignore[type-arg] - """ - Xml generator that yields Page objects with old text loaded. +class XMLDumpPageGenerator(abc.Iterator): # type: ignore[type-arg] + """Xml generator that yields Page objects. + + .. versionadded:: 7.2 + the `content` parameter
:param filename: filename of XML dump :param start: skip entries below that value @@ -2785,11 +2788,9 @@ :param site: current site for the generator :param text_predicate: a callable with entry.text as parameter and boolean as result to indicate the generator should return the page or not + :param content: If True, assign old page content to Page.text
- :ivar text_predicate: holds text_predicate function :ivar skipping: True if start parameter is given, else False - :ivar start: holds start parameter - :ivar namespaces: holds namespaces filter :ivar parser: holds the xmlreader.XmlDump parse method """
@@ -2798,11 +2799,11 @@ None, NAMESPACE_OR_STR_TYPE, Sequence[NAMESPACE_OR_STR_TYPE]] = None, site: OPT_SITE_TYPE = None, - text_predicate: Optional[Callable[[str], bool]] = None - ) -> None: + text_predicate: Optional[Callable[[str], bool]] = None, + content=False) -> None: """Initializer.""" self.text_predicate = text_predicate - + self.content = content self.skipping = bool(start)
self.start = None # type: Optional[str] @@ -2814,7 +2815,6 @@ self.namespaces = self.site.namespaces else: self.namespaces = self.site.namespaces.resolve(namespaces) - dump = xmlreader.XmlDump(filename) self.parser = dump.parse()
@@ -2830,19 +2830,24 @@ if page.namespace() not in self.namespaces: continue if not self.text_predicate or self.text_predicate(entry.text): - page.text = entry.text + if self.content: + page.text = entry.text return page
-class XMLDumpPageGenerator(XMLDumpOldPageGenerator): +@deprecated('XMLDumpPageGenerator with content=True parameter', since='7.2.0') +class XMLDumpOldPageGenerator(XMLDumpPageGenerator):
- """Xml generator that yields Page objects without text loaded.""" + """Xml generator that yields Page objects with old text loaded.
- def __next__(self) -> 'pywikibot.page.Page': - """Get next Page from dump and remove the text.""" - page = super().__next__() - del page.text - return page + .. deprecated:: 7.2 + :class:`XMLDumpPageGenerator` with `content` parameter should be + used instead + """ + + def __init__(self, *args, **kwargs): + """Initializer.""" + super().__init__(*args, **kwargs, content=True)
def YearPageGenerator(start: int = 1, end: int = 2050,
pywikibot-commits@lists.wikimedia.org