jenkins-bot has submitted this change and it was merged.
Change subject: [bugfix] Bugfixes for XMLDumpOldPageGenerator ......................................................................
[bugfix] Bugfixes for XMLDumpOldPageGenerator
pagegenerators.py - rename self.xmlStart to self.start and use properties for the deprecated instance variable - start parameter may be a prefix instead an existing page title. Skip all previous titles. - solve underlines inside start parameter - resolve namespaces parameter - don't assign a constructor parameter to a local variable. Use it directly. - self.namespaces is never a empty list anymore
reflinks.py - handle -namespace option by pagegenerators.GeneratorFactory - predefine xmlStart
reflinks_tests.py - remove expectedFailure decorators from test which are solved now
Bug: T100524 Bug: T132663 Change-Id: I171f10165a321ae693ed939ed649e676e4b0e0de --- M pywikibot/pagegenerators.py M scripts/reflinks.py M tests/reflinks_tests.py 3 files changed, 54 insertions(+), 35 deletions(-)
Approvals: Mpaa: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index 6ce7417..e4bc8da 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -2517,26 +2517,61 @@
class XMLDumpOldPageGenerator(IteratorNextMixin):
- """Xml generator that yields Page objects with old text loaded.""" + """ + Xml generator that yields Page objects with old text loaded. + + @param filename: filename of XML dump + @type filename: str + @param start: skip entries below that value + @type start: str or None + @param namespaces: namespace filter + @type identifiers: iterable of basestring or Namespace key, + or a single instance of those types + @param site: current site for the generator + @type site: pywikibot.Site or None + @param text_predicate: a callable with entry.text as parameter and boolean + as result to indicate the generator should return the page or not + @type text_predicate: function identifier or None + + @ivar text_predicate: holds text_predicate function + @ivar skipping: True if start parameter is given, else False + @ivar start: holds start parameter + @ivar namespaces: holds namespaces filter + @ivar parser: holds the xmlreader.XmlDump parse method + """
@deprecated_args(xmlFilename='filename', xmlStart='start') - def __init__(self, filename, start=None, namespaces=[], site=None, + def __init__(self, filename, start=None, namespaces=None, site=None, text_predicate=None): """Constructor.""" - # xmlFilename and xmlStart mapped to not break git blame - # use filename and start on new/changed lines - xmlFilename = filename - xmlStart = start - self.text_predicate = text_predicate
- self.xmlStart = xmlStart - self.namespaces = namespaces - self.skipping = bool(xmlStart) - self.site = site or pywikibot.Site() + self.skipping = bool(start) + if self.skipping: + self.start = start.replace('_', ' ') + else: + self.start = None
- dump = xmlreader.XmlDump(xmlFilename) + self.site = site or pywikibot.Site() + if not namespaces: + self.namespaces = self.site.namespaces + else: + self.namespaces = self.site.namespaces.resolve(namespaces) + + dump = xmlreader.XmlDump(filename) self.parser = dump.parse() + + @property + @deprecated('self.start') + def xmlStart(self): + """Getter for deprecated xmlStart instance variable.""" + return self.start + + @xmlStart.setter + @deprecated('self.start') + def xmlStart(self, value): + """Setter for deprecated xmlStart instance variable.""" + self.start = value
def __next__(self): """Get next Page.""" @@ -2546,13 +2581,12 @@ except StopIteration: raise if self.skipping: - if entry.title != self.xmlStart: + if entry.title < self.start: continue self.skipping = False page = pywikibot.Page(self.site, entry.title) - if not self.namespaces == []: - if page.namespace() not in self.namespaces: - continue + if page.namespace() not in self.namespaces: + continue if not self.text_predicate or self.text_predicate(entry.text): page.text = entry.text return page diff --git a/scripts/reflinks.py b/scripts/reflinks.py index 0072ec0..c5daf1d 100755 --- a/scripts/reflinks.py +++ b/scripts/reflinks.py @@ -746,8 +746,8 @@ @type args: list of unicode """ xmlFilename = None + xmlStart = None options = {} - namespaces = [] generator = None
# Process global args and prepare generator args parser @@ -755,12 +755,7 @@ genFactory = pagegenerators.GeneratorFactory()
for arg in local_args: - if arg.startswith('-namespace:'): - try: - namespaces.append(int(arg[11:])) - except ValueError: - namespaces.append(arg[11:]) - elif arg.startswith('-summary:'): + if arg.startswith('-summary:'): options['summary'] = arg[9:] elif arg == '-always': options['always'] = True @@ -784,11 +779,8 @@ genFactory.handleArg(arg)
if xmlFilename: - try: - xmlStart - except NameError: - xmlStart = None - generator = XmlDumpPageGenerator(xmlFilename, xmlStart, namespaces) + generator = XmlDumpPageGenerator(xmlFilename, xmlStart, + genFactory.namespaces) if not generator: generator = genFactory.getCombinedGenerator() if not generator: diff --git a/tests/reflinks_tests.py b/tests/reflinks_tests.py index ac5f6f5..56611f3 100644 --- a/tests/reflinks_tests.py +++ b/tests/reflinks_tests.py @@ -56,7 +56,6 @@ self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'), site=self.get_site())
- @unittest.expectedFailure def test_namespace_None(self): """Test namespaces=None processes all namespaces.""" gen = XmlDumpPageGenerator( @@ -68,7 +67,6 @@ self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'), site=self.get_site())
- @unittest.expectedFailure def test_namespace_string_ids(self): """Test namespaces with ids as string.""" gen = XmlDumpPageGenerator( @@ -91,7 +89,6 @@ self.assertPagelistTitles(pages, (u'Talk:Fake page', ), site=self.get_site())
- @unittest.expectedFailure def test_start_with_underscore(self): """Test with underscore in start page title.""" gen = XmlDumpPageGenerator( @@ -114,7 +111,6 @@ self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'), site=self.get_site())
- @unittest.expectedFailure def test_start_prefix(self): """Test with a prefix as a start page title.""" gen = XmlDumpPageGenerator( @@ -178,7 +174,6 @@ self.assertPageTitlesCountEqual(gen, [u'Fake page', u'Talk:Fake page'], site=self.get_site())
- @unittest.expectedFailure def test_xml_multiple_namespace_ids_2(self): """Test the generator using multiple namespaces in one parameter.""" main('-xml:' + join_xml_data_path('dummy-reflinks.xml'), @@ -187,7 +182,6 @@ self.assertPageTitlesCountEqual(gen, [u'Fake page', u'Talk:Fake page'], site=self.get_site())
- @unittest.expectedFailure def test_xml_start_prefix(self): """Test the generator using a start partial page.""" main('-xml:' + join_xml_data_path('dummy-reflinks.xml'), @@ -197,7 +191,6 @@ self.assertPagelistTitles(pages, [u'Talk:Fake page'], site=self.get_site())
- @unittest.expectedFailure def test_xml_start_underscore(self): """Test the generator using a start page with an underscore.""" main('-xml:' + join_xml_data_path('dummy-reflinks.xml'),
pywikibot-commits@lists.wikimedia.org