jenkins-bot has submitted this change and it was merged.
Change subject: [FIX] Close opened files and open XMLs in binary mode ......................................................................
[FIX] Close opened files and open XMLs in binary mode
Close various files which have just been open. Also verify that the XmlDump instance can not just handle uncompressed UTF-8 files.
Contains also one unrelated assertEqual(s) fix.
Bug: 70969 Bug: 70977 Change-Id: Ic8454d0d1cc8c98458611efca32bbcd90620df12 --- M pywikibot/xmlreader.py A tests/data/article-pyrus-utf16.xml A tests/data/article-pyrus-utf16.xml.bz2 A tests/data/article-pyrus.xml.bz2 M tests/data_ingestion_tests.py M tests/textlib_tests.py M tests/wikibase_tests.py M tests/xmlreader_tests.py 8 files changed, 47 insertions(+), 21 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/xmlreader.py b/pywikibot/xmlreader.py index 56993c4..4f19a73 100644 --- a/pywikibot/xmlreader.py +++ b/pywikibot/xmlreader.py @@ -122,19 +122,22 @@ bufsize=65535).stdout else: # assume it's an uncompressed XML file - source = open(self.filename) - context = iterparse(source, events=("start", "end", "start-ns")) - self.root = None + source = open(self.filename, 'rb') + try: + context = iterparse(source, events=("start", "end", "start-ns")) + self.root = None
- for event, elem in context: - if event == "start-ns" and elem[0] == "": - self.uri = elem[1] - continue - if event == "start" and self.root is None: - self.root = elem - continue - for rev in self._parse(event, elem): - yield rev + for event, elem in context: + if event == "start-ns" and elem[0] == "": + self.uri = elem[1] + continue + if event == "start" and self.root is None: + self.root = elem + continue + for rev in self._parse(event, elem): + yield rev + finally: + source.close()
def _parse_only_latest(self, event, elem): """Parser that yields only the latest revision.""" diff --git a/tests/data/article-pyrus-utf16.xml b/tests/data/article-pyrus-utf16.xml new file mode 100644 index 0000000..8bd2461 --- /dev/null +++ b/tests/data/article-pyrus-utf16.xml Binary files differ diff --git a/tests/data/article-pyrus-utf16.xml.bz2 b/tests/data/article-pyrus-utf16.xml.bz2 new file mode 100644 index 0000000..9ef4634 --- /dev/null +++ b/tests/data/article-pyrus-utf16.xml.bz2 Binary files differ diff --git a/tests/data/article-pyrus.xml.bz2 b/tests/data/article-pyrus.xml.bz2 new file mode 100644 index 0000000..62371d0 --- /dev/null +++ b/tests/data/article-pyrus.xml.bz2 Binary files differ diff --git a/tests/data_ingestion_tests.py b/tests/data_ingestion_tests.py index f7f6711..1735a89 100644 --- a/tests/data_ingestion_tests.py +++ b/tests/data_ingestion_tests.py @@ -53,9 +53,9 @@
def setUp(self): super(TestCSVReader, self).setUp() - fileobj = open(os.path.join(os.path.split(__file__)[0], 'data', 'csv_ingestion.csv')) - self.iterator = data_ingestion.CSVReader(fileobj, 'url') - self.obj = next(self.iterator) + with open(os.path.join(os.path.split(__file__)[0], 'data', 'csv_ingestion.csv')) as fileobj: + self.iterator = data_ingestion.CSVReader(fileobj, 'url') + self.obj = next(self.iterator)
def test_PhotoURL(self): self.assertEqual(self.obj.URL, 'http://upload.wikimedia.org/wikipedia/commons/f/fc/MP_sounds.png') diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index 299898f..49d5e5f 100644 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -23,8 +23,9 @@ dirname = os.path.join(os.path.dirname(__file__), "pages")
for f in ["enwiki_help_editing"]: - files[f] = codecs.open(os.path.join(dirname, f + ".page"), - 'r', 'utf-8').read() + with codecs.open(os.path.join(dirname, f + ".page"), + 'r', 'utf-8') as content: + files[f] = content.read()
class TestSectionFunctions(TestCase): diff --git a/tests/wikibase_tests.py b/tests/wikibase_tests.py index b288484..7726433 100644 --- a/tests/wikibase_tests.py +++ b/tests/wikibase_tests.py @@ -545,7 +545,8 @@ super(TestLinks, self).setUp() self.wdp = pywikibot.ItemPage(self.get_repo(), 'Q60') self.wdp.id = 'Q60' - self.wdp._content = json.load(open(os.path.join(os.path.split(__file__)[0], 'pages', 'Q60_only_sitelinks.wd'))) + with open(os.path.join(os.path.split(__file__)[0], 'pages', 'Q60_only_sitelinks.wd')) as f: + self.wdp._content = json.load(f) self.wdp.get()
def test_iterlinks_page_object(self): @@ -852,8 +853,8 @@ wikidata = self.get_repo() self.wdp = pywikibot.ItemPage(wikidata, 'Q60') self.wdp.id = 'Q60' - self.wdp._content = json.load( - open(os.path.join(os.path.split(__file__)[0], 'pages', 'Q60.wd'))) + with open(os.path.join(os.path.split(__file__)[0], 'pages', 'Q60.wd')) as f: + self.wdp._content = json.load(f) self.wdp.get() del self.wdp._content['id'] del self.wdp._content['type'] @@ -863,7 +864,7 @@ old = json.dumps(self.wdp._content, indent=2, sort_keys=True) new = json.dumps(self.wdp.toJSON(), indent=2, sort_keys=True)
- self.assertEquals(old, new) + self.assertEqual(old, new)
def test_json_diff(self): del self.wdp.labels['en'] diff --git a/tests/xmlreader_tests.py b/tests/xmlreader_tests.py index 8a99c0e..23ac9f5 100644 --- a/tests/xmlreader_tests.py +++ b/tests/xmlreader_tests.py @@ -51,6 +51,27 @@ "article-pyrus.xml")).parse()] self.assertTrue(pages[0].isredirect)
+ def _compare(self, previous, variant, all_revisions): + result = [entry.__dict__ for entry in xmlreader.XmlDump( + os.path.join(self.path, 'data', 'article-pyrus' + variant), + all_revisions).parse()] + if previous: + self.assertEqual(previous, result) + return result + + def _compare_variants(self, all_revisions): + previous = None + previous = self._compare(previous, '.xml', all_revisions) + previous = self._compare(previous, '-utf16.xml', all_revisions) + previous = self._compare(previous, '.xml.bz2', all_revisions) + previous = self._compare(previous, '-utf16.xml.bz2', all_revisions) + + def test_XmlDump_compare_all(self): + self._compare_variants(True) + + def test_XmlDump_compare_single(self): + self._compare_variants(False) +
if __name__ == '__main__': try:
pywikibot-commits@lists.wikimedia.org