jenkins-bot has submitted this change and it was merged.
Change subject: [FIX] Close opened files and open XMLs in binary mode
......................................................................
[FIX] Close opened files and open XMLs in binary mode
Close various files which have just been open. Also verify that
the XmlDump instance can not just handle uncompressed UTF-8 files.
Contains also one unrelated assertEqual(s) fix.
Bug: 70969
Bug: 70977
Change-Id: Ic8454d0d1cc8c98458611efca32bbcd90620df12
---
M pywikibot/xmlreader.py
A tests/data/article-pyrus-utf16.xml
A tests/data/article-pyrus-utf16.xml.bz2
A tests/data/article-pyrus.xml.bz2
M tests/data_ingestion_tests.py
M tests/textlib_tests.py
M tests/wikibase_tests.py
M tests/xmlreader_tests.py
8 files changed, 47 insertions(+), 21 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/xmlreader.py b/pywikibot/xmlreader.py
index 56993c4..4f19a73 100644
--- a/pywikibot/xmlreader.py
+++ b/pywikibot/xmlreader.py
@@ -122,19 +122,22 @@
bufsize=65535).stdout
else:
# assume it's an uncompressed XML file
- source = open(self.filename)
- context = iterparse(source, events=("start", "end",
"start-ns"))
- self.root = None
+ source = open(self.filename, 'rb')
+ try:
+ context = iterparse(source, events=("start", "end",
"start-ns"))
+ self.root = None
- for event, elem in context:
- if event == "start-ns" and elem[0] == "":
- self.uri = elem[1]
- continue
- if event == "start" and self.root is None:
- self.root = elem
- continue
- for rev in self._parse(event, elem):
- yield rev
+ for event, elem in context:
+ if event == "start-ns" and elem[0] == "":
+ self.uri = elem[1]
+ continue
+ if event == "start" and self.root is None:
+ self.root = elem
+ continue
+ for rev in self._parse(event, elem):
+ yield rev
+ finally:
+ source.close()
def _parse_only_latest(self, event, elem):
"""Parser that yields only the latest revision."""
diff --git a/tests/data/article-pyrus-utf16.xml b/tests/data/article-pyrus-utf16.xml
new file mode 100644
index 0000000..8bd2461
--- /dev/null
+++ b/tests/data/article-pyrus-utf16.xml
Binary files differ
diff --git a/tests/data/article-pyrus-utf16.xml.bz2
b/tests/data/article-pyrus-utf16.xml.bz2
new file mode 100644
index 0000000..9ef4634
--- /dev/null
+++ b/tests/data/article-pyrus-utf16.xml.bz2
Binary files differ
diff --git a/tests/data/article-pyrus.xml.bz2 b/tests/data/article-pyrus.xml.bz2
new file mode 100644
index 0000000..62371d0
--- /dev/null
+++ b/tests/data/article-pyrus.xml.bz2
Binary files differ
diff --git a/tests/data_ingestion_tests.py b/tests/data_ingestion_tests.py
index f7f6711..1735a89 100644
--- a/tests/data_ingestion_tests.py
+++ b/tests/data_ingestion_tests.py
@@ -53,9 +53,9 @@
def setUp(self):
super(TestCSVReader, self).setUp()
- fileobj = open(os.path.join(os.path.split(__file__)[0], 'data',
'csv_ingestion.csv'))
- self.iterator = data_ingestion.CSVReader(fileobj, 'url')
- self.obj = next(self.iterator)
+ with open(os.path.join(os.path.split(__file__)[0], 'data',
'csv_ingestion.csv')) as fileobj:
+ self.iterator = data_ingestion.CSVReader(fileobj, 'url')
+ self.obj = next(self.iterator)
def test_PhotoURL(self):
self.assertEqual(self.obj.URL,
'http://upload.wikimedia.org/wikipedia/commons/f/fc/MP_sounds.png')
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index 299898f..49d5e5f 100644
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -23,8 +23,9 @@
dirname = os.path.join(os.path.dirname(__file__), "pages")
for f in ["enwiki_help_editing"]:
- files[f] = codecs.open(os.path.join(dirname, f + ".page"),
- 'r', 'utf-8').read()
+ with codecs.open(os.path.join(dirname, f + ".page"),
+ 'r', 'utf-8') as content:
+ files[f] = content.read()
class TestSectionFunctions(TestCase):
diff --git a/tests/wikibase_tests.py b/tests/wikibase_tests.py
index b288484..7726433 100644
--- a/tests/wikibase_tests.py
+++ b/tests/wikibase_tests.py
@@ -545,7 +545,8 @@
super(TestLinks, self).setUp()
self.wdp = pywikibot.ItemPage(self.get_repo(), 'Q60')
self.wdp.id = 'Q60'
- self.wdp._content = json.load(open(os.path.join(os.path.split(__file__)[0],
'pages', 'Q60_only_sitelinks.wd')))
+ with open(os.path.join(os.path.split(__file__)[0], 'pages',
'Q60_only_sitelinks.wd')) as f:
+ self.wdp._content = json.load(f)
self.wdp.get()
def test_iterlinks_page_object(self):
@@ -852,8 +853,8 @@
wikidata = self.get_repo()
self.wdp = pywikibot.ItemPage(wikidata, 'Q60')
self.wdp.id = 'Q60'
- self.wdp._content = json.load(
- open(os.path.join(os.path.split(__file__)[0], 'pages',
'Q60.wd')))
+ with open(os.path.join(os.path.split(__file__)[0], 'pages',
'Q60.wd')) as f:
+ self.wdp._content = json.load(f)
self.wdp.get()
del self.wdp._content['id']
del self.wdp._content['type']
@@ -863,7 +864,7 @@
old = json.dumps(self.wdp._content, indent=2, sort_keys=True)
new = json.dumps(self.wdp.toJSON(), indent=2, sort_keys=True)
- self.assertEquals(old, new)
+ self.assertEqual(old, new)
def test_json_diff(self):
del self.wdp.labels['en']
diff --git a/tests/xmlreader_tests.py b/tests/xmlreader_tests.py
index 8a99c0e..23ac9f5 100644
--- a/tests/xmlreader_tests.py
+++ b/tests/xmlreader_tests.py
@@ -51,6 +51,27 @@
"article-pyrus.xml")).parse()]
self.assertTrue(pages[0].isredirect)
+ def _compare(self, previous, variant, all_revisions):
+ result = [entry.__dict__ for entry in xmlreader.XmlDump(
+ os.path.join(self.path, 'data', 'article-pyrus' + variant),
+ all_revisions).parse()]
+ if previous:
+ self.assertEqual(previous, result)
+ return result
+
+ def _compare_variants(self, all_revisions):
+ previous = None
+ previous = self._compare(previous, '.xml', all_revisions)
+ previous = self._compare(previous, '-utf16.xml', all_revisions)
+ previous = self._compare(previous, '.xml.bz2', all_revisions)
+ previous = self._compare(previous, '-utf16.xml.bz2', all_revisions)
+
+ def test_XmlDump_compare_all(self):
+ self._compare_variants(True)
+
+ def test_XmlDump_compare_single(self):
+ self._compare_variants(False)
+
if __name__ == '__main__':
try:
--
To view, visit
https://gerrit.wikimedia.org/r/161195
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ic8454d0d1cc8c98458611efca32bbcd90620df12
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>