jenkins-bot has submitted this change and it was merged.
Change subject: Do not remove sortkey using category.py move
......................................................................
Do not remove sortkey using category.py move
Add -keepsortkey param in category.py to explicitly state that sortkey
in the old category should be maintained when moving pages from a
category to another when using "move" action.
Bug: T95804
Change-Id: Icea0c87083c045d35e8ab26db135c39465b77f8d
---
M pywikibot/page.py
M scripts/category.py
2 files changed, 26 insertions(+), 12 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 5f58dee..908b5a8 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -1744,6 +1744,7 @@
@param sortKey: sortKey to use for the added category.
Unused if newCat is None, or if inPlace=True
+ If sortKey=True, the sortKey used for oldCat will be used.
@param inPlace: if True, change categories in place rather than
rearranging them.
@@ -1764,9 +1765,6 @@
if cat not in cats:
cats.append(cat)
- if not sortKey:
- sortKey = oldCat.sortKey
-
if not self.canBeEdited():
pywikibot.output(u"Can't edit %s, skipping it..."
% self.title(asLink=True))
@@ -1781,17 +1779,21 @@
if newCat in cats:
newCat = None
+ oldtext = self.text
if inPlace or self.namespace() == 10:
- oldtext = self.get(get_redirect=True)
newtext = textlib.replaceCategoryInPlace(oldtext, oldCat, newCat,
site=self.site)
else:
+ old_cat_pos = cats.index(oldCat)
if newCat:
- cats[cats.index(oldCat)] = Category(self.site, newCat.title(),
- sortKey=sortKey)
+ if sortKey is True:
+ # Fetch sortKey from oldCat in current page.
+ sortKey = cats[old_cat_pos].sortKey
+ cats[old_cat_pos] = Category(self.site, newCat.title(),
+ sortKey=sortKey)
else:
- cats.pop(cats.index(oldCat))
- oldtext = self.get(get_redirect=True)
+ cats.pop(old_cat_pos)
+
try:
newtext = textlib.replaceCategoryLinks(oldtext, cats)
except ValueError:
diff --git a/scripts/category.py b/scripts/category.py
index dd8fe46..3577ac7 100755
--- a/scripts/category.py
+++ b/scripts/category.py
@@ -47,6 +47,10 @@
* -mvtogether - Only move the pages/subcategories of a category, if the
target page (and talk page, if -allowsplit is not set)
doesn't exist.
+ * -keepsortkey - Use sortKey of the old category also for the new category.
+ If not specified, sortKey is removed.
+ An alternative method to keep sortKey is to use -inplace
+ option.
Options for "tidy" action:
* -namespaces Filter the arcitles in the specified namespaces. Separate
@@ -406,7 +410,8 @@
inplace=False, move_oldcat=True, delete_oldcat=True,
title_regex=None, history=False, pagesonly=False,
deletion_comment=DELETION_COMMENT_AUTOMATIC,
- wikibase=True, allow_split=False, move_together=False):
+ wikibase=True, allow_split=False, move_together=False,
+ keep_sortkey=None):
"""Store all given parameters in the objects attributes.
@param oldcat: The move source.
@@ -460,6 +465,7 @@
self.wikibase = wikibase and self.site.has_data_repository
self.allow_split = allow_split
self.move_together = move_together
+ self.keep_sortkey = keep_sortkey
if not self.can_move_cats:
repo = self.site.data_repository()
@@ -590,7 +596,8 @@
page.change_category(self.oldcat, self.newcat,
comment=self.comment,
- inPlace=self.inplace)
+ inPlace=self.inplace,
+ sortKey=self.keep_sortkey)
# Categories for templates can be included in <includeonly> section
# of Template:Page/doc subpage.
@@ -610,7 +617,8 @@
doc_page.change_category(self.oldcat, self.newcat,
comment=self.comment,
inPlace=self.inplace,
- include=['includeonly'])
+ include=['includeonly'],
+ sortKey=self.keep_sortkey)
@staticmethod
def check_move(name, old_page, new_page):
@@ -1096,6 +1104,7 @@
rebuild = False
allow_split = False
move_together = False
+ keep_sortkey = None
depth = 5
# Process global args and prepare generator args parser
@@ -1167,6 +1176,8 @@
withHistory = True
elif arg.startswith('-depth:'):
depth = int(arg[len('-depth:'):])
+ elif arg == '-keepsortkey':
+ keep_sortkey = True
else:
genFactory.handleArg(arg)
@@ -1229,7 +1240,8 @@
deletion_comment=deletion_comment,
wikibase=wikibase,
allow_split=allow_split,
- move_together=move_together)
+ move_together=move_together,
+ keep_sortkey=keep_sortkey)
elif action == 'tidy':
catTitle = pywikibot.input(u'Which category do you want to tidy up?')
bot = CategoryTidyRobot(catTitle, catDB, genFactory.namespaces)
--
To view, visit https://gerrit.wikimedia.org/r/203565
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Icea0c87083c045d35e8ab26db135c39465b77f8d
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa(a)openmailbox.org>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] Move compression detection to tools
......................................................................
[FEAT] Move compression detection to tools
Instead of having the logic in xmlreader to determine which compression
module to use it is moving the logic into tools so that other modules
can benefit from it too.
Change-Id: I008754a55dd3d2e0a1cb346826981d83d79c1b61
---
M README.rst
M pywikibot/tools/__init__.py
M pywikibot/xmlreader.py
A tests/data/xml/article-pyrus.xml.7z
A tests/data/xml/article-pyrus.xml.gz
A tests/data/xml/article-pyrus.xml_invalid.7z
A tests/tools_tests.py
M tox.ini
8 files changed, 216 insertions(+), 19 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/README.rst b/README.rst
index 19e685e..e3dc096 100644
--- a/README.rst
+++ b/README.rst
@@ -35,6 +35,13 @@
page.text = page.text.replace('foo', 'bar')
page.save('Replacing "foo" with "bar"') # Saves the page
+Required external programms
+---------------------------
+
+It may require the following programs to function properly:
+
+* `7za`: To extract 7z files
+
Contributing
------------
diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index 53513ac..41714aa 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -8,9 +8,12 @@
from __future__ import print_function, unicode_literals
__version__ = '$Id$'
+import bz2
import collections
+import gzip
import inspect
import re
+import subprocess
import sys
import threading
import time
@@ -660,6 +663,101 @@
return self.next()
+class ContextManagerWrapper(object):
+
+ """
+ Wraps an object in a context manager.
+
+ It is redirecting all access to the wrapped object and executes 'close' when
+ used as a context manager in with-statements. In such statements the value
+ set via 'as' is directly the wrapped object. For example:
+
+ wrapped = ContextManagerWrapper(an_object)
+ with wrapped as another_object:
+ assert(another_object is an_object)
+
+ It does not subclass the object though, so isinstance checks will fail
+ outside a with-statement.
+ """
+
+ def __init__(self, wrapped):
+ """Create a new wrapper."""
+ super(ContextManagerWrapper, self).__init__()
+ super(ContextManagerWrapper, self).__setattr__('_wrapped', wrapped)
+
+ def __enter__(self):
+ """Enter a context manager and use the wrapped object directly."""
+ return self._wrapped
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ """Call close on the wrapped object when exiting a context manager."""
+ self._wrapped.close()
+
+ def __getattr__(self, name):
+ """Get the attribute from the wrapped object."""
+ return getattr(self._wrapped, name)
+
+ def __setattr__(self, name, value):
+ """Set the attribute in the wrapped object."""
+ setattr(self._wrapped, name, value)
+
+
+def open_compressed(filename):
+ """
+ Open a file and uncompress it if needed.
+
+ This function supports bzip2, gzip and 7zip as compression containers. It
+ uses the packages available in the standard library for bzip2 and gzip so
+ they are always available. 7zip is only available when a 7za program is
+ available.
+
+ The compression is selected via the file ending.
+
+ @param filename: The filename.
+ @type filename: str
+ @raises ValueError: When 7za is not available.
+ @raises OSError: When it's not a 7z archive but the file extension is 7z.
+ It is also raised by bz2 when its content is invalid. gzip does not
+ immediately raise that error but only on reading it.
+ @return: A file like object returning the uncompressed data in binary mode.
+ Before Python 2.7 it's wrapping the object returned by BZ2File and gzip
+ in a ContextManagerWrapper so it's advantages/disadvantages apply there.
+ @rtype: file like object
+ """
+ def wrap(wrapped):
+ """Wrap in a wrapper when this is below Python version 2.7."""
+ if sys.version_info < (2, 7):
+ return ContextManagerWrapper(wrapped)
+ else:
+ return wrapped
+
+ if filename.endswith('.bz2'):
+ return wrap(bz2.BZ2File(filename))
+ elif filename.endswith('.gz'):
+ return wrap(gzip.open(filename))
+ elif filename.endswith('.7z'):
+ try:
+ process = subprocess.Popen(['7za', 'e', '-bd', '-so', filename],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ bufsize=65535)
+ except OSError:
+ raise ValueError('7za is not installed and can not '
+ 'uncompress "{0}"'.format(filename))
+ else:
+ stderr = process.stderr.read()
+ process.stderr.close()
+ if b'Everything is Ok' not in stderr:
+ process.stdout.close()
+ # OSError is also raised when bz2 is invalid
+ raise OSError('Invalid 7z archive.')
+ else:
+ return process.stdout
+ else:
+ # assume it's an uncompressed XML file
+ return open(filename, 'rb')
+
+
# Decorators
#
# Decorator functions without parameters are _invoked_ differently from
diff --git a/pywikibot/xmlreader.py b/pywikibot/xmlreader.py
index 2b77234..86185cf 100644
--- a/pywikibot/xmlreader.py
+++ b/pywikibot/xmlreader.py
@@ -23,6 +23,8 @@
from xml.etree.cElementTree import iterparse
import xml.sax
+from pywikibot.tools import open_compressed
+
def parseRestrictions(restrictions):
"""
@@ -116,23 +118,7 @@
def parse(self):
"""Generator using cElementTree iterparse function."""
- if self.filename.endswith('.bz2'):
- import bz2
- source = bz2.BZ2File(self.filename)
- elif self.filename.endswith('.gz'):
- import gzip
- source = gzip.open(self.filename)
- elif self.filename.endswith('.7z'):
- import subprocess
- source = subprocess.Popen('7za e -bd -so %s 2>/dev/null'
- % self.filename,
- shell=True,
- stdout=subprocess.PIPE,
- bufsize=65535).stdout
- else:
- # assume it's an uncompressed XML file
- source = open(self.filename, 'rb')
- try:
+ with open_compressed(self.filename) as source:
# iterparse's event must be a str but they are unicode with
# unicode_literals in Python 2
context = iterparse(source, events=(str('start'), str('end'),
@@ -148,8 +134,6 @@
continue
for rev in self._parse(event, elem):
yield rev
- finally:
- source.close()
def _parse_only_latest(self, event, elem):
"""Parser that yields only the latest revision."""
diff --git a/tests/data/xml/article-pyrus.xml.7z b/tests/data/xml/article-pyrus.xml.7z
new file mode 100644
index 0000000..4e7404e
--- /dev/null
+++ b/tests/data/xml/article-pyrus.xml.7z
Binary files differ
diff --git a/tests/data/xml/article-pyrus.xml.gz b/tests/data/xml/article-pyrus.xml.gz
new file mode 100644
index 0000000..825b719
--- /dev/null
+++ b/tests/data/xml/article-pyrus.xml.gz
Binary files differ
diff --git a/tests/data/xml/article-pyrus.xml_invalid.7z b/tests/data/xml/article-pyrus.xml_invalid.7z
new file mode 100644
index 0000000..7d25ee8
--- /dev/null
+++ b/tests/data/xml/article-pyrus.xml_invalid.7z
@@ -0,0 +1,2 @@
+This file is not a valid XML file (but that is not important) and not a valid
+7z file (which is important).
diff --git a/tests/tools_tests.py b/tests/tools_tests.py
new file mode 100644
index 0000000..9ece411
--- /dev/null
+++ b/tests/tools_tests.py
@@ -0,0 +1,105 @@
+#!/usr/bin/python
+"""Test tools package alone which don't fit into other tests."""
+# -*- coding: utf-8 -*-
+#
+# (C) Pywikibot team, 2015
+#
+# Distributed under the terms of the MIT license.
+from __future__ import unicode_literals
+
+__version__ = '$Id$'
+
+import os.path
+import subprocess
+
+from pywikibot import tools
+
+from tests import _data_dir
+from tests.aspects import unittest, TestCase
+
+_xml_data_dir = os.path.join(_data_dir, 'xml')
+
+
+class ContextManagerWrapperTestCase(TestCase):
+
+ """Test that ContextManagerWrapper is working correctly."""
+
+ net = False
+
+ def test_wrapper(self):
+ """Create a test instance and verify the wrapper redirects."""
+ class DummyClass(object):
+
+ """A dummy class which has some values and a close method."""
+
+ class_var = 42
+
+ def __init__(self):
+ """Create instance with dummy values."""
+ self.instance_var = 1337
+ self.closed = False
+
+ def close(self):
+ """Just store that it has been closed."""
+ self.closed = True
+
+ obj = DummyClass()
+ wrapped = tools.ContextManagerWrapper(obj)
+ self.assertIs(wrapped.class_var, obj.class_var)
+ self.assertIs(wrapped.instance_var, obj.instance_var)
+ self.assertIs(wrapped._wrapped, obj)
+ self.assertFalse(obj.closed)
+ with wrapped as unwrapped:
+ self.assertFalse(obj.closed)
+ self.assertIs(unwrapped, obj)
+ self.assertTrue(obj.closed)
+
+
+class OpenCompressedTestCase(TestCase):
+
+ """
+ Unit test class for tools.
+
+ The tests for open_compressed requires that article-pyrus.xml* contain all
+ the same content after extraction. The content itself is not important.
+ The file article-pyrus.xml_invalid.7z is not a valid 7z file and
+ open_compressed will fail extracting it using 7za.
+ """
+
+ net = False
+
+ @classmethod
+ def setUpClass(cls):
+ """Define base_file and original_content."""
+ super(OpenCompressedTestCase, cls).setUpClass()
+ cls.base_file = os.path.join(_xml_data_dir, 'article-pyrus.xml')
+ with open(cls.base_file, 'rb') as f:
+ cls.original_content = f.read()
+
+ @staticmethod
+ def _get_content(*args):
+ """Use open_compressed and return content using a with-statement."""
+ with tools.open_compressed(*args) as f:
+ return f.read()
+
+ def test_open_compressed(self):
+ """Test open_compressed with all compressors in the standard library."""
+ self.assertEqual(self._get_content(self.base_file), self.original_content)
+ self.assertEqual(self._get_content(self.base_file + '.bz2'), self.original_content)
+ self.assertEqual(self._get_content(self.base_file + '.gz'), self.original_content)
+
+ def test_open_compressed_7z(self):
+ """Test open_compressed with 7za if installed."""
+ try:
+ subprocess.Popen(['7za'], stdout=subprocess.PIPE).stdout.close()
+ except OSError:
+ raise unittest.SkipTest('7za not installed')
+ self.assertEqual(self._get_content(self.base_file + '.7z'), self.original_content)
+ self.assertRaises(OSError, self._get_content, self.base_file + '_invalid.7z')
+
+
+if __name__ == '__main__':
+ try:
+ unittest.main()
+ except SystemExit:
+ pass
diff --git a/tox.ini b/tox.ini
index ef3fa9c..412f9a0 100644
--- a/tox.ini
+++ b/tox.ini
@@ -110,6 +110,7 @@
tests/pwb/ \
tests/pwb_tests.py \
tests/script_tests.py \
+ tests/tools_tests.py \
tests/upload_tests.py \
tests/wikidataquery_tests.py
--
To view, visit https://gerrit.wikimedia.org/r/209965
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I008754a55dd3d2e0a1cb346826981d83d79c1b61
Gerrit-PatchSet: 9
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>