jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/576912 )
Change subject: [IMPR] Use UserDict for LogEntry
......................................................................
[IMPR] Use UserDict for LogEntry
A UserDict holds its data in a data attribute which is a dict.
The dict can be accessed directly or via data attribute. This
enables to access all LogEntry entries directly with its key
even there is no corresponding method to retrieve the data.
The access via the data attribute still works.
- Remove the LogDict helper dict which is no longer used.
- Copy the one and only __missing__ method from LogDict
to LogEntry class which works like a dict now.
- Remove data._type attribute which was used by LogDict
- Shortcut each getting data; the data attribute is no
longer necessary.
- Remove __ne__ method which just returns the opposite
of __eq__; this is done by default.
- Remove logid, pageid, ns, type, action, user, comment
methods which just returns the corresponding data and
replace them by __getattr__.
- Some tests added to verify that LogEntry.data[item] is
equal to LogEntry[item] managed by UserDict.
Change-Id: Ic04f63150fd29123b2459dbf291d79e71fb95bf8
---
M pywikibot/logentries.py
M tests/logentries_tests.py
2 files changed, 71 insertions(+), 83 deletions(-)
Approvals:
Zhuyifei1999: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/logentries.py b/pywikibot/logentries.py
index 3daf72c..b4dd0e7 100644
--- a/pywikibot/logentries.py
+++ b/pywikibot/logentries.py
@@ -1,47 +1,30 @@
# -*- coding: utf-8 -*-
"""Objects representing Mediawiki log entries."""
#
-# (C) Pywikibot team, 2007-2019
+# (C) Pywikibot team, 2007-2020
#
# Distributed under the terms of the MIT license.
#
-from __future__ import absolute_import, division, unicode_literals
+from collections import UserDict
import pywikibot
from pywikibot.exceptions import Error, HiddenKeyError
-from pywikibot.tools import deprecated, classproperty, UnicodeType
+from pywikibot.tools import deprecated, classproperty
_logger = 'wiki'
-class LogDict(dict):
+class LogEntry(UserDict):
+ """Generic log entry.
+
+ LogEntry parameters may be retrieved by the corresponding method
+ or the LogEntry key. The following statements are equivalent:
+
+ action = logentry.action()
+ action = logentry['action']
+ action = logentry.data['action']
"""
- Simple custom dict that raises custom Errors when a key is missing.
-
- HiddenKeyError is raised when the user does not have permission.
- KeyError is raised otherwise.
-
- It also logs debugging information when a key is missing.
- """
-
- def __missing__(self, key):
- """Debug when the key is missing."""
- pywikibot.debug('API log entry received:\n' + repr(self),
- _logger)
- if ((key in ('ns', 'title', 'pageid', 'logpage', 'params', 'action')
- and 'actionhidden' in self)
- or (key == 'comment' and 'commenthidden' in self)
- or (key == 'user' and 'userhidden' in self)):
- raise HiddenKeyError(
- "Log entry ({0}) has a hidden '{1}' key and you don't have "
- 'permission to view it.'.format(self._type, key))
- raise KeyError("Log entry (%s) has no '%s' key" % (self._type, key))
-
-
-class LogEntry(object):
-
- """Generic log entry."""
# Log type expected. None for every type, or one of the (letype) str :
# block/patrol/etc...
@@ -50,13 +33,32 @@
def __init__(self, apidata, site):
"""Initialize object from a logevent dict returned by MW API."""
- self.data = LogDict(apidata)
+ super(LogEntry, self).__init__(apidata)
self.site = site
expected_type = self._expected_type
if expected_type is not None and expected_type != self.type():
raise Error('Wrong log type! Expecting %s, received %s instead.'
% (expected_type, self.type()))
- self.data._type = self.type()
+
+ def __missing__(self, key):
+ """Debug when the key is missing.
+
+ HiddenKeyError is raised when the user does not have permission.
+ KeyError is raised otherwise.
+
+ It also logs debugging information when a key is missing.
+ """
+ pywikibot.debug('API log entry received:\n' + repr(self),
+ _logger)
+ hidden = {'action', 'logpage', 'ns', 'pageid', 'params', 'title'}
+ if ((key in hidden and 'actionhidden' in self)
+ or (key == 'comment' and 'commenthidden' in self)
+ or (key == 'user' and 'userhidden' in self)):
+ raise HiddenKeyError(
+ "Log entry ({}) has a hidden '{}' key and you don't have "
+ 'permission to view it.'.format(self['type'], key))
+ raise KeyError("Log entry ({}) has no '{}' key"
+ .format(self['type'], key))
def __repr__(self):
"""Return a string representation of LogEntry object."""
@@ -76,9 +78,13 @@
return False
return self.logid() == other.logid() and self.site == other.site
- def __ne__(self, other):
- """Compare if self is not equal to other."""
- return not self == other
+ def __getattr__(self, item):
+ """Return several items from dict used as methods."""
+ if item in ('action', 'comment', 'logid', 'ns', 'pageid', 'type',
+ 'user'): # TODO use specific User class for 'user'?
+ return lambda: self[item]
+
+ return super(LogEntry, self).__getattribute__(item)
@property
def _params(self):
@@ -87,22 +93,10 @@
@rtype: dict or None
"""
- if 'params' in self.data:
- return self.data['params']
+ if 'params' in self:
+ return self['params']
else: # try old mw style preceding mw 1.19
- return self.data[self._expected_type]
-
- def logid(self):
- """Return the id of the log entry."""
- return self.data['logid']
-
- def pageid(self):
- """Return the log id of the page handled by this log entry."""
- return self.data['pageid']
-
- def ns(self):
- """Return the namespace id of the page handled by this log entry."""
- return self.data['ns']
+ return self[self._expected_type]
@deprecated('page()', since='20150617')
def title(self):
@@ -122,36 +116,16 @@
@rtype: pywikibot.Page
"""
if not hasattr(self, '_page'):
- self._page = pywikibot.Page(self.site, self.data['title'])
+ self._page = pywikibot.Page(self.site, self['title'])
return self._page
- def type(self):
- """The type of this logentry."""
- return self.data['type']
-
- def action(self):
- """The action of this log entry."""
- return self.data['action']
-
- def user(self):
- """Return the user name doing this action."""
- # TODO use specific User class ?
- return self.data['user']
-
def timestamp(self):
"""Timestamp object corresponding to event timestamp."""
if not hasattr(self, '_timestamp'):
self._timestamp = pywikibot.Timestamp.fromISOformat(
- self.data['timestamp'])
+ self['timestamp'])
return self._timestamp
- def comment(self):
- """Return the logentry's comment.
-
- @rtype: str
- """
- return self.data['comment']
-
class OtherLogEntry(LogEntry):
@@ -174,7 +148,7 @@
@rtype: pywikibot.User
"""
if not hasattr(self, '_page'):
- self._page = pywikibot.User(self.site, self.data['title'])
+ self._page = pywikibot.User(self.site, self['title'])
return self._page
@@ -194,10 +168,10 @@
super(BlockEntry, self).__init__(apidata, site)
# When an autoblock is removed, the "title" field is not a page title
# See bug T19781
- pos = self.data['title'].find('#')
+ pos = self['title'].find('#')
self.isAutoblockRemoval = pos > 0
if self.isAutoblockRemoval:
- self._blockid = int(self.data['title'][pos + 1:])
+ self._blockid = int(self['title'][pos + 1:])
def page(self):
"""
@@ -228,7 +202,7 @@
if not hasattr(self, '_flags'):
self._flags = self._params['flags']
# pre mw 1.19 returned a delimited string.
- if isinstance(self._flags, UnicodeType):
+ if isinstance(self._flags, str):
if self._flags:
self._flags = self._flags.split(',')
else:
@@ -300,7 +274,7 @@
@rtype: pywikibot.FilePage
"""
if not hasattr(self, '_page'):
- self._page = pywikibot.FilePage(self.site, self.data['title'])
+ self._page = pywikibot.FilePage(self.site, self['title'])
return self._page
diff --git a/tests/logentries_tests.py b/tests/logentries_tests.py
index 37a376a..6d8ce78 100644
--- a/tests/logentries_tests.py
+++ b/tests/logentries_tests.py
@@ -5,8 +5,6 @@
#
# Distributed under the terms of the MIT license.
#
-from __future__ import absolute_import, division, unicode_literals
-
import datetime
import pywikibot
@@ -20,7 +18,6 @@
from tests.aspects import (
unittest, MetaTestCaseClass, TestCase, DeprecationTestCase
)
-from tests.utils import add_metaclass
class TestLogentriesBase(TestCase):
@@ -67,13 +64,17 @@
logentry = self._get_logentry(logtype)
self.assertIn(logtype, logentry.__class__.__name__.lower())
self.assertEqual(logentry._expected_type, logtype)
+
if logtype not in LogEntryFactory._logtypes:
self.assertIsInstance(logentry, OtherLogEntry)
+
if self.site_key == 'old':
self.assertNotIn('params', logentry.data)
else:
self.assertNotIn(logentry.type(), logentry.data)
+
self.assertIsInstance(logentry.action(), UnicodeType)
+
try:
self.assertIsInstance(logentry.comment(), UnicodeType)
except HiddenKeyError as e:
@@ -83,11 +84,21 @@
r"don't have permission to view it\.")
except KeyError as e:
self.assertRegex(str(e), "Log entry ([^)]+) has no 'comment' key")
+ else:
+ self.assertEqual(logentry.comment(), logentry['comment'])
+
self.assertIsInstance(logentry.logid(), int)
self.assertIsInstance(logentry.timestamp(), pywikibot.Timestamp)
+
if 'title' in logentry.data: # title may be missing
self.assertIsInstance(logentry.ns(), int)
self.assertIsInstance(logentry.pageid(), int)
+
+ # test new UserDict style
+ self.assertEqual(logentry.data['title'], logentry['title'])
+ self.assertEqual(logentry.ns(), logentry['ns'])
+ self.assertEqual(logentry.pageid(), logentry['pageid'])
+
self.assertGreaterEqual(logentry.ns(), -2)
self.assertGreaterEqual(logentry.pageid(), 0)
if logtype == 'block' and logentry.isAutoblockRemoval:
@@ -100,10 +111,16 @@
self.assertIsInstance(logentry.page(), pywikibot.Page)
else:
self.assertRaises(KeyError, logentry.page)
+
self.assertEqual(logentry.type(), logtype)
self.assertIsInstance(logentry.user(), UnicodeType)
self.assertGreaterEqual(logentry.logid(), 0)
+ # test new UserDict style
+ self.assertEqual(logentry.type(), logentry['type'])
+ self.assertEqual(logentry.user(), logentry['user'])
+ self.assertEqual(logentry.logid(), logentry['logid'])
+
class TestLogentriesMeta(MetaTestCaseClass):
@@ -128,13 +145,10 @@
return super(TestLogentriesMeta, cls).__new__(cls, name, bases, dct)
-@add_metaclass
-class TestLogentries(TestLogentriesBase):
+class TestLogentries(TestLogentriesBase, metaclass=TestLogentriesMeta):
"""Test general LogEntry properties."""
- __metaclass__ = TestLogentriesMeta
-
class TestSimpleLogentries(TestLogentriesBase):
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/576912
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ic04f63150fd29123b2459dbf291d79e71fb95bf8
Gerrit-Change-Number: 576912
Gerrit-PatchSet: 11
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: Zhuyifei1999 <zhuyifei1999(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Dvorapa <dvorapa(a)seznam.cz>
Gerrit-CC: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/610553 )
Change subject: Update URLs for new toolforge.org domain
......................................................................
Update URLs for new toolforge.org domain
All tools hosted on tools.wmflabs.org/$TOOL are now located at
$TOOL.toolforge.org.
Change-Id: Ic91d51043d64b77ebe4e7f5ca847d3311b645e68
---
M pywikibot/__metadata__.py
M pywikibot/config2.py
M pywikibot/proofreadpage.py
M scripts/imagecopy.py
M scripts/wikisourcetext.py
M tests/imagecopy_tests.py
6 files changed, 16 insertions(+), 16 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/__metadata__.py b/pywikibot/__metadata__.py
index 468a3fb..8eaf1f2 100644
--- a/pywikibot/__metadata__.py
+++ b/pywikibot/__metadata__.py
@@ -14,7 +14,7 @@
__maintainer_email__ = 'pywikibot(a)lists.wikimedia.org'
__license__ = 'MIT License'
__url__ = 'https://www.mediawiki.org/wiki/Manual:Pywikibot'
-__download_url__ = 'https://tools.wmflabs.org/pywikibot/'
+__download_url__ = 'https://pywikibot.toolforge.org/'
__copyright__ = '(C) Pywikibot team, 2003-2020'
__keywords__ = 'API bot client framework mediawiki pwb python pywiki ' \
'pywikibase pywikibot pywikipedia pywikipediabot wiki ' \
diff --git a/pywikibot/config2.py b/pywikibot/config2.py
index 01dee15..c193e8f 100644
--- a/pywikibot/config2.py
+++ b/pywikibot/config2.py
@@ -706,7 +706,7 @@
db_connect_file = user_home_path('.my.cnf')
# local port for mysql server
# ssh -L 4711:enwiki.analytics.db.svc.eqiad.wmflabs:3306 \
-# user(a)login.tools.wmflabs.org
+# user(a)login.toolforge.org
db_port = 3306
# ############# SEARCH ENGINE SETTINGS ##############
diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py
index ffbced8..3e27874 100644
--- a/pywikibot/proofreadpage.py
+++ b/pywikibot/proofreadpage.py
@@ -12,11 +12,11 @@
OCR support of page scans via:
-- https://tools.wmflabs.org/phetools/hocr_cgi.py
-- https://tools.wmflabs.org/phetools/ocr.php
+- https://phetools.toolforge.org/hocr_cgi.py
+- https://phetools.toolforge.org/ocr.php
- inspired by https://en.wikisource.org/wiki/MediaWiki:Gadget-ocr.js
-- https://tools.wmflabs.org/ws-google-ocr/
+- https://ws-google-ocr.toolforge.org/
- inspired by https://wikisource.org/wiki/MediaWiki:GoogleOCR.js
- see also: https://wikisource.org/wiki/Wikisource:Google_OCR
@@ -129,14 +129,14 @@
p_close_no_div = re.compile('</noinclude>') # V2 page format.
# phetools ocr utility
- _HOCR_CMD = ('https://tools.wmflabs.org/phetools/hocr_cgi.py?'
+ _HOCR_CMD = ('https://phetools.toolforge.org/hocr_cgi.py?'
'cmd=hocr&book={book}&lang={lang}&user={user}')
- _OCR_CMD = ('https://tools.wmflabs.org/phetools/ocr.php?'
+ _OCR_CMD = ('https://phetools.toolforge.org/ocr.php?'
'cmd=ocr&url={url_image}&lang={lang}&user={user}')
# googleOCR ocr utility
- _GOCR_CMD = ('https://tools.wmflabs.org/ws-google-ocr/api.php?'
+ _GOCR_CMD = ('https://ws-google-ocr.toolforge.org/api.php?'
'image={url_image}&lang={lang}')
_MULTI_PAGE_EXT = ['djvu', 'pdf']
@@ -643,7 +643,7 @@
return (error, parser_func(_text))
def _do_hocr(self):
- """Do hocr using //tools.wmflabs.org/phetools/hocr_cgi.py?cmd=hocr.
+ """Do hocr using https://phetools.toolforge.org/hocr_cgi.py?cmd=hocr.
This is the main method for 'phetools'.
Fallback method is ocr.
diff --git a/scripts/imagecopy.py b/scripts/imagecopy.py
index 9f1a728..e82a8e1 100644
--- a/scripts/imagecopy.py
+++ b/scripts/imagecopy.py
@@ -219,7 +219,7 @@
while not gotInfo:
try:
commonsHelperPage = fetch(
- 'https://tools.wmflabs.org/commonshelper/',
+ 'https://commonshelper.toolforge.org/',
method='POST',
data=parameters)
data = commonsHelperPage.data.content.decode('utf-8')
@@ -257,7 +257,7 @@
}
pywikibot.output(tosend)
- CH = pageTextPost('http://tools.wmflabs.org/commonshelper/index.php',
+ CH = pageTextPost('https://commonshelper.toolforge.org/index.php',
tosend)
pywikibot.output('Got CH desc.')
diff --git a/scripts/wikisourcetext.py b/scripts/wikisourcetext.py
index 6d16ee6..e496d9f 100644
--- a/scripts/wikisourcetext.py
+++ b/scripts/wikisourcetext.py
@@ -10,7 +10,7 @@
of the file, as long as it is supported by the MW ProofreadPage extension.
As alternative, if '-ocr' option is selected,
-https://tools.wmflabs.org/phetools OCR tool will be used to get text.
+https://phetools.toolforge.org/ OCR tool will be used to get text.
In this case, also already existing pages with quality value 'Not Proofread'
can be treated. '-force' will override existing page in this case.
@@ -33,14 +33,14 @@
-showdiff: show difference between current text and new text when
saving the page.
- -ocr: use OCR tools hosted on https://tools.wmflabs.org.
+ -ocr: use OCR tools hosted on https://toolforge.org.
By default no OCR is done, i.e. only not-(yet)-existing
pages in Page ns will be treated and text will be fetched
via preload.
If -ocr is provided, default OCR method is:
- - https://tools.wmflabs.org/phetools
+ - https://phetools.toolforge.org/
If ocr:googleOCR is given, OCR method is:
- - https://tools.wmflabs.org/ws-google-ocr
+ - https://ws-google-ocr.toolforge.org/
-threads:n number of threads used to fetch OCR from OCR tools.
default is 5; valid only if '-ocr' is selected.
diff --git a/tests/imagecopy_tests.py b/tests/imagecopy_tests.py
index 90b865f..09a2def 100644
--- a/tests/imagecopy_tests.py
+++ b/tests/imagecopy_tests.py
@@ -18,7 +18,7 @@
class CommonsHelperMethodTest(TestCase):
"""Test CommonsHelper methods in imagecopy."""
- hostname = 'https://tools.wmflabs.org/commonshelper/'
+ hostname = 'https://commonshelper.toolforge.org/'
@unittest.expectedFailure # T207579
def test_pageTextPost(self):
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/610553
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ic91d51043d64b77ebe4e7f5ca847d3311b645e68
Gerrit-Change-Number: 610553
Gerrit-PatchSet: 1
Gerrit-Owner: Legoktm <legoktm(a)member.fsf.org>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: Zhuyifei1999 <zhuyifei1999(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/605962 )
Change subject: [IMPR] Make _GetDataHTML a context manager
......................................................................
[IMPR] Make _GetDataHTML a context manager
- Make _GetDataHTML a context manager and call HTMLParser
at exit time. Closing is also done if an exception occurres.
- Reorder imports; we have a PY2 section already.
Change-Id: Ib1e7296967293c6102d8416fb14d98da0d2f6d57
---
M pywikibot/textlib.py
1 file changed, 24 insertions(+), 10 deletions(-)
Approvals:
Dvorapa: Looks good to me, but someone else must approve
Zhuyifei1999: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 9054917..63e9be8 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -13,14 +13,11 @@
#
from __future__ import absolute_import, division, unicode_literals
-from collections import OrderedDict, namedtuple
-try:
- from collections.abc import Sequence
-except ImportError: # Python 2.7
- from collections import Sequence
import datetime
import re
+from collections import OrderedDict, namedtuple
+
import pywikibot
from pywikibot.exceptions import InvalidTitle, SiteDefinitionError
from pywikibot.family import Family
@@ -35,8 +32,10 @@
)
if not PY2:
+ from collections.abc import Sequence
from html.parser import HTMLParser
else:
+ from collections import Sequence
from future_builtins import zip
from HTMLParser import HTMLParser
@@ -517,27 +516,42 @@
# thanks to:
# https://www.hellboundhackers.org/articles/read-article.php?article_id=841
parser = _GetDataHTML()
- parser.keeptags = keeptags
- parser.feed(text)
- parser.close()
+ with parser:
+ parser.keeptags = keeptags
+ parser.feed(text)
return parser.textdata
# thanks to https://docs.python.org/3/library/html.parser.html
class _GetDataHTML(HTMLParser):
+
+ """HTML parser which removes html tags except they are listed in keeptags.
+
+ This class is also a context manager which closes itself at exit time.
+ """
+
textdata = ''
keeptags = []
+ def __enter__(self):
+ pass
+
+ def __exit__(self, *exc_info):
+ self.close()
+
def handle_data(self, data):
+ """Add data to text."""
self.textdata += data
def handle_starttag(self, tag, attrs):
+ """Add start tag to text if tag should be kept."""
if tag in self.keeptags:
- self.textdata += '<%s>' % tag
+ self.textdata += '<{}>'.format(tag)
def handle_endtag(self, tag):
+ """Add end tag to text if tag should be kept."""
if tag in self.keeptags:
- self.textdata += '</%s>' % tag
+ self.textdata += '</{}>'.format(tag)
def isDisabled(text, index, tags=None):
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/605962
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ib1e7296967293c6102d8416fb14d98da0d2f6d57
Gerrit-Change-Number: 605962
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Dvorapa <dvorapa(a)seznam.cz>
Gerrit-Reviewer: Zhuyifei1999 <zhuyifei1999(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged