Build Update for wikimedia/pywikibot-core
-------------------------------------
Build: #2819
Status: Broken
Duration: 31 minutes and 11 seconds
Commit: a573dda (master)
Author: Fabian Neundorf
Message: [IMPROV] NamespacesDict: Lookup names from dict
Instead of iterating over all namespaces, the NamespacesDict can cache a
dictionary of names mapping to the namespace instance.
This cache is now used by `validLanguageLinks` which also avoids to iterate
over all namespaces for each langlink and to avoid any magic provided by the
`Namespace` class.
Also add a possibility to lookup the namespace via an already normalized name.
Change-Id: Ice7f51aef1673724c87ae2aa7a86f35f36d01692
View the changeset: https://github.com/wikimedia/pywikibot-core/compare/8919541a9d0c...a573dda1…
View the full build log and details: https://travis-ci.org/wikimedia/pywikibot-core/builds/79952262
--
You can configure recipients for build notifications in your .travis.yml file. See http://docs.travis-ci.com/user/notifications
jenkins-bot has submitted this change and it was merged.
Change subject: [IMPROV] NamespacesDict: Lookup names from dict
......................................................................
[IMPROV] NamespacesDict: Lookup names from dict
Instead of iterating over all namespaces, the NamespacesDict can cache a
dictionary of names mapping to the namespace instance.
This cache is now used by `validLanguageLinks` which also avoids to iterate
over all namespaces for each langlink and to avoid any magic provided by the
`Namespace` class.
Also add a possibility to lookup the namespace via an already normalized name.
Change-Id: Ice7f51aef1673724c87ae2aa7a86f35f36d01692
---
M pywikibot/site.py
M tests/api_tests.py
M tests/namespace_tests.py
M tests/site_tests.py
4 files changed, 47 insertions(+), 3 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/site.py b/pywikibot/site.py
index 72d69a3..d9d1f60 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -492,6 +492,10 @@
"""Create new dict using the given namespaces."""
super(NamespacesDict, self).__init__()
self._namespaces = namespaces
+ self._namespace_names = {}
+ for namespace in self._namespaces.values():
+ for name in namespace:
+ self._namespace_names[name.lower()] = namespace
def __iter__(self):
"""Iterate over all namespaces."""
@@ -545,7 +549,22 @@
@type name: basestring
@return: Namespace or None
"""
- return self._lookup_name(name, self._namespaces)
+ name = Namespace.normalize_name(name)
+ if name is False:
+ return None
+ return self.lookup_normalized_name(name.lower())
+
+ def lookup_normalized_name(self, name):
+ """
+ Find the Namespace for a name also checking aliases.
+
+ The name has to be normalized and must be lower case.
+
+ @param name: Name of the namespace.
+ @type name: basestring
+ @return: Namespace or None
+ """
+ return self._namespace_names.get(name)
# Temporary until Namespace.lookup_name can be removed
@staticmethod
@@ -892,9 +911,8 @@
def validLanguageLinks(self):
"""Return list of language codes that can be used in interwiki links."""
- nsnames = [name for name in self.namespaces.values()]
return [lang for lang in self.languages()
- if first_upper(lang) not in nsnames]
+ if self.namespaces.lookup_normalized_name(lang) is None]
def _interwiki_urls(self):
site_paths = [self.path()] * 3
diff --git a/tests/api_tests.py b/tests/api_tests.py
index 6cb0c83..9ad71d5 100644
--- a/tests/api_tests.py
+++ b/tests/api_tests.py
@@ -580,6 +580,7 @@
# Add custom_name for this site namespace, to match the live site.
if 'Wikipedia' not in self.site.namespaces:
self.site.namespaces[4].custom_name = 'Wikipedia'
+ self.site.namespaces._namespace_names['wikipedia'] = self.site.namespaces[4]
def test_results(self):
"""Test that PageGenerator yields pages with expected attributes."""
diff --git a/tests/namespace_tests.py b/tests/namespace_tests.py
index 9d9c6a6..c021b4d 100644
--- a/tests/namespace_tests.py
+++ b/tests/namespace_tests.py
@@ -336,6 +336,30 @@
len(positive_namespaces) + len(excluded_namespaces))
+class TestNamespacesDictLookupName(TestCase):
+
+ """Test NamespacesDict.lookup_name and lookup_normalized_name."""
+
+ net = False
+
+ def test_lookup_name(self):
+ """Test lookup_name."""
+ namespaces = builtin_NamespacesDict()
+ self.assertIs(namespaces.lookup_name('project'), namespaces[4])
+ self.assertIs(namespaces.lookup_name('PROJECT'), namespaces[4])
+ self.assertIs(namespaces.lookup_name('Project'), namespaces[4])
+ self.assertIs(namespaces.lookup_name('Project:'), namespaces[4])
+
+ def test_lookup_normalized_name(self):
+ """Test lookup_normalized_name."""
+ namespaces = builtin_NamespacesDict()
+ self.assertIs(namespaces.lookup_normalized_name('project'),
+ namespaces[4])
+ self.assertIsNone(namespaces.lookup_normalized_name('PROJECT'))
+ self.assertIsNone(namespaces.lookup_normalized_name('Project'))
+ self.assertIsNone(namespaces.lookup_normalized_name('Project:'))
+
+
class TestNamespacesDictGetItem(TestCase):
"""Test NamespacesDict.__getitem__."""
diff --git a/tests/site_tests.py b/tests/site_tests.py
index 576b790..2439b8d 100644
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -302,6 +302,7 @@
for item in mysite.validLanguageLinks():
self.assertIn(item, langs)
+ self.assertIsNone(self.site.namespaces.lookup_name(item))
def testNamespaceMethods(self):
"""Test cases for methods manipulating namespace names."""
--
To view, visit https://gerrit.wikimedia.org/r/206120
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ice7f51aef1673724c87ae2aa7a86f35f36d01692
Gerrit-PatchSet: 9
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Malafaya <malafaya(a)clix.pt>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa(a)openmailbox.org>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>
Build Update for wikimedia/pywikibot-core
-------------------------------------
Build: #2818
Status: Passed
Duration: 14 minutes and 55 seconds
Commit: 8919541 (master)
Author: malafaya
Message: [IMPROV] Optimize/remove unneeded calls to Page.isEmpty()
A check for Page.isEmpty() is unnecessary as it will never be checked when it
would yield True.
A sequence of conditions was also optimized (quicker condition check first)
when replacing links.
Change-Id: Ib68446a77f52f59440d9f50fef416b9bfdb39827
View the changeset: https://github.com/wikimedia/pywikibot-core/compare/089a424eb607...8919541a…
View the full build log and details: https://travis-ci.org/wikimedia/pywikibot-core/builds/79931264
--
You can configure recipients for build notifications in your .travis.yml file. See http://docs.travis-ci.com/user/notifications
Hello PywikibotCommitWatcher,
I'd like you to do a code review. Please visit
https://gerrit.wikimedia.org/r/237622
to review the following change.
Change subject: Add _isempty caching attribute to Page
......................................................................
Add _isempty caching attribute to Page
Page.IsEmpty() is a somewhat heavy function and can be called several times for
the same page. interwiki.py, for instance, does that.
Adding the attribute _isempty allows caching of this value without the need for
re-evaluation.
The attribute is cleared when latest_revision_id is deleted for safety sake.
Change-Id: Ib68446a77f52f59440d9f50fef416b9bfdb39827
---
M pywikibot/page.py
1 file changed, 8 insertions(+), 5 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/22/237622/1
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 2ba2b0c..d0cd3a3 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -456,7 +456,7 @@
# * Old exceptions do not apply any more
# * Deleting _revid to force reload
# * Deleting _redirtarget, that info is now obsolete.
- for attr in ['_redirtarget', '_getexception', '_revid']:
+ for attr in ['_redirtarget', '_getexception', '_revid', '_isempty']:
if hasattr(self, attr):
delattr(self, attr)
@@ -738,10 +738,13 @@
@rtype: bool
"""
- txt = self.get()
- txt = textlib.removeLanguageLinks(txt, site=self.site)
- txt = textlib.removeCategoryLinks(txt, site=self.site)
- return len(txt) < 4
+ if not hasattr(self, "_isempty"):
+ txt = self.get()
+ txt = textlib.removeLanguageLinks(txt, site=self.site)
+ txt = textlib.removeCategoryLinks(txt, site=self.site)
+ self._isempty = len(txt) < 4
+
+ return bool(self._isempty)
def isTalkPage(self):
"""Return True if this page is in any talk namespace."""
--
To view, visit https://gerrit.wikimedia.org/r/237622
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib68446a77f52f59440d9f50fef416b9bfdb39827
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Malafaya <malafaya(a)clix.pt>
Gerrit-Reviewer: PywikibotCommitWatcher <pywikibot-commits(a)lists.wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>
Build Update for wikimedia/pywikibot-core
-------------------------------------
Build: #2817
Status: Passed
Duration: 28 minutes and 22 seconds
Commit: 089a424 (master)
Author: malafaya
Message: Build lang prefixes list once in removeLanguageLinks()
Instead of just checking whether validLanguageLinks() is
empty, it will build a list of language prefixes (including
the obsolete ones) once and then reuses that list to remove
all links which use one of the prefixes.
This indirectly reduces the runtime as it'll won't call
validLanguageLinks() twice if it's not empty.
Bug: T112256
Change-Id: Ie8462010561b510729e04a54447063957bd975f4
View the changeset: https://github.com/wikimedia/pywikibot-core/compare/c27464e40b5e...089a424e…
View the full build log and details: https://travis-ci.org/wikimedia/pywikibot-core/builds/79926546
--
You can configure recipients for build notifications in your .travis.yml file. See http://docs.travis-ci.com/user/notifications
jenkins-bot has submitted this change and it was merged.
Change subject: Build lang prefixes list once in removeLanguageLinks()
......................................................................
Build lang prefixes list once in removeLanguageLinks()
Instead of just checking whether validLanguageLinks() is
empty, it will build a list of language prefixes (including
the obsolete ones) once and then reuses that list to remove
all links which use one of the prefixes.
This indirectly reduces the runtime as it'll won't call
validLanguageLinks() twice if it's not empty.
Bug: T112256
Change-Id: Ie8462010561b510729e04a54447063957bd975f4
---
M pywikibot/textlib.py
1 file changed, 2 insertions(+), 2 deletions(-)
Approvals:
John Vandenberg: Looks good to me, but someone else must approve
XZise: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 7cefc3d..bda3503 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -793,12 +793,12 @@
"""
if site is None:
site = pywikibot.Site()
- if not site.validLanguageLinks():
- return text
# This regular expression will find every interwiki link, plus trailing
# whitespace.
languages = '|'.join(site.validLanguageLinks() +
list(site.family.obsolete.keys()))
+ if not languages:
+ return text
interwikiR = re.compile(r'\[\[(%s)\s?:[^\[\]\n]*\]\][\s]*'
% languages, re.IGNORECASE)
text = replaceExcept(text, interwikiR, '',
--
To view, visit https://gerrit.wikimedia.org/r/237612
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie8462010561b510729e04a54447063957bd975f4
Gerrit-PatchSet: 4
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Malafaya <malafaya(a)clix.pt>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Malafaya <malafaya(a)clix.pt>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>