jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] Improved Site.sametitle
......................................................................
[FEAT] Improved Site.sametitle
This improves the Site.sametitle comparision by the following features:
- It uses (if available) the case-sensitivity option defined by the
namespace
- It replaces underscores and spaces by only one space. So 'Fo__ar',
'Fo_ar' and 'Fo ar' are all the same.
- It works with servers which don't have a namespace which is empty.
Bug: 69118
Change-Id: I0b57ea6d7014b4ddfd8ceafbd859594b021e92b4
---
M pywikibot/site.py
M tests/site_tests.py
2 files changed, 83 insertions(+), 44 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/site.py b/pywikibot/site.py
index 799d99c..4a139fa 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -376,9 +376,9 @@
# Discard leading colon
if count >= 2 and parts[0] == '' and parts[1]:
- return parts[1]
+ return parts[1].strip()
elif parts[0]:
- return parts[0]
+ return parts[0].strip()
return False
@staticmethod
@@ -806,55 +806,42 @@
re.IGNORECASE | re.UNICODE | re.DOTALL)
def sametitle(self, title1, title2):
- """Return True if title1 and title2 identify the same wiki
page."""
- # title1 and title2 may be unequal but still identify the same page,
- # if they use different aliases for the same namespace
+ """
+ Return True if title1 and title2 identify the same wiki page.
- def valid_namespace(alias, ns):
- """Determine if a string is a valid alias for a namespace.
-
- @param alias: namespace alias
- @type alias: unicode
- @param ns: namespace
- @type ns: int
-
- @return: bool
- """
- for text in self.namespace(ns, all=True):
- if text.lower() == alias.lower():
- return True
- return False
+ title1 and title2 may be unequal but still identify the same page,
+ if they use different aliases for the same namespace.
+ """
+ def ns_split(title):
+ """Separate the namespace from the name."""
+ if ':' not in title:
+ title = ':' + title
+ ns, _, name = title.partition(':')
+ ns = Namespace.lookup_name(ns, self.namespaces) or default_ns
+ return ns, name
if title1 == title2:
return True
+ # Replace underscores with spaces and multiple combinations of them
+ # with only one space
+ title1 = re.sub(r'[_ ]+', ' ', title1)
+ title2 = re.sub(r'[_ ]+', ' ', title2)
+ if title1 == title2:
+ return True
+ default_ns = self.namespaces[0]
# determine whether titles contain namespace prefixes
- if ":" in title1:
- ns1, name1 = title1.split(":", 1)
- else:
- ns1, name1 = 0, title1
- if ":" in title2:
- ns2, name2 = title2.split(":", 1)
- else:
- ns2, name2 = 0, title2
- for space in self.namespaces(): # iterate over all valid namespaces
- if not isinstance(ns1, int) and valid_namespace(ns1, space):
- ns1 = space
- if not isinstance(ns2, int) and valid_namespace(ns2, space):
- ns2 = space
- if not isinstance(ns1, int):
- # no valid namespace prefix found, so the string followed by ":"
- # must be part of the title
- name1 = ns1 + ":" + name1
- ns1 = 0
- if not isinstance(ns2, int):
- name2 = ns2 + ":" + name2
- ns2 = 0
- if ns1 != ns2:
+ ns1_obj, name1 = ns_split(title1)
+ ns2_obj, name2 = ns_split(title2)
+ if ns1_obj != ns2_obj:
# pages in different namespaces
return False
- if self.case() == "first-letter":
- name1 = name1[:1].upper() + name1[1:]
- name2 = name2[:1].upper() + name2[1:]
+ name1 = name1.strip()
+ name2 = name2.strip()
+ # If the namespace has a case definition it's overriding the site's
+ # case definition
+ if (ns1_obj.case if hasattr(ns1_obj, 'case') else self.case()) ==
'first-letter':
+ name1 = name1[0].upper() + name1[1:]
+ name2 = name2[0].upper() + name2[1:]
return name1 == name2
# namespace shortcuts for backwards-compatibility
diff --git a/tests/site_tests.py b/tests/site_tests.py
index 408c60b..c2b3140 100644
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -144,6 +144,17 @@
self.assertFalse(mysite.isInterwikiLink("foo"))
self.assertIsInstance(mysite.redirectRegex().pattern, basestring)
self.assertIsInstance(mysite.category_on_one_line(), bool)
+ self.assertTrue(mysite.sametitle("Template:Test",
"Template:Test"))
+ self.assertTrue(mysite.sametitle("Template: Test", "Template:
Test"))
+ self.assertTrue(mysite.sametitle('Test name', 'Test name'))
+ self.assertFalse(mysite.sametitle('Test name', 'Test Name'))
+ # User, MediaWiki (both since 1.16) and Special are always
+ # first-letter (== only first non-namespace letter is case insenstive)
+ # See also:
https://www.mediawiki.org/wiki/Manual:$wgCapitalLinks
+ self.assertTrue(mysite.sametitle("Special:Always",
"Special:always"))
+ if LV(mysite.version()) >= LV('1.16'):
+ self.assertTrue(mysite.sametitle('User:Always',
'User:always'))
+ self.assertTrue(mysite.sametitle('MediaWiki:Always',
'MediaWiki:always'))
def testConstructors(self):
"""Test cases for site constructors."""
@@ -1611,6 +1622,47 @@
self.assertEqual(item.id, 'Q5296')
+class TestSameTitleSite(TestCase):
+
+ """Test APISite.sametitle on sites with known
behaviour."""
+
+ sites = {
+ 'enwp': {
+ 'family': 'wikipedia',
+ 'code': 'en',
+ },
+ 'dewp': {
+ 'family': 'wikipedia',
+ 'code': 'de',
+ },
+ 'enwt': {
+ 'family': 'wiktionary',
+ 'code': 'en',
+ }
+ }
+
+ def check(self, site, case_sensitive):
+ self.assertEqual(site.sametitle('Foo', 'foo'), not
case_sensitive)
+ self.assertTrue(site.sametitle('File:Foo', 'Image:Foo'))
+ self.assertTrue(site.sametitle(':Foo', 'Foo'))
+ self.assertFalse(site.sametitle('User:Foo', 'Foo'))
+
+ def test_enwp(self):
+ self.check(self.get_site('enwp'), False)
+ self.assertFalse(self.get_site('enwp').sametitle(
+ 'Template:Test template', 'Template:Test Template'))
+
+ def test_dewp(self):
+ site = self.get_site('dewp')
+ self.check(site, False)
+ self.assertTrue(site.sametitle('Benutzer:Foo', 'User:Foo'))
+ self.assertTrue(site.sametitle('Benutzerin:Foo', 'User:Foo'))
+ self.assertTrue(site.sametitle('Benutzerin:Foo',
'Benutzer:Foo'))
+
+ def test_enwt(self):
+ self.check(self.get_site('enwt'), True)
+
+
if __name__ == '__main__':
try:
unittest.main()
--
To view, visit
https://gerrit.wikimedia.org/r/151809
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I0b57ea6d7014b4ddfd8ceafbd859594b021e92b4
Gerrit-PatchSet: 12
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Nullzero <nullzero.free(a)gmail.com>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>