jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/492504 )
Change subject: [IMPR] Fix handling of interlanguage links in replace_links ......................................................................
[IMPR] Fix handling of interlanguage links in replace_links
- fix iw (en:Example) links detection (it just didn't work at all) - fix il (:en:Example) links detection (site should not be derived from the replacement pair) and behavior (until now replace_links made [[:en:Foo]] > [[Bar]] for :en:Foo > :en:Bar) - make site mandatory (as the iw and il links correct detection and handling needs a site matching the text source/target)
- extracted from https://gerrit.wikimedia.org/r/#/c/pywikibot/core/+/491673/
Change-Id: Iae273b5440bd697b84b574bf2649996c1cec4f32 --- M pywikibot/textlib.py M tests/textlib_tests.py 2 files changed, 62 insertions(+), 13 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 7c676c2..506fa93 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -644,8 +644,8 @@ in that case it will apply the second value from the sequence. @type replace: sequence of pywikibot.Page/pywikibot.Link/str or callable - @param site: a Site object to use if replace is not a sequence or the link - to be replaced is not a Link or Page instance. + @param site: a Site object to use. It should match the origin + or target site of the text @type site: pywikibot.APISite """ def to_link(source): @@ -689,14 +689,14 @@ if isinstance(replace_list[1], basestring): replace_list[1] = pywikibot.Page(site, replace_list[1]) check_classes(replace_list[0]) - if replace_list[0].site != replace_list[1].site: - raise ValueError('Both pages in the "replace" argument ' - 'must belong to the same site.') - site = replace_list[0].site replace = replace_callable + if site is None: + issue_deprecation_warning( + 'site=None', + 'a valid site for list or tuple parameter "replace"', + 2, since='20190223') elif site is None: - raise ValueError('If "replace" is not a tuple or list of pages, ' - 'the "site" argument must be provided.') + raise ValueError('The "site" argument must be provided.')
linktrail = site.linktrail() link_pattern = re.compile( @@ -710,10 +710,15 @@ m = link_pattern.search(text, pos=curpos) if not m: break - # ignore links to sections of the same page + # Ignore links to sections of the same page if not m.group('title').strip(): curpos = m.end() continue + # Ignore interwiki links + if (site.isInterwikiLink(m.group('title').strip()) + and not m.group('title').strip().startswith(':')): + curpos = m.end() + continue groups = m.groupdict() if groups['label'] and '[[' in groups['label']: # TODO: Work on the link within the label too @@ -740,10 +745,6 @@ # unrecognized iw prefix curpos = end continue - # ignore interwiki links - if link.site != site: - curpos = end - continue
# Check whether the link found should be replaced. # Either None, False or tuple(Link, bool) @@ -792,6 +793,9 @@ is_link = False
new_title = new_link.canonical_title() + # Make correct langlink if needed + if not new_link.site == site: + new_title = ':' + new_link.site.code + ':' + new_title
if is_link: # Use link's label diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index 6b754da..860fbfb 100644 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -1060,6 +1060,51 @@ ValueError, r'unicode (str.*bytes (str', textlib.replace_links, self.text, callback, self.wp_site)
+ def test_replace_interwiki_links(self): + """Make sure interwiki links can not be replaced.""" + link = '[[fr:how]]' + self.assertEqual( + textlib.replace_links(link, ('fr:how', 'de:are'), self.wp_site), + link) + self.assertEqual( + textlib.replace_links(link, (':fr:how', ':de:are'), self.wp_site), + link) + self.assertEqual( + textlib.replace_links(link, ('how', 'de:are'), self.wp_site), + link) + self.assertEqual( + textlib.replace_links(link, ('de:how', 'de:are'), self.wp_site), + link) + + +class TestReplaceLinksNonDry(TestCase): + """Test the replace_links function in textlib non-dry.""" + + family = 'wikipedia' + code = 'en' + + cached = True + + def test_replace_interlanguage_links(self): + """Test replacing interlanguage links.""" + link = '[[:fr:how]]' + self.assertEqual( + textlib.replace_links(link, (':fr:how', ':de:are'), + self.site), + '[[:de:Are|fr:how]]') + self.assertEqual( + textlib.replace_links(link, ('fr:how', 'de:are'), + self.site), + '[[:de:Are|fr:how]]') + self.assertEqual( + textlib.replace_links(link, ('how', ':de:are'), + self.site), + link) + self.assertEqual( + textlib.replace_links(link, (':de:how', ':de:are'), + self.site), + link) +
class TestLocalDigits(TestCase):
pywikibot-commits@lists.wikimedia.org