jenkins-bot has submitted this change and it was merged.
Change subject: [FIX] cosmetic_changes: Implement external link to wikilink ......................................................................
[FIX] cosmetic_changes: Implement external link to wikilink
The feature to replace external links into a wikilink when it pointed to the same wiki was introduced in compat as of f2645f85. But it had been disabled since its addition. Later it was, still disabled, ported to core in 6ac688fe.
This now enables an improved version which does not allow the delimiter in the text itself so that links to diffs for example won't be changed. Also instead of using the assumption that the URL is `code.family` it's iterating over all the URLs which are also available for `Family.from_url` and `Site(url=…)`.
Change-Id: Id06b256e2005f5730fbfbaa96cd6690ec4de1789 --- M pywikibot/cosmetic_changes.py M pywikibot/family.py M tests/cosmetic_changes_tests.py 3 files changed, 102 insertions(+), 12 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py index 6b4af85..93a19ba 100755 --- a/pywikibot/cosmetic_changes.py +++ b/pywikibot/cosmetic_changes.py @@ -696,16 +696,42 @@
# from fixes.py def fixSyntaxSave(self, text): + def replace_link(match): + replacement = '[[' + match.group('link') + if match.group('title'): + replacement += '|' + match.group('title') + return replacement + ']]' + exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] # link to the wiki working on - # TODO: disable this for difflinks and titled links, - # to prevent edits like this: - # https://de.wikipedia.org/w/index.php?title=Wikipedia%3aVandalismusmeldung&am... -# text = textlib.replaceExcept(text, -# r'[https?://%s.%s.org/wiki/(?P<link>\S+)\s+(?P<title>.+?)\s?]' -# % (self.site.code, self.site.family.name), -# r'[[\g<link>|\g<title>]]', exceptions) + # Do not use the first entry as it is not actually a prefix + for suffix in self.site._interwiki_urls()[1:]: + http_url = self.site.base_url(suffix, 'http') + if self.site.protocol() == 'http': + https_url = None + else: + https_url = self.site.base_url(suffix, 'https') + # compare strings without the protocol, if they are empty support + # also no prefix (//en.wikipedia.org/…) + if http_url[4:] == https_url[5:]: + urls = ['(?:https?:)?' + re.escape(http_url[5:])] + else: + urls = [re.escape(url) for url in (http_url, https_url) + if url is not None] + for url in urls: + # Only include links which don't include the separator as + # the wikilink won't support additional parameters + separator = '?' + if '?' in suffix: + separator += '&' + # Match first a non space in the title to prevent that multiple + # spaces at the end without title will be matched by it + text = textlib.replaceExcept( + text, + r'[[?' + url + r'(?P<link>[^' + separator + r']+?)' + r'(\s+(?P<title>[^\s].*?))?\s*]]?', + replace_link, exceptions, site=self.site) # external link in/starting with double brackets text = textlib.replaceExcept( text, diff --git a/pywikibot/family.py b/pywikibot/family.py index ad5168c..95f7695 100644 --- a/pywikibot/family.py +++ b/pywikibot/family.py @@ -1076,18 +1076,28 @@ # Override this ONLY if the wiki family requires a path prefix return ''
- def _hostname(self, code): + def _hostname(self, code, protocol=None): """Return the protocol and hostname.""" - protocol = self.protocol(code) + if protocol is None: + protocol = self.protocol(code) if protocol == 'https': host = self.ssl_hostname(code) else: host = self.hostname(code) return protocol, host
- def base_url(self, code, uri): - """Prefix uri with port and hostname.""" - protocol, host = self._hostname(code) + def base_url(self, code, uri, protocol=None): + """ + Prefix uri with port and hostname. + + @param code: The site code + @param uri: The absolute path after the hostname + @param protocol: The protocol which is used. If None it'll determine the + protocol from the code. + @return: The full URL + @rtype: str + """ + protocol, host = self._hostname(code, protocol) if protocol == 'https': uri = self.ssl_pathprefix(code) + uri return urlparse.urljoin('{0}://{1}'.format(protocol, host), uri) diff --git a/tests/cosmetic_changes_tests.py b/tests/cosmetic_changes_tests.py index a591f18..c4fdc3e 100644 --- a/tests/cosmetic_changes_tests.py +++ b/tests/cosmetic_changes_tests.py @@ -91,6 +91,60 @@
def test_fixSyntaxSave(self): """Test fixSyntaxSave method.""" + # necessary as the fixer needs the article path to fix it + self.cct.site._siteinfo._cache['general'] = ( + {'articlepath': '/wiki/$1'}, True) + self.assertEqual( + '[[Example|Page]]\n[[Example|Page]]\n[[Example|Page]]\n' + '[[Example]]\n[[Example]]\n[[Example]]\n' + '[https://de.wikipedia.org/w/index.php?title=Example&' + 'oldid=68181978 Page]\n' + '[https://de.wikipedia.org/w/index.php?title=Example&' + 'oldid=68181978&diff=next Page]\n' + '[https://en.wikipedia.org/w/index.php?title=Example%5D%5Cn' + '[https://de.wiktionary.org/w/index.php?title=Example%5D%5Cn', + self.cct.fixSyntaxSave( + '[https://de.wikipedia.org/w/index.php?title=Example Page]\n' + '[https://de.wikipedia.org/w/index.php?title=Example Page ]\n' + '[https://de.wikipedia.org/w/index.php?title=Example Page ]\n' + '[https://de.wikipedia.org/w/index.php?title=Example%5D%5Cn' + '[https://de.wikipedia.org/w/index.php?title=Example ]\n' + '[https://de.wikipedia.org/w/index.php?title=Example ]\n' + '[https://de.wikipedia.org/w/index.php?title=Example&' + 'oldid=68181978 Page]\n' + '[https://de.wikipedia.org/w/index.php?title=Example&' + 'oldid=68181978&diff=next Page]\n' + '[https://en.wikipedia.org/w/index.php?title=Example%5D%5Cn' + '[https://de.wiktionary.org/w/index.php?title=Example%5D%5Cn' + )) + self.assertEqual( + '[[Example]]\n[[Example]]\n[[Example]]\n' + '[https://de.wikipedia.org/wiki/Example?oldid=68181978 Page]\n' + '[https://de.wikipedia.org/wiki/Example?' + 'oldid=68181978&diff=next Page]\n' + '[[Example]]\n[[Example]]\n[[Example]]\n' + '[https://de.wikipedia.org/w/index.php/Example?' + 'oldid=68181978 Page]\n' + '[https://de.wikipedia.org/w/index.php/Example?' + 'oldid=68181978&diff=next Page]\n' + '[[&]]\n[[&]]\n', + self.cct.fixSyntaxSave( + '[https://de.wikipedia.org/wiki/Example%5D%5Cn' + '[https://de.wikipedia.org/wiki/Example ]\n' + '[https://de.wikipedia.org/wiki/Example ]\n' + '[https://de.wikipedia.org/wiki/Example?oldid=68181978 Page]\n' + '[https://de.wikipedia.org/wiki/Example?' + 'oldid=68181978&diff=next Page]\n' + '[https://de.wikipedia.org/w/index.php/Example%5D%5Cn' + '[https://de.wikipedia.org/w/index.php/Example ]\n' + '[https://de.wikipedia.org/w/index.php/Example ]\n' + '[https://de.wikipedia.org/w/index.php/Example?' + 'oldid=68181978 Page]\n' + '[https://de.wikipedia.org/w/index.php/Example?' + 'oldid=68181978&diff=next Page]\n' + '[https://de.wikipedia.org/wiki/&%5D%5Cn' + '[https://de.wikipedia.org/w/index.php/&%5D%5Cn' + )) self.assertEqual( '[https://de.wikipedia.org]', self.cct.fixSyntaxSave('[[https://de.wikipedia.org]]'))
pywikibot-commits@lists.wikimedia.org