jenkins-bot has submitted this change and it was merged.
Change subject: [FIX] cosmetic_changes: Detect useless space at line end
......................................................................
[FIX] cosmetic_changes: Detect useless space at line end
It only trimmed useless spaces at the string end but not at each line end. And
instead of using two regexes to search for them it combines both into one regex
to only need to search once. It also adds the site parameter as the call has
been changed.
Change-Id: I8cf45d76441af49756f5933a651d07ecf61f2f82
---
M pywikibot/cosmetic_changes.py
M tests/cosmetic_changes_tests.py
2 files changed, 6 insertions(+), 4 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index 93a19ba..3010ef3 100755
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -617,13 +617,11 @@
def removeUselessSpaces(self, text):
"""Cleanup multiple or trailing spaces."""
- multipleSpacesR = re.compile(' +')
- spaceAtLineEndR = re.compile(' $')
exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', 'table']
if self.site.sitename != 'wikipedia:cs':
exceptions.append('template')
- text = textlib.replaceExcept(text, multipleSpacesR, ' ', exceptions)
- text = textlib.replaceExcept(text, spaceAtLineEndR, '', exceptions)
+ text = textlib.replaceExcept(text, r'(?m) +( |$)', r'\1', exceptions,
+ site=self.site)
return text
def removeNonBreakingSpaceBeforePercent(self, text):
diff --git a/tests/cosmetic_changes_tests.py b/tests/cosmetic_changes_tests.py
index c4fdc3e..17e270d 100644
--- a/tests/cosmetic_changes_tests.py
+++ b/tests/cosmetic_changes_tests.py
@@ -52,6 +52,10 @@
"""Test removeUselessSpaces method."""
self.assertEqual('Foo bar',
self.cct.removeUselessSpaces('Foo bar '))
+ self.assertEqual('Foo bar',
+ self.cct.removeUselessSpaces('Foo bar '))
+ self.assertEqual('Foo bar\nsna fu',
+ self.cct.removeUselessSpaces('Foo bar \nsna fu '))
# inside comments
self.assertEqual('<!--Foo bar -->',
self.cct.removeUselessSpaces('<!--Foo bar -->'))
--
To view, visit https://gerrit.wikimedia.org/r/246795
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I8cf45d76441af49756f5933a651d07ecf61f2f82
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: [FIX] cosmetic_changes: Implement external link to wikilink
......................................................................
[FIX] cosmetic_changes: Implement external link to wikilink
The feature to replace external links into a wikilink when it pointed to the
same wiki was introduced in compat as of f2645f85. But it had been disabled
since its addition. Later it was, still disabled, ported to core in 6ac688fe.
This now enables an improved version which does not allow the delimiter in the
text itself so that links to diffs for example won't be changed. Also instead
of using the assumption that the URL is `code.family` it's iterating over all
the URLs which are also available for `Family.from_url` and `Site(url=…)`.
Change-Id: Id06b256e2005f5730fbfbaa96cd6690ec4de1789
---
M pywikibot/cosmetic_changes.py
M pywikibot/family.py
M tests/cosmetic_changes_tests.py
3 files changed, 102 insertions(+), 12 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index 6b4af85..93a19ba 100755
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -696,16 +696,42 @@
# from fixes.py
def fixSyntaxSave(self, text):
+ def replace_link(match):
+ replacement = '[[' + match.group('link')
+ if match.group('title'):
+ replacement += '|' + match.group('title')
+ return replacement + ']]'
+
exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
'startspace']
# link to the wiki working on
- # TODO: disable this for difflinks and titled links,
- # to prevent edits like this:
- # https://de.wikipedia.org/w/index.php?title=Wikipedia%3aVandalismusmeldung&d…
-# text = textlib.replaceExcept(text,
-# r'\[https?://%s\.%s\.org/wiki/(?P<link>\S+)\s+(?P<title>.+?)\s?\]'
-# % (self.site.code, self.site.family.name),
-# r'[[\g<link>|\g<title>]]', exceptions)
+ # Do not use the first entry as it is not actually a prefix
+ for suffix in self.site._interwiki_urls()[1:]:
+ http_url = self.site.base_url(suffix, 'http')
+ if self.site.protocol() == 'http':
+ https_url = None
+ else:
+ https_url = self.site.base_url(suffix, 'https')
+ # compare strings without the protocol, if they are empty support
+ # also no prefix (//en.wikipedia.org/…)
+ if http_url[4:] == https_url[5:]:
+ urls = ['(?:https?:)?' + re.escape(http_url[5:])]
+ else:
+ urls = [re.escape(url) for url in (http_url, https_url)
+ if url is not None]
+ for url in urls:
+ # Only include links which don't include the separator as
+ # the wikilink won't support additional parameters
+ separator = '?'
+ if '?' in suffix:
+ separator += '&'
+ # Match first a non space in the title to prevent that multiple
+ # spaces at the end without title will be matched by it
+ text = textlib.replaceExcept(
+ text,
+ r'\[\[?' + url + r'(?P<link>[^' + separator + r']+?)'
+ r'(\s+(?P<title>[^\s].*?))?\s*\]\]?',
+ replace_link, exceptions, site=self.site)
# external link in/starting with double brackets
text = textlib.replaceExcept(
text,
diff --git a/pywikibot/family.py b/pywikibot/family.py
index ad5168c..95f7695 100644
--- a/pywikibot/family.py
+++ b/pywikibot/family.py
@@ -1076,18 +1076,28 @@
# Override this ONLY if the wiki family requires a path prefix
return ''
- def _hostname(self, code):
+ def _hostname(self, code, protocol=None):
"""Return the protocol and hostname."""
- protocol = self.protocol(code)
+ if protocol is None:
+ protocol = self.protocol(code)
if protocol == 'https':
host = self.ssl_hostname(code)
else:
host = self.hostname(code)
return protocol, host
- def base_url(self, code, uri):
- """Prefix uri with port and hostname."""
- protocol, host = self._hostname(code)
+ def base_url(self, code, uri, protocol=None):
+ """
+ Prefix uri with port and hostname.
+
+ @param code: The site code
+ @param uri: The absolute path after the hostname
+ @param protocol: The protocol which is used. If None it'll determine the
+ protocol from the code.
+ @return: The full URL
+ @rtype: str
+ """
+ protocol, host = self._hostname(code, protocol)
if protocol == 'https':
uri = self.ssl_pathprefix(code) + uri
return urlparse.urljoin('{0}://{1}'.format(protocol, host), uri)
diff --git a/tests/cosmetic_changes_tests.py b/tests/cosmetic_changes_tests.py
index a591f18..c4fdc3e 100644
--- a/tests/cosmetic_changes_tests.py
+++ b/tests/cosmetic_changes_tests.py
@@ -91,6 +91,60 @@
def test_fixSyntaxSave(self):
"""Test fixSyntaxSave method."""
+ # necessary as the fixer needs the article path to fix it
+ self.cct.site._siteinfo._cache['general'] = (
+ {'articlepath': '/wiki/$1'}, True)
+ self.assertEqual(
+ '[[Example|Page]]\n[[Example|Page]]\n[[Example|Page]]\n'
+ '[[Example]]\n[[Example]]\n[[Example]]\n'
+ '[https://de.wikipedia.org/w/index.php?title=Example&'
+ 'oldid=68181978 Page]\n'
+ '[https://de.wikipedia.org/w/index.php?title=Example&'
+ 'oldid=68181978&diff=next Page]\n'
+ '[https://en.wikipedia.org/w/index.php?title=Example]\n'
+ '[https://de.wiktionary.org/w/index.php?title=Example]\n',
+ self.cct.fixSyntaxSave(
+ '[https://de.wikipedia.org/w/index.php?title=Example Page]\n'
+ '[https://de.wikipedia.org/w/index.php?title=Example Page ]\n'
+ '[https://de.wikipedia.org/w/index.php?title=Example Page ]\n'
+ '[https://de.wikipedia.org/w/index.php?title=Example]\n'
+ '[https://de.wikipedia.org/w/index.php?title=Example ]\n'
+ '[https://de.wikipedia.org/w/index.php?title=Example ]\n'
+ '[https://de.wikipedia.org/w/index.php?title=Example&'
+ 'oldid=68181978 Page]\n'
+ '[https://de.wikipedia.org/w/index.php?title=Example&'
+ 'oldid=68181978&diff=next Page]\n'
+ '[https://en.wikipedia.org/w/index.php?title=Example]\n'
+ '[https://de.wiktionary.org/w/index.php?title=Example]\n'
+ ))
+ self.assertEqual(
+ '[[Example]]\n[[Example]]\n[[Example]]\n'
+ '[https://de.wikipedia.org/wiki/Example?oldid=68181978 Page]\n'
+ '[https://de.wikipedia.org/wiki/Example?'
+ 'oldid=68181978&diff=next Page]\n'
+ '[[Example]]\n[[Example]]\n[[Example]]\n'
+ '[https://de.wikipedia.org/w/index.php/Example?'
+ 'oldid=68181978 Page]\n'
+ '[https://de.wikipedia.org/w/index.php/Example?'
+ 'oldid=68181978&diff=next Page]\n'
+ '[[&]]\n[[&]]\n',
+ self.cct.fixSyntaxSave(
+ '[https://de.wikipedia.org/wiki/Example]\n'
+ '[https://de.wikipedia.org/wiki/Example ]\n'
+ '[https://de.wikipedia.org/wiki/Example ]\n'
+ '[https://de.wikipedia.org/wiki/Example?oldid=68181978 Page]\n'
+ '[https://de.wikipedia.org/wiki/Example?'
+ 'oldid=68181978&diff=next Page]\n'
+ '[https://de.wikipedia.org/w/index.php/Example]\n'
+ '[https://de.wikipedia.org/w/index.php/Example ]\n'
+ '[https://de.wikipedia.org/w/index.php/Example ]\n'
+ '[https://de.wikipedia.org/w/index.php/Example?'
+ 'oldid=68181978 Page]\n'
+ '[https://de.wikipedia.org/w/index.php/Example?'
+ 'oldid=68181978&diff=next Page]\n'
+ '[https://de.wikipedia.org/wiki/&]\n'
+ '[https://de.wikipedia.org/w/index.php/&]\n'
+ ))
self.assertEqual(
'[https://de.wikipedia.org]',
self.cct.fixSyntaxSave('[[https://de.wikipedia.org]]'))
--
To view, visit https://gerrit.wikimedia.org/r/244880
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Id06b256e2005f5730fbfbaa96cd6690ec4de1789
Gerrit-PatchSet: 7
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Remove html entities from list_to_text result
......................................................................
Remove html entities from list_to_text result
Prior to MediaWiki 1.16, messages contained html entities,
including  . These should be converted to unicode.
Also deprecate list_to_text being non-unicode,
and require the MediaWiki messages to exist, as they
occur in MediaWiki 1.14, and fallback to English when
not provided in the site language.
Change-Id: Ieda20f72946185f7be015f728773450a080bd156
---
M pywikibot/site.py
1 file changed, 21 insertions(+), 15 deletions(-)
Approvals:
XZise: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/site.py b/pywikibot/site.py
index 8199c1f..a05e4ce 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -38,6 +38,7 @@
redirect_func, issue_deprecation_warning,
manage_wrapping, MediaWikiVersion, first_upper, normalize_username,
merge_unique_dicts,
+ PY2,
)
from pywikibot.comms.http import get_authentication
from pywikibot.tools.ip import is_IP
@@ -2341,32 +2342,37 @@
def list_to_text(self, args):
"""Convert a list of strings into human-readable text.
- The MediaWiki message 'and' is used as separator
+ The MediaWiki messages 'and' and 'word-separator' are used as separator
between the last two arguments.
- If present, other arguments are joined using a comma.
+ If more than two arguments are given, other arguments are
+ joined using MediaWiki message 'comma-separator'.
@param args: text to be expanded
- @type args: iterable
+ @type args: iterable of unicode
@return: unicode
"""
+ NEEDED_MW_MESSAGES = ('and', 'comma-separator', 'word-separator')
if not args:
return u''
+ if PY2 and any(isinstance(arg, str) for arg in args):
+ issue_deprecation_warning('arg of type str', 'type unicode', 2)
+
args = [unicode(e) for e in args]
- msgs = {
- 'and': ',',
- 'comma-separator': ', ',
- 'word-separator': ' '
- }
try:
- self.mediawiki_messages(list(msgs.keys()))
+ msgs = self.mediawiki_messages(NEEDED_MW_MESSAGES)
except KeyError:
- pass
- for msg in msgs:
- try:
- msgs[msg] = self.mediawiki_message(msg)
- except KeyError:
- pass
+ raise NotImplementedError(
+ 'MediaWiki messages missing: {0}'.format(NEEDED_MW_MESSAGES))
+
+ if MediaWikiVersion(self.version()) < MediaWikiVersion('1.16'):
+ for key, value in msgs.items():
+ if key == 'and' and value == ', and':
+ # v1.14 defined and as ', and'; fixed in v1.15
+ msgs['and'] = ' and'
+ else:
+ msgs[key] = pywikibot.html2unicode(value)
+
concat = msgs['and'] + msgs['word-separator']
return msgs['comma-separator'].join(args[:-2] + [concat.join(args[-2:])])
--
To view, visit https://gerrit.wikimedia.org/r/246193
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ieda20f72946185f7be015f728773450a080bd156
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>