jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/498727 )
Change subject: [bugfix] Fix basestring/StringTypes across scripts ......................................................................
[bugfix] Fix basestring/StringTypes across scripts
- basestring = StringTypes = (str, unicode) in PY2, but several scripts use `basestring = (str,)`, for PY3, which is incorrect and produces different behavior between PY2 and PY3 - so all occurences of `basestring = (str,)` or `unicode = str` in scripts are replaced by `UnicodeType` from pywikibot.tools
Bug: T219095 Change-Id: Ib7d1b649576dfb58e8b971b53c96b75961267fb7 --- M pywikibot/__init__.py M pywikibot/bot.py M pywikibot/data/api.py M pywikibot/data/wikistats.py M pywikibot/date.py M pywikibot/flow.py M pywikibot/logentries.py M pywikibot/login.py M pywikibot/page.py M pywikibot/pagegenerators.py M pywikibot/site.py M pywikibot/specialbots.py M pywikibot/textlib.py M pywikibot/tools/__init__.py M pywikibot/tools/chars.py M pywikibot/userinterfaces/win32_unicode.py M pywikibot/version.py M scripts/category.py M scripts/imageharvest.py M scripts/interwiki.py M scripts/misspelling.py M scripts/redirect.py M scripts/replace.py M scripts/weblinkchecker.py M tests/djvu_tests.py 25 files changed, 150 insertions(+), 203 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/__init__.py b/pywikibot/__init__.py index e674046..7fa267f 100644 --- a/pywikibot/__init__.py +++ b/pywikibot/__init__.py @@ -67,6 +67,7 @@ ModuleDeprecationWrapper as _ModuleDeprecationWrapper, PY2, UnicodeMixin, + UnicodeType ) from pywikibot.tools.formatter import color_format
@@ -74,7 +75,6 @@ if not PY2: from queue import Queue long = int - basestring = str else: from Queue import Queue
@@ -791,7 +791,7 @@ self.site = site or Site().data_repository()
# also allow entity URIs to be provided via unit parameter - if isinstance(unit, basestring) and \ + if isinstance(unit, UnicodeType) and \ unit.partition('://')[0] not in ('http', 'https'): raise ValueError("'unit' must be an ItemPage or entity uri.")
@@ -833,7 +833,7 @@ @type lazy_load: bool @return: pywikibot.ItemPage """ - if not isinstance(self._unit, basestring): + if not isinstance(self._unit, UnicodeType): return self._unit
repo = repo or self.site diff --git a/pywikibot/bot.py b/pywikibot/bot.py index 0a93d5e..2c98fc6 100644 --- a/pywikibot/bot.py +++ b/pywikibot/bot.py @@ -127,9 +127,6 @@ # Note: all output goes through python std library "logging" module _logger = 'bot'
-if not PY2: - unicode = str - # User interface initialization # search for user interface module in the 'userinterfaces' subdirectory uiModule = __import__('pywikibot.userinterfaces.%s_interface' diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py index 9f2843a..c111fd0 100644 --- a/pywikibot/data/api.py +++ b/pywikibot/data/api.py @@ -49,9 +49,7 @@ # The following solution might be removed if/once the bug is fixed, # unless the fix is not backported to py3.x versions that should # instead support PWB. - basestring = (str, ) from urllib.parse import urlencode, unquote - unicode = str
from io import BytesIO
@@ -105,7 +103,7 @@ self.code = code self.info = info self.other = kwargs - self.unicode = unicode(self.__str__()) + self.unicode = UnicodeType(self.__str__())
def __repr__(self): """Return internal representation.""" @@ -309,7 +307,7 @@ assert('mime' in result['help']) assert(result['help']['mime'] == 'text/plain') assert('help' in result['help']) - assert(isinstance(result['help']['help'], basestring)) + assert(isinstance(result['help']['help'], UnicodeType))
help_text = result['help']['help']
@@ -353,7 +351,7 @@ @type modules: iterable or basestring @rtype: set """ - if isinstance(modules, basestring): + if isinstance(modules, UnicodeType): return set(modules.split('|')) return set(modules)
@@ -1374,7 +1372,7 @@ assert(value.site == self.site) return value.title(with_section=False) else: - return unicode(value) + return UnicodeType(value)
def __getitem__(self, key): """Implement dict interface.""" @@ -1399,7 +1397,7 @@ if isinstance(value, bytes): value = value.decode(self.site.encoding())
- if isinstance(value, unicode): + if isinstance(value, UnicodeType): value = value.split('|')
if hasattr(value, 'api_iter'): @@ -1778,7 +1776,7 @@ @raises APIError: unknown action found @raises APIError: unknown query result type """ - if not isinstance(data, unicode): + if not isinstance(data, UnicodeType): data = data.decode(self.site.encoding()) pywikibot.debug(('API response received from {}:\n' .format(self.site)) + data, _logger) @@ -2005,7 +2003,7 @@ if key in ('error', 'warnings'): continue assert key not in error - assert isinstance(result[key], basestring), \ + assert isinstance(result[key], UnicodeType), \ 'Unexpected %s: %r' % (key, result[key]) error[key] = result[key]
@@ -2658,7 +2656,7 @@
return False
- if isinstance(namespaces, basestring): + if isinstance(namespaces, UnicodeType): namespaces = namespaces.split('|')
# Use Namespace id (int) here; Request will cast int to str @@ -2832,7 +2830,7 @@ if 'query' not in self.data: pywikibot.log("%s: 'query' not found in api response." % self.__class__.__name__) - pywikibot.log(unicode(self.data)) + pywikibot.log(UnicodeType(self.data)) # if (query-)continue is present, self.resultkey might not have # been fetched yet if self.continue_name not in self.data: diff --git a/pywikibot/data/wikistats.py b/pywikibot/data/wikistats.py index 309e549..14175d8 100644 --- a/pywikibot/data/wikistats.py +++ b/pywikibot/data/wikistats.py @@ -10,11 +10,10 @@
import pywikibot from pywikibot.comms import http -from pywikibot.tools import PY2 +from pywikibot.tools import PY2, UnicodeType
if not PY2: import csv - unicode = str else: try: import unicodecsv as csv @@ -185,8 +184,8 @@ site = {}
for field in row.findall('field'): - name = unicode(field.get('name')) - site[name] = unicode(field.text) + name = UnicodeType(field.get('name')) + site[name] = UnicodeType(field.text)
data.append(site)
diff --git a/pywikibot/date.py b/pywikibot/date.py index 34e38b1..fb761cb 100644 --- a/pywikibot/date.py +++ b/pywikibot/date.py @@ -19,11 +19,7 @@ from string import digits as _decimalDigits # noqa: N812
from pywikibot.textlib import NON_LATIN_DIGITS -from pywikibot.tools import first_lower, first_upper, deprecated, PY2 - -if not PY2: - unicode = str - basestring = (str,) +from pywikibot.tools import first_lower, first_upper, deprecated, UnicodeType
# # Different collections of well known formats @@ -59,7 +55,7 @@ When the 2nd function evaluates to true, the 1st function is used.
""" - if isinstance(value, basestring): + if isinstance(value, UnicodeType): # Try all functions, and test result against predicates for func, pred in tuplst: try: @@ -214,7 +210,7 @@ formats['MonthName']['en']('anything else') => raise ValueError
""" - if isinstance(value, basestring): + if isinstance(value, UnicodeType): return lst.index(value) + 1 else: return lst[value - 1] @@ -232,7 +228,7 @@ formats['CurrEvents']['en']('Current Events') => ind
""" - if isinstance(value, basestring): + if isinstance(value, UnicodeType): if value == match: return ind else: @@ -264,33 +260,33 @@
# Helper for KN: digits representation _knDigits = NON_LATIN_DIGITS['kn'] -_knDigitsToLocal = {ord(unicode(i)): _knDigits[i] for i in range(10)} -_knLocalToDigits = {ord(_knDigits[i]): unicode(i) for i in range(10)} +_knDigitsToLocal = {ord(UnicodeType(i)): _knDigits[i] for i in range(10)} +_knLocalToDigits = {ord(_knDigits[i]): UnicodeType(i) for i in range(10)}
# Helper for Urdu/Persian languages _faDigits = NON_LATIN_DIGITS['fa'] -_faDigitsToLocal = {ord(unicode(i)): _faDigits[i] for i in range(10)} -_faLocalToDigits = {ord(_faDigits[i]): unicode(i) for i in range(10)} +_faDigitsToLocal = {ord(UnicodeType(i)): _faDigits[i] for i in range(10)} +_faLocalToDigits = {ord(_faDigits[i]): UnicodeType(i) for i in range(10)}
# Helper for HI:, MR: _hiDigits = NON_LATIN_DIGITS['hi'] -_hiDigitsToLocal = {ord(unicode(i)): _hiDigits[i] for i in range(10)} -_hiLocalToDigits = {ord(_hiDigits[i]): unicode(i) for i in range(10)} +_hiDigitsToLocal = {ord(UnicodeType(i)): _hiDigits[i] for i in range(10)} +_hiLocalToDigits = {ord(_hiDigits[i]): UnicodeType(i) for i in range(10)}
# Helper for BN: _bnDigits = NON_LATIN_DIGITS['bn'] -_bnDigitsToLocal = {ord(unicode(i)): _bnDigits[i] for i in range(10)} -_bnLocalToDigits = {ord(_bnDigits[i]): unicode(i) for i in range(10)} +_bnDigitsToLocal = {ord(UnicodeType(i)): _bnDigits[i] for i in range(10)} +_bnLocalToDigits = {ord(_bnDigits[i]): UnicodeType(i) for i in range(10)}
# Helper for GU: _guDigits = NON_LATIN_DIGITS['gu'] -_guDigitsToLocal = {ord(unicode(i)): _guDigits[i] for i in range(10)} -_guLocalToDigits = {ord(_guDigits[i]): unicode(i) for i in range(10)} +_guDigitsToLocal = {ord(UnicodeType(i)): _guDigits[i] for i in range(10)} +_guLocalToDigits = {ord(_guDigits[i]): UnicodeType(i) for i in range(10)}
def intToLocalDigitsStr(value, digitsToLocalDict): """Encode an integer value into a textual form.""" - return unicode(value).translate(digitsToLocalDict) + return UnicodeType(value).translate(digitsToLocalDict)
def localDigitsStrToInt(value, digitsToLocalDict, localToDigitsDict): @@ -328,7 +324,7 @@ # %% is a % '%': '%', # %d is a decimal - 'd': (_decimalDigits, unicode, int), + 'd': (_decimalDigits, UnicodeType, int), # %R is a roman numeral. This allows for only the simplest linear # conversions based on a list of numbers 'R': ('IVX', intToRomanNum, romanNumToInt), @@ -353,7 +349,8 @@ lambda v: localDigitsStrToInt(v, _guDigitsToLocal, _guLocalToDigits)), # %T is a year in TH: -- all years are shifted: 2005 => 'พ.ศ. 2548' - 'T': (_decimalDigits, lambda v: unicode(v + 543), lambda v: int(v) - 543), + 'T': (_decimalDigits, lambda v: UnicodeType(v + 543), + lambda v: int(v) - 543), }
# Allows to search for '(%%)|(%d)|(%R)|...", and allows one digit 1-9 to set @@ -386,7 +383,7 @@ and(len(s) == 2 or s[1] in _decimalDigits)): # Must match a "%2d" or "%d" style dec = _digitDecoders[s[-1]] - if isinstance(dec, basestring): + if isinstance(dec, UnicodeType): # Special case for strings that are replaced instead of # decoded assert len(s) < 3, ( @@ -447,7 +444,7 @@
""" compPattern, strPattern, decoders = escapePattern2(pattern) - if isinstance(value, basestring): + if isinstance(value, UnicodeType): m = compPattern.match(value) if m: # decode each found value using provided decoder @@ -455,7 +452,7 @@ for i, decoder in enumerate(decoders)] decValue = decf(values)
- assert not isinstance(decValue, basestring), \ + assert not isinstance(decValue, UnicodeType), \ 'Decoder must not return a string!'
# recursive call to re-encode and see if we get the original diff --git a/pywikibot/flow.py b/pywikibot/flow.py index ab32dea..d37b3b1 100644 --- a/pywikibot/flow.py +++ b/pywikibot/flow.py @@ -11,11 +11,9 @@
from pywikibot.exceptions import NoPage, UnknownExtension, LockedPage from pywikibot.page import BasePage, User -from pywikibot.tools import PY2 +from pywikibot.tools import PY2, UnicodeType
if not PY2: - unicode = str - basestring = (str,) from urllib.parse import urlparse, parse_qs else: from urlparse import urlparse, parse_qs @@ -201,7 +199,7 @@ """ if not isinstance(board, Board): raise TypeError('board must be a pywikibot.flow.Board object.') - if not isinstance(root_uuid, basestring): + if not isinstance(root_uuid, UnicodeType): raise TypeError('Topic/root UUID must be a string.')
topic = cls(board.site, 'Topic:' + root_uuid) @@ -326,7 +324,7 @@ raise TypeError('Page must be a Topic object') if not page.exists(): raise NoPage(page, 'Topic must exist: %s') - if not isinstance(uuid, basestring): + if not isinstance(uuid, UnicodeType): raise TypeError('Post UUID must be a string')
self._page = page @@ -379,7 +377,7 @@ if 'content' in self._current_revision: content = self._current_revision.pop('content') assert isinstance(content, dict) - assert isinstance(content['content'], unicode) + assert isinstance(content['content'], UnicodeType) self._content[content['format']] = content['content']
def _load(self, format='wikitext', load_from_topic=False): diff --git a/pywikibot/logentries.py b/pywikibot/logentries.py index 1248334..3daf72c 100644 --- a/pywikibot/logentries.py +++ b/pywikibot/logentries.py @@ -9,10 +9,7 @@
import pywikibot from pywikibot.exceptions import Error, HiddenKeyError -from pywikibot.tools import deprecated, classproperty, PY2 - -if not PY2: - basestring = (str, ) +from pywikibot.tools import deprecated, classproperty, UnicodeType
_logger = 'wiki'
@@ -231,7 +228,7 @@ if not hasattr(self, '_flags'): self._flags = self._params['flags'] # pre mw 1.19 returned a delimited string. - if isinstance(self._flags, basestring): + if isinstance(self._flags, UnicodeType): if self._flags: self._flags = self._flags.split(',') else: diff --git a/pywikibot/login.py b/pywikibot/login.py index 3c6e5bb..935ef19 100644 --- a/pywikibot/login.py +++ b/pywikibot/login.py @@ -27,10 +27,7 @@ from pywikibot import config, __url__ from pywikibot.exceptions import NoUsername from pywikibot.tools import (deprecated_args, remove_last_args, - normalize_username, PY2) - -if not PY2: - unicode = basestring = str + normalize_username, UnicodeType)
class OAuthImpossible(ImportError): @@ -270,7 +267,7 @@ if (normalize_username(username) == self.username and family == self.site.family.name and code == self.site.code): - if isinstance(password, basestring): + if isinstance(password, UnicodeType): self.password = password break elif isinstance(password, BotPassword): diff --git a/pywikibot/page.py b/pywikibot/page.py index a07bf9c..2b94ee8 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -47,12 +47,11 @@ deprecated, deprecate_arg, deprecated_args, issue_deprecation_warning, add_full_name, manage_wrapping, ModuleDeprecationWrapper as _ModuleDeprecationWrapper, PY2, - first_upper, redirect_func, remove_last_args, + first_upper, redirect_func, remove_last_args, UnicodeType ) from pywikibot.tools.ip import is_IP, ip_regexp
if not PY2: - unicode = basestring = str long = int from html import entities as htmlentitydefs from urllib.parse import quote_from_bytes, unquote_to_bytes @@ -637,7 +636,7 @@ @param value: New value or None @type value: basestring """ - self._text = None if value is None else unicode(value) + self._text = None if value is None else UnicodeType(value) if hasattr(self, '_raw_extracted_templates'): del self._raw_extracted_templates
@@ -1821,7 +1820,7 @@ @rtype: tuple(username, Timestamp) """ result = self.oldest_revision - return result.user, unicode(result.timestamp.isoformat()) + return result.user, UnicodeType(result.timestamp.isoformat())
@deprecated('contributors() or revisions()', since='20150206') @deprecated_args(limit='total') @@ -1836,7 +1835,8 @@ @rtype: list """ return [ - {'user': rev.user, 'timestamp': unicode(rev.timestamp.isoformat())} + {'user': rev.user, + 'timestamp': UnicodeType(rev.timestamp.isoformat())} for rev in self.revisions(total=total)]
def merge_history(self, dest, timestamp=None, reason=None): @@ -2373,7 +2373,7 @@ @param kwargs: Arguments which are used for saving the page directly afterwards, like 'summary' for edit summary. """ - if isinstance(target_page, basestring): + if isinstance(target_page, UnicodeType): target_page = pywikibot.Page(self.site, target_page) elif self.site != target_page.site: raise pywikibot.InterwikiRedirectPage(self, target_page) @@ -2571,7 +2571,7 @@ For compatibility with compat only. """ return [self.oldest_file_info.user, - unicode(self.oldest_file_info.timestamp.isoformat())] + UnicodeType(self.oldest_file_info.timestamp.isoformat())]
@deprecated('FilePage.latest_file_info.user', since='20141106') def getLatestUploader(self): @@ -2581,7 +2581,7 @@ For compatibility with compat only. """ return [self.latest_file_info.user, - unicode(self.latest_file_info.timestamp.isoformat())] + UnicodeType(self.latest_file_info.timestamp.isoformat())]
@deprecated('FilePage.get_file_history()', since='20141106') def getFileVersionHistory(self): @@ -3629,7 +3629,7 @@ return for item in self.logevents(logtype='upload', total=total): yield (item.page(), - unicode(item.timestamp()), + UnicodeType(item.timestamp()), item.comment(), item.pageid() > 0 ) @@ -4124,7 +4124,7 @@ continue data[prop] = cls._normalizeLanguages(data[prop]) for key, value in data[prop].items(): - if isinstance(value, basestring): + if isinstance(value, UnicodeType): data[prop][key] = {'language': key, 'value': value}
if 'aliases' in data: @@ -4133,7 +4133,7 @@ if isinstance(values, list): strings = [] for value in values: - if isinstance(value, basestring): + if isinstance(value, UnicodeType): strings.append({'language': key, 'value': value}) else: strings.append(value) @@ -4621,7 +4621,7 @@ is not redirect. @type force: bool """ - if isinstance(target_page, basestring): + if isinstance(target_page, UnicodeType): target_page = pywikibot.ItemPage(self.repo, target_page) elif self.repo != target_page.repo: raise pywikibot.InterwikiRedirectPage(self, target_page) @@ -4665,15 +4665,15 @@
types = {'wikibase-item': ItemPage, # 'wikibase-property': PropertyPage, must be declared first - 'string': basestring, + 'string': UnicodeType, 'commonsMedia': FilePage, 'globe-coordinate': pywikibot.Coordinate, - 'url': basestring, + 'url': UnicodeType, 'time': pywikibot.WbTime, 'quantity': pywikibot.WbQuantity, 'monolingualtext': pywikibot.WbMonolingualText, - 'math': basestring, - 'external-id': basestring, + 'math': UnicodeType, + 'external-id': UnicodeType, 'geo-shape': pywikibot.WbGeoShape, 'tabular-data': pywikibot.WbTabularData, } @@ -5228,7 +5228,7 @@ @rtype: bool """ if (isinstance(self.target, WikibasePage) - and isinstance(value, basestring)): + and isinstance(value, UnicodeType)): return self.target.id == value
if (isinstance(self.target, pywikibot.WbTime) @@ -5236,7 +5236,7 @@ return self.target.year == int(value)
if (isinstance(self.target, pywikibot.Coordinate) - and isinstance(value, basestring)): + and isinstance(value, UnicodeType)): coord_args = [float(x) for x in value.split(',')] if len(coord_args) >= 3: precision = coord_args[2] @@ -5252,7 +5252,7 @@ and abs(self.target.lon - coord_args[1]) <= precision)
if (isinstance(self.target, pywikibot.WbMonolingualText) - and isinstance(value, basestring)): + and isinstance(value, UnicodeType)): return self.target.text == value
return self.target == value @@ -6129,7 +6129,7 @@
@raise UnicodeError: Could not convert using any encoding. """ - if isinstance(encodings, basestring): + if isinstance(encodings, UnicodeType): encodings = [encodings] elif isinstance(encodings, pywikibot.site.BaseSite): # create a list of all possible encodings for both hint sites diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index b2e9ab3..356e80c 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -56,10 +56,9 @@ UnknownExtension, ) from pywikibot.proofreadpage import ProofreadPage -from pywikibot.tools import PY2 +from pywikibot.tools import PY2, UnicodeType
if not PY2: - basestring = (str, ) from itertools import zip_longest else: from itertools import izip_longest as zip_longest @@ -676,7 +675,7 @@ # 'start or None', because start might be an empty string total = None start = start or None - if isinstance(start, basestring) and len(start) == 8: + if isinstance(start, UnicodeType) and len(start) == 8: start = pywikibot.Timestamp.strptime(start, '%Y%m%d') elif start is not None: try: @@ -1646,7 +1645,7 @@ if site is None: site = pywikibot.Site() for title in iterable: - if not isinstance(title, basestring): + if not isinstance(title, UnicodeType): break yield pywikibot.Page(pywikibot.Link(title, site))
@@ -1908,7 +1907,7 @@ regex = [regex] # Test if regex is already compiled. # We assume that all list components have the same type - if isinstance(regex[0], basestring): + if isinstance(regex[0], UnicodeType): regex = [re.compile(r, flag) for r in regex] return regex
@@ -2099,7 +2098,7 @@ @type show_filtered: bool """ if timestamp: - if isinstance(timestamp, basestring): + if isinstance(timestamp, UnicodeType): ts = pywikibot.Timestamp.fromtimestampformat(timestamp) else: ts = timestamp diff --git a/pywikibot/site.py b/pywikibot/site.py index 99af4e7..cf560f3 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -77,15 +77,13 @@ merge_unique_dicts, PY2, filter_unique, + UnicodeType ) from pywikibot.tools.ip import is_IP
if not PY2: from itertools import zip_longest from urllib.parse import urlencode, urlparse - - basestring = (str,) - unicode = str else: from itertools import izip_longest as zip_longest from urllib import urlencode @@ -344,7 +342,7 @@ return self.id == other elif isinstance(other, Namespace): return self.id == other.id - elif isinstance(other, basestring): + elif isinstance(other, UnicodeType): return other in self
def __ne__(self, other): @@ -605,7 +603,7 @@ # Temporary until Namespace.resolve can be removed @staticmethod def _resolve(identifiers, namespaces): - if isinstance(identifiers, (basestring, Namespace)): + if isinstance(identifiers, (UnicodeType, Namespace)): identifiers = [identifiers] else: # convert non-iterators to single item list @@ -618,7 +616,7 @@ # int(None) raises TypeError; however, bool needs special handling. result = [NotImplemented if isinstance(ns, bool) else NamespacesDict._lookup_name(ns, namespaces) - if isinstance(ns, basestring) + if isinstance(ns, UnicodeType) and not ns.lstrip('-').isdigit() else namespaces[int(ns)] if int(ns) in namespaces else None @@ -744,7 +742,7 @@ pywikibot.log('BaseSite: code "%s" contains invalid characters' % code) self.__code = code - if isinstance(fam, basestring) or fam is None: + if isinstance(fam, UnicodeType) or fam is None: self.__family = pywikibot.family.Family.load(fam) else: self.__family = fam @@ -1275,7 +1273,7 @@ UserWarning) from pywikibot.comms import http if data: - if not isinstance(data, basestring): + if not isinstance(data, UnicodeType): data = urlencode(data) return http.request(self, path, method='PUT', body=data) else: @@ -1518,7 +1516,7 @@ else: return False
- if isinstance(prop, basestring): + if isinstance(prop, UnicodeType): props = [prop] else: props = prop @@ -2492,7 +2490,7 @@ issue_deprecation_warning('arg of type str', 'type unicode', 2, since='20151014')
- args = [unicode(e) for e in args] + args = [UnicodeType(e) for e in args] try: msgs = self.mediawiki_messages(needed_mw_messages) except KeyError: @@ -2505,7 +2503,7 @@ # v1.14 defined and as ', and'; fixed in v1.15 msgs['and'] = ' and' else: - msgs[key] = pywikibot.html2unicode(value) + msgs[key] = pywikibot.html2UnicodeType(value)
concat = msgs['and'] + msgs['word-separator'] return msgs['comma-separator'].join( @@ -2528,7 +2526,7 @@ @type includecomments: bool @rtype: unicode """ - if not isinstance(text, basestring): + if not isinstance(text, UnicodeType): raise ValueError('text must be a string') if not text: return '' @@ -3289,7 +3287,7 @@ """ if not pageids: return - if isinstance(pageids, basestring): + if isinstance(pageids, UnicodeType): pageids = pageids.replace('|', ',') pageids = pageids.split(',') pageids = [p.strip() for p in pageids] @@ -3533,7 +3531,7 @@ query.request._warning_handler = warn_handler
for item in query: - pywikibot.debug(unicode(item), _logger) + pywikibot.debug(UnicodeType(item), _logger) for tokentype in valid_tokens: if (tokentype + 'token') in item: user_tokens[tokentype] = item[tokentype + 'token'] @@ -3556,7 +3554,7 @@ data = data['query'] if 'recentchanges' in data: item = data['recentchanges'][0] - pywikibot.debug(unicode(item), _logger) + pywikibot.debug(UnicodeType(item), _logger) if 'patroltoken' in item: user_tokens['patrol'] = item['patroltoken'] else: @@ -3933,7 +3931,7 @@ raise ValueError( 'categorymembers: startsort must be less than endsort')
- if isinstance(member_type, basestring): + if isinstance(member_type, UnicodeType): member_type = {member_type}
if member_type and sortby == 'timestamp': @@ -4106,7 +4104,7 @@ if content: rvargs['rvprop'].append('content') if section is not None: - rvargs['rvsection'] = unicode(section) + rvargs['rvsection'] = UnicodeType(section) if rollback: self.login(sysop=sysop) rvargs['rvtoken'] = 'rollback' @@ -4114,10 +4112,10 @@ rvtitle = page.title(with_section=False).encode(self.encoding()) rvargs['titles'] = rvtitle else: - if isinstance(revids, (int, basestring)): - ids = unicode(revids) + if isinstance(revids, (int, UnicodeType)): + ids = UnicodeType(revids) else: - ids = '|'.join(unicode(r) for r in revids) + ids = '|'.join(UnicodeType(r) for r in revids) rvargs['revids'] = ids
if rvdir: @@ -4301,9 +4299,9 @@ apgen.request['gapminsize'] = str(minsize) if isinstance(maxsize, int): apgen.request['gapmaxsize'] = str(maxsize) - if isinstance(protect_type, basestring): + if isinstance(protect_type, UnicodeType): apgen.request['gapprtype'] = protect_type - if isinstance(protect_level, basestring): + if isinstance(protect_level, UnicodeType): apgen.request['gapprlevel'] = protect_level if reverse: apgen.request['gapdir'] = 'descending' @@ -4547,7 +4545,7 @@ if blockids: bkgen.request['bkids'] = blockids if users: - if isinstance(users, basestring): + if isinstance(users, UnicodeType): users = users.split('|') # actual IPv6 addresses (anonymous users) are uppercase, but they # have never a :: in the username (so those are registered users) @@ -5275,7 +5273,7 @@ 'logged in' % err.code, _logger) if err.code in self._ep_errors: - if isinstance(self._ep_errors[err.code], basestring): + if isinstance(self._ep_errors[err.code], UnicodeType): errdata = { 'site': self, 'title': page.title(with_section=False), @@ -5876,9 +5874,9 @@ if all(_ is None for _ in [rcid, revid, revision]): raise Error('No rcid, revid or revision provided.')
- if isinstance(rcid, (int, basestring)): + if isinstance(rcid, (int, UnicodeType)): rcid = {rcid} - if isinstance(revid, (int, basestring)): + if isinstance(revid, (int, UnicodeType)): revid = {revid} if isinstance(revision, pywikibot.page.Revision): revision = {revision} @@ -6972,7 +6970,7 @@ """ # check old and diff types def get_param(item): - if isinstance(item, basestring): + if isinstance(item, UnicodeType): return 'title', item elif isinstance(item, pywikibot.Page): return 'title', item.title() @@ -7033,13 +7031,13 @@ namespaces=namespaces)
if lint_categories: - if isinstance(lint_categories, basestring): + if isinstance(lint_categories, UnicodeType): lint_categories = lint_categories.split('|') lint_categories = [p.strip() for p in lint_categories] query.request['lntcategories'] = '|'.join(lint_categories)
if pageids: - if isinstance(pageids, basestring): + if isinstance(pageids, UnicodeType): pageids = pageids.split('|') pageids = [p.strip() for p in pageids] # Validate pageids. @@ -7640,7 +7638,7 @@ 'Only "props" is a valid kwarg, not {0}'.format(set(params) - {'props'}) if isinstance(source, int) or \ - isinstance(source, basestring) and source.isdigit(): + isinstance(source, UnicodeType) and source.isdigit(): ids = 'q' + str(source) params = merge_unique_dicts(params, action='wbgetentities', ids=ids) diff --git a/pywikibot/specialbots.py b/pywikibot/specialbots.py index 7ac5315..0576f89 100644 --- a/pywikibot/specialbots.py +++ b/pywikibot/specialbots.py @@ -24,14 +24,12 @@ ) from pywikibot.editor import TextEditor from pywikibot.textlib import replace_links -from pywikibot.tools import PY2, deprecated, deprecated_args +from pywikibot.tools import PY2, deprecated, deprecated_args, UnicodeType from pywikibot.tools.formatter import color_format
if not PY2: from urllib.parse import urlparse from urllib.request import URLopener - - basestring = (str,) else: from urllib import URLopener from urlparse import urlparse @@ -101,7 +99,7 @@ raise ValueError('When always is set to True, the description ' 'must be set.') self.url = url - if isinstance(self.url, basestring): + if isinstance(self.url, UnicodeType): pywikibot.warning('url as string is deprecated. ' 'Use an iterable instead.') self.urlEncoding = urlEncoding @@ -474,7 +472,7 @@ return
try: - if isinstance(self.url, basestring): + if isinstance(self.url, UnicodeType): self._treat_counter = 1 return self.upload_file(self.url) for file_url in self.url: diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 3335d1d..b0da390 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -37,8 +37,6 @@
if not PY2: from html.parser import HTMLParser - basestring = (str,) - unicode = str else: from HTMLParser import HTMLParser from itertools import izip as zip @@ -318,7 +316,7 @@ dontTouchRegexes = result
for exc in keys: - if isinstance(exc, basestring): + if isinstance(exc, UnicodeType): # assume the string is a reference to a standard regex above, # which may not yet have a site specific re compiled. if exc in _regex_cache: @@ -380,7 +378,7 @@ @type count: int """ # if we got a string, compile it as a regular expression - if isinstance(old, basestring): + if isinstance(old, UnicodeType): if caseInsensitive: old = re.compile(old, re.IGNORECASE | re.UNICODE) else: @@ -652,7 +650,7 @@ """Return the link from source when it's a Page otherwise itself.""" if isinstance(source, pywikibot.Page): return source._link - elif isinstance(source, basestring): + elif isinstance(source, UnicodeType): return pywikibot.Link(source, site) else: return source @@ -686,7 +684,7 @@ 'The original value must be either basestring, Link or Page ' 'but is "{0}"'.format(type(replace_list[0]))) if replace_list[1] is not False and replace_list[1] is not None: - if isinstance(replace_list[1], basestring): + if isinstance(replace_list[1], UnicodeType): replace_list[1] = pywikibot.Page(site, replace_list[1]) check_classes(replace_list[0]) replace = replace_callable @@ -770,9 +768,10 @@
if new_link is False: # unlink - we remove the section if there's any - assert isinstance(new_label, unicode), 'link text must be unicode.' + assert isinstance(new_label, UnicodeType), \ + 'link text must be unicode.' new_link = new_label - if isinstance(new_link, unicode): + if isinstance(new_link, UnicodeType): # Nothing good can come out of the fact that bytes is returned so # force unicode text = text[:start] + new_link + text[end:] @@ -1522,7 +1521,7 @@
catLinks = [] for category in categories: - if isinstance(category, basestring): + if isinstance(category, UnicodeType): category, separator, sortKey = category.strip('[]').partition('|') sortKey = sortKey if separator else None # whole word if no ":" is present @@ -1683,7 +1682,7 @@
params[key] = value
- result.append((unicode(template.name.strip()), params)) + result.append((UnicodeType(template.name.strip()), params)) return result
@@ -1809,7 +1808,7 @@ param_name, param_val = param.split('=', 1) implicit_parameter = False else: - param_name = unicode(numbered_param) + param_name = UnicodeType(numbered_param) param_val = param numbered_param += 1 implicit_parameter = True diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py index 159395a..b3eca69 100644 --- a/pywikibot/tools/__init__.py +++ b/pywikibot/tools/__init__.py @@ -39,8 +39,8 @@ if not PY2: from itertools import zip_longest import queue - StringTypes = basestring = (str,) - UnicodeType = unicode = str + StringTypes = (str, bytes) + UnicodeType = str else: from itertools import izip_longest as zip_longest import Queue as queue # noqa: N813 @@ -776,7 +776,7 @@ return '.'.join(str(v) for v in self.version) + self.suffix
def _cmp(self, other): - if isinstance(other, basestring): + if isinstance(other, StringTypes): other = MediaWikiVersion(other)
if self.version > other.version: @@ -1369,7 +1369,7 @@ result.update(arg) if conflicts: raise ValueError('Multiple dicts contain the same keys: {0}' - .format(', '.join(sorted(unicode(key) + .format(', '.join(sorted(UnicodeType(key) for key in conflicts)))) return result
@@ -1895,7 +1895,7 @@ @param module: The module name or instance @type module: str or module """ - if isinstance(module, basestring): + if isinstance(module, StringTypes): module = sys.modules[module] super(ModuleDeprecationWrapper, self).__setattr__('_deprecated', {}) super(ModuleDeprecationWrapper, self).__setattr__('_module', module) diff --git a/pywikibot/tools/chars.py b/pywikibot/tools/chars.py index 7b09075..c2422a3 100644 --- a/pywikibot/tools/chars.py +++ b/pywikibot/tools/chars.py @@ -9,11 +9,7 @@
import sys
-from pywikibot.tools import LazyRegex, PY2 - - -if not PY2: - unicode = str +from pywikibot.tools import LazyRegex
# All characters in the Cf category in a static list. When testing each Unicode diff --git a/pywikibot/userinterfaces/win32_unicode.py b/pywikibot/userinterfaces/win32_unicode.py index 103c1fa..39002e6 100755 --- a/pywikibot/userinterfaces/win32_unicode.py +++ b/pywikibot/userinterfaces/win32_unicode.py @@ -34,13 +34,9 @@ from ctypes import c_void_p as LPVOID from io import IOBase, UnsupportedOperation
-OSWIN32 = (sys.platform == 'win32') +from pywikibot.tools import PY2, UnicodeType
-if sys.version_info[0] > 2: - unicode = str - PY3 = True -else: - PY3 = False +OSWIN32 = (sys.platform == 'win32')
stdin = sys.stdin stdout = sys.stdout @@ -84,7 +80,7 @@ if not result: raise Exception('stdin failure') data = self.buffer.value[:numrecv.value] - if not PY3: + if PY2: return data.encode(self.encoding) else: return data @@ -123,11 +119,11 @@ """Write the text to the output.""" try: if self._hConsole is None: - if not PY3 and isinstance(text, unicode): + if PY2 and isinstance(text, UnicodeType): text = text.encode('utf-8') self._stream.write(text) else: - if not isinstance(text, unicode): + if not isinstance(text, UnicodeType): text = bytes(text).decode('utf-8') remaining = len(text) while remaining > 0: @@ -338,7 +334,7 @@ _complain('exception {!r} while fixing up sys.stdout and sys.stderr' .format(e))
- if PY3: + if not PY2: # no need to work around issue2128 since it's a Python 2 only issue return stdin, stdout, stderr, argv
diff --git a/pywikibot/version.py b/pywikibot/version.py index 426ae08..f1e8437 100644 --- a/pywikibot/version.py +++ b/pywikibot/version.py @@ -44,10 +44,7 @@ import pywikibot
from pywikibot import config2 as config -from pywikibot.tools import deprecated, PY2, PYTHON_VERSION - -if not PY2: - basestring = (str, ) +from pywikibot.tools import deprecated, PY2, PYTHON_VERSION, UnicodeType
cache = None _logger = 'version' @@ -140,7 +137,7 @@ pywikibot.debug('version algorithm exceptions:\n%r' % exceptions, _logger)
- if isinstance(date, basestring): + if isinstance(date, UnicodeType): datestring = date elif isinstance(date, time.struct_time): datestring = time.strftime('%Y/%m/%d, %H:%M:%S', date) diff --git a/scripts/category.py b/scripts/category.py index 3c0985a..9e7e161 100755 --- a/scripts/category.py +++ b/scripts/category.py @@ -143,13 +143,11 @@ ) from pywikibot.cosmetic_changes import moved_links from pywikibot.tools import ( - deprecated_args, deprecated, ModuleDeprecationWrapper, open_archive, PY2 + deprecated_args, deprecated, ModuleDeprecationWrapper, open_archive, + UnicodeType ) from pywikibot.tools.formatter import color_format
-if not PY2: - basestring = (str, ) - # This is required for the text that is shown when you run this script # with the parameter -help. docuReplacements = {'¶ms;': pagegenerators.parameterHelp} # noqa: N816 @@ -627,7 +625,7 @@ 'category-removing', template_vars) # Set deletion reason for category page and talkpage. - if isinstance(deletion_comment, basestring): + if isinstance(deletion_comment, UnicodeType): # Deletion comment is set to given string. self.deletion_comment = deletion_comment elif deletion_comment == self.DELETION_COMMENT_SAME_AS_EDIT_COMMENT: diff --git a/scripts/imageharvest.py b/scripts/imageharvest.py index 196b60d..c437253 100644 --- a/scripts/imageharvest.py +++ b/scripts/imageharvest.py @@ -38,8 +38,6 @@ if not PY2: import urllib from urllib.request import URLopener - - basestring = (str,) else: from urllib import URLopener
diff --git a/scripts/interwiki.py b/scripts/interwiki.py index f5f5f7e..b17d4a2 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -363,12 +363,9 @@
from pywikibot.bot import ListOption, StandardOption from pywikibot.cosmetic_changes import moved_links -from pywikibot.tools import first_upper +from pywikibot.tools import first_upper, UnicodeType from pywikibot.tools.formatter import color_format
-if sys.version_info[0] > 2: - unicode = str - docuReplacements = { '¶ms;': pagegenerators.parameterHelp } @@ -1249,7 +1246,7 @@ # todo list.
if not page.exists(): - self.conf.remove.append(unicode(page)) + self.conf.remove.append(UnicodeType(page)) self.conf.note('{} does not exist. Skipping.'.format(page)) if page == self.originPage: # The page we are working on is the page that does not @@ -1310,7 +1307,7 @@ # must be behind the page.isRedirectPage() part # otherwise a redirect error would be raised elif page_empty_check(page): - self.conf.remove.append(unicode(page)) + self.conf.remove.append(UnicodeType(page)) self.conf.note('{} is empty. Skipping.'.format(page)) if page == self.originPage: for site, count in self.todo.siteCounts(): @@ -1455,7 +1452,7 @@ if page2 is None: pywikibot.output(' ' * indent + 'Given as a hint.') else: - pywikibot.output(' ' * indent + unicode(page2)) + pywikibot.output(' ' * indent + UnicodeType(page2))
def assemble(self): """Assemble language links.""" @@ -1811,7 +1808,7 @@ # put it to new means don't delete it if ( not self.conf.cleanup - or unicode(rmPage) not in self.conf.remove + or UnicodeType(rmPage) not in self.conf.remove ): new[rmsite] = rmPage pywikibot.warning( @@ -2286,7 +2283,7 @@ if not summary and \ len(adding) + len(removing) + len(modifying) <= 3: # Use an extended format for the string linking to all added pages. - fmt = lambda d, site: unicode(d[site]) # noqa: E731 + fmt = lambda d, site: UnicodeType(d[site]) # noqa: E731 else: # Use short format, just the language code fmt = lambda d, site: site.code # noqa: E731 @@ -2488,7 +2485,7 @@ elif len(namespaces) == 1: ns = namespaces[0] if ns != 'all': - if isinstance(ns, unicode) or isinstance(ns, str): + if isinstance(ns, UnicodeType) or isinstance(ns, str): index = site.namespaces.lookup_name(ns) if index is None: raise ValueError('Unknown namespace: ' + ns) diff --git a/scripts/misspelling.py b/scripts/misspelling.py index 4eafcaa..75bec4e 100755 --- a/scripts/misspelling.py +++ b/scripts/misspelling.py @@ -34,13 +34,10 @@
from pywikibot import i18n, pagegenerators
-from pywikibot.tools import PY2 +from pywikibot.tools import UnicodeType
from scripts.solve_disambiguation import DisambiguationRobot
-if not PY2: - basestring = (str, ) - HELP_MSG = """\n misspelling.py does not support site {site}.
@@ -86,7 +83,7 @@ mycode = self.site.code if mycode in self.misspellingCategory: categories = self.misspellingCategory[mycode] - if isinstance(categories, basestring): + if isinstance(categories, UnicodeType): categories = (categories, ) generators = ( pagegenerators.CategorizedPageGenerator( @@ -95,7 +92,7 @@ for misspellingCategoryTitle in categories) elif mycode in self.misspellingTemplate: templates = self.misspellingTemplate[mycode] - if isinstance(templates, basestring): + if isinstance(templates, UnicodeType): templates = (templates, ) generators = ( pagegenerators.ReferringPageGenerator( @@ -129,7 +126,7 @@ return True if self.misspellingTemplate.get(disambPage.site.code) is not None: templates = self.misspellingTemplate[disambPage.site.code] - if isinstance(templates, basestring): + if isinstance(templates, UnicodeType): templates = (templates, ) for template, params in disambPage.templatesWithParams(): if template.title(with_ns=False) in templates: diff --git a/scripts/redirect.py b/scripts/redirect.py index 8d93480..3109db1 100755 --- a/scripts/redirect.py +++ b/scripts/redirect.py @@ -88,10 +88,7 @@ RedirectPageBot) from pywikibot.exceptions import ArgumentDeprecationWarning from pywikibot.textlib import extract_templates_and_params_regex_simple -from pywikibot.tools import issue_deprecation_warning, PY2 - -if not PY2: - basestring = (str, ) +from pywikibot.tools import issue_deprecation_warning, UnicodeType
def space_to_underscore(link): @@ -455,7 +452,7 @@
def init_page(self, item): """Ensure that we process page objects.""" - if isinstance(item, basestring): + if isinstance(item, UnicodeType): item = pywikibot.Page(self.site, item) elif isinstance(item, tuple): redir_name, code, target, final = item diff --git a/scripts/replace.py b/scripts/replace.py index b6a7d63..abdb0d7 100755 --- a/scripts/replace.py +++ b/scripts/replace.py @@ -164,13 +164,13 @@ deprecated_args, issue_deprecation_warning, PY2, + UnicodeType ) from pywikibot.tools.formatter import color_format
if not PY2: from queue import Queue long = int - basestring = (str, ) else: from Queue import Queue
@@ -1073,7 +1073,7 @@ '"{0}"'.format(fix_name)) continue if 'msg' in fix: - if isinstance(fix['msg'], basestring): + if isinstance(fix['msg'], UnicodeType): set_summary = i18n.twtranslate(site, str(fix['msg'])) else: set_summary = i18n.translate(site, fix['msg'], fallback=True) @@ -1081,7 +1081,7 @@ set_summary = None if not generators_given and 'generator' in fix: gen_args = fix['generator'] - if isinstance(gen_args, basestring): + if isinstance(gen_args, UnicodeType): gen_args = [gen_args] for gen_arg in gen_args: genFactory.handleArg(gen_arg) diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py index f47af6c..b89ecee 100755 --- a/scripts/weblinkchecker.py +++ b/scripts/weblinkchecker.py @@ -133,7 +133,7 @@ from pywikibot.pagegenerators import ( XMLDumpPageGenerator as _XMLDumpPageGenerator, ) -from pywikibot.tools import deprecated, PY2 +from pywikibot.tools import deprecated, PY2, UnicodeType from pywikibot.tools.formatter import color_format
import requests @@ -142,9 +142,6 @@ import http.client as httplib import urllib.parse as urlparse import urllib.request as urllib - - basestring = (str, ) - unicode = str else: import httplib import urllib @@ -398,9 +395,9 @@ self.query.encode('ascii') except UnicodeEncodeError: encoding = self.getEncodingUsedByServer() - self.path = unicode(urllib.quote(self.path.encode(encoding))) - self.query = unicode(urllib.quote(self.query.encode(encoding), - '=&')) + self.path = UnicodeType(urllib.quote(self.path.encode(encoding))) + self.query = UnicodeType(urllib.quote(self.query.encode(encoding), + '=&'))
def resolveRedirect(self, useHEAD=False): """ @@ -487,7 +484,7 @@ # wrong or a pair (errno, string) representing an error # returned by a system call, similar to the value # accompanying os.error - if isinstance(error, basestring): + if isinstance(error, UnicodeType): msg = error else: try: diff --git a/tests/djvu_tests.py b/tests/djvu_tests.py index f4d833a..b2e375f 100644 --- a/tests/djvu_tests.py +++ b/tests/djvu_tests.py @@ -14,16 +14,13 @@ import subprocess
from pywikibot.tools.djvu import DjVuFile -from pywikibot.tools import PY2 +from pywikibot.tools import PY2, UnicodeType
from tests import join_data_path, create_path_func from tests.aspects import unittest, TestCase
join_djvu_data_path = create_path_func(join_data_path, 'djvu')
-if not PY2: - unicode = str -
class TestDjVuFile(TestCase):
@@ -69,7 +66,7 @@ djvu = DjVuFile(self.file_djvu) expected = "DjVuFile('{}')".format(self.file_djvu) if PY2: - self.assertEqual(unicode(djvu), expected) + self.assertEqual(UnicodeType(djvu), expected) else: self.assertEqual(djvu.__unicode__(), expected)
pywikibot-commits@lists.wikimedia.org