jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/612509 )
Change subject: [doc] Update ROADMAP.rst
......................................................................
[doc] Update ROADMAP.rst
Change-Id: I36bb23a0046d1916c19c59f66b250fa80089994a
---
M ROADMAP.rst
1 file changed, 3 insertions(+), 1 deletion(-)
Approvals:
D3r1ck01: Looks good to me, approved
jenkins-bot: Verified
diff --git a/ROADMAP.rst b/ROADMAP.rst
index 28cdc24..41051d7 100644
--- a/ROADMAP.rst
+++ b/ROADMAP.rst
@@ -1,6 +1,8 @@
Current release
~~~~~~~~~~~~~~~
+* Improve performance of CosmeticChangesToolkit.translateMagicWords
+* Prohibit positional arguments with Page.title()
* Functions dealing with stars list were removed
* Some pagegenerators functions were deprecated which should be replaced by site generators
* LogEntry became a UserDict; all content can be accessed by its key
@@ -17,7 +19,7 @@
Future releases
~~~~~~~~~~~~~~~
-* (current): Methods deprecated for 8 years or longer will be removed
+* 4.0.0: Methods deprecated for 8 years or longer will be removed
* 3.0.20200703: Unsupported debug parameter of UploadRobot will be removed
* 3.0.20200703: Unported compat decode parameter of Page.title() will be removed
* 3.0.20200703: tools.count, tools.Counter, tools.OrderedDict and ContextManagerWrapper will be removed
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/612509
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I36bb23a0046d1916c19c59f66b250fa80089994a
Gerrit-Change-Number: 612509
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/597263 )
Change subject: [4.0] remove Python 2 code parts in wikistats.py
......................................................................
[4.0] remove Python 2 code parts in wikistats.py
Change-Id: I321d353e7ee815c139a7fc911f7f28b9bb12a5df
---
M pywikibot/data/wikistats.py
1 file changed, 25 insertions(+), 47 deletions(-)
Approvals:
Huji: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/data/wikistats.py b/pywikibot/data/wikistats.py
index 931fcd9..19d5c7c 100644
--- a/pywikibot/data/wikistats.py
+++ b/pywikibot/data/wikistats.py
@@ -4,27 +4,15 @@
# (C) Pywikibot team, 2014-2020
#
# Distributed under the terms of the MIT license.
-from __future__ import absolute_import, division, unicode_literals
+import csv
from io import BytesIO, StringIO
import pywikibot
from pywikibot.comms import http
-from pywikibot.tools import PY2, UnicodeType
-
-if not PY2:
- import csv
-else:
- try:
- import unicodecsv as csv
- except ImportError:
- pywikibot.warning(
- 'WikiStats: unicodecsv package required for using csv in Python 2;'
- ' falling back to using the larger XML datasets.')
- csv = None
-class WikiStats(object):
+class WikiStats:
"""
Light wrapper around WikiStats data, caching responses and data.
@@ -83,18 +71,17 @@
ALL_KEYS = set(FAMILY_MAPPING.keys()) | ALL_TABLES
- def __init__(self, url='https://wikistats.wmflabs.org/'):
+ def __init__(self, url='https://wikistats.wmflabs.org/') -> None:
"""Initializer."""
self.url = url
self._raw = {}
self._data = {}
- def fetch(self, table, format='xml'):
+ def fetch(self, table: str, format='xml'):
"""
Fetch data from WikiStats.
@param table: table of data to fetch
- @type table: basestring
@param format: Format of data to use
@type format: 'xml' or 'csv'.
@rtype: bytes
@@ -114,13 +101,12 @@
r = http.fetch(url.format(table=table, format=format))
return r.raw
- def raw_cached(self, table, format):
+ def raw_cached(self, table: str, format):
"""
Cache raw data.
@param table: table of data to fetch
- @type table: basestring
- @param format: Format of data to use
+ @param format: format of data to use
@type format: 'xml' or 'csv'.
@rtype: bytes
"""
@@ -134,12 +120,11 @@
self._raw[format][table] = data
return data
- def csv(self, table):
+ def csv(self, table: str):
"""
Fetch and parse CSV for a table.
@param table: table of data to fetch
- @type table: basestring
@rtype: list
"""
if table in self._data.setdefault('csv', {}):
@@ -147,10 +132,7 @@
data = self.raw_cached(table, 'csv')
- if not PY2:
- f = StringIO(data.decode('utf8'))
- else:
- f = BytesIO(data)
+ f = StringIO(data.decode('utf8'))
reader = csv.DictReader(f)
data = list(reader)
@@ -158,12 +140,11 @@
return data
- def xml(self, table):
+ def xml(self, table: str):
"""
Fetch and parse XML for a table.
@param table: table of data to fetch
- @type table: basestring
@rtype: list
"""
if table in self._data.setdefault('xml', {}):
@@ -182,8 +163,8 @@
site = {}
for field in row.findall('field'):
- name = UnicodeType(field.get('name'))
- site[name] = UnicodeType(field.text)
+ name = str(field.get('name'))
+ site[name] = str(field.text)
data.append(site)
@@ -191,32 +172,29 @@
return data
- def get(self, table, format=None):
- """
- Get a list of a table of data using format.
+ def get(self, table: str, format='csv'):
+ """Get a list of a table of data.
@param table: table of data to fetch
- @type table: basestring
- @param format: Format of data to use
- @type format: 'xml' or 'csv', or None to autoselect.
@rtype: list
"""
- if csv or format == 'csv':
- data = self.csv(table)
- else:
- data = self.xml(table)
- return data
+ try:
+ func = getattr(self, format)
+ except AttributeError:
+ raise NotImplementedError('Format "{}" is not supported'
+ .format(format))
+ return func(table)
- def get_dict(self, table, format=None):
- """
- Get dictionary of a table of data using format.
+ def get_dict(self, table: str, format='csv'):
+ """Get dictionary of a table of data using format.
@param table: table of data to fetch
- @type table: basestring
- @param format: Format of data to use
+ @param format: format of data to use
@type format: 'xml' or 'csv', or None to autoselect.
@rtype: dict
"""
+ if format is None: # old autoselect
+ format = 'csv'
return {data['prefix']: data for data in self.get(table, format)}
def sorted(self, table, key):
@@ -230,7 +208,7 @@
key=lambda d: int(d[key]),
reverse=True)
- def languages_by_size(self, table):
+ def languages_by_size(self, table: str):
"""Return ordered list of languages by size from WikiStats."""
# This assumes they appear in order of size in the WikiStats dump.
return [d['prefix'] for d in self.get(table)]
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/597263
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I321d353e7ee815c139a7fc911f7f28b9bb12a5df
Gerrit-Change-Number: 597263
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Dvorapa <dvorapa(a)seznam.cz>
Gerrit-Reviewer: Huji <huji.huji(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/612165 )
Change subject: [IMPR] Improve performance of translateMagicWords
......................................................................
[IMPR] Improve performance of translateMagicWords
- avoid nested code
- cache mapping of aliases to preferred words to avoid
doing things repeatedly
- defer caching to when an image with some parameters is found
- remove 'math' from exceptions (unlikely to be needed,
just makes thing slower)
- extend the list of magic words
Change-Id: I802948afb5f09a156497ae0497c706905eecef77
---
M pywikibot/cosmetic_changes.py
1 file changed, 38 insertions(+), 24 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index b0abb56..ff45c5f 100755
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -442,31 +442,45 @@
# not wanted at ru
# arz uses english stylish codes
# no need to run on English wikis
- if self.site.code not in ['arz', 'en', 'ru']:
- def replace_magicword(match):
- split = match.group().split('|')
- # push ']]' out and re-add below
- split[-1] = split[-1][:-2]
- for magicword in ['img_thumbnail', 'img_left', 'img_center',
- 'img_right', 'img_none', 'img_framed',
- 'img_frameless', 'img_border', 'img_upright',
- ]:
- aliases = self.site.getmagicwords(magicword)
- preferred = aliases[0]
- aliases = set(aliases[1:])
- if not aliases:
- continue
- split[1:] = [preferred if x.strip() in aliases else x
- for x in split[1:]]
- return '|'.join(split) + ']]'
+ if self.site.code in ['arz', 'en', 'ru']:
+ return text
- exceptions = ['nowiki', 'comment', 'math', 'pre', 'source']
- regex = re.compile(
- FILE_LINK_REGEX % '|'.join(self.site.namespaces[6]),
- flags=re.X)
- text = textlib.replaceExcept(text, regex, replace_magicword,
- exceptions)
- return text
+ def init_cache():
+ for magicword in ('img_thumbnail', 'img_left', 'img_center',
+ 'img_right', 'img_none', 'img_framed',
+ 'img_frameless', 'img_border', 'img_upright',
+ 'img_baseline', 'img_sub', 'img_super',
+ 'img_top', 'img_text_top', 'img_middle',
+ 'img_bottom', 'img_text_bottom'):
+ aliases = self.site.getmagicwords(magicword)
+ if len(aliases) > 1:
+ cache.update((alias, aliases[0]) for alias in aliases[1:]
+ if '$1' not in alias)
+ if not cache:
+ cache[False] = True # signal there is nothing to replace
+
+ def replace_magicword(match):
+ if cache.get(False):
+ return match.group()
+ split = match.group().split('|')
+ if len(split) == 1:
+ return match.group()
+
+ if not cache:
+ init_cache()
+
+ # push ']]' out and re-add below
+ split[-1] = split[-1][:-2]
+ return '{}|{}]]'.format(
+ split[0], '|'.join(cache.get(x, x) for x in split[1:]))
+
+ cache = {}
+ exceptions = ['nowiki', 'comment', 'pre', 'source']
+ regex = re.compile(
+ FILE_LINK_REGEX % '|'.join(self.site.namespaces[6]),
+ flags=re.X)
+ return textlib.replaceExcept(
+ text, regex, replace_magicword, exceptions)
def cleanUpLinks(self, text):
"""Tidy up wikilinks found in a string.
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/612165
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I802948afb5f09a156497ae0497c706905eecef77
Gerrit-Change-Number: 612165
Gerrit-PatchSet: 2
Gerrit-Owner: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/612152 )
Change subject: [cleanup] Drop unnecessary calls to list
......................................................................
[cleanup] Drop unnecessary calls to list
- avoid loading whole views in memory when O(1) lookup
is possible
- use sorted, reversed and list.extend
- use list comprehension and set in translateMagicWords
Change-Id: I5deb2a32918b39d3f382ed33ab738781c9ff7cd9
---
M pywikibot/config2.py
M pywikibot/cosmetic_changes.py
M pywikibot/i18n.py
M pywikibot/page/__init__.py
M pywikibot/pagegenerators.py
M pywikibot/site/__init__.py
M pywikibot/textlib.py
7 files changed, 13 insertions(+), 15 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/config2.py b/pywikibot/config2.py
index d74c505..fe01116 100644
--- a/pywikibot/config2.py
+++ b/pywikibot/config2.py
@@ -1169,9 +1169,7 @@
_all = False
else:
warning('Unknown arg {0} ignored'.format(_arg))
- _k = list(globals().keys())
- _k.sort()
- for _name in _k:
+ for _name in sorted(globals().keys()):
if _name[0] != '_':
if not type(globals()[_name]) in [types.FunctionType,
types.ModuleType]:
diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index e8dc359..b0abb56 100755
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -451,13 +451,13 @@
'img_right', 'img_none', 'img_framed',
'img_frameless', 'img_border', 'img_upright',
]:
- aliases = list(self.site.getmagicwords(magicword))
- preferred = aliases.pop(0)
+ aliases = self.site.getmagicwords(magicword)
+ preferred = aliases[0]
+ aliases = set(aliases[1:])
if not aliases:
continue
- split[1:] = list(map(
- lambda x: preferred if x.strip() in aliases else x,
- split[1:]))
+ split[1:] = [preferred if x.strip() in aliases else x
+ for x in split[1:]]
return '|'.join(split) + ']]'
exceptions = ['nowiki', 'comment', 'math', 'pre', 'source']
diff --git a/pywikibot/i18n.py b/pywikibot/i18n.py
index 99f5d40..cff155a 100644
--- a/pywikibot/i18n.py
+++ b/pywikibot/i18n.py
@@ -608,7 +608,7 @@
if fallback is True:
codes += _altlang(code) + ['_default', 'en']
elif fallback is not False:
- codes += list(fallback)
+ codes.extend(fallback)
for code in codes:
if code in xdict:
trans = xdict[code]
diff --git a/pywikibot/page/__init__.py b/pywikibot/page/__init__.py
index 54695ec..a45192b 100644
--- a/pywikibot/page/__init__.py
+++ b/pywikibot/page/__init__.py
@@ -1337,12 +1337,12 @@
if config.cosmetic_changes_mylang_only:
cc = ((family == config.family
and self.site.lang == config.mylang)
- or family in list(config.cosmetic_changes_enable.keys())
+ or family in config.cosmetic_changes_enable
and self.site.lang in config.cosmetic_changes_enable[family])
else:
cc = True
cc = (cc and not
- (family in list(config.cosmetic_changes_disable.keys())
+ (family in config.cosmetic_changes_disable
and self.site.lang in config.cosmetic_changes_disable[family]))
if not cc:
return summary
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index a37e88b..b15f73b 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -2180,7 +2180,7 @@
break
pywikibot.sleep(sleep_duration)
- yield from list(filtered_generator())[::-1]
+ yield from reversed(filtered_generator())
@deprecated_args(pageNumber='groupsize', step='groupsize', lookahead=None)
diff --git a/pywikibot/site/__init__.py b/pywikibot/site/__init__.py
index 0beb08d..ea208a0 100644
--- a/pywikibot/site/__init__.py
+++ b/pywikibot/site/__init__.py
@@ -759,7 +759,7 @@
pywikibot.log('Site %s instantiated and marked "obsolete" '
'to prevent access' % self)
elif self.__code not in self.languages():
- if self.__family.name in list(self.__family.langs.keys()) and \
+ if self.__family.name in self.__family.langs and \
len(self.__family.langs) == 1:
self.__code = self.__family.name
if self.__family == pywikibot.config.family \
@@ -3686,7 +3686,7 @@
namespaces=namespaces,
content=content
)
- return itertools.chain(*list(genlist.values()))
+ return itertools.chain(*genlist.values())
return blgen
@deprecated_args(step=None, filterRedirects='filter_redirects')
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index f233926..9c74eb2 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -998,7 +998,7 @@
# language, or if it's e.g. a category tag or an internal link
if lang in fam.obsolete:
lang = fam.obsolete[lang]
- if lang in list(fam.langs.keys()):
+ if lang in fam.langs:
if '|' in pagetitle:
# ignore text after the pipe
pagetitle = pagetitle[:pagetitle.index('|')]
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/612152
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I5deb2a32918b39d3f382ed33ab738781c9ff7cd9
Gerrit-Change-Number: 612152
Gerrit-PatchSet: 2
Gerrit-Owner: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: Isaacandy <isaac(a)iznd.xyz>
Gerrit-Reviewer: Siebrand <siebrand(a)kitano.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged