jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/617103 )
Change subject: [4.0] Remove Python 2 code parts in few scripts
......................................................................
[4.0] Remove Python 2 code parts in few scripts
Change-Id: Ib6ae321b3a34d774a1b641cb7d53daad6779ecbc
---
M pywikibot/__metadata__.py
M pywikibot/bot_choice.py
M pywikibot/config2.py
M pywikibot/daemonize.py
4 files changed, 38 insertions(+), 58 deletions(-)
Approvals:
Matěj Suchánek: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/__metadata__.py b/pywikibot/__metadata__.py
index 8eaf1f2..90c4121 100644
--- a/pywikibot/__metadata__.py
+++ b/pywikibot/__metadata__.py
@@ -5,8 +5,6 @@
#
# Distributed under the terms of the MIT license.
#
-from __future__ import unicode_literals
-
__name__ = 'pywikibot'
__version__ = '4.0.0.dev0'
__description__ = 'Python MediaWiki Bot Framework'
diff --git a/pywikibot/bot_choice.py b/pywikibot/bot_choice.py
index a512f69..2242c94 100755
--- a/pywikibot/bot_choice.py
+++ b/pywikibot/bot_choice.py
@@ -1,19 +1,17 @@
# -*- coding: utf-8 -*-
"""Choices for input_choice."""
#
-# (C) Pywikibot team, 2015-2019
+# (C) Pywikibot team, 2015-2020
#
# Distributed under the terms of the MIT license.
#
-from __future__ import absolute_import, division, unicode_literals
-
import re
from textwrap import fill
import pywikibot
-class Option(object):
+class Option:
"""
A basic option for input_choice.
@@ -23,30 +21,28 @@
* result(value)
* test(value)
- The methods C{test} and C{handled} are in such a relationship that when
- C{handled} returns itself that C{test} must return True for that value. So
- if C{test} returns False C{handled} may not return itself but it may return
- not None.
+ The methods C{test} and C{handled} are in such a relationship that
+ when C{handled} returns itself that C{test} must return True for
+ that value. So if C{test} returns False C{handled} may not return
+ itself but it may return not None.
Also C{result} only returns a sensible value when C{test} returns True for
the same value.
"""
- def __init__(self, stop=True):
+ def __init__(self, stop=True) -> None:
"""Initializer."""
self._stop = stop
@staticmethod
- def formatted(text, options, default=None):
+ def formatted(text: str, options, default=None) -> str:
"""
Create a text with the options formatted into it.
@param text: Text into which options are to be formatted
- @type text: str
@param options: Option instances to be formatted
@type options: Iterable
@return: Text with the options formatted into it
- @rtype: str
"""
formatted_options = []
for option in options:
@@ -58,7 +54,7 @@
return fill(re.sub(pattern, '{}', text), width=77).format(*highlights)
@property
- def stop(self):
+ def stop(self) -> bool:
"""Return whether this option stops asking."""
return self._stop
@@ -110,20 +106,19 @@
"""An option with a description and shortcut and returning the shortcut."""
- def __init__(self, option, shortcut, **kwargs):
+ def __init__(self, option: str, shortcut, **kwargs):
"""
Initializer.
@param option: option string
- @type option: str
@param shortcut: Shortcut of the option
@type shortcut: str
"""
- super(StandardOption, self).__init__(**kwargs)
+ super().__init__(**kwargs)
self.option = option
self.shortcut = shortcut.lower()
- def format(self, default=None):
+ def format(self, default=None) -> str:
"""Return a formatted string for that option."""
index = self.option.lower().find(self.shortcut)
shortcut = self.shortcut
@@ -140,7 +135,7 @@
"""Return the lowercased shortcut."""
return self.shortcut
- def test(self, value):
+ def test(self, value) -> bool:
"""Return True whether this option applies."""
return (self.shortcut.lower() == value.lower()
or self.option.lower() == value.lower())
@@ -152,7 +147,7 @@
def __init__(self, option, shortcut, output, **kwargs):
"""Create a new option for the given sequence."""
- super(OutputProxyOption, self).__init__(option, shortcut, **kwargs)
+ super().__init__(option, shortcut, **kwargs)
self._outputter = output
def output(self):
@@ -171,14 +166,14 @@
def __init__(self, option, shortcut, description, options):
"""Initializer."""
- super(NestedOption, self).__init__(option, shortcut, stop=False)
+ super().__init__(option, shortcut, stop=False)
self.description = description
self.options = options
def format(self, default=None):
"""Return a formatted string for that option."""
self._output = Option.formatted(self.description, self.options)
- return super(NestedOption, self).format(default=default)
+ return super().format(default=default)
def handled(self, value):
"""Return itself if it applies or the applying sub option."""
@@ -187,7 +182,7 @@
if handled is not None:
return handled
else:
- return super(NestedOption, self).handled(value)
+ return super().handled(value)
def output(self):
"""Output the suboptions."""
@@ -202,7 +197,7 @@
self, option, shortcut, text, context, delta=100, start=0, end=0
):
"""Initializer."""
- super(ContextOption, self).__init__(option, shortcut, stop=False)
+ super().__init__(option, shortcut, stop=False)
self.text = text
self.context = context
self.delta = delta
@@ -212,7 +207,7 @@
def result(self, value):
"""Add the delta to the context and output it."""
self.context += self.delta
- super(ContextOption, self).result(value)
+ super().result(value)
def output(self):
"""Output the context."""
@@ -231,7 +226,7 @@
def __init__(self, option, shortcut, replacer):
"""Initializer."""
- super(Choice, self).__init__(option, shortcut)
+ super().__init__(option, shortcut)
self._replacer = replacer
@property
@@ -254,7 +249,7 @@
def __init__(self, option, shortcut, result):
"""Create instance with replacer set to None."""
- super(StaticChoice, self).__init__(option, shortcut, None)
+ super().__init__(option, shortcut, None)
self._result = result
def handle(self):
@@ -269,7 +264,7 @@
def __init__(self, option, shortcut, replacer, replace_section,
replace_label):
"""Initializer."""
- super(LinkChoice, self).__init__(option, shortcut, replacer)
+ super().__init__(option, shortcut, replacer)
self._section = replace_section
self._label = replace_label
@@ -306,7 +301,7 @@
def __init__(self, replacer, option='always', shortcut='a'):
"""Initializer."""
- super(AlwaysChoice, self).__init__(option, shortcut, replacer)
+ super().__init__(option, shortcut, replacer)
self.always = False
def handle(self):
@@ -330,7 +325,7 @@
def __init__(self, minimum=1, maximum=None, prefix='', **kwargs):
"""Initializer."""
- super(IntegerOption, self).__init__(**kwargs)
+ super().__init__(**kwargs)
if not ((minimum is None or isinstance(minimum, int))
and (maximum is None or isinstance(maximum, int))):
raise ValueError(
@@ -341,7 +336,7 @@
self._max = maximum
self.prefix = prefix
- def test(self, value):
+ def test(self, value) -> bool:
"""Return whether the value is an int and in the specified range."""
try:
value = self.parse(value)
@@ -361,7 +356,7 @@
"""Return the upper bound of the range of allowed values."""
return self._max
- def format(self, default=None):
+ def format(self, default=None) -> str:
"""Return a formatted string showing the range."""
if default is not None and self.test(default):
value = self.parse(default)
@@ -389,7 +384,7 @@
rng = 'any' + default
return '{0}<number> [{1}]'.format(self.prefix, rng)
- def parse(self, value):
+ def parse(self, value) -> int:
"""Return integer from value with prefix removed."""
if value.lower().startswith(self.prefix.lower()):
return int(value[len(self.prefix):])
@@ -409,7 +404,7 @@
"""Initializer."""
self._list = sequence
try:
- super(ListOption, self).__init__(1, self.maximum, prefix, **kwargs)
+ super().__init__(1, self.maximum, prefix, **kwargs)
except ValueError:
raise ValueError('The sequence is empty.')
del self._max
@@ -419,10 +414,10 @@
if not self._list:
raise ValueError('The sequence is empty.')
else:
- return super(ListOption, self).format(default=default)
+ return super().format(default=default)
@property
- def maximum(self):
+ def maximum(self) -> int:
"""Return the maximum value."""
return len(self._list)
@@ -445,7 +440,7 @@
@param post: Additional comment printed after the list.
@type post: str
"""
- super(ShowingListOption, self).__init__(sequence, prefix, **kwargs)
+ super().__init__(sequence, prefix, **kwargs)
self.pre = pre
self.post = post
@@ -469,7 +464,7 @@
"""An option to select multiple items from a list."""
- def test(self, value):
+ def test(self, value) -> bool:
"""Return whether the values are int and in the specified range."""
try:
values = [self.parse(val) for val in value.split(',')]
@@ -526,4 +521,4 @@
def __init__(self):
"""Constructor using the 'quit' ('q') in input_choice."""
- super(QuitKeyboardInterrupt, self).__init__('quit', 'q')
+ super().__init__('quit', 'q')
diff --git a/pywikibot/config2.py b/pywikibot/config2.py
index fe01116..49482d5 100644
--- a/pywikibot/config2.py
+++ b/pywikibot/config2.py
@@ -36,8 +36,6 @@
#
# Distributed under the terms of the MIT license.
#
-from __future__ import absolute_import, division, unicode_literals
-
import collections
import copy
import os
@@ -56,15 +54,12 @@
from pywikibot import __version__ as pwb_version
from pywikibot.logging import error, output, warning
-from pywikibot.tools import PY2, issue_deprecation_warning
+from pywikibot.tools import issue_deprecation_warning
OSWIN32 = (sys.platform == 'win32')
if OSWIN32:
- if not PY2:
- import winreg
- else:
- import _winreg as winreg
+ import winreg
# Normalize old PYWIKIBOT2 environment variables and issue a deprecation warn.
@@ -417,16 +412,10 @@
# be 'cp850' ('cp437' for older versions). Linux users might try 'iso-8859-1'
# or 'utf-8'.
# This default code should work fine, so you don't have to think about it.
+# When using pywikibot inside a daemonized twisted application, we get
+# "StdioOnnaStick instance has no attribute 'encoding'"; assign None instead.
# TODO: consider getting rid of this config variable.
-try:
- if not PY2 or not sys.stdout.encoding:
- console_encoding = sys.stdout.encoding
- else:
- console_encoding = sys.stdout.encoding.decode('ascii')
-except AttributeError:
- # When using pywikibot inside a daemonized twisted application,
- # we get "StdioOnnaStick instance has no attribute 'encoding'"
- console_encoding = None
+console_encoding = getattr(sys.stdout, 'encoding', None)
# The encoding the user would like to see text transliterated to. This can be
# set to a charset (e.g. 'ascii', 'iso-8859-1' or 'cp850'), and we will output
diff --git a/pywikibot/daemonize.py b/pywikibot/daemonize.py
index 677481a..41fb678 100644
--- a/pywikibot/daemonize.py
+++ b/pywikibot/daemonize.py
@@ -5,8 +5,6 @@
#
# Distributed under the terms of the MIT license.
#
-from __future__ import absolute_import, division, unicode_literals
-
import codecs
import os
import stat
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/617103
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ib6ae321b3a34d774a1b641cb7d53daad6779ecbc
Gerrit-Change-Number: 617103
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/616719 )
Change subject: [IMPR] Reset counter when "era" changes
......................................................................
[IMPR] Reset counter when "era" changes
This splits the main loop into two ("divide-and-conquer algorithm"):
- The first loop scans the whole page for old threads to be archived
and groups them by archive.
- The second loop iterates the groups in order that the archive with
the oldest threads goes first. This is the one the saved counter
applies to. Any other group resets the counter (if it matters).
Bug: T215247
Change-Id: I2ab39a39a2692bedc0f6de57fb4d06b108ae1164
---
M scripts/archivebot.py
1 file changed, 78 insertions(+), 26 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index b8c81e0..292f35d 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -101,7 +101,7 @@
import re
import time
-from collections import OrderedDict
+from collections import defaultdict, OrderedDict
from hashlib import md5
from math import ceil
from typing import Any, List, Optional, Pattern, Set, Tuple
@@ -664,39 +664,89 @@
max_arch_size = str2size(self.get_attr('maxarchivesize'))
counter = int(self.get_attr('counter', '1'))
pattern = self.get_attr('archive')
- oldthreads = self.page.threads
- self.page.threads = []
+
+ keep_threads = []
+ threads_per_archive = defaultdict(list)
whys = set()
- pywikibot.output('Processing {} threads'.format(len(oldthreads)))
- for thread in oldthreads:
- threads_left = len(oldthreads) - self.archived_threads
- if threads_left <= int(self.get_attr('minthreadsleft', 5)):
- self.page.threads.append(thread)
- continue # Because there's too little threads left.
+ pywikibot.output('Processing {} threads'
+ .format(len(self.page.threads)))
+ for i, thread in enumerate(self.page.threads):
# TODO: Make an option so that unstamped (unsigned) posts get
# archived.
why = self.should_archive_thread(thread)
if not why or why[0] != 'duration':
- self.page.threads.append(thread)
+ keep_threads.append(i)
continue
-
params = self.get_params(thread.timestamp, counter)
- archive = self.get_archive_page(pattern % params, params)
+ # this is actually just a dummy key to group the threads by
+ # "era" regardless of the counter and deal with it later
+ key = pattern % params
+ threads_per_archive[key].append((i, thread))
+ whys.add(why) # xxx: we don't now if we ever archive anything
- aux_params = self.get_params(thread.timestamp, counter + 1)
- counter_matters = (pattern % params) != (pattern % aux_params)
- del aux_params
- while counter_matters and archive.is_full(max_arch_size):
- counter += 1
+ # we need to start with the oldest archive since that is
+ # the one the saved counter applies to, so sort the groups
+ # by the oldest timestamp
+ groups = sorted(threads_per_archive.values(),
+ key=lambda group: min(t.timestamp for _, t in group))
+
+ era_change = False
+ for group in groups:
+ # We will reset counter IFF:
+ # 1. it matters (AND)
+ # 2. "era" (year, month, etc.) changes (AND)
+ # 3. there is something to put to the new archive.
+ counter_matters = False
+ for i, thread in group:
+ threads_left = len(self.page.threads) - self.archived_threads
+ if threads_left <= int(self.get_attr('minthreadsleft', 5)):
+ keep_threads.append(i)
+ continue # Because there's too little threads left.
+
+ if era_change:
+ era_change = False
+ counter = 1
+
params = self.get_params(thread.timestamp, counter)
archive = self.get_archive_page(pattern % params, params)
- archive.feed_thread(thread, max_arch_size)
- whys.add(why)
- self.archived_threads += 1
+ aux_params = self.get_params(thread.timestamp, counter + 1)
+ # TODO: this variable does not change, figure out a way
+ # to only compute it once
+ counter_matters = (pattern % params) != (pattern % aux_params)
+ del aux_params
+ if counter_matters:
+ while counter > 1 and not archive.exists():
+ # This may happen when either:
+ # 1. a previous version of the bot run and reset
+ # the counter without archiving anything
+ # (number #3 above)
+ # 2. era changed between runs.
+ # Decrease the counter.
+ # TODO: This can be VERY slow, use preloading
+ # or binary search.
+ counter -= 1
+ params = self.get_params(thread.timestamp, counter)
+ archive = self.get_archive_page(
+ pattern % params, params)
+ while archive.is_full(max_arch_size):
+ counter += 1
+ params = self.get_params(thread.timestamp, counter)
+ archive = self.get_archive_page(
+ pattern % params, params)
- self.set_attr('counter', str(counter))
- return whys
+ archive.feed_thread(thread, max_arch_size)
+ self.archived_threads += 1
+ if counter_matters:
+ era_change = True
+
+ if self.archived_threads:
+ self.page.threads = [self.page.threads[i]
+ for i in sorted(keep_threads)]
+ self.set_attr('counter', str(counter))
+ return whys
+ else:
+ return set()
def run(self) -> None:
"""Process a single DiscussionPage object."""
@@ -723,13 +773,15 @@
pywikibot.output('Archiving {0} thread(s).'
.format(self.archived_threads))
# Save the archives first (so that bugs don't cause a loss of data)
- for a in sorted(self.archives.keys()):
- self.comment_params['count'] = self.archives[
- a].archived_threads
+ for title, archive in sorted(self.archives.items()):
+ count = archive.archived_threads
+ if count == 0:
+ continue
+ self.comment_params['count'] = count
comment = i18n.twtranslate(self.site.code,
'archivebot-archive-summary',
self.comment_params)
- self.archives[a].update(comment)
+ archive.update(comment)
# Save the page itself
self.page.header = rx.sub(self.attr2text(), self.page.header)
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/616719
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I2ab39a39a2692bedc0f6de57fb4d06b108ae1164
Gerrit-Change-Number: 616719
Gerrit-PatchSet: 3
Gerrit-Owner: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: D3r1ck <alangiderick(a)gmail.com>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: Dvorapa <dvorapa(a)seznam.cz>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Whym <whym(a)whym.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/616699 )
Change subject: [4.0] Remove Python 2 code parts in logging.py
......................................................................
[4.0] Remove Python 2 code parts in logging.py
Also cleanup unicode parts in tools.
Change-Id: I330064cbbd9f32f017a387962c2b211ea9f220d4
---
M pywikibot/logging.py
M pywikibot/tools/__init__.py
2 files changed, 16 insertions(+), 22 deletions(-)
Approvals:
Matěj Suchánek: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/logging.py b/pywikibot/logging.py
index 033c628..83abad9 100644
--- a/pywikibot/logging.py
+++ b/pywikibot/logging.py
@@ -5,8 +5,6 @@
#
# Distributed under the terms of the MIT license.
#
-from __future__ import absolute_import, division, unicode_literals
-
import logging
import os
import sys
@@ -18,9 +16,6 @@
VERBOSE = 18
INPUT = 25
-if sys.version_info[0] > 2:
- unicode = str
-
_init_routines = []
_inited_routines = set()
@@ -43,8 +38,8 @@
# User output/logging functions
-# Six output functions are defined. Each requires a unicode or string
-# argument. All of these functions generate a message to the log file if
+# Six output functions are defined. Each requires a string argument
+# All of these functions generate a message to the log file if
# logging is enabled ("-log" or "-debug" command line arguments).
# The functions output(), stdout(), warning(), and error() all display a
@@ -94,17 +89,16 @@
if decoder:
text = text.decode(decoder)
- elif not isinstance(text, unicode):
- if not isinstance(text, str):
- # looks like text is a non-text object.
- # Maybe it has a __unicode__ builtin ?
- # (allows to print Page, Site...)
- text = unicode(text)
- else:
- try:
- text = text.decode('utf-8')
- except UnicodeDecodeError:
- text = text.decode('iso8859-1')
+ elif isinstance(text, bytes):
+ try:
+ text = text.decode('utf-8')
+ except UnicodeDecodeError:
+ text = text.decode('iso8859-1')
+ else:
+ # looks like text is a non-text object.
+ # Maybe it has a __str__ builtin ?
+ # (allows to print Page, Site...)
+ text = str(text)
logger.log(_level, text, extra=context, **kwargs)
@@ -210,8 +204,8 @@
exc_info = 1
else:
exc_info = sys.exc_info()
- msg = '%s: %s' % (
- repr(exc_info[1]).split('(')[0], unicode(exc_info[1]).strip())
+ msg = '{}: {}'.format(repr(exc_info[1]).split('(')[0],
+ str(exc_info[1]).strip())
if tb:
kwargs['exc_info'] = exc_info
logoutput(msg, decoder, newline, ERROR, **kwargs)
diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index 00735a3..ecfadb0 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -954,7 +954,7 @@
class SelfCallString(SelfCallMixin, str):
- """Unicode string with SelfCallMixin."""
+ """String with SelfCallMixin."""
pass
@@ -1089,7 +1089,7 @@
result.update(arg)
if conflicts:
raise ValueError('Multiple dicts contain the same keys: {0}'
- .format(', '.join(sorted(UnicodeType(key)
+ .format(', '.join(sorted(str(key)
for key in conflicts))))
return result
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/616699
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I330064cbbd9f32f017a387962c2b211ea9f220d4
Gerrit-Change-Number: 616699
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged