Xqt has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/821796 )
Change subject: [IMPR] use backported pairwise in archivebot.py
......................................................................
[IMPR] use backported pairwise in archivebot.py
Function itertool.pairwise has been introduced in 3.10.
Change-Id: I7bdd71920855df782aab1a66ca2bde2f1976c484
---
M scripts/archivebot.py
1 file changed, 3 insertions(+), 4 deletions(-)
Approvals:
Xqt: Verified; Looks good to me, approved
diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index e05e565..a756bbd 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -115,7 +115,6 @@
# Distributed under the terms of the MIT license.
#
import datetime
-import itertools
import locale
import os
import re
@@ -129,7 +128,7 @@
import pywikibot
from pywikibot import i18n
-from pywikibot.backports import List, Set, Tuple
+from pywikibot.backports import List, Set, Tuple, pairwise
from pywikibot.exceptions import Error, NoPageError
from pywikibot.textlib import (
TimeStripper,
@@ -138,7 +137,7 @@
findmarker,
to_local_digits,
)
-from pywikibot.time import parse_duration, str2timedelta, MW_KEYS
+from pywikibot.time import MW_KEYS, parse_duration, str2timedelta
ShouldArchive = Tuple[str, str]
@@ -392,7 +391,7 @@
if self.keep:
# set the timestamp to the previous if the current is lower
- for first, second in itertools.pairwise(self.threads):
+ for first, second in pairwise(self.threads):
second.timestamp = self.max(first.timestamp, second.timestamp)
# This extra info is not desirable when run under the unittest
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/821796
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I7bdd71920855df782aab1a66ca2bde2f1976c484
Gerrit-Change-Number: 821796
Gerrit-PatchSet: 1
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/820889 )
Change subject: [IMPR] use User:MiszaBot/config as default template
......................................................................
[IMPR] use User:MiszaBot/config as default template
Change-Id: I697a73736f21f7947b99403877d2f16fbe7f665b
---
M scripts/archivebot.py
1 file changed, 14 insertions(+), 13 deletions(-)
Approvals:
Matěj Suchánek: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index 8b3100d..c2bf860 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -1,18 +1,19 @@
#!/usr/bin/python3
-"""
-archivebot.py - discussion page archiving bot.
+"""archivebot.py - discussion page archiving bot.
usage:
- python pwb.py archivebot [OPTIONS] TEMPLATE_PAGE
+ python pwb.py archivebot [OPTIONS] [TEMPLATE_PAGE]
-Bot examines backlinks (Special:WhatLinksHere) to TEMPLATE_PAGE.
-Then goes through all pages (unless a specific page specified using options)
-and archives old discussions. This is done by breaking a page into threads,
-then scanning each thread for timestamps. Threads older than a specified
-threshold are then moved to another page (the archive), which can be named
-either basing on the thread's name or then name can contain a counter which
-will be incremented when the archive reaches a certain size.
+Several TEMPLATE_PAGE templates can be given at once. Default is
+`User:MiszaBot/config`. Bot examines backlinks (Special:WhatLinksHere)
+to all TEMPLATE_PAGE templates. Then goes through all pages (unless a
+specific page specified using options) and archives old discussions.
+This is done by breaking a page into threads, then scanning each thread
+for timestamps. Threads older than a specified threshold are then moved
+to another page (the archive), which can be named either basing on the
+thread's name or then name can contain a counter which will be
+incremented when the archive reaches a certain size.
Transcluded template may contain the following parameters:
@@ -891,9 +892,9 @@
return
if not templates:
- pywikibot.bot.suggest_help(
- additional_text='No template was specified.')
- return
+ templates = ['User:MiszaBot/config']
+ pywikibot.info('No template was specified, using default {{{{{}}}}}.'
+ .format(templates[0]))
for template_name in templates:
tmpl = pywikibot.Page(site, template_name, ns=10)
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/820889
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I697a73736f21f7947b99403877d2f16fbe7f665b
Gerrit-Change-Number: 820889
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: Whym <whym(a)whym.org>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/820746 )
Change subject: [IMPR] backport pairwise() from Python 3.10
......................................................................
[IMPR] backport pairwise() from Python 3.10
Also use pairwise to iterate pairs of elements
Change-Id: I3fde479978960a7b033719ac5c5a26185c5cdd43
---
M pywikibot/backports.py
M scripts/claimit.py
M scripts/replace.py
M scripts/template.py
4 files changed, 34 insertions(+), 18 deletions(-)
Approvals:
Mpaa: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/backports.py b/pywikibot/backports.py
index d5a0ba2..9773ac6 100644
--- a/pywikibot/backports.py
+++ b/pywikibot/backports.py
@@ -123,3 +123,19 @@
if string.endswith(suffix):
return string[:-len(suffix)]
return string
+
+
+# bpo-38200
+if PYTHON_VERSION >= (3, 10):
+ from itertools import pairwise
+else:
+ from itertools import tee
+
+ def pairwise(iterable):
+ """Return successive overlapping pairs taken from the input iterable.
+
+ .. versionadded:: 7.6
+ """
+ a, b = tee(iterable)
+ next(b, None)
+ return zip(a, b)
diff --git a/scripts/claimit.py b/scripts/claimit.py
index 3e1ac09..4fff795 100755
--- a/scripts/claimit.py
+++ b/scripts/claimit.py
@@ -52,6 +52,7 @@
#
import pywikibot
from pywikibot import WikidataBot, pagegenerators
+from pywikibot.backports import pairwise
# This is required for the text that is shown when you run this script
@@ -126,15 +127,15 @@
claims = []
repo = pywikibot.Site().data_repository()
- for i in range(0, len(commandline_claims), 2):
- claim = pywikibot.Claim(repo, commandline_claims[i])
+ for source_str, target_str in pairwise(commandline_claims):
+ claim = pywikibot.Claim(repo, source_str)
if claim.type == 'wikibase-item':
- target = pywikibot.ItemPage(repo, commandline_claims[i + 1])
+ target = pywikibot.ItemPage(repo, target_str)
elif claim.type == 'string':
- target = commandline_claims[i + 1]
+ target = target_str
elif claim.type == 'globe-coordinate':
coord_args = [
- float(c) for c in commandline_claims[i + 1].split(',')]
+ float(c) for c in target_str.split(',')]
if len(coord_args) >= 3:
precision = coord_args[2]
else:
diff --git a/scripts/replace.py b/scripts/replace.py
index ac97270..703dd5c 100755
--- a/scripts/replace.py
+++ b/scripts/replace.py
@@ -154,7 +154,7 @@
import pywikibot
from pywikibot import editor, fixes, i18n, pagegenerators, textlib
-from pywikibot.backports import Dict, Generator, List, Pattern, Tuple
+from pywikibot.backports import pairwise, Dict, Generator, List, Pattern, Tuple
from pywikibot.bot import ExistingPageBot, SingleSiteBot
from pywikibot.exceptions import InvalidPageError, NoPageError
from pywikibot.tools import chars
@@ -979,9 +979,9 @@
# The summary stored here won't be actually used but is only an example
site = pywikibot.Site()
single_summary = None
- for i in range(0, len(commandline_replacements), 2):
- replacement = Replacement(commandline_replacements[i],
- commandline_replacements[i + 1])
+
+ for old, new in pairwise(commandline_replacements):
+ replacement = Replacement(old, new)
if not single_summary:
single_summary = i18n.twtranslate(
site, 'replace-replacing',
diff --git a/scripts/template.py b/scripts/template.py
index 7e83f71..58421bd 100755
--- a/scripts/template.py
+++ b/scripts/template.py
@@ -113,6 +113,7 @@
import pywikibot
from pywikibot import i18n, pagegenerators, textlib
+from pywikibot.backports import pairwise
from pywikibot.bot import SingleSiteBot
from pywikibot.pagegenerators import XMLDumpPageGenerator
from pywikibot.tools.itertools import filter_unique, roundrobin_generators
@@ -215,7 +216,6 @@
:param args: command line arguments
"""
template_names = []
- templates = {}
options = {}
# If xmlfilename is None, references will be loaded from the live wiki.
xmlfilename = None
@@ -266,17 +266,16 @@
return
if bool(options.get('subst', False)) ^ options.get('remove', False):
- for template_name in template_names:
- templates[template_name] = None
+ templates = {name: None for name in template_names}
else:
- try:
- for i in range(0, len(template_names), 2):
- templates[template_names[i]] = template_names[i + 1]
- except IndexError:
- pywikibot.output('Unless using solely -subst or -remove, '
- 'you must give an even number of template names.')
+ if len(template_names) % 2:
+ pywikibot.warning('Unless using solely -subst or -remove, you'
+ 'must give an even number of template names.')
return
+ templates = {key: value
+ for key, value in pairwise(template_names)}
+
old_templates = [pywikibot.Page(site, template_name, ns=10)
for template_name in templates]
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/820746
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I3fde479978960a7b033719ac5c5a26185c5cdd43
Gerrit-Change-Number: 820746
Gerrit-PatchSet: 6
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
Xqt has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/820837 )
Change subject: [IMPR]: fix docstring for page.ocr() method.
......................................................................
[IMPR]: fix docstring for page.ocr() method.
Fix docstring for page.ocr() method:
- specify also 'wmfOCR' for ocr_tool parameter.
Change-Id: I4829d80d9ec79354df97c1a2e95dab0f4939ecc5
---
M pywikibot/proofreadpage.py
1 file changed, 2 insertions(+), 1 deletion(-)
Approvals:
Xqt: Verified; Looks good to me, approved
Mpaa: Looks good to me, approved
diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py
index 2f98b6c..6004a73 100644
--- a/pywikibot/proofreadpage.py
+++ b/pywikibot/proofreadpage.py
@@ -736,7 +736,8 @@
It is the user's responsibility to reset quality level accordingly.
- :param ocr_tool: 'phetools' or 'googleOCR', default is 'phetools'
+ :param ocr_tool: 'phetools', 'wmfOCR' or 'googleOCR';
+ default is 'phetools'
:return: OCR text for the page.
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/820837
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I4829d80d9ec79354df97c1a2e95dab0f4939ecc5
Gerrit-Change-Number: 820837
Gerrit-PatchSet: 1
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/817748 )
Change subject: [IMPR] Speed up archivebot.py a bit
......................................................................
[IMPR] Speed up archivebot.py a bit
- early return DiscussionPage.is_full
- reorder conditions to calculate the most used first
- return get_archive_page from archives if present
- don't walk down and up to find a counter if the first was found
- not necessary to delete local values
Change-Id: Idfd271f32020a5f2c56557641f3331d6e3db7233
---
M scripts/archivebot.py
1 file changed, 25 insertions(+), 15 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index 6c73eca..8b3100d 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -407,10 +407,14 @@
def is_full(self, max_archive_size: Size) -> bool:
"""Check whether archive size exceeded."""
+ if self.full:
+ return True
+
size, unit = max_archive_size
- if (self.size() > self.archiver.maxsize
- or unit == 'B' and self.size() >= size
- or unit == 'T' and len(self.threads) >= size):
+ self_size = self.size()
+ if (unit == 'B' and self_size >= size
+ or unit == 'T' and len(self.threads) >= size
+ or self_size > self.archiver.maxsize):
self.full = True # xxx: this is one-way flag
return self.full
@@ -584,21 +588,21 @@
return None
def get_archive_page(self, title: str, params=None) -> DiscussionPage:
- """
- Return the page for archiving.
+ """Return the page for archiving.
If it doesn't exist yet, create and cache it.
Also check for security violations.
"""
- page_title = self.page.title()
- archive = pywikibot.Page(self.site, title)
- if not (self.force or title.startswith(page_title + '/')
- or self.key_ok()):
- raise ArchiveSecurityError(
- 'Archive page {} does not start with page title ({})!'
- .format(archive, page_title))
if title not in self.archives:
- self.archives[title] = DiscussionPage(archive, self, params)
+ page_title = self.page.title()
+ archive_link = pywikibot.Link(title, self.site)
+ if not (title.startswith(page_title + '/') or self.force
+ or self.key_ok()):
+ raise ArchiveSecurityError(
+ 'Archive page {} does not start with page title ({})!'
+ .format(archive_link, page_title))
+ self.archives[title] = DiscussionPage(archive_link, self, params)
+
return self.archives[title]
def get_params(self, timestamp, counter: int) -> dict:
@@ -666,7 +670,6 @@
params = self.get_params(self.now, counter)
aux_params = self.get_params(self.now, counter + 1)
counter_matters = (pattern % params) != (pattern % aux_params)
- del params, aux_params
# we need to start with the oldest archive since that is
# the one the saved counter applies to, so sort the groups
@@ -680,6 +683,7 @@
# 1. it matters (AND)
# 2. "era" (year, month, etc.) changes (AND)
# 3. there is something to put to the new archive.
+ counter_found = False
for i, thread in group:
threads_left = len(self.page.threads) - self.archived_threads
if threads_left <= int(self.get_attr('minthreadsleft', 5)):
@@ -694,7 +698,8 @@
archive = self.get_archive_page(pattern % params, params)
if counter_matters:
- while counter > 1 and not archive.exists():
+ while not counter_found and counter > 1 \
+ and not archive.exists():
# This may happen when either:
# 1. a previous version of the bot run and reset
# the counter without archiving anything
@@ -707,6 +712,10 @@
params = self.get_params(thread.timestamp, counter)
archive = self.get_archive_page(
pattern % params, params)
+ else:
+ # There are only non existing pages found by count down
+ counter_found = True
+
while archive.is_full(max_arch_size):
counter += 1
params = self.get_params(thread.timestamp, counter)
@@ -715,6 +724,7 @@
archive.feed_thread(thread, max_arch_size)
self.archived_threads += 1
+
if counter_matters:
era_change = True
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/817748
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Idfd271f32020a5f2c56557641f3331d6e3db7233
Gerrit-Change-Number: 817748
Gerrit-PatchSet: 6
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: MarcoAurelio <maurelio(a)toolforge.org>
Gerrit-CC: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-MessageType: merged