jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/674613 )
Change subject: [bugfix] Avoid dupliate reference names
......................................................................
[bugfix] Avoid dupliate reference names
The old implementation failed if there were autogenerated references
not starting with 1. Now an iterator was implemented which creates the
next unused number (upto 999 but this should be enough)
Other improvements:
- always use double quotes with references
- modify pattern to find single quotes in reference names
- remove regext o clean title (\s includes [\n\r\t])
- rename self.NAMES.match(params) to found
- use removeDisabledPart function only once
Bug: T278040
Change-Id: Ie082dea4334e62f6818a208b86704b3a8afcd0ad
---
M scripts/reflinks.py
1 file changed, 25 insertions(+), 21 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/reflinks.py b/scripts/reflinks.py
index f21b808..205f5c8 100755
--- a/scripts/reflinks.py
+++ b/scripts/reflinks.py
@@ -47,6 +47,7 @@
import subprocess
import tempfile
+from contextlib import suppress
from functools import partial
from textwrap import shorten
from urllib.error import URLError
@@ -56,6 +57,7 @@
import pywikibot
from pywikibot import comms, i18n, pagegenerators, textlib
+from pywikibot.backports import removeprefix
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot, SingleSiteBot
from pywikibot import config2 as config
from pywikibot.pagegenerators import (
@@ -233,7 +235,6 @@
self.title = re.sub(r'[\.+\-=]{4,}', ' ', self.title)
# remove \n and \r and unicode spaces from titles
self.title = re.sub(r'\s', ' ', self.title)
- self.title = re.sub(r'[\n\r\t]', ' ', self.title)
# remove extra whitespaces
# remove leading and trailing ./;/,/-/_/+/ /
self.title = re.sub(r' +', ' ', self.title.strip(r'=.;,-+_ '))
@@ -285,10 +286,9 @@
# Match references
self.REFS = re.compile(
r'(?i)<ref(?P<params>[^>/]*)>(?P<content>.*?)</ref>')
- self.NAMES = re.compile(
- r'(?i).*name\s*=\s*(?P<quote>"?)\s*(?P<name>.+)\s*(?P=quote).*')
- self.GROUPS = re.compile(
- r'(?i).*group\s*=\s*(?P<quote>"?)\s*(?P<group>.+)\s*(?P=quote).*')
+ fmt = r'(?i).*{0}\s*=\s*(?P<quote>["\']?)\s*(?P<{0}>.+)\s*(?P=quote).*'
+ self.NAMES = re.compile(fmt.format('name'))
+ self.GROUPS = re.compile(fmt.format('group'))
self.autogen = i18n.twtranslate(site, 'reflinks-autogen')
def process(self, text):
@@ -320,10 +320,10 @@
else:
v = [None, [match.group()], False, False]
- name = self.NAMES.match(params)
- if name:
- quoted = name.group('quote') == '"'
- name = name.group('name')
+ found = self.NAMES.match(params)
+ if found:
+ quoted = found.group('quote') in ['"', "'"]
+ name = found.group('name')
if v[0]:
if v[0] != name:
named_repl[name] = [v[0], v[2]]
@@ -344,14 +344,20 @@
found_ref_names[name] = 1
groupdict[content] = v
- id_ = 1
- while self.autogen + str(id_) in found_ref_names:
- id_ += 1
+ used_numbers = set()
+ for name in found_ref_names:
+ number = removeprefix(name, self.autogen)
+ with suppress(ValueError):
+ used_numbers.add(int(number))
+
+ # iterator to give the next free number
+ free_number = iter({str(i) for i in range(1, 1000) # should be enough
+ if i not in used_numbers})
for (g, d) in found_refs.items():
group = ''
if g:
- group = 'group=\"{}\" '.format(group)
+ group = 'group="{}" '.format(group)
for (k, v) in d.items():
if len(v[1]) == 1 and not v[3]:
@@ -359,10 +365,9 @@
name = v[0]
if not name:
- name = '"{}{}"'.format(self.autogen, id_)
- id_ += 1
+ name = '"{}{}"'.format(self.autogen, next(free_number))
elif v[2]:
- name = '{!r}'.format(name)
+ name = '"{}"'.format(name)
named = '<ref {}name={}>{}</ref>'.format(group, name, k)
text = text.replace(v[1][0], named, 1)
@@ -384,10 +389,10 @@
# TODO : Support ref groups
name = v[0]
if v[1]:
- name = '{!r}'.format(name)
+ name = '"{}"'.format(name)
text = re.sub(
- '<ref name\\s*=\\s*(?P<quote>"?)\\s*{}\\s*(?P=quote)\\s*/>'
+ r'<ref name\s*=\s*(?P<quote>["\']?)\s*{}\s*(?P=quote)\s*/>'
.format(k),
'<ref name={} />'.format(name), text)
return text
@@ -518,10 +523,9 @@
"""Process one page."""
# Load the page's text from the wiki
new_text = page.text
-
+ raw_text = textlib.removeDisabledParts(new_text)
# for each link to change
- for match in linksInRef.finditer(
- textlib.removeDisabledParts(page.get())):
+ for match in linksInRef.finditer(raw_text):
link = match.group('url')
if 'jstor.org' in link:
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/674613
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ie082dea4334e62f6818a208b86704b3a8afcd0ad
Gerrit-Change-Number: 674613
Gerrit-PatchSet: 7
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: Rubin <rubin.happy(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/674559 )
Change subject: [doc] Update ROADMAP.rst
......................................................................
[doc] Update ROADMAP.rst
Change-Id: I78470a6741e09eb36345265e30df5fe396f85fc5
---
M ROADMAP.rst
1 file changed, 1 insertion(+), 0 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/ROADMAP.rst b/ROADMAP.rst
index 588cf8d..cea9631 100644
--- a/ROADMAP.rst
+++ b/ROADMAP.rst
@@ -11,6 +11,7 @@
* 6.0.0: User.name() method will be removed in favour of User.username property
* 5.6.0: pagenenerators.handleArg() method will be removed in favour of handle_arg() (T271437)
+* 5.6.0: Family.ignore_certificate_error() method will be removed in favour of verify_SSL_certificate() (T265205)
* 5.0.0: OptionHandler.options dict will be removed in favour of OptionHandler.opt
* 5.0.0: Methods deprecated for 5 years or longer will be removed
* 5.0.0: pagegenerators.ReferringPageGenerator is desupported and will be removed
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/674559
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I78470a6741e09eb36345265e30df5fe396f85fc5
Gerrit-Change-Number: 674559
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/674578 )
Change subject: [cleanup] use revisions instead of deprecated getLatestEditors
......................................................................
[cleanup] use revisions instead of deprecated getLatestEditors
- replace getLatestEditors with list(revisions)
- use len(history) instead comparing timestamps
- second_text is set previously; no need to set it to False
- remove return statement at the end of the block
Change-Id: I136b68986fb4a24f1a131283b56d671d3b594747
---
M scripts/checkimages.py
1 file changed, 6 insertions(+), 11 deletions(-)
Approvals:
Huji: Looks good to me, but someone else must approve
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/checkimages.py b/scripts/checkimages.py
index cde33f7..cc6a476 100755
--- a/scripts/checkimages.py
+++ b/scripts/checkimages.py
@@ -652,26 +652,24 @@
emailPageName = i18n.translate(self.site, emailPageWithText)
emailSubj = i18n.translate(self.site, emailSubject)
if self.notification2:
- self.notification2 = self.notification2 % self.image_to_report
+ self.notification2 %= self.image_to_report
else:
self.notification2 = self.notification
- second_text = False
+ second_text = False
# Getting the talk page's history, to check if there is another
# advise...
try:
testoattuale = self.talk_page.get()
- history = self.talk_page.getLatestEditors(limit=10)
+ history = list(self.talk_page.revisions(total=10))
latest_user = history[0]['user']
pywikibot.output(
'The latest user that has written something is: '
+ latest_user)
- if latest_user in self.bots:
+ # A block to prevent the second message if the bot also
+ # welcomed users...
+ if latest_user in self.bots and len(history) > 1:
second_text = True
- # A block to prevent the second message if the bot also
- # welcomed users...
- if history[0]['timestamp'] == history[-1]['timestamp']:
- second_text = False
except pywikibot.IsRedirectPage:
pywikibot.output(
'The user talk is a redirect, trying to get the right talk...')
@@ -679,11 +677,9 @@
self.talk_page = self.talk_page.getRedirectTarget()
testoattuale = self.talk_page.get()
except pywikibot.NoPage:
- second_text = False
testoattuale = i18n.translate(self.site, empty)
except pywikibot.NoPage:
pywikibot.output('The user page is blank')
- second_text = False
testoattuale = i18n.translate(self.site, empty)
if self.commTalk:
@@ -725,7 +721,6 @@
emailClass.send_email(emailSubj, text_to_send)
except NotEmailableError:
pywikibot.output('User is not mailable, aborted')
- return
def regexGenerator(self, regexp, textrun) -> Generator[pywikibot.FilePage,
None, None]:
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/674578
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I136b68986fb4a24f1a131283b56d671d3b594747
Gerrit-Change-Number: 674578
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: Huji <huji.huji(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/674536 )
Change subject: [6.1] Update current release to 6.1
......................................................................
[6.1] Update current release to 6.1
- update ROADMAP.rst
Change-Id: I75b41a1c156a1a075ee6dc9d97249bbfa87077e9
---
M ROADMAP.rst
M pywikibot/__metadata__.py
2 files changed, 4 insertions(+), 65 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/ROADMAP.rst b/ROADMAP.rst
index 427fb8f..588cf8d 100644
--- a/ROADMAP.rst
+++ b/ROADMAP.rst
@@ -1,77 +1,16 @@
Current release changes
~~~~~~~~~~~~~~~~~~~~~~~
-* (No changes yet)
-
-6.0.1
-~~~~~
-
-* Add support for taywiki, trvwiki and mnwwiktionary (T275838, T276128, T276250)
-
-6.0.0
-~~~~~
-
-Breaking changes
-^^^^^^^^^^^^^^^^
-
-* interwiki_graph module was removed (T223826)
-* Require setuptools >= 20.2 due to PEP 440
-* Support of MediaWiki < 1.23 has been dropped (T268979)
-* APISite.loadimageinfo will no longer return any content
-* Return requests.Response with http.request() instead of plain text (T265206)
-* config.db_hostname has been renamed to db_hostname_format
-
-Code cleanups
-^^^^^^^^^^^^^
-
-* tools.PY2 was removed (T213287)
-* Site.language() method was removed in favour of Site.lang property
-* Deprecated Page.getMovedTarget() method was removed in favour of moved_target()
-* Remove deprecated Wikibase.lastrevid attribute
-* config settings of archived scripts were removed (T223826)
-* Drop startsort/endsort parameter for site.categorymembers method (T74101)
-* Deprecated data attribute of http.fetch() result has been dropped (T265206)
-* toStdout parameter of pywikibot.output() has been dropped
-* Deprecated Site.getToken() and Site.case was removed
-* Deprecated Family.known_families dict was removed (T89451)
-* Deprecated DataSite.get_* methods was removed
-* Deprecated LogEntryFactory.logtypes classproperty was removed
-* Unused comms.threadedhttp module was removed; threadedhttp.HttpRequest was already replaced with requests.Response (T265206)
-
-Other changes
-^^^^^^^^^^^^^
-
-* Raise a SiteDefinitionError if api request response is Non-JSON and site is AutoFamily (T272911)
-* Support deleting and undeleting specific file versions (T276725)
-* Only add bot option generator if the bot class have it already
-* Raise a RuntimeError if pagegenerators -namespace option is provided too late (T276916)
-* Check for LookupError exception in http._decide_encoding (T276715)
-* Re-enable setting private family files (T270949)
-* Move the hardcoded namespace identifiers to an IntEnum
-* Buffer 'pageprops' in api.QueryGenerator
-* Ensure that BaseBot.generator is a Generator
-* Add additional info into log if 'messagecode' is missing during login (T261061, T269503)
-* Use hardcoded messages if i18n system is not available (T275981)
-* Move wikibase data structures to page/_collections.py
* L10N updates
-* Add support for altwiki (T271984)
-* Add support for mniwiki and mniwiktionary (T273467, T273462)
-* Don't use mime parameter as boolean in api.Request (T274723)
-* textlib.removeDisabledPart is able to remove templates (T274138)
-* Create a SiteLink with __getitem__ method and implement lazy load (T273386, T245809, T238471, T226157)
-* Fix date.formats['MonthName'] behaviour (T273573)
-* Implement pagegenerators.handle_args() to process all options at once
-* Add enabled_options, disabled_options to GeneratorFactory (T271320)
-* Move interwiki() interwiki_prefix() and local_interwiki() methods from BaseSite to APISite
-* Add requests.Response.headers to log when an API error occurs (T272325)
+* Family files can be collected from a zip folder (T278076)
+* Deprecated getuserinfo and getglobaluserinfo Site methods were removed
+* compat2core.py script was archived
Future release notes
~~~~~~~~~~~~~~~~~~~~
* 6.0.0: User.name() method will be removed in favour of User.username property
* 5.6.0: pagenenerators.handleArg() method will be removed in favour of handle_arg() (T271437)
-* 5.5.0: Site.getuserinfo() method will be dropped in favour of userinfo property
-* 5.5.0: Site.getglobaluserinfo() method will be dropped in favour of globaluserinfo property
* 5.0.0: OptionHandler.options dict will be removed in favour of OptionHandler.opt
* 5.0.0: Methods deprecated for 5 years or longer will be removed
* 5.0.0: pagegenerators.ReferringPageGenerator is desupported and will be removed
diff --git a/pywikibot/__metadata__.py b/pywikibot/__metadata__.py
index b512ae8..9c335de 100644
--- a/pywikibot/__metadata__.py
+++ b/pywikibot/__metadata__.py
@@ -5,7 +5,7 @@
# Distributed under the terms of the MIT license.
#
__name__ = 'pywikibot'
-__version__ = '6.0.2.dev0'
+__version__ = '6.1.0.dev0'
__description__ = 'Python MediaWiki Bot Framework'
__maintainer__ = 'The Pywikibot team'
__maintainer_email__ = 'pywikibot(a)lists.wikimedia.org'
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/674536
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I75b41a1c156a1a075ee6dc9d97249bbfa87077e9
Gerrit-Change-Number: 674536
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/674317 )
Change subject: [IMPR] collect family files from zip folder
......................................................................
[IMPR] collect family files from zip folder
if the given folder_path of register_family_file funtion is not a
directory it probably resides in a zip folder. In that case check
the current path and its parents whether it is a zip file. If no
zipfile is found raise a NotADirectoryError. Otherwise collect all
files inside the folder and filter the family files by the suffix.
Bug: T278076
Change-Id: Ifb4a75c8f81a7d727b149ed6354e8896ffeb4e0c
---
M pywikibot/config2.py
1 file changed, 34 insertions(+), 5 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/config2.py b/pywikibot/config2.py
index d55dbf5..f514ce1 100644
--- a/pywikibot/config2.py
+++ b/pywikibot/config2.py
@@ -45,6 +45,7 @@
from textwrap import fill
from typing import Optional, Union
from warnings import warn
+from zipfile import is_zipfile, ZipFile
from pywikibot.__metadata__ import __version__ as pwb_version
from pywikibot.backports import Dict, List, removesuffix, Tuple
@@ -397,13 +398,41 @@
family_files[family_name] = file_path
-def register_families_folder(folder_path):
- """Register all family class files contained in a directory."""
+def register_families_folder(folder_path: str):
+ """Register all family class files contained in a directory.
+
+ @param folder_path: The path of a folder containing family files.
+ The families may also be inside a zip archive structure.
+ @raises NotADirectoryError: folder_path is not a directory
+ """
suffix = '_family.py'
- for file_name in os.listdir(folder_path):
+ if os.path.isdir(folder_path):
+ for file_name in os.listdir(folder_path):
+ if file_name.endswith(suffix):
+ family_name = removesuffix(file_name, suffix)
+ family_files[family_name] = os.path.join(folder_path,
+ file_name)
+ return
+
+ # probably there is a zip file chain (T278076)
+ # find the parent zip folder
+ path = Path(folder_path)
+ if not is_zipfile(path):
+ for path in path.parents:
+ if is_zipfile(path):
+ break
+ else:
+ raise NotADirectoryError('20', 'Not a directory', folder_path)
+
+ # read the family files from zip folder
+ # assume that all files ending with suffix reside in family folder
+ zip_file = ZipFile(path)
+ for file_name in zip_file.namelist():
if file_name.endswith(suffix):
- family_name = removesuffix(file_name, suffix)
- family_files[family_name] = os.path.join(folder_path, file_name)
+ file_path = Path(file_name)
+ family_name = removesuffix(file_path.name, suffix)
+ family_files[family_name] = os.path.join(folder_path,
+ file_path.name)
# Get the names of all known families, and initialize with empty dictionaries.
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/674317
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ifb4a75c8f81a7d727b149ed6354e8896ffeb4e0c
Gerrit-Change-Number: 674317
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: WolfgangFahl <wf(a)bitplan.com>
Gerrit-MessageType: merged