jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/979673 )
Change subject: [doc] Fix typo in category_redirect.py
......................................................................
[doc] Fix typo in category_redirect.py
Bug: T351951
Change-Id: I78c5cc5c0ec888b5c6d3cc6dbbc01eddcb9ac227
---
M scripts/category_redirect.py
1 file changed, 13 insertions(+), 3 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/category_redirect.py b/scripts/category_redirect.py
index 04153a6..1b337c1 100755
--- a/scripts/category_redirect.py
+++ b/scripts/category_redirect.py
@@ -14,8 +14,8 @@
-always If used, the bot won't ask if it should add the specified
text
--delay:# Set an amount of days. If the category is edited more recenty
- than given days, ignore it. Default is 7.
+-delay:# Set an amount of days. If the category is edited more
+ recently than given days, ignore it. Default is 7.
-tiny Only loops over Category:Non-empty_category_redirects and
moves all images, pages and categories in redirect categories
@@ -30,7 +30,7 @@
can be set within a settings file which is scripts.ini by default.
"""
#
-# (C) Pywikibot team, 2008-2022
+# (C) Pywikibot team, 2008-2023
#
# Distributed under the terms of the MIT license.
#
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/979673
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I78c5cc5c0ec888b5c6d3cc6dbbc01eddcb9ac227
Gerrit-Change-Number: 979673
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <dalangi-ctr(a)wikimedia.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/969516 )
Change subject: proofreadpage.py: fetch URL of page scan via API
......................................................................
proofreadpage.py: fetch URL of page scan via API
Fetch URL of Page image using new API for MW >= 1.40:
- query+prop=imageforpage
No more HTML page scraping is needed, except for MW version < 1.40.
This should also fix bug T181913, tests are re-added.
Change-Id: I374e878d0b321024903be8d5194b2878355667b6
Bug: T352524
Bug: T181913
Bug: T114318
---
M tests/proofreadpage_tests.py
M pywikibot/page/_basepage.py
M pywikibot/site/_extensions.py
M pywikibot/data/api/_generators.py
M pywikibot/proofreadpage.py
5 files changed, 82 insertions(+), 9 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/data/api/_generators.py b/pywikibot/data/api/_generators.py
index 902c561..07f0338 100644
--- a/pywikibot/data/api/_generators.py
+++ b/pywikibot/data/api/_generators.py
@@ -1038,3 +1038,8 @@
page._lintinfo.pop('pageid')
page._lintinfo.pop('title')
page._lintinfo.pop('ns')
+
+ if 'imageforpage' in props and 'imagesforpage' in pagedict:
+ # proofreadpage will work always on dicts
+ # it serves also as workaround for T352482
+ page._imageforpage = pagedict['imagesforpage'] or {}
diff --git a/pywikibot/page/_basepage.py b/pywikibot/page/_basepage.py
index 796eac5..caef2f3 100644
--- a/pywikibot/page/_basepage.py
+++ b/pywikibot/page/_basepage.py
@@ -73,7 +73,7 @@
'_contentmodel', '_langlinks', '_isredir', '_coords',
'_preloadedtext', '_timestamp', '_applicable_protections',
'_flowinfo', '_quality', '_pageprops', '_revid', '_quality_text',
- '_pageimage', '_item', '_lintinfo',
+ '_pageimage', '_item', '_lintinfo', '_imageforpage',
)
def __init__(self, source, title: str = '', ns=0) -> None:
diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py
index a4ae60f..7157316 100644
--- a/pywikibot/proofreadpage.py
+++ b/pywikibot/proofreadpage.py
@@ -54,7 +54,7 @@
from pywikibot.data.api import ListGenerator, Request
from pywikibot.exceptions import Error, InvalidTitleError, OtherPageSaveError
from pywikibot.page import PageSourceType
-from pywikibot.tools import cached
+from pywikibot.tools import MediaWikiVersion, cached
try:
@@ -825,9 +825,7 @@
"""
return f'/* {self.status} */ '
- @property
- @cached
- def url_image(self) -> str:
+ def __url_image_lt_140(self) -> str:
"""Get the file url of the scan of ProofreadPage.
:return: file url of the scan ProofreadPage or None.
@@ -864,6 +862,36 @@
return url_image
+ def __url_image(self) -> str:
+ """Get the file url of the scan of ProofreadPage.
+
+ :return: file url of the scan of ProofreadPage or None.
+ :raises ValueError: in case of no image found for scan
+ """
+ self.site.loadpageurls(self)
+ url = self._imageforpage.get('fullsize')
+ if url is not None:
+ return f'{self.site.family.protocol(self.site.code)}:{url}'
+ else:
+ raise ValueError(f'imagesforpage is empty for {self}.')
+
+ @property
+ @cached
+ def url_image(self) -> str:
+ """Get the file url of the scan of ProofreadPage.
+
+ :return: file url of the scan of ProofreadPage or None.
+
+ For MW version < 1.40:
+ :raises Exception: in case of http errors
+ :raises ImportError: if bs4 is not installed, _bs4_soup() will raise
+ :raises ValueError: in case of no prp_page_image src found for scan
+ """
+ if self.site.version() < MediaWikiVersion('1.40'):
+ return self.__url_image_lt_140()
+ else:
+ return self.__url_image()
+
def _ocr_callback(self, cmd_uri: str,
parser_func: Optional[Callable[[str], str]] = None,
ocr_tool: Optional[str] = None
diff --git a/pywikibot/site/_extensions.py b/pywikibot/site/_extensions.py
index f6c5859..4e6977e 100644
--- a/pywikibot/site/_extensions.py
+++ b/pywikibot/site/_extensions.py
@@ -141,6 +141,29 @@
self._cache_proofreadinfo()
return self._proofread_levels
+ @need_extension('ProofreadPage')
+ def loadpageurls(
+ self,
+ page: 'pywikibot.page.BasePage'
+ ) -> None:
+ """Load URLs from api and store in page attributes.
+
+ Load URLs to images for a given page in the "Page:" namespace.
+ No effect for pages in other namespaces.
+
+ .. seealso:: :api:`imageforpage`
+ """
+ title = page.title(with_section=False)
+ # responsiveimages: server would try to render the other images as well
+ # let's not load the server unless needed.
+ prppifpprop = 'filename|size|fullsize'
+
+ query = self._generator(api.PropertyGenerator,
+ type_arg='imageforpage',
+ titles=title.encode(self.encoding()),
+ prppifpprop=prppifpprop)
+ self._update_page(page, query)
+
class GeoDataMixin:
diff --git a/tests/proofreadpage_tests.py b/tests/proofreadpage_tests.py
index 40a1728..13534e1 100755
--- a/tests/proofreadpage_tests.py
+++ b/tests/proofreadpage_tests.py
@@ -26,7 +26,6 @@
BasePageLoadRevisionsCachingTestBase,
BasePageMethodsTestBase,
)
-from tests.utils import skipping
class TestPagesTagParser(TestCase):
@@ -250,7 +249,7 @@
'footer': '\n{{smallrefs}}',
'url_image': ('https://upload.wikimedia.org/wikipedia/commons/'
'thumb/a/ac/Popular_Science_Monthly_Volume_1.djvu/'
- 'page12-1024px-Popular_Science_Monthly_Volume_1.djvu'
+ 'page12-2267px-Popular_Science_Monthly_Volume_1.djvu'
'.jpg'),
}
@@ -412,8 +411,7 @@
page.url_image
page = ProofreadPage(self.site, self.valid_redlink['title'])
- with skipping(ValueError, msg='T181913, T114318'):
- self.assertEqual(page.url_image, self.valid_redlink['url_image'])
+ self.assertEqual(page.url_image, self.valid_redlink['url_image'])
class TestPageQuality(TestCase):
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/969516
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I374e878d0b321024903be8d5194b2878355667b6
Gerrit-Change-Number: 969516
Gerrit-PatchSet: 13
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Sohom Datta <sohomdatta1(a)gmail.com>
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/978143 )
Change subject: proofreadpage: sort page names before loading pages
......................................................................
proofreadpage: sort page names before loading pages
Sort the list of pages before converting to a preload generator.
Otherwise the sorted() operation will tranform everything to a list,
loading all pages, defeating the purpose of having a generator.
Change-Id: Ie8ca5d7dd37b7dd9fadca42ed5dcf339164fc427
---
M pywikibot/proofreadpage.py
1 file changed, 22 insertions(+), 6 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py
index e72b676..a4ae60f 100644
--- a/pywikibot/proofreadpage.py
+++ b/pywikibot/proofreadpage.py
@@ -1313,7 +1313,14 @@
filter_ql = list(self.site.proofread_levels)
filter_ql.remove(ProofreadPage.WITHOUT_TEXT)
- gen = (self.get_page(i) for i in range(start, end + 1))
+ gen = [self.get_page(i) for i in range(start, end + 1)]
+
+ # Decorate and sort by page number because preloadpages does not
+ # guarantee order.
+ # TODO: remove if preloadpages will guarantee order.
+ gen = [(self.get_number(p), p) for p in gen]
+ gen = [p for n, p in sorted(gen)]
+
if content:
gen = self.site.preloadpages(gen)
# Filter by QL.
@@ -1321,11 +1328,6 @@
# Yield only existing.
if only_existing:
gen = (p for p in gen if p.exists())
- # Decorate and sort by page number because preloadpages does not
- # guarantee order.
- # TODO: remove if preloadpages will guarantee order.
- gen = ((self.get_number(p), p) for p in gen)
- gen = (p for n, p in sorted(gen))
return gen
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/978143
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ie8ca5d7dd37b7dd9fadca42ed5dcf339164fc427
Gerrit-Change-Number: 978143
Gerrit-PatchSet: 1
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
JJMC89 has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/979136 )
Change subject: [FIX] fix typo in explanation of options
......................................................................
[FIX] fix typo in explanation of options
Change-Id: Idc396cbf2ecab8043da1740ae86a4ea817e1f1c8
---
M scripts/weblinkchecker.py
1 file changed, 10 insertions(+), 1 deletion(-)
Approvals:
Mpaa: Looks good to me, approved
JJMC89: Verified; Looks good to me, approved
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index a04d25d..02be28c 100755
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -31,7 +31,7 @@
These command line parameters can be used to specify which pages to work on:
--repeat Work on all pages were dead links were found before. This is
+-repeat Work on all pages where dead links were found before. This is
useful to confirm that the links are dead after some time (at
least one week), which is required before the script will report
the problem.
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/979136
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Idc396cbf2ecab8043da1740ae86a4ea817e1f1c8
Gerrit-Change-Number: 979136
Gerrit-PatchSet: 2
Gerrit-Owner: Mevo1961 <mevo1961(a)gmail.com>
Gerrit-Reviewer: D3r1ck01 <dalangi-ctr(a)wikimedia.org>
Gerrit-Reviewer: JJMC89 <JJMC89.Wikimedia(a)gmail.com>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/971559 )
Change subject: [fix] check for valid family and site option after -help is processed
......................................................................
[fix] check for valid family and site option after -help is processed
pywikibot.handle_args() checks for a valid default site and prints an
error if either the family or the site code is invalid.
Make this test after -help option is processed to avoid unnecessary
api call when -help option is used. This is a workaround unless the
APISite.login(cookie_only=True) call no longer retrieves userinfo in
an early state i.e. instantiating the site.
Bug: T350272
Change-Id: I04a55c96a826c76b5bcf89893be92413985c6429
---
M pywikibot/bot.py
1 file changed, 27 insertions(+), 9 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/bot.py b/pywikibot/bot.py
index d364f6a..2d73000 100644
--- a/pywikibot/bot.py
+++ b/pywikibot/bot.py
@@ -991,15 +991,6 @@
# argument not global -> specific bot script will take care
non_global_args.append(arg)
- if calledModuleName() != 'generate_user_files': # T261771
- try:
- pywikibot.Site()
- except (UnknownFamilyError, UnknownSiteError):
- pywikibot.exception(exc_info=False)
- sys.exit(1)
- if calledModuleName() == 'wrapper':
- pywikibot._sites.clear()
-
if username:
config.usernames[config.family][config.mylang] = username
@@ -1014,6 +1005,15 @@
show_help(show_global=do_help_val == 'global')
sys.exit(0)
+ if calledModuleName() != 'generate_user_files': # T261771
+ try:
+ pywikibot.Site()
+ except (UnknownFamilyError, UnknownSiteError):
+ pywikibot.exception(exc_info=False)
+ sys.exit(1)
+ if calledModuleName() == 'wrapper':
+ pywikibot._sites.clear()
+
debug('handle_args() completed.')
return non_global_args
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/971559
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I04a55c96a826c76b5bcf89893be92413985c6429
Gerrit-Change-Number: 971559
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Martineznovo <martineznovo(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged