jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/756699 )
Change subject: [doc] Update documentation
......................................................................
[doc] Update documentation
* Fix typo
** precessors -> processors
Change-Id: Id0d6ada40c78bc16d31b5e735633260b3c68ff77
---
M scripts/maintenance/preload_sites.py
1 file changed, 2 insertions(+), 2 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/maintenance/preload_sites.py b/scripts/maintenance/preload_sites.py
index 6c790e0..7e5b051 100755
--- a/scripts/maintenance/preload_sites.py
+++ b/scripts/maintenance/preload_sites.py
@@ -4,7 +4,7 @@
The following parameters are supported:
-worker:<num> The number of parallel tasks to be run. Default is the
- number of precessors on the machine
+ number of processors on the machine
Usage:
@@ -16,7 +16,7 @@
"""
#
-# (C) Pywikibot team, 2021
+# (C) Pywikibot team, 2022
#
# Distributed under the terms of the MIT license.
#
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/756699
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Id0d6ada40c78bc16d31b5e735633260b3c68ff77
Gerrit-Change-Number: 756699
Gerrit-PatchSet: 3
Gerrit-Owner: Meno25 <meno25mail(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/756165 )
Change subject: [bugfix] Always use low limit for bulk load of redirects
......................................................................
[bugfix] Always use low limit for bulk load of redirects
- Always load 50 pages at once with _next_redirect_group
- rename apiQ to chunk
- print a dot every 500 pages
- only yield multiple or broken redirects with get_redirects_via_api();
this ensures that no page title is printed if redirect.py does
not process it.
Bug: T299859
Change-Id: I7be7b5bbdc35ec98d7007792d95971202119ffcc
---
M scripts/redirect.py
1 file changed, 18 insertions(+), 10 deletions(-)
Approvals:
Meno25: Looks good to me, but someone else must approve
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/redirect.py b/scripts/redirect.py
index ec74455..a836bf9 100755
--- a/scripts/redirect.py
+++ b/scripts/redirect.py
@@ -213,22 +213,28 @@
def _next_redirect_group(self) -> Generator[List[pywikibot.Page], None,
None]:
- """Generator that yields batches of 500 redirects as a list."""
- apiQ = []
+ """Generator that yields batches of 50 redirects as a list."""
+ chunk = []
+ chunks = 0
for page in self.get_redirect_pages_via_api():
- apiQ.append(str(page.pageid))
- if len(apiQ) >= 500:
- pywikibot.output('.', newline=False)
- yield apiQ
- apiQ = []
- if apiQ:
- yield apiQ
+ chunk.append(str(page.pageid))
+ if len(chunk) >= 50: # T299859
+ chunks += 1
+ if not chunks % 10:
+ pywikibot.output('.', newline=False)
+ yield chunk
+ chunk.clear()
+ if chunk:
+ yield chunk
def get_redirects_via_api(self, maxlen=8) -> Generator[Tuple[
str, Optional[int], str, Optional[str]], None, None]:
r"""
Return a generator that yields tuples of data about redirect Pages.
+ .. versionchanged:: 7.0
+ only yield tuple if type of redirect is not 1 (normal redirect)
+
The description of returned tuple items is as follows:
:[0]: page title of a redirect page
@@ -281,7 +287,9 @@
result += 1
final = redirects[final]
- yield (redirect, result, target, final)
+ # only yield multiple or broken redirects
+ if result != 1:
+ yield redirect, result, target, final
def retrieve_broken_redirects(self) -> Generator[
Union[str, pywikibot.Page], None, None]:
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/756165
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I7be7b5bbdc35ec98d7007792d95971202119ffcc
Gerrit-Change-Number: 756165
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: Meno25 <meno25mail(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/756156 )
Change subject: [bugfix] Use the right counter when saving a page
......................................................................
[bugfix] Use the right counter when saving a page
Change-Id: I8cac6ec1ef2936f0916b4978b62b8dd97e8b0b82
---
M pywikibot/bot.py
1 file changed, 1 insertion(+), 1 deletion(-)
Approvals:
Meno25: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/bot.py b/pywikibot/bot.py
index ef061b2..df367b0 100644
--- a/pywikibot/bot.py
+++ b/pywikibot/bot.py
@@ -1423,7 +1423,7 @@
try:
func(*args, **kwargs)
- self.counter['save'] += 1
+ self.counter['write'] += 1
except PageSaveRelatedError as e:
if not ignore_save_related_errors:
raise
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/756156
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I8cac6ec1ef2936f0916b4978b62b8dd97e8b0b82
Gerrit-Change-Number: 756156
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Meno25 <meno25mail(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/756153 )
Change subject: [IMPR] move page checking code from batchLoaded to check_page
......................................................................
[IMPR] move page checking code from batchLoaded to check_page
- decrease nested frow statements
- decrease cyclomatic complexity of batchLoaded method by 10 %
Change-Id: Ic51f2571cbffd47930df5bb50e6f0b8dbecefab6
---
M scripts/interwiki.py
1 file changed, 179 insertions(+), 181 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index 1f222fe..edfb329 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -1098,6 +1098,184 @@
if self.conf.hintsareright:
self.hintedsites.add(page.site)
+ def check_page(self, page, counter):
+ """Check whether any iw links should be added to the todo list."""
+ if not page.exists():
+ self.conf.remove.append(str(page))
+ self.conf.note('{} does not exist. Skipping.'.format(page))
+ if page == self.origin:
+ # The page we are working on is the page that does not
+ # exist. No use in doing any work on it in that case.
+ for site, count in self.todo.iter_values_len():
+ counter.minus(site, count)
+ self.todo.clear()
+ # In some rare cases it might be we already did check some
+ # 'automatic' links
+ self.done.clear()
+ return
+
+ if page.isRedirectPage():
+ redirectTargetPage = page.getRedirectTarget()
+ redir = ''
+ elif page.isCategoryRedirect():
+ redirectTargetPage = page.getCategoryRedirectTarget()
+ redir = 'category '
+ else:
+ redir = None
+
+ if redir is not None:
+ self.conf.note('{} is {}redirect to {}'
+ .format(page, redir, redirectTargetPage))
+ if self.origin is None or page == self.origin:
+ # the 1st existig page becomes the origin page, if none was
+ # supplied
+ if self.conf.initialredirect:
+ # don't follow another redirect; it might be a self
+ # loop
+ if not redirectTargetPage.isRedirectPage() \
+ and not redirectTargetPage.isCategoryRedirect():
+ self.origin = redirectTargetPage
+ self.todo.append(redirectTargetPage)
+ counter.plus(redirectTargetPage.site)
+ else:
+ # This is a redirect page to the origin. We don't need
+ # to follow the redirection.
+ # In this case we can also stop all hints!
+ for site, count in self.todo.iter_values_len():
+ counter.minus(site, count)
+ self.todo.clear()
+ elif not self.conf.followredirect:
+ self.conf.note('not following {}redirects.'.format(redir))
+ elif page.isStaticRedirect():
+ self.conf.note('not following static {}redirects.'
+ .format(redir))
+ elif (page.site.family == redirectTargetPage.site.family
+ and not self.skipPage(page, redirectTargetPage, counter)):
+ if self.addIfNew(redirectTargetPage, counter, page):
+ if config.interwiki_shownew:
+ pywikibot.output('{}: {} gives new {}redirect {}'
+ .format(self.origin, page, redir,
+ redirectTargetPage))
+ return
+
+ # must be behind the page.isRedirectPage() part
+ # otherwise a redirect error would be raised
+ if page_empty_check(page):
+ self.conf.remove.append(str(page))
+ self.conf.note('{} is empty. Skipping.'.format(page))
+ if page == self.origin:
+ for site, count in self.todo.iter_values_len():
+ counter.minus(site, count)
+ self.todo.clear()
+ self.done.clear()
+ self.origin = None
+ return
+
+ if page.section():
+ self.conf.note('{} is a page section. Skipping.'.format(page))
+ return
+
+ # Page exists, isn't a redirect, and is a plain link (no section)
+ if self.origin is None:
+ # the 1st existig page becomes the origin page, if none was
+ # supplied
+ self.origin = page
+
+ try:
+ iw = page.langlinks()
+ except UnknownSiteError:
+ self.conf.note('site {} does not exist.'.format(page.site))
+ return
+
+ (skip, alternativePage) = self.disambigMismatch(page, counter)
+ if skip:
+ pywikibot.output('NOTE: ignoring {} and its interwiki links'
+ .format(page))
+ self.done.remove(page)
+ iw = ()
+ if alternativePage:
+ # add the page that was entered by the user
+ self.addIfNew(alternativePage, counter, None)
+
+ duplicate = None
+ for p in self.done.filter(page.site):
+ if p != page and p.exists() \
+ and not p.isRedirectPage() and not p.isCategoryRedirect():
+ duplicate = p
+ break
+
+ if self.origin == page:
+ self.untranslated = not iw
+ if self.conf.untranslatedonly:
+ # Ignore the interwiki links.
+ iw = ()
+ if self.conf.lacklanguage:
+ if self.conf.lacklanguage in (link.site.lang for link in iw):
+ iw = ()
+ self.workonme = False
+ if len(iw) < self.conf.minlinks:
+ iw = ()
+ self.workonme = False
+
+ elif self.conf.autonomous and duplicate and not skip:
+ pywikibot.output('Stopping work on {} because duplicate pages'
+ ' {} and {} are found'
+ .format(self.originP, duplicate, page))
+ self.makeForcedStop(counter)
+ try:
+ with codecs.open(
+ pywikibot.config.datafilepath('autonomous_problems.dat'),
+ 'a', 'utf-8') as f:
+ f.write('* {} {{Found more than one link for {}}}'
+ .format(self.origin, page.site))
+ if config.interwiki_graph and config.interwiki_graph_url:
+ filename = interwiki_graph.getFilename(
+ self.origin,
+ extension=config.interwiki_graph_formats[0])
+ f.write(' [{}{} graph]'
+ .format(config.interwiki_graph_url, filename))
+ f.write('\n')
+ # FIXME: What errors are we catching here?
+ except Exception:
+ pywikibot.output(
+ 'File autonomous_problems.dat open or corrupted! '
+ 'Try again with -restore.')
+ sys.exit()
+ iw = ()
+
+ for link in iw:
+ linkedPage = pywikibot.Page(link)
+ if self.conf.hintsareright and linkedPage.site in self.hintedsites:
+ pywikibot.output(
+ 'NOTE: {}: {} extra interwiki on hinted site ignored {}'
+ .format(self.origin, page, linkedPage))
+ break
+
+ if not self.skipPage(page, linkedPage, counter):
+ if self.conf.followinterwiki or page == self.origin:
+ if self.addIfNew(linkedPage, counter, page):
+ # It is new. Also verify whether it is the second
+ # on the same site
+ lpsite = linkedPage.site
+ for prevPage in self.found_in:
+ if prevPage != linkedPage and \
+ prevPage.site == lpsite:
+ # Still, this could be "no problem" as
+ # either may be a redirect to the other.
+ # No way to find out quickly!
+ pywikibot.output(
+ 'NOTE: {}: {} gives duplicate '
+ 'interwiki on same site {}'
+ .format(self.origin, page, linkedPage))
+ break
+ else:
+ if config.interwiki_shownew:
+ pywikibot.output(
+ '{}: {} gives new interwiki {}'
+ .format(self.origin, page, linkedPage))
+ if self.forcedStop:
+ break
+
def batchLoaded(self, counter):
"""
Notify that the promised batch of pages was loaded.
@@ -1136,187 +1314,7 @@
# Now check whether any interwiki links should be added to the
# todo list.
-
- if not page.exists():
- self.conf.remove.append(str(page))
- self.conf.note('{} does not exist. Skipping.'.format(page))
- if page == self.origin:
- # The page we are working on is the page that does not
- # exist. No use in doing any work on it in that case.
- for site, count in self.todo.iter_values_len():
- counter.minus(site, count)
- self.todo.clear()
- # In some rare cases it might be we already did check some
- # 'automatic' links
- self.done.clear()
- continue
-
- if page.isRedirectPage() or page.isCategoryRedirect():
- if page.isRedirectPage():
- redirectTargetPage = page.getRedirectTarget()
- redir = ''
- else:
- redirectTargetPage = page.getCategoryRedirectTarget()
- redir = 'category '
- self.conf.note('{} is {}redirect to {}'
- .format(page, redir, redirectTargetPage))
- if self.origin is None or page == self.origin:
- # the 1st existig page becomes the origin page, if none was
- # supplied
- if self.conf.initialredirect:
- # don't follow another redirect; it might be a self
- # loop
- if not redirectTargetPage.isRedirectPage() \
- and not redirectTargetPage.isCategoryRedirect():
- self.origin = redirectTargetPage
- self.todo.append(redirectTargetPage)
- counter.plus(redirectTargetPage.site)
- else:
- # This is a redirect page to the origin. We don't need
- # to follow the redirection.
- # In this case we can also stop all hints!
- for site, count in self.todo.iter_values_len():
- counter.minus(site, count)
- self.todo.clear()
- elif not self.conf.followredirect:
- self.conf.note('not following {}redirects.'.format(redir))
- elif page.isStaticRedirect():
- self.conf.note('not following static {}redirects.'
- .format(redir))
- elif (page.site.family == redirectTargetPage.site.family
- and not self.skipPage(page, redirectTargetPage,
- counter)):
- if self.addIfNew(redirectTargetPage, counter, page):
- if config.interwiki_shownew:
- pywikibot.output('{}: {} gives new {}redirect {}'
- .format(self.origin,
- page, redir,
- redirectTargetPage))
- continue
-
- # must be behind the page.isRedirectPage() part
- # otherwise a redirect error would be raised
- if page_empty_check(page):
- self.conf.remove.append(str(page))
- self.conf.note('{} is empty. Skipping.'.format(page))
- if page == self.origin:
- for site, count in self.todo.iter_values_len():
- counter.minus(site, count)
- self.todo.clear()
- self.done.clear()
- self.origin = None
- continue
-
- if page.section():
- self.conf.note('{} is a page section. Skipping.'.format(page))
- continue
-
- # Page exists, isn't a redirect, and is a plain link (no section)
- if self.origin is None:
- # the 1st existig page becomes the origin page, if none was
- # supplied
- self.origin = page
-
- try:
- iw = page.langlinks()
- except UnknownSiteError:
- self.conf.note('site {} does not exist.'.format(page.site))
- continue
-
- (skip, alternativePage) = self.disambigMismatch(page, counter)
- if skip:
- pywikibot.output('NOTE: ignoring {} and its interwiki links'
- .format(page))
- self.done.remove(page)
- iw = ()
- if alternativePage:
- # add the page that was entered by the user
- self.addIfNew(alternativePage, counter, None)
-
- duplicate = None
- for p in self.done.filter(page.site):
- if p != page and p.exists() and \
- not p.isRedirectPage() and not p.isCategoryRedirect():
- duplicate = p
- break
-
- if self.origin == page:
- self.untranslated = not iw
- if self.conf.untranslatedonly:
- # Ignore the interwiki links.
- iw = ()
- if self.conf.lacklanguage:
- if self.conf.lacklanguage in (link.site.lang
- for link in iw):
- iw = ()
- self.workonme = False
- if len(iw) < self.conf.minlinks:
- iw = ()
- self.workonme = False
-
- elif self.conf.autonomous and duplicate and not skip:
- pywikibot.output('Stopping work on {} because duplicate pages'
- ' {} and {} are found'
- .format(self.originP, duplicate, page))
- self.makeForcedStop(counter)
- try:
- with codecs.open(
- pywikibot.config.datafilepath(
- 'autonomous_problems.dat'),
- 'a', 'utf-8') as f:
- f.write('* {} {{Found more than one link for {}}}'
- .format(self.origin, page.site))
- if config.interwiki_graph \
- and config.interwiki_graph_url:
- filename = interwiki_graph.getFilename(
- self.origin,
- extension=config.interwiki_graph_formats[0])
- f.write(' [{}{} graph]'
- .format(config.interwiki_graph_url,
- filename))
- f.write('\n')
- # FIXME: What errors are we catching here?
- except Exception:
- # raise
- pywikibot.output(
- 'File autonomous_problems.dat open or corrupted! '
- 'Try again with -restore.')
- sys.exit()
- iw = ()
-
- for link in iw:
- linkedPage = pywikibot.Page(link)
- if self.conf.hintsareright \
- and linkedPage.site in self.hintedsites:
- pywikibot.output(
- 'NOTE: {}: {} extra interwiki on hinted site '
- 'ignored {}'.format(self.origin, page, linkedPage))
- break
-
- if not self.skipPage(page, linkedPage, counter):
- if self.conf.followinterwiki or page == self.origin:
- if self.addIfNew(linkedPage, counter, page):
- # It is new. Also verify whether it is the second
- # on the same site
- lpsite = linkedPage.site
- for prevPage in self.found_in:
- if prevPage != linkedPage and \
- prevPage.site == lpsite:
- # Still, this could be "no problem" as
- # either may be a redirect to the other.
- # No way to find out quickly!
- pywikibot.output(
- 'NOTE: {}: {} gives duplicate '
- 'interwiki on same site {}'
- .format(self.origin, page, linkedPage))
- break
- else:
- if config.interwiki_shownew:
- pywikibot.output(
- '{}: {} gives new interwiki {}'
- .format(self.origin, page, linkedPage))
- if self.forcedStop:
- break
+ self.check_page(page, counter)
# These pages are no longer 'in progress'
self.pending.clear()
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/756153
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ic51f2571cbffd47930df5bb50e6f0b8dbecefab6
Gerrit-Change-Number: 756153
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged