jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] Improvements for interwiki.py

- simplify InterwikiBotConfig arg parsing for two items
- [bugfix] exceptions must derive from BaseException
- shorten comparing len
- decrease nested flow statements
- use defaultdict in interwiki_graph.Subject

Change-Id: Ie559eb9063b52acf4d531819a4474c908b7d032e
---
M scripts/interwiki.py
1 file changed, 162 insertions(+), 156 deletions(-)

diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index 8cac302..0096eab 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -516,12 +516,10 @@
self.parenthesesonly = True
elif arg == 'localright':
self.followinterwiki = False
- elif arg == 'array':
- if value.isdigit():
- self.minsubjects = int(value)
- elif arg == 'query':
- if value.isdigit():
- self.maxquerysize = int(value)
+ elif arg == 'array' and value.isdigit():
+ self.minsubjects = int(value)
+ elif arg == 'query' and value.isdigit():
+ self.maxquerysize = int(value)
elif arg == 'back':
self.nobackonly = True
elif arg == 'async':
@@ -811,8 +809,9 @@
# Bug-check: Isn't there any work still in progress? We can't work on
# different sites at a time!
if self.pending:
- raise "BUG: Can't start to work on {}; still working on {}".format(
- site, self.pending)
+ raise RuntimeError(
+ "BUG: Can't start to work on {}; still working on {}"
+ .format(site, self.pending))
# Prepare a list of suitable pages
result = []
for page in self.todo.filter(site):
@@ -1262,7 +1261,7 @@
break

if self.origin == page:
- self.untranslated = (len(iw) == 0)
+ self.untranslated = not iw
if self.conf.untranslatedonly:
# Ignore the interwiki links.
iw = ()
@@ -1307,13 +1306,12 @@

for link in iw:
linkedPage = pywikibot.Page(link)
- if self.conf.hintsareright:
- if linkedPage.site in self.hintedsites:
- pywikibot.output(
- 'NOTE: {}: {} extra interwiki on hinted site '
- 'ignored {}'
- .format(self.origin, page, linkedPage))
- break
+ if self.conf.hintsareright \
+ and linkedPage.site in self.hintedsites:
+ pywikibot.output(
+ 'NOTE: {}: {} extra interwiki on hinted site '
+ 'ignored {}'.format(self.origin, page, linkedPage))
+ break

if not self.skipPage(page, linkedPage, counter):
if self.conf.followinterwiki or page == self.origin:
@@ -1372,7 +1370,7 @@
errorCount = self.problemfound
# Build up a dictionary of all pages found, with the site as key.
# Each value will be a list of pages.
- new = {}
+ new = defaultdict(list)
for page in self.done:
if page.exists() and not page.isRedirectPage() \
and not page.isCategoryRedirect():
@@ -1382,16 +1380,13 @@

# inhibit the forwarding families pages to be updated.
continue
- if site == self.origin.site:
- if page != self.origin:
- self.problem('Found link to ' + page)
- self.whereReport(page)
- errorCount += 1
- else:
- if site in new:
- new[site].append(page)
- else:
- new[site] = [page]
+
+ if site != self.origin.site:
+ new[site].append(page)
+ elif page != self.origin:
+ self.problem('Found link to ' + page)
+ self.whereReport(page)
+ errorCount += 1

# See if new{} contains any problematic values
result = {}
@@ -1519,10 +1514,10 @@
# processing
# TODO: should be move to assemble()
# replaceLinks will skip the site it's working on.
- if self.origin.site not in new:
# TODO: make this possible as well.
- if not self.origin.site.family.interwiki_forward:
- new[self.origin.site] = self.origin
+ if self.origin.site not in new \
+ and not self.origin.site.family.interwiki_forward:
+ new[self.origin.site] = self.origin

updatedSites = []
notUpdatedSites = []
@@ -1607,21 +1602,23 @@

def replaceLinks(self, page, newPages):
"""Return True if saving was successful."""
- if self.conf.localonly:
- # In this case only continue on the Page we started with
- if page != self.origin:
- raise SaveError('-localonly and page != origin')
+ # In this case only continue on the Page we started with
+ if self.conf.localonly and page != self.origin:
+ raise SaveError('-localonly and page != origin')
+
if page.section():
# This is not a page, but a subpage. Do not edit it.
pywikibot.output('Not editing {}: not doing interwiki on subpages'
.format(page))
raise SaveError('Link has a #section')
+
try:
pagetext = page.get()
except pywikibot.NoPage:
pywikibot.output('Not editing {}: page does not exist'
.format(page))
raise SaveError("Page doesn't exist")
+
if page_empty_check(page):
pywikibot.output('Not editing {}: page is empty'.format(page))
raise SaveError('Page is empty.')
@@ -1667,9 +1664,9 @@
# Do not add interwiki links to foreign families that page.site() does
# not forward to
for stmp in new.keys():
- if stmp.family != page.site.family:
- if stmp.family.name != page.site.family.interwiki_forward:
- del new[stmp]
+ if stmp.family != page.site.family \
+ and stmp.family.name != page.site.family.interwiki_forward:
+ del new[stmp]

# Put interwiki links into a map
old = {}
@@ -1751,87 +1748,86 @@
ask = False
if self.conf.confirm and not self.conf.always:
ask = True
- # If we need to ask, do so
- if ask:
- if self.conf.autonomous:
- # If we cannot ask, deny permission
- answer = 'n'
- else:
- answer = pywikibot.input_choice('Submit?',
- [('Yes', 'y'), ('No', 'n'),
- ('open in Browser', 'b'),
- ('Give up', 'g'),
- ('Always', 'a')],
- automatic_quit=False)
- if answer == 'b':
- pywikibot.bot.open_webbrowser(page)
- return True
- if answer == 'a':
- # don't ask for the rest of this subject
- self.conf.always = True
- answer = 'y'
- else:
+
+ if not ask:
# If we do not need to ask, allow
answer = 'y'
-
- # If we got permission to submit, do so
- if answer == 'y':
- self.conf.note('Updating live wiki...')
- timeout = 60
- page.text = newtext
- while True:
- try:
- page.save(summary=mcomment,
- asynchronous=self.conf.asynchronous,
- nocreate=True)
- except pywikibot.NoCreateError:
- pywikibot.exception()
- return False
- except pywikibot.LockedPage:
- pywikibot.output('Page {} is locked. Skipping.'
- .format(page))
- raise SaveError('Locked')
- except pywikibot.EditConflict:
- pywikibot.output(
- 'ERROR putting page: An edit conflict occurred. '
- 'Giving up.')
- raise SaveError('Edit conflict')
- except pywikibot.SpamblacklistError as error:
- pywikibot.output(
- 'ERROR putting page: {0} blacklisted by spamfilter. '
- 'Giving up.'.format(error.url))
- raise SaveError('Spam filter')
- except pywikibot.PageSaveRelatedError as error:
- pywikibot.output('ERROR putting page: {}'
- .format(error.args,))
- raise SaveError('PageSaveRelatedError')
- except (socket.error, IOError) as error:
- if timeout > 3600:
- raise
- pywikibot.output('ERROR putting page: {}'
- .format(error.args,))
- pywikibot.output('Sleeping {} seconds before trying again.'
- .format(timeout,))
- timeout *= 2
- pywikibot.sleep(timeout)
- except pywikibot.ServerError:
- if timeout > 3600:
- raise
- pywikibot.output('ERROR putting page: ServerError.')
- pywikibot.output('Sleeping {} seconds before trying again.'
- .format(timeout,))
- timeout *= 2
- pywikibot.sleep(timeout)
- else:
- break
- return True
+ elif self.conf.autonomous:
+ # If we cannot ask, deny permission
+ answer = 'n'
+ else: # If we need to ask, do so
+ answer = pywikibot.input_choice('Submit?',
+ [('Yes', 'y'), ('No', 'n'),
+ ('open in Browser', 'b'),
+ ('Give up', 'g'),
+ ('Always', 'a')],
+ automatic_quit=False)
+ if answer == 'b':
+ pywikibot.bot.open_webbrowser(page)
+ return True
+ if answer == 'a':
+ # don't ask for the rest of this subject
+ self.conf.always = True
+ answer = 'y'

if answer == 'g':
raise GiveUpOnPage('User asked us to give up')

- raise LinkMustBeRemoved('Found incorrect link to {} in {}'
- .format(', '.join(x.code for x in removing),
- page))
+ # If we got permission to submit, do so
+ if answer != 'y':
+ raise LinkMustBeRemoved(
+ 'Found incorrect link to {} in {}'
+ .format(', '.join(x.code for x in removing), page))
+
+ self.conf.note('Updating live wiki...')
+ timeout = 60
+ page.text = newtext
+ while True:
+ try:
+ page.save(summary=mcomment,
+ asynchronous=self.conf.asynchronous,
+ nocreate=True)
+ except pywikibot.NoCreateError:
+ pywikibot.exception()
+ return False
+ except pywikibot.LockedPage:
+ pywikibot.output('Page {} is locked. Skipping.'
+ .format(page))
+ raise SaveError('Locked')
+ except pywikibot.EditConflict:
+ pywikibot.output(
+ 'ERROR putting page: An edit conflict occurred. '
+ 'Giving up.')
+ raise SaveError('Edit conflict')
+ except pywikibot.SpamblacklistError as error:
+ pywikibot.output(
+ 'ERROR putting page: {0} blacklisted by spamfilter. '
+ 'Giving up.'.format(error.url))
+ raise SaveError('Spam filter')
+ except pywikibot.PageSaveRelatedError as error:
+ pywikibot.output('ERROR putting page: {}'
+ .format(error.args,))
+ raise SaveError('PageSaveRelatedError')
+ except (socket.error, IOError) as error:
+ if timeout > 3600:
+ raise
+ pywikibot.output('ERROR putting page: {}'
+ .format(error.args,))
+ pywikibot.output('Sleeping {} seconds before trying again.'
+ .format(timeout,))
+ timeout *= 2
+ pywikibot.sleep(timeout)
+ except pywikibot.ServerError:
+ if timeout > 3600:
+ raise
+ pywikibot.output('ERROR putting page: ServerError.')
+ pywikibot.output('Sleeping {} seconds before trying again.'
+ .format(timeout,))
+ timeout *= 2
+ pywikibot.sleep(timeout)
+ else:
+ break
+ return True

def reportBacklinks(self, new, updatedSites):
"""
@@ -1847,45 +1843,49 @@
try:
for site in expectedSites - set(updatedSites):
page = new[site]
- if not page.section():
+ if page.section():
+ continue
+
+ try:
+ linkedPages = {pywikibot.Page(link)
+ for link in page.iterlanglinks()}
+ except pywikibot.NoPage:
+ pywikibot.warning(
+ 'Page {} does no longer exist?!'.format(page))
+ break
+
+ # To speed things up, create a dictionary which maps sites
+ # to pages. This assumes that there is only one interwiki
+ # link per language.
+ linkedPagesDict = {}
+ for linkedPage in linkedPages:
+ linkedPagesDict[linkedPage.site] = linkedPage
+ for expectedPage in expectedPages - linkedPages:
+ if expectedPage == page:
+ continue
try:
- linkedPages = {pywikibot.Page(link)
- for link in page.iterlanglinks()}
- except pywikibot.NoPage:
+ linkedPage = linkedPagesDict[expectedPage.site]
pywikibot.warning(
- 'Page {} does no longer exist?!'.format(page))
- break
- # To speed things up, create a dictionary which maps sites
- # to pages. This assumes that there is only one interwiki
- # link per language.
- linkedPagesDict = {}
- for linkedPage in linkedPages:
- linkedPagesDict[linkedPage.site] = linkedPage
- for expectedPage in expectedPages - linkedPages:
- if expectedPage != page:
- try:
- linkedPage = linkedPagesDict[expectedPage.site]
- pywikibot.warning(
- '{}: {} does not link to {} but to {}'
- .format(page.site.family.name,
- page, expectedPage, linkedPage))
- except KeyError:
- if not expectedPage.site.is_data_repository():
- pywikibot.warning(
- '{}: {} does not link to {}'
- .format(page.site.family.name,
- page, expectedPage))
- # Check for superfluous links
- for linkedPage in linkedPages:
- if linkedPage not in expectedPages:
- # Check whether there is an alternative page on
- # that language.
- # In this case, it was already reported above.
- if linkedPage.site not in expectedSites:
- pywikibot.warning(
- '{}: {} links to incorrect {}'
- .format(page.site.family.name,
- page, linkedPage))
+ '{}: {} does not link to {} but to {}'
+ .format(page.site.family.name,
+ page, expectedPage, linkedPage))
+ except KeyError:
+ if not expectedPage.site.is_data_repository():
+ pywikibot.warning('{}: {} does not link to {}'
+ .format(page.site.family.name,
+ page, expectedPage))
+ # Check for superfluous links
+ for linkedPage in linkedPages:
+ if linkedPage in expectedPages:
+ continue
+ # Check whether there is an alternative page on
+ # that language.
+ # In this case, it was already reported above.
+ if linkedPage.site not in expectedSites:
+ pywikibot.warning('{}: {} links to incorrect {}'
+ .format(page.site.family.name,
+ page, linkedPage))
+
except (socket.error, IOError):
pywikibot.output('ERROR: could not report backlinks')

@@ -2006,6 +2006,7 @@
"""Return the first subject that is still being worked on."""
if self.subjects:
return self.subjects[0]
+ return None

def maxOpenSite(self):
"""
@@ -2014,7 +2015,7 @@
If there is nothing left, return None.
Only languages that are TODO for the first Subject are returned.
"""
- max = 0
+ max_ = 0
maxlang = None
if not self.firstSubject():
return None
@@ -2024,12 +2025,15 @@
# because we have to wait before submitting another modification to
# go live. Select any language from counts.
oc = self.counts
- if pywikibot.Site() in oc:
- return pywikibot.Site()
+
+ default_site = pywikibot.Site()
+ if default_site in oc:
+ return default_site
+
for lang in oc:
count = self.counts[lang]
- if count > max:
- max = count
+ if count > max_:
+ max_ = count
maxlang = lang
return maxlang

@@ -2091,9 +2095,11 @@
if len(pageGroup) >= self.conf.maxquerysize:
# We have found enough pages to fill the bandwidth.
break
- if len(pageGroup) == 0:
+
+ if not pageGroup:
pywikibot.output('NOTE: Nothing left to do 2')
return False
+
# Get the content of the assembled list in one blow
gen = site.preloadpages(pageGroup, templates=True, langlinks=True,
pageprops=True)
@@ -2117,7 +2123,7 @@

def isDone(self):
"""Check whether there is still more work to do."""
- return len(self) == 0 and self.pageGenerator is None
+ return self and self.pageGenerator is None

def plus(self, site, count=1):
"""Helper routine that the Subject class expects in a counter."""

To view, visit change 675844. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ie559eb9063b52acf4d531819a4474c908b7d032e
Gerrit-Change-Number: 675844
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki@aol.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged