jenkins-bot merged this change.

View Change

Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified
[bugfix] Ensure that BaseBot.treat is always processing a Page object

bot.py:
- BaseBot.generator should always yield Page objects but here are several
cases where there are different item classes. Ensure that the BaseBot
is always processing a Page object in skip_page and treat.
- init_page is the right place to handle page initializing. Therefore
init_page returns the page object back to the loop inside BaseBot.run.
- For backward compatibility BaseBot.run uses the item yielded by
BaseBot.generator as default if init_page returns None. The run method
also ensures that page is a pywikibot.Page or raises an AssertionError.
- Some docs added.

followlive.py:
- init_page gets the tuple from generator, sets some instance variables
and returns the page object for further processing with the bot's
standard methods.
- Derive the bot from CurrentPageBot and use treat_page to get a pretty
printed header, also improve show_page_info printing.

pagefromfile.py:
- init_page gets the tuple and sets the page content accordingly.
The result is returnd for further processing with standard methods.

redirect.py:
- Re-enable -fullscan option by passing namespace option if namespaces are
set in the main function only. Otherwise use the default setting.
- Use init_page to ensure that Page objects are processed only.
- In "both" action mode the "code" content is passed as an instance property
to the page object to select the treat method.
- site setting of RedirectRobot is done by the SingleSiteBot super class.

Bug: T196562
Bug: T196813
Change-Id: I8be29333a29650b10fdf443cd870533361ba32db
---
M pywikibot/bot.py
M scripts/followlive.py
M scripts/pagefromfile.py
M scripts/redirect.py
4 files changed, 87 insertions(+), 63 deletions(-)

diff --git a/pywikibot/bot.py b/pywikibot/bot.py
index 84f4073..6b7f809 100644
--- a/pywikibot/bot.py
+++ b/pywikibot/bot.py
@@ -1406,20 +1406,37 @@
pywikibot.output('by exception:\n')
pywikibot.exception()

- def init_page(self, page):
- """Initialize a page before treating.
+ def init_page(self, item):
+ """Initialize a generator item before treating.
+
+ Ensure that the result of init_page is always a pywikibot.Page object
+ even when the generator returns something else.

Also used to set the arrange the current site. This is called before
skip_page and treat.
+
+ @param item: any item from self.generator
+ @return: return the page object to be processed further or None if
+ page is a pywikibot.Page already and superclass' init_page isn't
+ to be called
+ @rtype: pywikibot.Page or None
"""
- pass
+ return item

def skip_page(self, page):
- """Return whether treat should be skipped for the page."""
+ """Return whether treat should be skipped for the page.
+
+ @param page: Page object to be processed
+ @type page: pywikibot.Page
+ """
return False

def treat(self, page):
- """Process one page (Abstract method)."""
+ """Process one page (abstract method).
+
+ @param page: Page object to be processed
+ @type page: pywikibot.Page
+ """
raise NotImplementedError('Method %s.treat() not implemented.'
% self.__class__.__name__)

@@ -1437,7 +1454,10 @@
pass

def run(self):
- """Process all pages in generator."""
+ """Process all pages in generator.
+
+ @raise AssertionError: "page" is not a pywikibot.Page object
+ """
self._start_ts = pywikibot.Timestamp.now()
if not hasattr(self, 'generator'):
raise NotImplementedError('Variable %s.generator not set.'
@@ -1452,23 +1472,33 @@
sys.exc_clear()
self.setup()
try:
- for page in self.generator:
+ for item in self.generator:
# preprocessing of the page
try:
- self.init_page(page)
+ initialized_page = self.init_page(item)
except SkipPageError as e:
issue_deprecation_warning('Use of SkipPageError',
'BaseBot.skip_page() method', 2)
pywikibot.warning('Skipped "{0}" due to: {1}'.format(
- page, e.reason))
+ item, e.reason))
if PY2:
# Python 2 does not clear the exception and it may seem
# that the generator stopped due to an exception
sys.exc_clear()
continue
+ else:
+ if initialized_page is None:
+ page = item
+ else:
+ page = initialized_page
+
+ assert isinstance(page, pywikibot.Page), \
+ '"page" is not a pywikibot.Page object but {}.'.format(
+ page.__class__)

if self.skip_page(page):
continue
+
# Process the page
self.treat(page)

@@ -1563,13 +1593,15 @@
% self.__class__.__name__)
super(Bot, self).run()

- def init_page(self, page):
+ def init_page(self, item):
"""Update site before calling treat."""
# When in auto update mode, set the site when it changes,
# so subclasses can hook onto changes to site.
+ page = super(Bot, self).init_page(item)
if (self._auto_update_site and
(not self._site or page.site != self.site)):
self.site = page.site
+ return page


class SingleSiteBot(BaseBot):
@@ -1616,10 +1648,12 @@
'"{1}"'.format(self._site, value))
self._site = value

- def init_page(self, page):
+ def init_page(self, item):
"""Set site if not defined."""
+ page = super(SingleSiteBot, self).init_page(item)
if not self._site:
self.site = page.site
+ return page

def skip_page(self, page):
"""Skip page it's site is not on the defined site."""
@@ -1665,9 +1699,11 @@
super(MultipleSitesBot, self).run()
self._site = None

- def init_page(self, page):
+ def init_page(self, item):
"""Define the site for this page."""
+ page = super(MultipleSitesBot, self).init_page(item)
self._site = page.site
+ return page


class CurrentPageBot(BaseBot):
diff --git a/scripts/followlive.py b/scripts/followlive.py
index 2e76b86..71d096b 100644
--- a/scripts/followlive.py
+++ b/scripts/followlive.py
@@ -26,7 +26,7 @@
import pywikibot

from pywikibot import i18n, pagegenerators, editor
-from pywikibot.bot import SingleSiteBot, QuitKeyboardInterrupt
+from pywikibot.bot import SingleSiteBot, CurrentPageBot, QuitKeyboardInterrupt

__metaclass__ = type

@@ -380,7 +380,7 @@
# TODO: merge 'done' with 'templates' above


-class CleaningBot(SingleSiteBot):
+class CleaningBot(SingleSiteBot, CurrentPageBot):

"""Bot meant to facilitate customized cleaning of the page."""

@@ -401,9 +401,10 @@

def show_page_info(self):
"""Display informations about an article."""
- pywikibot.output(u'[[%s]] %s ' % (self.page.title(), self.date))
- pywikibot.output('Length: %i bytes' % self.length)
- pywikibot.output(u'User : %s' % self.user)
+ pywikibot.output('Date: {info.date}\n'
+ 'Length: {info.length} bytes\n'
+ 'User: {info.user}'
+ .format(info=self))

def could_be_bad(self):
"""Check whether the page could be bad."""
@@ -507,24 +508,18 @@
self.page.put(self.content, summary=summary)
pywikibot.output(u'with comment %s\n' % summary)

- def treat(self, args):
+ def treat_page(self):
"""Process one page."""
- page, date, length, logged_in, user, comment = args
- self.page = page
- self.date = date
- self.length = length
- self.loggedIn = logged_in
- self.user = user
self.show_page_info()
if self.could_be_bad():
pywikibot.output('Integrity of page doubtful...')
self.handle_bad_page()
pywikibot.output('----- Current time: %s' % datetime.datetime.now())

- def init_page(self, args):
- """Init the page tuple before processing."""
- page, date, length, logged_in, user, comment = args
- super(CleaningBot, self).init_page(page)
+ def init_page(self, item):
+ """Init the page tuple before processing and return a page object."""
+ self.page, self.date, self.length, self.loggedIn, self.user, _ = item
+ return super(CleaningBot, self).init_page(self.page)

def setup(self):
"""Setup bot before running."""
diff --git a/scripts/pagefromfile.py b/scripts/pagefromfile.py
index cb2fb6b..31ad263 100755
--- a/scripts/pagefromfile.py
+++ b/scripts/pagefromfile.py
@@ -114,16 +114,12 @@
self.availableOptions.update(
{'always': False if self.getOption('showdiff') else True})

- def init_page(self, page):
- """Do not try to update site before calling treat."""
- pass
-
- def treat(self, page_tuple):
- """Process page tuple, set page to current page and treat it."""
- title, content = page_tuple
+ def init_page(self, item):
+ """Get the tuple and return the page object to be processed."""
+ title, content = item
page = pywikibot.Page(self.site, title)
page.text = content.strip()
- super(PageFromFileRobot, self).treat(page)
+ return super(PageFromFileRobot, self).init_page(page)

def treat_page(self):
"""Upload page content."""
diff --git a/scripts/redirect.py b/scripts/redirect.py
index f95bd6d1..c2bcfdd 100755
--- a/scripts/redirect.py
+++ b/scripts/redirect.py
@@ -108,7 +108,7 @@
availableOptions = {
'fullscan': False,
'moves': False,
- 'namespaces': [0],
+ 'namespaces': {0},
'offset': -1,
'page': None,
'start': None,
@@ -226,6 +226,7 @@
for page in self.get_redirect_pages_via_api():
apiQ.append(str(page.pageid))
if len(apiQ) >= 500:
+ pywikibot.output('.', newline=False)
yield apiQ
apiQ = []
if apiQ:
@@ -325,9 +326,8 @@
def retrieve_double_redirects(self):
"""Retrieve double redirects."""
if self.use_move_log:
- gen = self.get_moved_pages_redirects()
- for redir_page in gen:
- yield redir_page.title()
+ for redir_page in self.get_moved_pages_redirects():
+ yield redir_page
elif self.use_api:
count = 0
for (pagetitle, type, target, final) \
@@ -412,7 +412,6 @@
'sdtemplate': None,
})
super(RedirectRobot, self).__init__(**kwargs)
- self.site = pywikibot.Site()
self.repo = self.site.data_repository()
self.is_repo = self.repo if self.repo == self.site else None
self.exiting = False
@@ -457,9 +456,15 @@
''.format('"{0}" '.format(title) if title else ''))
return None

- def init_page(self, page):
- """Overwrite super class method."""
- pass
+ def init_page(self, item):
+ """Ensure that we process page objects."""
+ if isinstance(item, basestring):
+ item = pywikibot.Page(self.site, item)
+ elif isinstance(item, tuple):
+ redir_name, code, target, final = item
+ item = pywikibot.Page(self.site, redir_name)
+ item._redirect_type = code
+ return super(RedirectRobot, self).init_page(item)

def delete_redirect(self, page, summary_key):
"""Delete the redirect page."""
@@ -483,12 +488,8 @@
except pywikibot.PageSaveRelatedError as e:
pywikibot.error(e)

- def delete_1_broken_redirect(self, redir_name):
+ def delete_1_broken_redirect(self, redir_page):
"""Treat one broken redirect."""
- if isinstance(redir_name, basestring):
- redir_page = pywikibot.Page(self.site, redir_name)
- else:
- redir_page = redir_name
# Show the title of the page we're working on.
# Highlight the title in purple.
done = not self.getOption('delete')
@@ -582,12 +583,8 @@
"Won't delete anything."
if self.getOption('delete') else "Skipping."))

- def fix_1_double_redirect(self, redir_name):
+ def fix_1_double_redirect(self, redir):
"""Treat one double redirect."""
- if isinstance(redir_name, basestring):
- redir = pywikibot.Page(self.site, redir_name)
- else:
- redir = redir_name
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(color_format(
@@ -724,13 +721,12 @@

def fix_double_or_delete_broken_redirect(self, page):
"""Treat one broken or double redirect."""
- redir_name, code, target, final = page
- if code == 1:
+ if page._redirect_type == 1:
return
- elif code == 0:
- self.delete_1_broken_redirect(redir_name)
+ if page._redirect_type == 0:
+ self.delete_1_broken_redirect(page)
else:
- self.fix_1_double_redirect(redir_name)
+ self.fix_1_double_redirect(page)

def treat(self, page):
"""Treat a single page."""
@@ -755,7 +751,7 @@
# what the bot should do (either resolve double redirs, or process broken
# redirs)
action = None
- namespaces = []
+ namespaces = set()
source = set()

for arg in pywikibot.handle_args(args):
@@ -796,8 +792,8 @@
# -namespace:all Process all namespaces.
# Only works with the API read interface.
pass
- if ns not in namespaces:
- namespaces.append(ns)
+ else:
+ namespaces.add(ns)
elif option == 'offset':
gen_options[option] = int(value)
elif option in ('page', 'start', 'until'):
@@ -809,7 +805,8 @@
else:
pywikibot.output(u'Unknown argument: %s' % arg)

- gen_options['namespaces'] = namespaces
+ if namespaces:
+ gen_options['namespaces'] = namespaces

if len(source) > 1:
problem = 'You can only use one of {0} options.'.format(

To view, visit change 439446. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I8be29333a29650b10fdf443cd870533361ba32db
Gerrit-Change-Number: 439446
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: John Vandenberg <jayvdb@gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw@arctus.nl>
Gerrit-Reviewer: Wesalius <jonas.dyba@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: Zoranzoki21 <zorandori4444@gmail.com>
Gerrit-Reviewer: jenkins-bot