jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/439446 )
Change subject: [bugfix] Ensure that BaseBot.treat is always processing a Page object ......................................................................
[bugfix] Ensure that BaseBot.treat is always processing a Page object
bot.py: - BaseBot.generator should always yield Page objects but here are several cases where there are different item classes. Ensure that the BaseBot is always processing a Page object in skip_page and treat. - init_page is the right place to handle page initializing. Therefore init_page returns the page object back to the loop inside BaseBot.run. - For backward compatibility BaseBot.run uses the item yielded by BaseBot.generator as default if init_page returns None. The run method also ensures that page is a pywikibot.Page or raises an AssertionError. - Some docs added.
followlive.py: - init_page gets the tuple from generator, sets some instance variables and returns the page object for further processing with the bot's standard methods. - Derive the bot from CurrentPageBot and use treat_page to get a pretty printed header, also improve show_page_info printing.
pagefromfile.py: - init_page gets the tuple and sets the page content accordingly. The result is returnd for further processing with standard methods.
redirect.py: - Re-enable -fullscan option by passing namespace option if namespaces are set in the main function only. Otherwise use the default setting. - Use init_page to ensure that Page objects are processed only. - In "both" action mode the "code" content is passed as an instance property to the page object to select the treat method. - site setting of RedirectRobot is done by the SingleSiteBot super class.
Bug: T196562 Bug: T196813 Change-Id: I8be29333a29650b10fdf443cd870533361ba32db --- M pywikibot/bot.py M scripts/followlive.py M scripts/pagefromfile.py M scripts/redirect.py 4 files changed, 87 insertions(+), 63 deletions(-)
Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/bot.py b/pywikibot/bot.py index 84f4073..6b7f809 100644 --- a/pywikibot/bot.py +++ b/pywikibot/bot.py @@ -1406,20 +1406,37 @@ pywikibot.output('by exception:\n') pywikibot.exception()
- def init_page(self, page): - """Initialize a page before treating. + def init_page(self, item): + """Initialize a generator item before treating. + + Ensure that the result of init_page is always a pywikibot.Page object + even when the generator returns something else.
Also used to set the arrange the current site. This is called before skip_page and treat. + + @param item: any item from self.generator + @return: return the page object to be processed further or None if + page is a pywikibot.Page already and superclass' init_page isn't + to be called + @rtype: pywikibot.Page or None """ - pass + return item
def skip_page(self, page): - """Return whether treat should be skipped for the page.""" + """Return whether treat should be skipped for the page. + + @param page: Page object to be processed + @type page: pywikibot.Page + """ return False
def treat(self, page): - """Process one page (Abstract method).""" + """Process one page (abstract method). + + @param page: Page object to be processed + @type page: pywikibot.Page + """ raise NotImplementedError('Method %s.treat() not implemented.' % self.__class__.__name__)
@@ -1437,7 +1454,10 @@ pass
def run(self): - """Process all pages in generator.""" + """Process all pages in generator. + + @raise AssertionError: "page" is not a pywikibot.Page object + """ self._start_ts = pywikibot.Timestamp.now() if not hasattr(self, 'generator'): raise NotImplementedError('Variable %s.generator not set.' @@ -1452,23 +1472,33 @@ sys.exc_clear() self.setup() try: - for page in self.generator: + for item in self.generator: # preprocessing of the page try: - self.init_page(page) + initialized_page = self.init_page(item) except SkipPageError as e: issue_deprecation_warning('Use of SkipPageError', 'BaseBot.skip_page() method', 2) pywikibot.warning('Skipped "{0}" due to: {1}'.format( - page, e.reason)) + item, e.reason)) if PY2: # Python 2 does not clear the exception and it may seem # that the generator stopped due to an exception sys.exc_clear() continue + else: + if initialized_page is None: + page = item + else: + page = initialized_page + + assert isinstance(page, pywikibot.Page), \ + '"page" is not a pywikibot.Page object but {}.'.format( + page.__class__)
if self.skip_page(page): continue + # Process the page self.treat(page)
@@ -1563,13 +1593,15 @@ % self.__class__.__name__) super(Bot, self).run()
- def init_page(self, page): + def init_page(self, item): """Update site before calling treat.""" # When in auto update mode, set the site when it changes, # so subclasses can hook onto changes to site. + page = super(Bot, self).init_page(item) if (self._auto_update_site and (not self._site or page.site != self.site)): self.site = page.site + return page
class SingleSiteBot(BaseBot): @@ -1616,10 +1648,12 @@ '"{1}"'.format(self._site, value)) self._site = value
- def init_page(self, page): + def init_page(self, item): """Set site if not defined.""" + page = super(SingleSiteBot, self).init_page(item) if not self._site: self.site = page.site + return page
def skip_page(self, page): """Skip page it's site is not on the defined site.""" @@ -1665,9 +1699,11 @@ super(MultipleSitesBot, self).run() self._site = None
- def init_page(self, page): + def init_page(self, item): """Define the site for this page.""" + page = super(MultipleSitesBot, self).init_page(item) self._site = page.site + return page
class CurrentPageBot(BaseBot): diff --git a/scripts/followlive.py b/scripts/followlive.py index 2e76b86..71d096b 100644 --- a/scripts/followlive.py +++ b/scripts/followlive.py @@ -26,7 +26,7 @@ import pywikibot
from pywikibot import i18n, pagegenerators, editor -from pywikibot.bot import SingleSiteBot, QuitKeyboardInterrupt +from pywikibot.bot import SingleSiteBot, CurrentPageBot, QuitKeyboardInterrupt
__metaclass__ = type
@@ -380,7 +380,7 @@ # TODO: merge 'done' with 'templates' above
-class CleaningBot(SingleSiteBot): +class CleaningBot(SingleSiteBot, CurrentPageBot):
"""Bot meant to facilitate customized cleaning of the page."""
@@ -401,9 +401,10 @@
def show_page_info(self): """Display informations about an article.""" - pywikibot.output(u'[[%s]] %s ' % (self.page.title(), self.date)) - pywikibot.output('Length: %i bytes' % self.length) - pywikibot.output(u'User : %s' % self.user) + pywikibot.output('Date: {info.date}\n' + 'Length: {info.length} bytes\n' + 'User: {info.user}' + .format(info=self))
def could_be_bad(self): """Check whether the page could be bad.""" @@ -507,24 +508,18 @@ self.page.put(self.content, summary=summary) pywikibot.output(u'with comment %s\n' % summary)
- def treat(self, args): + def treat_page(self): """Process one page.""" - page, date, length, logged_in, user, comment = args - self.page = page - self.date = date - self.length = length - self.loggedIn = logged_in - self.user = user self.show_page_info() if self.could_be_bad(): pywikibot.output('Integrity of page doubtful...') self.handle_bad_page() pywikibot.output('----- Current time: %s' % datetime.datetime.now())
- def init_page(self, args): - """Init the page tuple before processing.""" - page, date, length, logged_in, user, comment = args - super(CleaningBot, self).init_page(page) + def init_page(self, item): + """Init the page tuple before processing and return a page object.""" + self.page, self.date, self.length, self.loggedIn, self.user, _ = item + return super(CleaningBot, self).init_page(self.page)
def setup(self): """Setup bot before running.""" diff --git a/scripts/pagefromfile.py b/scripts/pagefromfile.py index cb2fb6b..31ad263 100755 --- a/scripts/pagefromfile.py +++ b/scripts/pagefromfile.py @@ -114,16 +114,12 @@ self.availableOptions.update( {'always': False if self.getOption('showdiff') else True})
- def init_page(self, page): - """Do not try to update site before calling treat.""" - pass - - def treat(self, page_tuple): - """Process page tuple, set page to current page and treat it.""" - title, content = page_tuple + def init_page(self, item): + """Get the tuple and return the page object to be processed.""" + title, content = item page = pywikibot.Page(self.site, title) page.text = content.strip() - super(PageFromFileRobot, self).treat(page) + return super(PageFromFileRobot, self).init_page(page)
def treat_page(self): """Upload page content.""" diff --git a/scripts/redirect.py b/scripts/redirect.py index f95bd6d1..c2bcfdd 100755 --- a/scripts/redirect.py +++ b/scripts/redirect.py @@ -108,7 +108,7 @@ availableOptions = { 'fullscan': False, 'moves': False, - 'namespaces': [0], + 'namespaces': {0}, 'offset': -1, 'page': None, 'start': None, @@ -226,6 +226,7 @@ for page in self.get_redirect_pages_via_api(): apiQ.append(str(page.pageid)) if len(apiQ) >= 500: + pywikibot.output('.', newline=False) yield apiQ apiQ = [] if apiQ: @@ -325,9 +326,8 @@ def retrieve_double_redirects(self): """Retrieve double redirects.""" if self.use_move_log: - gen = self.get_moved_pages_redirects() - for redir_page in gen: - yield redir_page.title() + for redir_page in self.get_moved_pages_redirects(): + yield redir_page elif self.use_api: count = 0 for (pagetitle, type, target, final) \ @@ -412,7 +412,6 @@ 'sdtemplate': None, }) super(RedirectRobot, self).__init__(**kwargs) - self.site = pywikibot.Site() self.repo = self.site.data_repository() self.is_repo = self.repo if self.repo == self.site else None self.exiting = False @@ -457,9 +456,15 @@ ''.format('"{0}" '.format(title) if title else '')) return None
- def init_page(self, page): - """Overwrite super class method.""" - pass + def init_page(self, item): + """Ensure that we process page objects.""" + if isinstance(item, basestring): + item = pywikibot.Page(self.site, item) + elif isinstance(item, tuple): + redir_name, code, target, final = item + item = pywikibot.Page(self.site, redir_name) + item._redirect_type = code + return super(RedirectRobot, self).init_page(item)
def delete_redirect(self, page, summary_key): """Delete the redirect page.""" @@ -483,12 +488,8 @@ except pywikibot.PageSaveRelatedError as e: pywikibot.error(e)
- def delete_1_broken_redirect(self, redir_name): + def delete_1_broken_redirect(self, redir_page): """Treat one broken redirect.""" - if isinstance(redir_name, basestring): - redir_page = pywikibot.Page(self.site, redir_name) - else: - redir_page = redir_name # Show the title of the page we're working on. # Highlight the title in purple. done = not self.getOption('delete') @@ -582,12 +583,8 @@ "Won't delete anything." if self.getOption('delete') else "Skipping."))
- def fix_1_double_redirect(self, redir_name): + def fix_1_double_redirect(self, redir): """Treat one double redirect.""" - if isinstance(redir_name, basestring): - redir = pywikibot.Page(self.site, redir_name) - else: - redir = redir_name # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(color_format( @@ -724,13 +721,12 @@
def fix_double_or_delete_broken_redirect(self, page): """Treat one broken or double redirect.""" - redir_name, code, target, final = page - if code == 1: + if page._redirect_type == 1: return - elif code == 0: - self.delete_1_broken_redirect(redir_name) + if page._redirect_type == 0: + self.delete_1_broken_redirect(page) else: - self.fix_1_double_redirect(redir_name) + self.fix_1_double_redirect(page)
def treat(self, page): """Treat a single page.""" @@ -755,7 +751,7 @@ # what the bot should do (either resolve double redirs, or process broken # redirs) action = None - namespaces = [] + namespaces = set() source = set()
for arg in pywikibot.handle_args(args): @@ -796,8 +792,8 @@ # -namespace:all Process all namespaces. # Only works with the API read interface. pass - if ns not in namespaces: - namespaces.append(ns) + else: + namespaces.add(ns) elif option == 'offset': gen_options[option] = int(value) elif option in ('page', 'start', 'until'): @@ -809,7 +805,8 @@ else: pywikibot.output(u'Unknown argument: %s' % arg)
- gen_options['namespaces'] = namespaces + if namespaces: + gen_options['namespaces'] = namespaces
if len(source) > 1: problem = 'You can only use one of {0} options.'.format(
pywikibot-commits@lists.wikimedia.org