jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] Improvements for patrol.py

- Type checking of BaseBot can be changed by passing treat_page_type to
the bot. Therefore remove the run() method and inherit the method
from BaseBot
- Derive PatrolBot from BaseBot to prevent site checking of skip_page()
- remove unused attribute repeat_start_ts
- remove counter and use default BaseBot behavior
- rename load_whitelist() to setup(). It will be called from run()
method before generator is processed
- remove try statement in treat(). These exceptions cannot be rised
because there is not Page object. This also solves the error that
title is not a callable, introduced 6 years ago.
- remove LinkedPagesRule.title() which is never used
- update docs
- DummyPatrolBot is no longer necessary for testes because whitelist
is loaded with run() method and not with constructor

Change-Id: I370b9c0108c7860e053242af97ac6779a1547a7c
---
M scripts/patrol.py
M tests/patrolbot_tests.py
2 files changed, 77 insertions(+), 110 deletions(-)

diff --git a/scripts/patrol.py b/scripts/patrol.py
index 4843f8d..f33f956 100755
--- a/scripts/patrol.py
+++ b/scripts/patrol.py
@@ -5,7 +5,8 @@
This bot obtains a list of recent changes and newpages and marks the
edits as patrolled based on a whitelist.

-WHITELIST FORMAT
+Whitelist Format
+~~~~~~~~~~~~~~~~

The whitelist is formatted as a number of list entries. Any links outside of
lists are ignored and can be used for documentation. In a list the first link
@@ -41,7 +42,7 @@

"""
#
-# (C) Pywikibot team, 2011-2020
+# (C) Pywikibot team, 2011-2021
#
# Distributed under the terms of the MIT license.
#
@@ -50,16 +51,17 @@
from collections import defaultdict
from contextlib import suppress

-try:
- import mwparserfromhell
-except ImportError as e:
- mwparserfromhell = e
-
import pywikibot

from pywikibot import pagegenerators

-from pywikibot.bot import SingleSiteBot, QuitKeyboardInterrupt, suggest_help
+from pywikibot.backports import removeprefix
+from pywikibot.bot import BaseBot, suggest_help
+
+try:
+ import mwparserfromhell
+except ImportError as e:
+ mwparserfromhell = e

_logger = 'patrol'

@@ -70,7 +72,7 @@
pywikibot.output(string)


-class PatrolBot(SingleSiteBot):
+class PatrolBot(BaseBot):

"""Bot marks the edits as patrolled based on info obtained by whitelist."""

@@ -79,7 +81,7 @@
'en': 'patrol_whitelist',
}

- def __init__(self, site=True, **kwargs):
+ def __init__(self, site=None, **kwargs):
"""
Initializer.

@@ -94,7 +96,8 @@
'versionchecktime': 300,
'autopatroluserns': False
})
- super().__init__(site, **kwargs)
+ super().__init__(**kwargs)
+ self.site = site or pywikibot.Site()
self.recent_gen = True
self.user = None
if self.opt.whitelist:
@@ -111,10 +114,11 @@

self.highest_rcid = 0 # used to track loops
self.last_rcid = 0
- self.repeat_start_ts = 0

- self.rc_item_counter = 0 # counts how many items have been reviewed
- self.patrol_counter = 0 # and how many times an action was taken
+ self._load_prefix_index_aliases()
+
+ def _load_prefix_index_aliases(self):
+ """Load _prefixindex_aliases."""
for entry in self.site.siteinfo['specialpagealiases']:
if entry['realname'] == 'Prefixindex':
self._prefixindex_aliases = {alias.lower()
@@ -123,10 +127,8 @@
else:
raise RuntimeError('No alias for "prefixindex"')

- def load_whitelist(self):
+ def setup(self):
"""Load most recent watchlist_page for further processing."""
- if isinstance(mwparserfromhell, ImportError):
- raise mwparserfromhell
# Check for a more recent version after versionchecktime in sec.
if (self.whitelist_load_ts and (time.time() - self.whitelist_load_ts
< self.opt.versionchecktime)):
@@ -260,7 +262,7 @@
return dict(whitelist)

def is_wikisource_author_page(self, title):
- """Initialise author_ns if site family is 'wikisource' else pass."""
+ """Patrol a single item."""
if self.site.family.name != 'wikisource':
return False

@@ -269,107 +271,81 @@
author_ns = self.site.family.authornamespaces[self.site.lang][0]

author_ns_prefix = self.site.namespace(author_ns) + ':'
- if title.startswith(author_ns_prefix):
- author_page_name = title[len(author_ns_prefix):]
+ author_page_name = removeprefix(title, author_ns_prefix)
+ if title != author_ns_prefix:
verbose_output('Found author ' + author_page_name)
return True
return False

- def run(self, feed):
- """Process 'whitelist' page absent in generator."""
- if self.whitelist is None:
- self.load_whitelist()
- try:
- for page in feed:
- self.treat(page)
- except QuitKeyboardInterrupt:
- pywikibot.output('\nUser quit {} bot run.'
- .format(self.__class__.__name__))
-
def treat(self, page):
"""It loads the given page, does some changes, and saves it."""
choice = False
- try:
- # page: title, date, username, comment, loginfo, rcid, token
- username = page['user']
- # when the feed isn't from the API, it used to contain
- # '(not yet written)' or '(page does not exist)' when it was
- # a redlink
- rcid = page['rcid']
- title = page['title']
- if not rcid:
- raise Exception('rcid not present')

- # check whether we have wrapped around to higher rcids
- # which indicates a new RC feed is being processed
- if rcid > self.last_rcid:
- # refresh the whitelist
- self.load_whitelist()
- self.repeat_start_ts = time.time()
+ # page: title, date, username, comment, loginfo, rcid, token
+ username = page['user']
+ # when the feed isn't from the API, it used to contain
+ # '(not yet written)' or '(page does not exist)' when it was
+ # a redlink
+ rcid = page['rcid']
+ title = page['title']
+ if not rcid:
+ raise Exception('rcid not present')

- if pywikibot.config.verbose_output or self.opt.ask:
- pywikibot.output('User {0} has created or modified page {1}'
- .format(username, title))
+ # check whether we have wrapped around to higher rcids
+ # which indicates a new RC feed is being processed
+ if rcid > self.last_rcid:
+ # refresh the whitelist
+ self.setup()

- if (self.opt.autopatroluserns
- and page['ns'] in (2, 3)):
- # simple rule to whitelist any user editing their own userspace
- if title.partition(':')[2].split('/')[0].startswith(username):
- verbose_output('{0} is whitelisted to modify {1}'
- .format(username, title))
- choice = True
+ if pywikibot.config.verbose_output or self.opt.ask:
+ pywikibot.output('User {0} has created or modified page {1}'
+ .format(username, title))

- if not choice and username in self.whitelist:
- if self.in_list(self.whitelist[username], title):
- verbose_output('{0} is whitelisted to modify {1}'
- .format(username, title))
- choice = True
+ if (self.opt.autopatroluserns
+ and page['ns'] in (2, 3)):
+ # simple rule to whitelist any user editing their own userspace
+ if title.partition(':')[2].split('/')[0].startswith(username):
+ verbose_output('{0} is whitelisted to modify {1}'
+ .format(username, title))
+ choice = True

- if self.opt.ask:
- choice = pywikibot.input_yn(
- 'Do you want to mark page as patrolled?')
+ if not choice and username in self.whitelist:
+ if self.in_list(self.whitelist[username], title):
+ verbose_output('{0} is whitelisted to modify {1}'
+ .format(username, title))
+ choice = True

- # Patrol the page
- if choice:
- # list() iterates over patrol() which returns a generator
- list(self.site.patrol(rcid))
- self.patrol_counter = self.patrol_counter + 1
- pywikibot.output('Patrolled {0} (rcid {1}) by user {2}'
- .format(title, rcid, username))
- else:
- verbose_output('Skipped')
+ if self.opt.ask:
+ choice = pywikibot.input_yn(
+ 'Do you want to mark page as patrolled?')

- if rcid > self.highest_rcid:
- self.highest_rcid = rcid
- self.last_rcid = rcid
- self.rc_item_counter = self.rc_item_counter + 1
+ # Patrol the page
+ if choice:
+ # list() iterates over patrol() which returns a generator
+ list(self.site.patrol(rcid))
+ pywikibot.output('Patrolled {0} (rcid {1}) by user {2}'
+ .format(title, rcid, username))
+ else:
+ verbose_output('Skipped')

- except pywikibot.NoPage:
- pywikibot.output('Page {0} does not exist; skipping.'
- .format(title(as_link=True)))
- except pywikibot.IsRedirectPage:
- pywikibot.output('Page {0} is a redirect; skipping.'
- .format(title(as_link=True)))
+ if rcid > self.highest_rcid:
+ self.highest_rcid = rcid
+ self.last_rcid = rcid


class LinkedPagesRule:

"""Matches of page site title and linked pages title."""

- def __init__(self, page_title):
+ def __init__(self, page_title: str):
"""Initializer.

@param page_title: The page title for this rule
- @type page_title: pywikibot.Page
"""
self.site = pywikibot.Site()
self.page_title = page_title
self.linkedpages = None

- def title(self):
- """Obtain page title."""
- return self.page_title
-
def match(self, page_title):
"""Match page_title to linkedpages elements."""
if page_title == self.page_title:
@@ -392,7 +368,8 @@
verbose_output("Checking against '{0}'".format(p))
if page_title.startswith(p):
verbose_output('Matched.')
- return p
+ return True
+ return False


def api_feed_repeater(gen, delay=0, repeat=False, namespaces=None,
@@ -471,8 +448,6 @@
else:
recentchanges = True

- bot = PatrolBot(**options)
-
if isinstance(mwparserfromhell, ImportError):
suggest_help(missing_dependencies=('mwparserfromhell',))
return
@@ -484,7 +459,9 @@
user=usercontribs,
namespaces=gen_factory.namespaces,
recent_new_gen=False)
- bot.run(feed)
+ bot = PatrolBot(site=site, generator=feed, **options)
+ bot.treat_page_type = dict
+ bot.run()

if recentchanges or usercontribs:
pywikibot.output('Recentchanges:')
@@ -492,10 +469,9 @@
feed = api_feed_repeater(gen, delay=60, repeat=repeat,
namespaces=gen_factory.namespaces,
user=usercontribs)
- bot.run(feed)
-
- pywikibot.output('{0}/{1} patrolled'
- .format(bot.patrol_counter, bot.rc_item_counter))
+ bot = PatrolBot(site=site, generator=feed, **options)
+ bot.treat_page_type = dict
+ bot.run()


if __name__ == '__main__':
diff --git a/tests/patrolbot_tests.py b/tests/patrolbot_tests.py
index 081af28..864a3fb 100644
--- a/tests/patrolbot_tests.py
+++ b/tests/patrolbot_tests.py
@@ -1,12 +1,12 @@
"""Tests for the patrol script."""
#
-# (C) Pywikibot team, 2015-2020
+# (C) Pywikibot team, 2015-2021
#
# Distributed under the terms of the MIT license.
#
from contextlib import suppress

-from scripts import patrol
+from scripts.patrol import PatrolBot

from tests.aspects import require_modules, unittest, DefaultDrySiteTestCase

@@ -27,15 +27,6 @@
"""


-class DummyPatrolBot(patrol.PatrolBot):
-
- """Dummy Patrol Bot for Tests."""
-
- def load_whitelist(self):
- """Do not try to load the whitelist."""
- pass
-
-
@require_modules('mwparserfromhell')
class TestPatrolBot(DefaultDrySiteTestCase):

@@ -44,7 +35,7 @@
def setUp(self):
"""Create a bot dummy instance."""
super().setUp()
- self.bot = DummyPatrolBot(self.site)
+ self.bot = PatrolBot(self.site)

def test_parse_page_tuples(self):
"""Test parsing the page tuples from a dummy text."""

To view, visit change 676969. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I370b9c0108c7860e053242af97ac6779a1547a7c
Gerrit-Change-Number: 676969
Gerrit-PatchSet: 6
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki@aol.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Zoranzoki21 <zorandori4444@gmail.com>
Gerrit-MessageType: merged