jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] unlink: Use replace_links ......................................................................
[FEAT] unlink: Use replace_links
This uses replace_links to unlink all the links via the unlink script. It modifies InteractiveReplace to also store the current link information and adds a way to ask the user again. It also adds an option to increase the context.
Change-Id: I7056c773761a5d1673d6e1d0ad5252861ff0d02f --- M pywikibot/bot.py M scripts/unlink.py 2 files changed, 169 insertions(+), 145 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/bot.py b/pywikibot/bot.py index 9f0e686..2410708 100644 --- a/pywikibot/bot.py +++ b/pywikibot/bot.py @@ -77,6 +77,15 @@ self.page = page
+class UnhandledAnswer(Exception): + + """The given answer didn't suffice.""" + + def __init__(self, stop=False): + """Constructor.""" + self.stop = stop + + # Logging module configuration class RotatingFileHandler(logging.handlers.RotatingFileHandler):
@@ -719,6 +728,10 @@ """Handle this choice. Must be implemented.""" raise NotImplementedError()
+ def handle_link(self): + """The current link will be handled by this choice.""" + return False +
class StaticChoice(Choice):
@@ -756,6 +769,30 @@ **kwargs)
+class AlwaysChoice(Choice): + + """Add an option to always apply the default.""" + + def __init__(self, replacer, option='always', shortcut='a'): + """Constructor.""" + super(AlwaysChoice, self).__init__(option, shortcut, replacer) + self.always = False + + def handle(self): + """Handle the custom shortcut.""" + self.always = True + return self.answer + + def handle_link(self): + """Directly return answer whether it's applying it always.""" + return self.always + + @property + def answer(self): + """Get the actual default answer instructing the replacement.""" + return self.replacer.handle_answer(self.replacer._default) + + class InteractiveReplace(object):
""" @@ -770,9 +807,11 @@ * allow_replace_all = False (replace target, section and label) (The boolean values are the default values)
- It has also a 'context' attribute which must be a non-negative integer. If + It has also a C{context} attribute which must be a non-negative integer. If it is greater 0 it shows that many characters before and after the link in - question. + question. The C{context_delta} attribute can be defined too and adds an + option to increase C{context} by the given amount each time the option is + selected.
Additional choices can be defined using the 'additional_choices' and will be amended to the choices defined by this class. This list is mutable and the @@ -788,8 +827,9 @@ @type old_link: Link or Page @param new_link: The new link with which it should be replaced. Depending on the replacement mode it'll use this link's label and - section. - @type new_link: Link or Page + section. If False it'll unlink all and the attributes beginning with + allow_replace are ignored. + @type new_link: Link or Page or False @param default: The default answer as the shortcut @type default: None or str @param automatic_quit: Add an option to quit and raise a @@ -808,6 +848,7 @@ self._quit = automatic_quit self._current_match = None self.context = 30 + self.context_delta = 0 self.allow_skip_link = True self.allow_unlink = True self.allow_replace = True @@ -818,15 +859,18 @@ self._own_choices = [ ('skip_link', StaticChoice('Do not change', 'n', None)), ('unlink', StaticChoice('Unlink', 'u', False)), - ('replace', StaticChoice('Change link target', 't', - self._new.canonical_title())), - ('replace_section', LinkChoice('Change link target and section', - 's', self, True)), - ('replace_label', LinkChoice('Change link target and label', - 'l', self, False)), - ('replace_all', StaticChoice('Change complete link', 'c', - self._new)), ] + if self._new: + self._own_choices += [ + ('replace', StaticChoice('Change link target', 't', + self._new.canonical_title())), + ('replace_section', LinkChoice('Change link target and section', + 's', self, True)), + ('replace_label', LinkChoice('Change link target and label', + 'l', self, False)), + ('replace_all', StaticChoice('Change complete link', 'c', + self._new)), + ]
self.additional_choices = []
@@ -838,42 +882,67 @@ else: raise ValueError('Invalid choice "{0}"'.format(choice))
+ def __call__(self, link, text, groups, rng): + """Ask user how the selected link should be replaced.""" + if self._old == link: + self._current_match = (link, text, groups, rng) + while True: + try: + answer = self.handle_link() + except UnhandledAnswer as e: + if e.stop: + raise + else: + break + self._current_match = None # don't reset in case of an exception + return answer + else: + return None + @property def choices(self): """Return the tuple of choices.""" choices = [] for name, choice in self._own_choices: if getattr(self, 'allow_' + name): - choices += [choice] + choices += [self._own_choices[name]] + if self.context_delta > 0: + choices += [HighlightContextOption( + 'more context', 'm', self.current_text, self.context, + self.context_delta, *self.current_range)] choices += self.additional_choices return tuple(choices)
- def __call__(self, link, text, groups, rng): - """Ask user how the selected link should be replaced.""" - if self._old == link: - self._current_match = (link, text, groups, rng) - if self.context > 0: - # at the beginning of the link, start red color. - # at the end of the link, reset the color to default - pywikibot.output(text[max(0, rng[0] - self.context): rng[0]] + - '\03{lightred}' + text[rng[0]: rng[1]] + - '\03{default}' + text[rng[1]: rng[1] + self.context]) - question = ('Should the link target to ' - '\03{{lightpurple}}{0}\03{{default}}?') - else: - question = ('Should the link \03{{lightred}}{1}\03{{default}} ' - 'target to \03{{lightpurple}}{0}\03{{default}}?') + def handle_link(self): + """Handle the currently given replacement.""" + choices = self.choices + for choice in choices: + if isinstance(choice, Choice) and choice.handle_link(): + return choice.answer
- choice = pywikibot.input_choice( - question.format(self._new.canonical_title(), - self._old.canonical_title()), - self.choices, default=self._default, automatic_quit=self._quit) - - answer = self.handle_answer(choice, link) - self._current_match = None - return answer + if self.context > 0: + rng = self.current_range + text = self.current_text + # at the beginning of the link, start red color. + # at the end of the link, reset the color to default + pywikibot.output(text[max(0, rng[0] - self.context): rng[0]] + + '\03{lightred}' + text[rng[0]: rng[1]] + + '\03{default}' + text[rng[1]: rng[1] + self.context]) + question = 'Should the link ' else: - return None + question = 'Should the link \03{{lightred}}{0}\03{{default}} ' + + if self._new is False: + question += 'be unlinked?' + else: + question += 'target to \03{{lightpurple}}{0}\03{{default}}?'.format( + self._new.canonical_title()) + + choice = pywikibot.input_choice( + question.format(self._old.canonical_title()), + choices, default=self._default, automatic_quit=self._quit) + + return self.handle_answer(choice)
@property def current_link(self): diff --git a/scripts/unlink.py b/scripts/unlink.py index a5deb82..1b663d8 100755 --- a/scripts/unlink.py +++ b/scripts/unlink.py @@ -30,14 +30,51 @@ __version__ = '$Id$' #
-import re import pywikibot -from pywikibot.editor import TextEditor from pywikibot import i18n -from pywikibot.bot import SingleSiteBot +from pywikibot.bot import ( + SingleSiteBot, ExistingPageBot, NoRedirectPageBot, InteractiveReplace, + ChoiceException, UnhandledAnswer, AlwaysChoice, +) +from pywikibot.editor import TextEditor +from pywikibot.textlib import replace_links
-class UnlinkBot(SingleSiteBot): +class EditReplacement(ChoiceException, UnhandledAnswer): + + """The text should be edited and replacement should be restarted.""" + + def __init__(self): + """Constructor.""" + super(EditReplacement, self).__init__('edit', 'e') + self.stop = True + + +class InteractiveUnlink(InteractiveReplace): + + """An implementation which just allows unlinking.""" + + def __init__(self, bot): + """Create default settings.""" + super(InteractiveUnlink, self).__init__( + old_link=bot.pageToUnlink, new_link=False, default='u') + self._always = AlwaysChoice(self, 'unlink all pages', 'a') + self._always.always = bot.getOption('always') + self.additional_choices = [AlwaysChoice(self, 'unlink all on page', 'p'), + self._always, EditReplacement()] + self._bot = bot + self.allow_replace = False + self.context = 100 + self.context_change = 100 + + def handle_answer(self, choice): + """Handle choice and store in bot's options.""" + answer = super(InteractiveUnlink, self).handle_answer(choice) + self._bot.options['always'] = self._always.always + return answer + + +class UnlinkBot(SingleSiteBot, ExistingPageBot, NoRedirectPageBot):
"""Page unlinking bot."""
@@ -51,119 +88,37 @@
super(UnlinkBot, self).__init__(site=pageToUnlink.site, **kwargs) self.pageToUnlink = pageToUnlink - linktrail = self.pageToUnlink.site.linktrail()
self.generator = pageToUnlink.getReferences( namespaces=self.getOption('namespaces'), content=True) - # The regular expression which finds links. Results consist of four - # groups: - # - # group title is the target page title, that is, everything - # before | or ]. - # - # group section is the page section. - # It'll include the # to make life easier for us. - # - # group label is the alternative link title, that's everything - # between | and ]. - # - # group linktrail is the link trail, that's letters after ]] which are - # part of the word. - # note that the definition of 'letter' varies from language to language. - self.linkR = re.compile(r'[[(?P<title>[^]|#]*)(?P<section>#[^]|]*)?(|(?P<label>[^]]*))?]](?P<linktrail>%s)' - % linktrail) self.comment = i18n.twtranslate(self.pageToUnlink.site, 'unlink-unlinking', self.pageToUnlink.title())
- def handleNextLink(self, text, match, context=100): - """ - Return a tuple (text, jumpToBeginning). + def _create_callback(self): + """Create a new callback instance for replace_links.""" + return InteractiveUnlink(self)
- text is the unicode string after the current link has been processed. - jumpToBeginning is a boolean which specifies if the cursor position - should be reset to 0. This is required after the user has edited the - article. - """ - # ignore interwiki links and links to sections of the same page as well - # as section links - if not match.group('title') \ - or self.pageToUnlink.site.isInterwikiLink(match.group('title')) \ - or match.group('section'): - return text, False - linkedPage = pywikibot.Page(self.pageToUnlink.site, - match.group('title')) - # Check whether the link found is to the current page itself. - if linkedPage != self.pageToUnlink: - # not a self-link - return text, False - else: - # at the beginning of the link, start red color. - # at the end of the link, reset the color to default - if self.getOption('always'): - choice = 'a' - else: - pywikibot.output( - text[max(0, match.start() - context):match.start()] + - '\03{lightred}' + text[match.start():match.end()] + - '\03{default}' + text[match.end():match.end() + context]) - choice = pywikibot.input_choice( - u'\nWhat shall be done with this link?\n', - [('unlink', 'u'), ('skip', 's'), ('edit', 'e'), - ('more context', 'm'), ('unlink all', 'a')], 'u') - pywikibot.output(u'') - - if choice == 's': - # skip this link - return text, False - elif choice == 'e': - editor = TextEditor() - newText = editor.edit(text, jumpIndex=match.start()) - # if user didn't press Cancel - if newText: - return newText, True - else: - return text, True - elif choice == 'm': - # show more context by recursive self-call - return self.handleNextLink(text, match, - context=context + 100) - elif choice == 'a': - self.options['always'] = True - new = match.group('label') or match.group('title') - new += match.group('linktrail') - return text[:match.start()] + new + text[match.end():], False - - def treat(self, page): + def treat_page(self): """Remove links pointing to the configured page from the given page.""" - self.current_page = page - try: - oldText = page.get() - text = oldText - curpos = 0 - while curpos < len(text): - match = self.linkR.search(text, pos=curpos) - if not match: - break - # Make sure that next time around we will not find this same - # hit. - curpos = match.start() + 1 - text, jumpToBeginning = self.handleNextLink(text, match) - if jumpToBeginning: - curpos = 0 - if oldText == text: - pywikibot.output(u'No changes necessary.') + text = self.current_page.text + while True: + unlink_callback = self._create_callback() + try: + text = replace_links(text, unlink_callback, + self.pageToUnlink.site) + except EditReplacement: + new_text = TextEditor().edit( + unlink_callback.current_text, + jumpIndex=unlink_callback.current_range[0]) + # if user didn't press Cancel + if new_text: + text = new_text + else: + text = unlink_callback.current_text else: - pywikibot.showDiff(oldText, text) - page.text = text - page.save(self.comment) - except pywikibot.NoPage: - pywikibot.output(u"Page %s does not exist?!" - % page.title(asLink=True)) - except pywikibot.IsRedirectPage: - pywikibot.output(u"Page %s is a redirect; skipping." - % page.title(asLink=True)) - except pywikibot.LockedPage: - pywikibot.output(u"Page %s is locked?!" % page.title(asLink=True)) + break + + self.put_current(text, summary=self.comment)
def main(*args):