jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/760553 )
Change subject: Revert "[bugfix] Enable default -mysqlquery with replace.py" ......................................................................
Revert "[bugfix] Enable default -mysqlquery with replace.py"
This reverts commit 15b2db3906276030b8b475c66c3d2ad4ce8eddc0.
Reason for revert: See T301185
Change-Id: I74d0e1345f70e33c7f0129e70e9c0c2a7366f6b1 --- M scripts/replace.py M tests/replacebot_tests.py 2 files changed, 125 insertions(+), 152 deletions(-)
Approvals: Dvorapa: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/replace.py b/scripts/replace.py index 5552141..12cf5fe 100755 --- a/scripts/replace.py +++ b/scripts/replace.py @@ -146,11 +146,9 @@ import re from collections.abc import Sequence from contextlib import suppress -from typing import Optional
import pywikibot from pywikibot import editor, fixes, i18n, pagegenerators, textlib -from pywikibot.backports import Dict, Generator, List, Pattern, Tuple from pywikibot.bot import ExistingPageBot, SingleSiteBot from pywikibot.exceptions import InvalidPageError, NoPageError from pywikibot.tools import chars @@ -770,113 +768,6 @@ return pattern
-EXC_KEYS = { - '-excepttitle': 'title', - '-requiretitle:': 'require-title', - '-excepttext': 'text-contains', - '-exceptinside': 'inside', - '-exceptinsidetag': 'inside-tags' -} -"""Dictionary to convert exceptions command line options to exceptions keys. - - .. versionadded:: 7.0 -""" - - -def handle_exceptions(*args: str) -> Tuple[List[str], Dict[str, str]]: - """Handle exceptions args to ignore pages which contain certain texts. - - .. versionadded:: 7.0 - """ - exceptions = {key: [] for key in EXC_KEYS.values()} - local_args = [] - for argument in args: - arg, _, value = argument.partition(':') - if arg in EXC_KEYS: - exceptions[EXC_KEYS[arg]].append(value) - else: - local_args.append(argument) - return local_args, exceptions - - -def handle_pairsfile(filename: str) -> List[str]: - """Handle -pairsfile argument. - - .. versionadded:: 7.0 - """ - if not filename: - filename = pywikibot.input( - 'Please enter the filename to read replacements from:') - - try: - with codecs.open(filename, 'r', 'utf-8') as f: - # strip newlines, but not other characters - replacements = f.read().splitlines() - if not replacements: - raise OSError('{} is empty.'.format(filename)) - except OSError as e: - pywikibot.error('Error loading {}: {}'.format(filename, e)) - return None - - if len(replacements) % 2: - pywikibot.error( - '{} contains an incomplete pattern replacement pair.'.format( - filename)) - return None - - # Strip BOM from first line - replacements[0].lstrip('\uFEFF') - return replacements - - -def handle_manual() -> List[str]: - """Handle manual input. - - .. versionadded:: 7.0 - """ - pairs = [] - old = pywikibot.input('Please enter the text that should be replaced:') - while old: - new = pywikibot.input('Please enter the new text:') - pairs += [old, new] - old = pywikibot.input( - 'Please enter another text that should be replaced,\n' - 'or press Enter to start:') - return pairs - - -def handle_sql(sql: str, - replacements: List[Pattern], - exceptions: List[Pattern]) -> Generator: - """Handle default sql query. - - .. versionadded:: 7.0 - """ - if not sql: - where_clause = 'WHERE ({})'.format(' OR '.join( - "old_text RLIKE '{}'" - .format(prepareRegexForMySQL(repl.old_regex.pattern)) - for repl in replacements)) - - if exceptions: - except_clause = 'AND NOT ({})'.format(' OR '.join( - "old_text RLIKE '{}'" - .format(prepareRegexForMySQL(exc.pattern)) - for exc in exceptions)) - else: - except_clause = '' - - sql = """ -SELECT page_namespace, page_title -FROM page -JOIN text ON (page_id = old_id) -{} -{} -LIMIT 200""".format(where_clause, except_clause) - - return pagegenerators.MySQLPageGenerator(sql) - - def main(*args: str) -> None: """ Process command line arguments and invoke bot. @@ -892,9 +783,16 @@ # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] - file_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] + # Don't edit pages which contain certain texts. + exceptions = { + 'title': [], + 'text-contains': [], + 'inside': [], + 'inside-tags': [], + 'require-title': [], # using a separate requirements dict needs some + } # major refactoring of code.
# Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? @@ -904,36 +802,53 @@ # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None - xmlStart = None - sql_query = None # type: Optional[str] + useSql = False + sql_query = None # Set the default regular expression flags flags = 0 # Request manual replacements even if replacements are already defined manual_input = False + # Replacements loaded from a file + replacement_file = None + replacement_file_arg_misplaced = False
# Read commandline parameters. - genFactory = pagegenerators.GeneratorFactory( - disabled_options=['mysqlquery']) local_args = pywikibot.handle_args(args) + genFactory = pagegenerators.GeneratorFactory() local_args = genFactory.handle_args(local_args) - local_args, exceptions = handle_exceptions(*local_args)
- for argument in local_args: - arg, _, value = argument.partition(':') + for arg in local_args: if arg == '-regex': regex = True - elif arg == '-xmlstart': - xmlStart = value or pywikibot.input( - 'Please enter the dumped article to start with:') - elif arg == '-xml': - xmlFilename = value or i18n.input('pywikibot-enter-xml-filename') - elif arg == '-mysqlquery': - sql_query = value - elif arg == '-fix': - fixes_set.append(value) - elif arg == '-sleep': - options['sleep'] = float(value) - elif arg in ('-allowoverlap', '-always', '-recursive'): + elif arg.startswith('-xmlstart'): + if len(arg) == 9: + xmlStart = pywikibot.input( + 'Please enter the dumped article to start with:') + else: + xmlStart = arg[10:] + elif arg.startswith('-xml'): + if len(arg) == 4: + xmlFilename = i18n.input('pywikibot-enter-xml-filename') + else: + xmlFilename = arg[5:] + elif arg.startswith('-mysqlquery'): + useSql = True + sql_query = arg.partition(':')[2] + elif arg.startswith('-excepttitle:'): + exceptions['title'].append(arg[13:]) + elif arg.startswith('-requiretitle:'): + exceptions['require-title'].append(arg[14:]) + elif arg.startswith('-excepttext:'): + exceptions['text-contains'].append(arg[12:]) + elif arg.startswith('-exceptinside:'): + exceptions['inside'].append(arg[14:]) + elif arg.startswith('-exceptinsidetag:'): + exceptions['inside-tags'].append(arg[17:]) + elif arg.startswith('-fix:'): + fixes_set += [arg[5:]] + elif arg.startswith('-sleep:'): + options['sleep'] = float(arg[7:]) + elif arg in ('-always', '-recursive', '-allowoverlap'): options[arg[1:]] = True elif arg == '-nocase': flags |= re.IGNORECASE @@ -941,32 +856,67 @@ flags |= re.DOTALL elif arg == '-multiline': flags |= re.MULTILINE - elif arg == '-addcat': - options['addcat'] = value - elif arg == '-summary': - edit_summary = value - elif arg == '-automaticsummary': + elif arg.startswith('-addcat:'): + options['addcat'] = arg[8:] + elif arg.startswith('-summary:'): + edit_summary = arg[9:] + elif arg.startswith('-automaticsummary'): edit_summary = True - elif arg == '-manualinput': + elif arg.startswith('-manualinput'): manual_input = True - elif arg == '-pairsfile': - file_replacements = handle_pairsfile(value) + elif arg.startswith('-pairsfile'): + if len(commandline_replacements) % 2: + replacement_file_arg_misplaced = True + + if arg == '-pairsfile': + replacement_file = pywikibot.input( + 'Please enter the filename to read replacements from:') + else: + replacement_file = arg[len('-pairsfile:'):] else: commandline_replacements.append(arg)
- if file_replacements is None: - return + site = pywikibot.Site()
if len(commandline_replacements) % 2: pywikibot.error('Incomplete command line pattern replacement pair.') return
- commandline_replacements += file_replacements + if replacement_file_arg_misplaced: + pywikibot.error( + '-pairsfile used between a pattern replacement pair.') + return + + if replacement_file: + try: + with codecs.open(replacement_file, 'r', 'utf-8') as f: + # strip newlines, but not other characters + file_replacements = f.read().splitlines() + except OSError as e: + pywikibot.error('Error loading {}: {}'.format( + replacement_file, e)) + return + + if len(file_replacements) % 2: + pywikibot.error( + '{} contains an incomplete pattern replacement pair.'.format( + replacement_file)) + return + + # Strip BOM from first line + file_replacements[0].lstrip('\uFEFF') + commandline_replacements.extend(file_replacements) + if not(commandline_replacements or fixes_set) or manual_input: - commandline_replacements += handle_manual() + old = pywikibot.input('Please enter the text that should be replaced:') + while old: + new = pywikibot.input('Please enter the new text:') + commandline_replacements += [old, new] + old = pywikibot.input( + 'Please enter another text that should be replaced,' + '\nor press Enter to start:')
# The summary stored here won't be actually used but is only an example - site = pywikibot.Site() single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], @@ -1079,12 +1029,33 @@ precompile_exceptions(exceptions, regex, flags)
if xmlFilename: + try: + xmlStart + except NameError: + xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) - elif sql_query is not None: - # Only -excepttext option is considered by the query. Other - # exceptions are taken into account by the ReplaceRobot - gen = handle_sql(sql_query, replacements, exceptions['text-contains']) + elif useSql: + if not sql_query: + whereClause = 'WHERE ({})'.format(' OR '.join( + "old_text RLIKE '{}'" + .format(prepareRegexForMySQL(old_regexp.pattern)) + for (old_regexp, new_text) in replacements)) + if exceptions: + exceptClause = 'AND NOT ({})'.format(' OR '.join( + "old_text RLIKE '{}'" + .format(prepareRegexForMySQL(exc.pattern)) + for exc in exceptions)) + else: + exceptClause = '' + query = sql_query or """ +SELECT page_namespace, page_title +FROM page +JOIN text ON (page_id = old_id) +{} +{} +LIMIT 200""".format(whereClause, exceptClause) + gen = pagegenerators.MySQLPageGenerator(query)
gen = genFactory.getCombinedGenerator(gen, preload=True) if pywikibot.bot.suggest_help(missing_generator=not gen): diff --git a/tests/replacebot_tests.py b/tests/replacebot_tests.py index a4ac228..9ee153b 100644 --- a/tests/replacebot_tests.py +++ b/tests/replacebot_tests.py @@ -100,14 +100,16 @@
def test_invalid_replacements(self): """Test invalid command line replacement configurations.""" - # old and new no longer need to be together but pairsfile must exist - self._run('foo', '-pairsfile:/dev/null', 'bar') - self.assertIn('Error loading /dev/null:', - pywikibot.bot.ui.pop_output()[0]) + # old and new need to be together + self.assertFalse(self._run('foo', '-pairsfile:/dev/null', 'bar')) + + self.assertEqual([ + '-pairsfile used between a pattern replacement pair.', + ], pywikibot.bot.ui.pop_output())
# only old provided with empty_sites(): - self._run('foo') + self.assertFalse(self._run('foo')) self.assertEqual([ 'Incomplete command line pattern replacement pair.', ], pywikibot.bot.ui.pop_output()) @@ -144,7 +146,7 @@
def _get_bot(self, only_confirmation, *args): """Run with arguments, assert and return one bot.""" - self._run(*args) + self.assertIsNone(self._run(*args)) self.assertLength(self.bots, 1) bot = self.bots[0] if only_confirmation is not None:
pywikibot-commits@lists.wikimedia.org