jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/336103 )
Change subject: [IMPR] Use bot classes for table2wiki.py ......................................................................
[IMPR] Use bot classes for table2wiki.py
- Remove -skip option which doesn't work for a very long time ago - rename -auto option with -always and -sql wiht -mysqlquery which we use for other bots and print a deprecation warning when the old options are used. - a new option -skipwarning skips pages with warnings. The old settings table2wikiAskOnlyWarnings and table2wikiSkipWarnings are deleted - use treat_page method instead of treat - use pagegenerators for -namespace option - use positional_arg_name with 'page' as default - a new tools method has_module() to check whether a library can be imported - docs added
Change-Id: Ie50078ae3315ba8ba70946b889a31e403c998dd7 --- M pywikibot/config2.py M pywikibot/tools/__init__.py M scripts/table2wiki.py 3 files changed, 115 insertions(+), 134 deletions(-)
Approvals: Mpaa: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/config2.py b/pywikibot/config2.py index 85ad1a7..9f732f2 100644 --- a/pywikibot/config2.py +++ b/pywikibot/config2.py @@ -661,9 +661,6 @@ # sometimes HTML-tables are indented for better reading. # That can do very ugly results. deIndentTables = True -# table2wiki.py works quite stable, so you might switch to True -table2wikiAskOnlyWarnings = True -table2wikiSkipWarnings = False
# ############# WEBLINK CHECKER SETTINGS ##############
diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py index c89bd6a..7e1c9c1 100644 --- a/pywikibot/tools/__init__.py +++ b/pywikibot/tools/__init__.py @@ -155,6 +155,16 @@ count = itertools.count
+def has_module(module): + """Check whether a module can be imported.""" + try: + __import__(module) + except ImportError: + return False + else: + return True + + def empty_iterator(): # http://stackoverflow.com/a/13243870/473890 """An iterator which does nothing.""" diff --git a/scripts/table2wiki.py b/scripts/table2wiki.py index b2527f7..82dba5b 100644 --- a/scripts/table2wiki.py +++ b/scripts/table2wiki.py @@ -7,32 +7,22 @@
¶ms;
--xml Retrieve information from a local XML dump (pages_current, see - http://download.wikimedia.org). +-always The bot won't ask for confirmation when putting a page + +-skipwarning Skip processing a page when a warning occurred. + Only used when -always is or becomes True. + +-quiet Don't show diffs in -always mode + +-mysqlquery Retrieve information from a local mirror. + Searches for pages with HTML tables, and tries to convert + them on the live wiki. + +-xml Retrieve information from a local XML dump + (pages_current, see http://download.wikimedia.org). Argument can also be given as "-xml:filename". - Searches for pages with HTML tables, and tries to convert them - on the live wiki. - --sql Retrieve information from a local mirror. - Searches for pages with HTML tables, and tries to convert them - on the live wiki. - --namespace:n Number or name of namespace to process. The parameter can be - used multiple times. It works in combination with all other - parameters, except for the -start parameter. If you e.g. - want to iterate over all categories starting at M, use - -start:Category:M. - -This SQL query can be used to find pages to work on: - - SELECT CONCAT('[[', cur_title, ']]') - FROM cur - WHERE (cur_text LIKE '%<table%' - OR cur_text LIKE '%<TABLE%') - AND cur_title REGEXP "^[A-N]" - AND cur_namespace=0 - ORDER BY cur_title - LIMIT 500 + Searches for pages with HTML tables, and tries to convert + them on the live wiki.
Example:
@@ -67,6 +57,11 @@ from pywikibot import pagegenerators from pywikibot import xmlreader
+from pywikibot.bot import (SingleSiteBot, ExistingPageBot, NoRedirectPageBot, + suggest_help, input_yn) +from pywikibot.exceptions import ArgumentDeprecationWarning +from pywikibot.tools import has_module, issue_deprecation_warning + # This is required for the text that is shown when you run this script # with the parameter -help. docuReplacements = { @@ -88,14 +83,23 @@ yield pywikibot.Page(pywikibot.Site(), entry.title)
-class Table2WikiRobot(object): +class Table2WikiRobot(SingleSiteBot, ExistingPageBot, NoRedirectPageBot):
- """Bot to convert HTML tables to wiki syntax.""" + """Bot to convert HTML tables to wiki syntax.
- def __init__(self, generator, quietMode=False): + @param generator: the page generator that determines on which pages + to work + @type generator: generator + """ + + def __init__(self, **kwargs): """Constructor.""" - self.generator = generator - self.quietMode = quietMode + self.availableOptions.update({ + 'quiet': False, # quiet mode, less output + 'skipwarning': False # on warning skip that page + }) + + super(Table2WikiRobot, self).__init__(site=True, **kwargs)
def convertTable(self, table): """ @@ -447,14 +451,10 @@ if not table: # no more HTML tables left break - pywikibot.output(">> Table %i <<" % (convertedTables + 1)) + # convert the current table newTable, warningsThisTable, warnMsgsThisTable = self.convertTable( table) - # show the changes for this table - if not self.quietMode: - pywikibot.showDiff(table.replace('##table##', 'table'), - newTable) warningSum += warningsThisTable for msg in warnMsgsThisTable: warningMessages += 'In table %i: %s' % (convertedTables + 1, @@ -465,23 +465,10 @@ pywikibot.output(warningMessages) return text, convertedTables, warningSum
- def treat(self, page): - """ - Load a page, convert all HTML tables in its text to wiki syntax, and save the result. - - Returns True if the converted table was successfully saved, otherwise returns False. - """ - pywikibot.output(u'\n>>> %s <<<' % page.title()) - site = page.site - try: - text = page.get() - except pywikibot.NoPage: - pywikibot.error(u"couldn't find %s" % page.title()) - return False - except pywikibot.IsRedirectPage: - pywikibot.output(u'Skipping redirect %s' % page.title()) - return False - newText, convertedTables, warningSum = self.convertAllHTMLTables(text) + def treat_page(self): + """Convert all HTML tables in text to wiki syntax and save it.""" + text = self.current_page.text + newText, convertedTables, warnings = self.convertAllHTMLTables(text)
# Check if there are any marked tags left markedTableTagR = re.compile("<##table##|</##table##>", re.IGNORECASE) @@ -492,33 +479,34 @@
if convertedTables == 0: pywikibot.output(u"No changes were necessary.") - else: - if config.table2wikiAskOnlyWarnings and warningSum == 0: - doUpload = True - else: - if config.table2wikiSkipWarnings: - doUpload = True - else: - pywikibot.output("There were %i replacement(s) that might lead to bad " - "output." % warningSum) - doUpload = (pywikibot.input( - u'Do you want to change the page anyway? [y|N]') == "y") - if doUpload: - # get edit summary message - if warningSum == 0: - editSummaryMessage = i18n.twtranslate(site.code, 'table2wiki-no-warning') - else: - editSummaryMessage = i18n.twntranslate( - site.code, - 'table2wiki-warnings', - {'count': warningSum} - ) - page.put_async(newText, summary=editSummaryMessage) + return
- def run(self): - """Check each page passed.""" - for page in self.generator: - self.treat(page) + if warnings: + if self.getOption('always') and self.getOption('skipwarning'): + pywikibot.output( + 'There were %i replacements that might lead to bad ' + 'output. Skipping.' % warnings) + return + if not self.getOption('always'): + pywikibot.output( + 'There were %i replacements that might lead to bad ' + 'output.' % warnings) + if not input_yn('Do you want to change the page anyway'): + return + + # get edit summary message + if warnings == 0: + editSummaryMessage = i18n.twtranslate( + self.site.code, 'table2wiki-no-warning') + else: + editSummaryMessage = i18n.twntranslate( + self.site.code, + 'table2wiki-warnings', + {'count': warnings} + ) + self.put_current(newText, summary=editSummaryMessage, + show_diff=not (self.getOption('quiet') and + self.getOption('always')))
def main(*args): @@ -530,76 +518,62 @@ @param args: command line arguments @type args: list of unicode """ - quietMode = False # use -quiet to get less output - # if the -file argument is used, page titles are stored in this array. - # otherwise it will only contain one page. - articles = [] - # if -file is not used, this temporary array is used to read the page title. - page_title = [] - - # Which namespaces should be processed? - # default to [] which means all namespaces will be processed - namespaces = [] - - xmlfilename = None + options = {} gen = None + + local_args = pywikibot.handle_args(args)
# This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. - genFactory = pagegenerators.GeneratorFactory() + genFactory = pagegenerators.GeneratorFactory(positional_arg_name='page')
- for arg in pywikibot.handle_args(args): - if arg.startswith('-xml'): - if len(arg) == 4: - xmlfilename = pywikibot.input( - u'Please enter the XML dump's filename:') - else: - xmlfilename = arg[5:] - gen = TableXmlDumpPageGenerator(xmlfilename) - elif arg == '-sql': - query = u""" + for arg in local_args: + option, sep, value = arg.partition(':') + if option == '-xml': + filename = value or pywikibot.input( + "Please enter the XML dump's filename:") + gen = TableXmlDumpPageGenerator(filename) + elif option == '-auto': + issue_deprecation_warning( + 'The usage of "-auto"', '-always', + 1, ArgumentDeprecationWarning) + options['always'] = True + elif option in ['-always', '-quiet', '-skipwarning']: + options[option[1:]] = True + else: + if option in ['-sql', '-mysqlquery']: + if not (has_module('oursql') or has_module('MySQLdb')): + raise NotImplementedError( + 'Neither "oursql" nor "MySQLdb" library is installed.') + if option == '-sql': + issue_deprecation_warning( + 'The usage of "-sql"', '-mysqlquery', + 1, ArgumentDeprecationWarning) + + query = value or """ SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) WHERE old_text LIKE '%<table%' -LIMIT 200""" - gen = pagegenerators.MySQLPageGenerator(query) - elif arg.startswith('-namespace:'): - try: - namespaces.append(int(arg[11:])) - except ValueError: - namespaces.append(arg[11:]) - elif arg.startswith('-skip:'): - articles = articles[articles.index(arg[6:]):] - elif arg.startswith('-auto'): - config.table2wikiAskOnlyWarnings = True - config.table2wikiSkipWarnings = True - pywikibot.output('Automatic mode!\n') - elif arg.startswith('-quiet'): - quietMode = True - else: - if not genFactory.handleArg(arg): - page_title.append(arg) +""" + arg = '-mysqlquery:' + query + genFactory.handleArg(arg)
- # if the page is given as a command line argument, - # connect the title's parts with spaces - if page_title != []: - page_title = ' '.join(page_title) - page = pywikibot.Page(pywikibot.Site(), page_title) - gen = iter([page]) - - if not gen: + if gen: + gen = pagegenerators.NamespaceFilterPageGenerator( + gen, genFactory.namespaces) + else: gen = genFactory.getCombinedGenerator()
if gen: - if namespaces != []: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) if not genFactory.nopreload: gen = pagegenerators.PreloadingGenerator(gen) - bot = Table2WikiRobot(gen, quietMode) + bot = Table2WikiRobot(generator=gen, **options) bot.run() + return True else: - pywikibot.showHelp('table2wiki') + suggest_help(missing_generator=True) + return False
if __name__ == "__main__":
pywikibot-commits@lists.wikimedia.org