jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/399179 )
Change subject: Create a maintenance script to download Wikimedia database dump ......................................................................
Create a maintenance script to download Wikimedia database dump
The script is a one time run script that will download Wikimedia database dump (based on lang, family, and filename) from dumps.wikimedia.org. It will also copy if the host has a copy of dumps in /public/dumps/public/, such as in the case of Toolforge environment.
Bug: T123885 Change-Id: I3f2bad7a4bfa622017765958c3f7d6bcc9b42105 --- A scripts/maintenance/download_dump.py 1 file changed, 142 insertions(+), 0 deletions(-)
Approvals: Framawiki: Verified; Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/maintenance/download_dump.py b/scripts/maintenance/download_dump.py new file mode 100644 index 0000000..9793255 --- /dev/null +++ b/scripts/maintenance/download_dump.py @@ -0,0 +1,142 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +This bot downloads dump from dumps.wikimedia.org. + +This script supports the following command line parameters: + + -filename:# The name of the file (e.g. abstract.xml) + + -storepath:# The stored file's path. + +""" +# +# (C) Pywikibot team, 2017 +# (C) Yifei He, 2017 +# +# Distributed under the terms of the MIT license. +# +from __future__ import absolute_import, division, unicode_literals + +import io +import os.path +import sys + +from shutil import copyfile, copyfileobj + +import pywikibot + +from pywikibot import Bot + +from pywikibot.comms.http import fetch + + +class DownloadDumpBot(Bot): + + """Download dump bot.""" + + availableOptions = { + 'wikiname': '', + 'filename': '', + 'storepath': './', + } + + def __init__(self, **kwargs): + """Constructor.""" + super(DownloadDumpBot, self).__init__(**kwargs) + + def get_dump_name(self, db_name, typ): + """Check if dump file exists locally in a Toolforge server.""" + db_path = '/public/dumps/public/{0}/'.format(db_name) + if os.path.isdir(db_path): + dates = map(int, os.listdir(db_path)) + dates = sorted(dates, reverse=True) + for date in dates: + dump_filepath = ('/public/dumps/public/{0}/{1}/{2}-{3}-{4}' + .format(db_name, date, db_name, date, typ)) + if os.path.isfile(dump_filepath): + return dump_filepath + return None + + def run(self): + """Run bot.""" + pywikibot.output('Downloading dump from ' + self.getOption('wikiname')) + + download_filename = self.getOption('wikiname') + \ + '-latest-' + self.getOption('filename') + file_storepath = os.path.join( + self.getOption('storepath'), download_filename) + + # https://wikitech.wikimedia.org/wiki/Help:Toolforge#Dumps + toolforge_dump_filepath = self.get_dump_name( + self.getOption('wikiname'), self.getOption('filename')) + if toolforge_dump_filepath: + pywikibot.output('Copying file from ' + toolforge_dump_filepath) + copyfile(toolforge_dump_filepath, file_storepath) + else: + url = 'https://dumps.wikimedia.org/' + \ + os.path.join(self.getOption('wikiname'), + 'latest', download_filename) + pywikibot.output('Downloading file from ' + url) + response = fetch(url) + if response.status == 200: + try: + with open(file_storepath, 'wb') as result_file: + copyfileobj(io.BytesIO(response.raw), result_file) + except IOError: + pywikibot.exception() + return False + else: + return + + pywikibot.output('Done! File stored as ' + file_storepath) + return + + +def main(*args): + """ + Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + @param args: command line arguments + @type args: list of unicode + """ + opts = {} + unknown_args = [] + + local_args = pywikibot.handle_args(args) + for arg in local_args: + option, sep, value = arg.partition(':') + if option.startswith('-'): + option = option[1:] + if option == 'filename': + opts[option] = value or pywikibot.input( + 'Enter the filename: ') + continue + elif option == 'storepath': + opts[option] = os.path.abspath(value) or pywikibot.input( + 'Enter the store path: ') + continue + unknown_args += [arg] + + missing = [] + if 'filename' not in opts: + missing += ['-filename'] + + if missing or unknown_args: + pywikibot.bot.suggest_help(missing_parameters=missing, + unknown_parameters=unknown_args) + return 1 + + site = pywikibot.Site() + opts['wikiname'] = site.dbName() + + bot = DownloadDumpBot(**opts) + bot.run() + + return 0 + + +if __name__ == '__main__': + sys.exit(main())
pywikibot-commits@lists.wikimedia.org