Xqt has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/914038 )
Change subject: Add transwikiimport.py script ......................................................................
Add transwikiimport.py script
Patch submitted by Draco flavus
Bug: T335246 Change-Id: Ic4353c3863d8d6abc60200dc7707981884c7f055 --- A scripts/transwikiimport.py M tox.ini 2 files changed, 323 insertions(+), 0 deletions(-)
Approvals: Xqt: Verified; Looks good to me, approved
diff --git a/scripts/transwikiimport.py b/scripts/transwikiimport.py new file mode 100644 index 0000000..2f80c48 --- /dev/null +++ b/scripts/transwikiimport.py @@ -0,0 +1,310 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +This script transfers pages from a source wiki to a target wiki + over the transwiki import mechanism. + +It is also able to copy the full edit history. + +The following parameters are supported: + +-interwikisource: The interwiki code of the source wiki. + +-fullhistory: Include all versions of the page. + +-includealltemplates: All templates and transcluded pages will + be copied (dangerous). + +-assignknownusers: If user exists on target wiki, assign the + editions to them + +-correspondingnamespace: The number of the corresponding namespace. + +-rootpage: Import as subpages of ... + +-summary: Log entry import summary. + +-tags: Change tags to apply to the entry in the import log + and to the null revision on the imported pages. + +-test: No import, the names of the pages are output. + +-overwrite: Existing pages are skipped by default. + Use this option to overwrite pages. + +-target Use page generator of the target site + This also affects the correspondingnamespace. + + +Internal links are *not* repaired! + +Pages to work on can be specified using any of: + +¶ms; + +Examples +-------- + +Transfer all pages in category "Query service" from the English Wikipedia to +the home Wikipedia, adding "Wikipedia:Import enwp/" as prefix: + + python pwb.py transwikiimport -interwikisource:en -cat:"Query service" \ +-prefix:"Wikipedia:Import enwp/" -fullhistory -assignknownusers + +Copy the template "Query service" from the English Wikipedia to the +home Wiktionary: + + python pwb.py transferbot -interwikisource:w:en \ +-page:"Template:Query service" -fullhistory -assignknownusers + +Copy 10 wanted templates of the home Wikipedia from English Wikipedia \ +to the home Wikipedia + python pwb.py transferbot -interwikisource:en \ +-wantedtemplates:10 -target -fullhistory -assignknownusers + +Advices +------- + +The module gives access to all parameters of the API (and specialpage) + and is compatible to the transferbot module. +However for most scenarios the following parameters should be avoided: + -overwrite (by default set as False) + -target (by default set as False) + -includealltemplates (by default set as False) + +The correspondingnamespace is used only if the namespaces on both wikis do not + correspond one with another. + +Correspondingnamespace and rootpage are mutually exclusive. + +Target and rootpage are mutually exclusive. + (This combination does not seem to be feasible.) + +If the target page already exists, the target page will be overwritten + if -overwrite is set or skipped otherwise. + +The list of pages to be imported can be generated outside of the pywikbot: + + for i in {1..10} ; do python3 pwb.py transwikiimport -interwikisource:mul \ +-page:"Page:How to become famous.djvu/$i" -fullhistory \ +-assignknownusers ; done + +(The pages Page:How to become famous.djvu/1, Page:How to become famous.djvu/2 \ +.. Page:How to become famous.djvu/10 will be copied + from wikisource (mul) to the home-wikisource, all versions will be imported + and the usernames will be identified + (existing pages will be skipped.) + +Or generated using the usual pywikibot generators: + + python3 pwb.py transwikiimport -interwikisource:mul \ +-prefixindex:"Page:How to become famous.djvu" \ +-fullhistory -assignknownusers \ +-summary:"Book copied from oldwiki." + +(All pages like Page:How to become famous.djvu... will be copied + from wikisource (mul) to the home-wikisource, all versions will be + imported and the usernames will be identified + (existing pages will be skipped.) + +The parameter -test disables the import and the bot prints the names + of the pages that would be imported. +Since the import of pages is a quite exceptionell process and potentially + dangerous it should be made carefully and tested in advance. +The -test parameter can help to find out which pages would be moved + and what would be the target of the import. +However it does not print the titles of the transcluded pages (e.g. templates) + if -includealltemplates is set. +This option is quite *dangerous*. If the title of an existing page on home wiki + clashes with the title of one of the linked pages it would be *overritten*. + The histories would be merged. (If the imported version is newer.) + Even if -overwrite is not set the linked page *can be overwritten*. + + +Interwikisource +--------------- + +The list of wikis that can be used as a source + is defined in the variable $wgImportSources +It can be viewed on the Specialpage:Import. + + +Rights +------ + +For tranwikiimport (and even to access the Specialpage:Import) + the appropriate flag on the account + must be set (usually administrator, tranwiki importer or importer). + + +""" +# +# (C) Draco flavus +# +# Distributed under the terms of the MIT license. +# +import pywikibot +from pywikibot import pagegenerators +from pywikibot.bot import suggest_help +# from pywikibot.i18n import twtranslate +from pywikibot.data import api + + +docuReplacements = {'¶ms;': pagegenerators.parameterHelp} # noqa: N816 + + +def api_query(site, params): + query = api.Request(site, parameters=params) + datas = query.submit() + return datas + + +def main(*args: str) -> None: + """ + Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + :param args: command line arguments + """ + local_args = pywikibot.handle_args(args) + + interwikisource = '' + correspondingnamespace = 'all' + rootpage = '' + tags = '' + summary = 'Importing page from ' + test = False + overwrite = False + target = False + fullhistory = False + includealltemplates = False + assignknownusers = False + gen_args = [] + + for arg in local_args: + if arg.startswith('-interwikisource'): + interwikisource = arg[len('-interwikisource:'):] + summary += interwikisource + elif arg.startswith('-correspondingnamespace'): + correspondingnamespace = arg[len('-correspondingnamespace:'):] + elif arg.startswith('-rootpage'): + rootpage = arg[len('-rootpage:'):] + elif arg.startswith('-tags'): + tags = arg[len('-tags:'):] + elif arg.startswith('-summary'): + summary = arg[len('-summary:'):] + elif arg == '-test': + test = True + elif arg == '-overwrite': + overwrite = True + elif arg == '-target': + target = True + elif arg == '-fullhistory': + fullhistory = True + elif arg == '-includealltemplates': + includealltemplates = True + elif arg == '-assignknownusers': + assignknownusers = True + else: + gen_args.append(arg) + + tosite = pywikibot.Site() + csrf = tosite.tokens['csrf'] + fromsite = pywikibot.Site().interwiki(interwikisource) + additional_text = ('Target site not different from source site.' + if fromsite == tosite else '') + + gen_factory = pagegenerators.GeneratorFactory(site=tosite if target + else fromsite) + unknown_args = [arg for arg in gen_args if not gen_factory.handle_arg(arg)] + + gen = gen_factory.getCombinedGenerator() + + if suggest_help(missing_generator=not gen, + additional_text=additional_text, + unknown_parameters=unknown_args): + return + + gen_args = ' '.join(gen_args) + pywikibot.output(""" + Page transfer configuration + --------------------------- + Source: {fromsite} + Target: {tosite} + + Generator of pages to transfer: {gen_args} + {target} + Prefix for transferred pages: {rootpage} + """.format(fromsite=fromsite, tosite=tosite, gen_args=gen_args, + rootpage=rootpage if rootpage else '(none)', + target='from target site\n' if target else '')) + + if correspondingnamespace != 'all' and rootpage != '': + pywikibot.output('Both the correspondingnamespace and the rootpage are set! Exiting.') + elif target and rootpage != '': + pywikibot.output('Both the target and the rootpage are set! Exiting.') + else: + params = { + 'action': 'import', + 'token': csrf, + 'interwikisource': interwikisource, + 'fullhistory': fullhistory, + 'assignknownusers': assignknownusers, + 'templates': includealltemplates, + 'summary': summary + } + if correspondingnamespace != 'all': + params['namespace'] = correspondingnamespace + if rootpage != '': + params['rootpage'] = rootpage + if tags != '': + params['tags'] = tags + for page in gen: + if target: + if correspondingnamespace == 'all': + fromtitle = page.namespace().canonical_prefix() + page.title(with_ns=False) + else: + fromtitle = str(fromsite.namespaces[int(correspondingnamespace)]) + page.title(with_ns=False) + targetpage = page + else: + fromtitle = page.title(with_ns=True) + if correspondingnamespace == 'all': + totitle = page.namespace().canonical_prefix() + page.title(with_ns=False) + else: + totitle = str(tosite.namespaces[int(correspondingnamespace)]) + page.title(with_ns=False) + targetpage = pywikibot.Page(tosite, totitle) + if not overwrite: + if targetpage.exists(): + pywikibot.warning( + 'Skipped {} (target page {} exists)'.format( + page.title(as_link=True, force_interwiki=True), + targetpage.title(as_link=True) + ) + ) + continue + else: + if not targetpage.botMayEdit(): + pywikibot.warning( + 'Target page {} is not editable by bots'.format( + targetpage.title(as_link=True) + ) + ) + continue + params['interwikipage'] = fromtitle + if test: + pywikibot.output('Simulation: {} → {}'.format( + fromtitle, + targetpage.title(with_ns=True) + ) + ) + else: + # Zum Testen die folgende Zeile auskommentieren. + api_query(tosite, params) + # Zum Testen bei folgenden zwei Zeilen das Kreuzzeichen entfernen. + # pywikibot.output(params) + # pywikibot.output(fromtitle + ' → ' + page.title(with_ns=True) if target else totitle) + + +if __name__ == '__main__': + main() diff --git a/tox.ini b/tox.ini index 17b9aca..7069a31 100644 --- a/tox.ini +++ b/tox.ini @@ -192,6 +192,7 @@ scripts/reflinks.py: N802, N816 scripts/replace.py: N802, N803, N806, N816 scripts/solve_disambiguation.py: N802, N806 + scripts/transwikiimport.py: C103, D103, D205, D400, E123, E501 setup.py: T001, T201 tests/api_tests.py: N802 tests/archivebot_tests.py: N802
pywikibot-commits@lists.wikimedia.org