Xqt submitted this change.

View Change


Approvals: Xqt: Verified; Looks good to me, approved
Add transwikiimport.py script

Patch submitted by Draco flavus

Bug: T335246
Change-Id: Ic4353c3863d8d6abc60200dc7707981884c7f055
---
A scripts/transwikiimport.py
M tox.ini
2 files changed, 323 insertions(+), 0 deletions(-)

diff --git a/scripts/transwikiimport.py b/scripts/transwikiimport.py
new file mode 100644
index 0000000..2f80c48
--- /dev/null
+++ b/scripts/transwikiimport.py
@@ -0,0 +1,310 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+This script transfers pages from a source wiki to a target wiki
+ over the transwiki import mechanism.
+
+It is also able to copy the full edit history.
+
+The following parameters are supported:
+
+-interwikisource: The interwiki code of the source wiki.
+
+-fullhistory: Include all versions of the page.
+
+-includealltemplates: All templates and transcluded pages will
+ be copied (dangerous).
+
+-assignknownusers: If user exists on target wiki, assign the
+ editions to them
+
+-correspondingnamespace: The number of the corresponding namespace.
+
+-rootpage: Import as subpages of ...
+
+-summary: Log entry import summary.
+
+-tags: Change tags to apply to the entry in the import log
+ and to the null revision on the imported pages.
+
+-test: No import, the names of the pages are output.
+
+-overwrite: Existing pages are skipped by default.
+ Use this option to overwrite pages.
+
+-target Use page generator of the target site
+ This also affects the correspondingnamespace.
+
+
+Internal links are *not* repaired!
+
+Pages to work on can be specified using any of:
+
+&params;
+
+Examples
+--------
+
+Transfer all pages in category "Query service" from the English Wikipedia to
+the home Wikipedia, adding "Wikipedia:Import enwp/" as prefix:
+
+ python pwb.py transwikiimport -interwikisource:en -cat:"Query service" \
+-prefix:"Wikipedia:Import enwp/" -fullhistory -assignknownusers
+
+Copy the template "Query service" from the English Wikipedia to the
+home Wiktionary:
+
+ python pwb.py transferbot -interwikisource:w:en \
+-page:"Template:Query service" -fullhistory -assignknownusers
+
+Copy 10 wanted templates of the home Wikipedia from English Wikipedia \
+to the home Wikipedia
+ python pwb.py transferbot -interwikisource:en \
+-wantedtemplates:10 -target -fullhistory -assignknownusers
+
+Advices
+-------
+
+The module gives access to all parameters of the API (and specialpage)
+ and is compatible to the transferbot module.
+However for most scenarios the following parameters should be avoided:
+ -overwrite (by default set as False)
+ -target (by default set as False)
+ -includealltemplates (by default set as False)
+
+The correspondingnamespace is used only if the namespaces on both wikis do not
+ correspond one with another.
+
+Correspondingnamespace and rootpage are mutually exclusive.
+
+Target and rootpage are mutually exclusive.
+ (This combination does not seem to be feasible.)
+
+If the target page already exists, the target page will be overwritten
+ if -overwrite is set or skipped otherwise.
+
+The list of pages to be imported can be generated outside of the pywikbot:
+
+ for i in {1..10} ; do python3 pwb.py transwikiimport -interwikisource:mul \
+-page:"Page:How to become famous.djvu/$i" -fullhistory \
+-assignknownusers ; done
+
+(The pages Page:How to become famous.djvu/1, Page:How to become famous.djvu/2 \
+.. Page:How to become famous.djvu/10 will be copied
+ from wikisource (mul) to the home-wikisource, all versions will be imported
+ and the usernames will be identified
+ (existing pages will be skipped.)
+
+Or generated using the usual pywikibot generators:
+
+ python3 pwb.py transwikiimport -interwikisource:mul \
+-prefixindex:"Page:How to become famous.djvu" \
+-fullhistory -assignknownusers \
+-summary:"Book copied from oldwiki."
+
+(All pages like Page:How to become famous.djvu... will be copied
+ from wikisource (mul) to the home-wikisource, all versions will be
+ imported and the usernames will be identified
+ (existing pages will be skipped.)
+
+The parameter -test disables the import and the bot prints the names
+ of the pages that would be imported.
+Since the import of pages is a quite exceptionell process and potentially
+ dangerous it should be made carefully and tested in advance.
+The -test parameter can help to find out which pages would be moved
+ and what would be the target of the import.
+However it does not print the titles of the transcluded pages (e.g. templates)
+ if -includealltemplates is set.
+This option is quite *dangerous*. If the title of an existing page on home wiki
+ clashes with the title of one of the linked pages it would be *overritten*.
+ The histories would be merged. (If the imported version is newer.)
+ Even if -overwrite is not set the linked page *can be overwritten*.
+
+
+Interwikisource
+---------------
+
+The list of wikis that can be used as a source
+ is defined in the variable $wgImportSources
+It can be viewed on the Specialpage:Import.
+
+
+Rights
+------
+
+For tranwikiimport (and even to access the Specialpage:Import)
+ the appropriate flag on the account
+ must be set (usually administrator, tranwiki importer or importer).
+
+
+"""
+#
+# (C) Draco flavus
+#
+# Distributed under the terms of the MIT license.
+#
+import pywikibot
+from pywikibot import pagegenerators
+from pywikibot.bot import suggest_help
+# from pywikibot.i18n import twtranslate
+from pywikibot.data import api
+
+
+docuReplacements = {'&params;': pagegenerators.parameterHelp} # noqa: N816
+
+
+def api_query(site, params):
+ query = api.Request(site, parameters=params)
+ datas = query.submit()
+ return datas
+
+
+def main(*args: str) -> None:
+ """
+ Process command line arguments and invoke bot.
+
+ If args is an empty list, sys.argv is used.
+
+ :param args: command line arguments
+ """
+ local_args = pywikibot.handle_args(args)
+
+ interwikisource = ''
+ correspondingnamespace = 'all'
+ rootpage = ''
+ tags = ''
+ summary = 'Importing page from '
+ test = False
+ overwrite = False
+ target = False
+ fullhistory = False
+ includealltemplates = False
+ assignknownusers = False
+ gen_args = []
+
+ for arg in local_args:
+ if arg.startswith('-interwikisource'):
+ interwikisource = arg[len('-interwikisource:'):]
+ summary += interwikisource
+ elif arg.startswith('-correspondingnamespace'):
+ correspondingnamespace = arg[len('-correspondingnamespace:'):]
+ elif arg.startswith('-rootpage'):
+ rootpage = arg[len('-rootpage:'):]
+ elif arg.startswith('-tags'):
+ tags = arg[len('-tags:'):]
+ elif arg.startswith('-summary'):
+ summary = arg[len('-summary:'):]
+ elif arg == '-test':
+ test = True
+ elif arg == '-overwrite':
+ overwrite = True
+ elif arg == '-target':
+ target = True
+ elif arg == '-fullhistory':
+ fullhistory = True
+ elif arg == '-includealltemplates':
+ includealltemplates = True
+ elif arg == '-assignknownusers':
+ assignknownusers = True
+ else:
+ gen_args.append(arg)
+
+ tosite = pywikibot.Site()
+ csrf = tosite.tokens['csrf']
+ fromsite = pywikibot.Site().interwiki(interwikisource)
+ additional_text = ('Target site not different from source site.'
+ if fromsite == tosite else '')
+
+ gen_factory = pagegenerators.GeneratorFactory(site=tosite if target
+ else fromsite)
+ unknown_args = [arg for arg in gen_args if not gen_factory.handle_arg(arg)]
+
+ gen = gen_factory.getCombinedGenerator()
+
+ if suggest_help(missing_generator=not gen,
+ additional_text=additional_text,
+ unknown_parameters=unknown_args):
+ return
+
+ gen_args = ' '.join(gen_args)
+ pywikibot.output("""
+ Page transfer configuration
+ ---------------------------
+ Source: {fromsite}
+ Target: {tosite}
+
+ Generator of pages to transfer: {gen_args}
+ {target}
+ Prefix for transferred pages: {rootpage}
+ """.format(fromsite=fromsite, tosite=tosite, gen_args=gen_args,
+ rootpage=rootpage if rootpage else '(none)',
+ target='from target site\n' if target else ''))
+
+ if correspondingnamespace != 'all' and rootpage != '':
+ pywikibot.output('Both the correspondingnamespace and the rootpage are set! Exiting.')
+ elif target and rootpage != '':
+ pywikibot.output('Both the target and the rootpage are set! Exiting.')
+ else:
+ params = {
+ 'action': 'import',
+ 'token': csrf,
+ 'interwikisource': interwikisource,
+ 'fullhistory': fullhistory,
+ 'assignknownusers': assignknownusers,
+ 'templates': includealltemplates,
+ 'summary': summary
+ }
+ if correspondingnamespace != 'all':
+ params['namespace'] = correspondingnamespace
+ if rootpage != '':
+ params['rootpage'] = rootpage
+ if tags != '':
+ params['tags'] = tags
+ for page in gen:
+ if target:
+ if correspondingnamespace == 'all':
+ fromtitle = page.namespace().canonical_prefix() + page.title(with_ns=False)
+ else:
+ fromtitle = str(fromsite.namespaces[int(correspondingnamespace)]) + page.title(with_ns=False)
+ targetpage = page
+ else:
+ fromtitle = page.title(with_ns=True)
+ if correspondingnamespace == 'all':
+ totitle = page.namespace().canonical_prefix() + page.title(with_ns=False)
+ else:
+ totitle = str(tosite.namespaces[int(correspondingnamespace)]) + page.title(with_ns=False)
+ targetpage = pywikibot.Page(tosite, totitle)
+ if not overwrite:
+ if targetpage.exists():
+ pywikibot.warning(
+ 'Skipped {} (target page {} exists)'.format(
+ page.title(as_link=True, force_interwiki=True),
+ targetpage.title(as_link=True)
+ )
+ )
+ continue
+ else:
+ if not targetpage.botMayEdit():
+ pywikibot.warning(
+ 'Target page {} is not editable by bots'.format(
+ targetpage.title(as_link=True)
+ )
+ )
+ continue
+ params['interwikipage'] = fromtitle
+ if test:
+ pywikibot.output('Simulation: {} → {}'.format(
+ fromtitle,
+ targetpage.title(with_ns=True)
+ )
+ )
+ else:
+ # Zum Testen die folgende Zeile auskommentieren.
+ api_query(tosite, params)
+ # Zum Testen bei folgenden zwei Zeilen das Kreuzzeichen entfernen.
+ # pywikibot.output(params)
+ # pywikibot.output(fromtitle + ' → ' + page.title(with_ns=True) if target else totitle)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tox.ini b/tox.ini
index 17b9aca..7069a31 100644
--- a/tox.ini
+++ b/tox.ini
@@ -192,6 +192,7 @@
scripts/reflinks.py: N802, N816
scripts/replace.py: N802, N803, N806, N816
scripts/solve_disambiguation.py: N802, N806
+ scripts/transwikiimport.py: C103, D103, D205, D400, E123, E501
setup.py: T001, T201
tests/api_tests.py: N802
tests/archivebot_tests.py: N802

To view, visit change 914038. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ic4353c3863d8d6abc60200dc7707981884c7f055
Gerrit-Change-Number: 914038
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki@aol.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged