jenkins-bot has submitted this change and it was merged.
Change subject: listpages.py: Add option to save page in local file ......................................................................
listpages.py: Add option to save page in local file
Provide the option to save Page to a local file.
File has the same name as the page.
File encoding can be specified and it defaults to config.textfile_encoding.
Directory can be optionally specified and created.
Change-Id: I03f13f605301c84583c7f1f9b3916ba357e08b9d --- M scripts/listpages.py 1 file changed, 63 insertions(+), 18 deletions(-)
Approvals: XZise: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/listpages.py b/scripts/listpages.py index 0580a54..72cfcb5 100644 --- a/scripts/listpages.py +++ b/scripts/listpages.py @@ -2,7 +2,8 @@ r""" Print a list of pages, as defined by page generator parameters.
-Optionally, it also prints page content to STDOUT. +Optionally, it also prints page content to STDOUT or save it to a file +in the current directory.
These parameters are supported to specify which pages titles to print:
@@ -34,30 +35,39 @@
num is the sequential number of the listed page.
--outputlang Language for translation of namespaces +-outputlang Language for translation of namespaces.
-notitle Page title is not printed.
-get Page content is printed.
+-save Save Page content to a file named as page.title(as_filename=True). + Directory can be set with -dir:dir_name + If no dir is specified, current direcory will be used. + +-encode File encoding can be specified with '-encode:name' (name must be a + valid python encoding: utf-8, etc.). + If not specified, it defaults to config.textfile_encoding. +
Custom format can be applied to the following items extrapolated from a page object:
- site: obtained from page._link._site + site: obtained from page._link._site.
- title: obtained from page._link._title + title: obtained from page._link._title.
- loc_title: obtained from page._link.canonical_title() + loc_title: obtained from page._link.canonical_title().
- can_title: obtained from page._link.ns_title() + can_title: obtained from page._link.ns_title(). based either the canonical namespace name or on the namespace name in the language specified by the -trans param; a default value '******' will be used if no ns is found.
- onsite: obtained from pywikibot.Site(outputlang, self.site.family) + onsite: obtained from pywikibot.Site(outputlang, self.site.family).
- trs_title: obtained from page._link.ns_title(onsite=onsite) + trs_title: obtained from page._link.ns_title(onsite=onsite). + If selected format requires trs_title, outputlang must be set.
¶ms; @@ -70,8 +80,10 @@ __version__ = '$Id$' #
+import os
import pywikibot +from pywikibot import config2 as config from pywikibot.pagegenerators import GeneratorFactory, parameterHelp
docuReplacements = {'¶ms;': parameterHelp} @@ -103,12 +115,12 @@ @param outputlang: language code in which namespace before title should be translated.
- Page namespace will be searched in Site(outputlang, page.site.family) + Page ns will be searched in Site(outputlang, page.site.family) and, if found, its custom name will be used in page.title().
@type outputlang: str or None, if no translation is wanted. - @param default: default string to be used if no corresponding namespace - is found when outputlang is not None. + @param default: default string to be used if no corresponding + namespace is found when outputlang is not None.
""" self.site = page._link.site @@ -117,7 +129,7 @@ self.can_title = page._link.ns_title() self.outputlang = outputlang if outputlang is not None: - # Cache onsite in case of tranlations. + # Cache onsite in case of translations. if not hasattr(self, "onsite"): self.onsite = pywikibot.Site(outputlang, self.site.family) try: @@ -155,6 +167,8 @@ fmt = '1' outputlang = None page_get = False + base_dir = None + encoding = config.textfile_encoding
# Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) @@ -163,15 +177,39 @@ for arg in local_args: if arg == '-notitle': notitle = True - elif arg.startswith("-format:"): - fmt = arg[len("-format:"):] + elif arg.startswith('-format:'): + fmt = arg[len('-format:'):] fmt = fmt.replace(u'\03{{', u'\03{{') - elif arg.startswith("-outputlang:"): - outputlang = arg[len("-outputlang:"):] + elif arg.startswith('-outputlang:'): + outputlang = arg[len('-outputlang:'):] elif arg == '-get': page_get = True + elif arg.startswith('-save'): + base_dir = arg.partition(':')[2] or '.' + elif arg.startswith('-encode:'): + encoding = arg.partition(':')[2] else: genFactory.handleArg(arg) + + if base_dir: + base_dir = os.path.expanduser(base_dir) + if not os.path.isabs(base_dir): + base_dir = os.path.normpath(os.path.join(os.getcwd(), base_dir)) + + if not os.path.exists(base_dir): + pywikibot.output(u'Directory "%s" does not exist.' % base_dir) + choice = pywikibot.input_yn( + u'Do you want to create it ("No" to continue without saving)?') + if choice: + os.makedirs(base_dir, mode=0o744) + else: + base_dir = None + elif not os.path.isdir(base_dir): + # base_dir is a file. + pywikibot.warning(u'Not a directory: "%s"\n' + u'Skipping saving ...' + % base_dir) + base_dir = None
gen = genFactory.getCombinedGenerator() if gen: @@ -180,8 +218,15 @@ page_fmt = Formatter(page, outputlang) pywikibot.stdout(page_fmt.output(num=i, fmt=fmt)) if page_get: - # TODO: catch exceptions - pywikibot.output(page.text, toStdout=True) + try: + pywikibot.output(page.text, toStdout=True) + except pywikibot.Error as err: + pywikibot.output(err) + if base_dir: + filename = os.path.join(base_dir, page.title(as_filename=True)) + pywikibot.output(u'Saving %s to %s' % (page.title(), filename)) + with open(filename, mode='wb') as f: + f.write(page.text.encode(encoding)) else: pywikibot.showHelp()