jenkins-bot has submitted this change and it was merged.
Change subject: Add custom formatter to listpages.py ......................................................................
Add custom formatter to listpages.py
Custom format can be applied to the following items extrapolated from a page object:
site: obtained from page._link._site title: obtained from page._link._title loc_title: obtained from page._link.canonical_title() can_title: obtained from page._link.ns_title() based either the canonical ns name or on the ns name in the language specified by the -trans_to param onsite: obtained from pywikibot.Site(trans_to, self.site.family) trs_title: obtained from page._link.ns_title(onsite=onsite)
Added tests for Link.ns_title().
Change-Id: Ia911be7fb45a1e29515208b1b54ee6a213ffc29e --- M pywikibot/page.py M scripts/listpages.py M tests/page_tests.py 3 files changed, 184 insertions(+), 6 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved XZise: Looks good to me, but someone else must approve jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py index 9a865a8..a6fe416 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -3840,6 +3840,41 @@ else: return self.title
+ def ns_title(self, onsite=None): + """Return full page title, including namespace. + + @param onsite: site object + if specified, present title using onsite local namespace, + otherwise use self canonical namespace. + + if no corresponding namespace is found in onsite, + pywikibot.Error is raised. + + """ + + ns_id = self.namespace + ns = self.site.namespaces()[ns_id] + ns_names = list(self.site.namespaces()[ns_id]) + + if onsite is None: + namespace = ns.canonical_name + else: + # look for corresponding ns in onsite by name comparison + for name in ns_names: + onsite_ns = ns.lookup_name(name, namespaces=onsite.namespaces()) + # not found + if onsite_ns is None: + raise pywikibot.Error( + u'No corresponding namespace found for namespace %s on %s.' + % (self.site.namespaces()[ns_id], onsite)) + else: + namespace = onsite_ns.custom_name + + if namespace: + return u'%s:%s' % (namespace, self.title) + else: + return self.title + def astext(self, onsite=None): """Return a text representation of the link.
diff --git a/scripts/listpages.py b/scripts/listpages.py index 3557566..aa10f47 100644 --- a/scripts/listpages.py +++ b/scripts/listpages.py @@ -5,12 +5,61 @@
These parameters are supported to specify which pages titles to print:
+-format Defines the output format. + + Can be a custom string according to python string.format() notation or + can be selected by a number from following list (1 is default format): + 1 - u'{num:4d} {page.title}' + --> 10 PageTitle + + 2 - u'{num:4d} {[[page.title]]}' + --> 10 [[PageTitle]] + + 3 - u'{page.title}' + --> PageTitle + + 4 - u'{[[page.title]]}' + --> [[PageTitle]] + + 5 - u'{num:4d} \03{{lightred}}{page.loc_title:<40}\03{{default}}' + --> 10 PageTitle (colorised in lightred) + + 6 - u'{num:4d} {page.loc_title:<40} {page.can_title:<40}' + --> 10 localised_Namespace:PageTitle canonical_Namespace:PageTitle + + 7 - u'{num:4d} {page.loc_title:<40} {page.trs_title:<40}' + --> 10 localised_Namespace:PageTitle outputlang_Namespace:PageTitle + (*) requires "outputlang:lang" set. + + num is the sequential number of the listed page. + +-outputlang Language for translation of namespaces + +-notitle Page title is not printed. + +-get Page content is printed. + + +Custom format can be applied to the following items extrapolated from a + page object: + + site: obtained from page._link._site + + title: obtained from page._link._title + + loc_title: obtained from page._link.canonical_title() + + can_title: obtained from page._link.ns_title() + based either the canonical namespace name or on the namespace name + in the language specified by the -trans param; + a default value '******' will be used if no ns is found. + + onsite: obtained from pywikibot.Site(outputlang, self.site.family) + + trs_title: obtained from page._link.ns_title(onsite=onsite) + + ¶ms; - --notitle Page title is not printed. - --get Page content is printed. - """ # # (C) Pywikibot team, 2008-2014 @@ -20,15 +69,83 @@ __version__ = '$Id$' #
+ import pywikibot from pywikibot.pagegenerators import GeneratorFactory, parameterHelp
docuReplacements = {'¶ms;': parameterHelp}
+class Formatter(object): + + """Structure with Page attributes exposed for formatting from cmd line.""" + + fmt_options = { + '1': u"{num:4d} {page.title}", + '2': u"{num:4d} [[{page.title}]]", + '3': u"{page.title}", + '4': u"[[{page.title}]]", + '5': u"{num:4d} \03{{lightred}}{page.loc_title:<40}\03{{default}}", + '6': u"{num:4d} {page.loc_title:<40} {page.can_title:<40}", + '7': u"{num:4d} {page.loc_title:<40} {page.trs_title:<40}", + } + + # Identify which formats need outputlang + fmt_need_lang = [k for k, v in fmt_options.items() if 'trs_title' in v] + + def __init__(self, page, outputlang=None, default='******'): + """ + Constructor. + + @param page: the page to be formatted. + @type page: Page object. + @param outputlang: language code in which namespace before title should + be translated. + + Page namespace will be searched in Site(outputlang, page.site.family) + and, if found, its custom name will be used in page.title(). + + @type outputlang: str or None, if no translation is wanted. + @param default: default string to be used if no corresponding namespace + is found when outputlang is not None. + + """ + + self.site = page._link._site + self.title = page._link.title + self.loc_title = page._link.canonical_title() + self.can_title = page._link.ns_title() + self.outputlang = outputlang + if outputlang is not None: + # Cache onsite in case of tranlations. + if not hasattr(self, "onsite"): + self.onsite = pywikibot.Site(outputlang, self.site.family) + try: + self.trs_title = page._link.ns_title(onsite=self.onsite) + # Fallback if no corresponding namespace is found in onsite. + except pywikibot.Error: + self.trs_title = u'%s:%s' % (default, page._link.title) + + def output(self, num=None, fmt=1): + """Output formatted string.""" + fmt = self.fmt_options.get(fmt, fmt) + # If selected format requires trs_title, outputlang must be set. + if (fmt in self.fmt_need_lang or + 'trs_title' in fmt and + self.outputlang is None): + raise ValueError( + u"Required format code needs 'outputlang' parameter set.") + if num is None: + return fmt.format(page=self) + else: + return fmt.format(num=num, page=self) + + def main(*args): gen = None notitle = False + fmt = '1' + outputlang = None page_get = False
# Process global args and prepare generator args parser @@ -38,6 +155,11 @@ for arg in local_args: if arg == '-notitle': notitle = True + elif arg.startswith("-format:"): + fmt = arg[len("-format:"):] + fmt = fmt.replace(u'\03{{', u'\03{{') + elif arg.startswith("-outputlang:"): + outputlang = arg[len("-outputlang:"):] elif arg == '-get': page_get = True else: @@ -47,12 +169,14 @@ if gen: for i, page in enumerate(gen, start=1): if not notitle: - pywikibot.stdout("%4d: %s" % (i, page.title())) + page_fmt = Formatter(page, outputlang) + pywikibot.stdout(page_fmt.output(num=i, fmt=fmt)) if page_get: # TODO: catch exceptions pywikibot.output(page.text, toStdout=True) else: pywikibot.showHelp()
+ if __name__ == "__main__": main() diff --git a/tests/page_tests.py b/tests/page_tests.py index 4383cb1..b112ee3 100644 --- a/tests/page_tests.py +++ b/tests/page_tests.py @@ -27,6 +27,8 @@ enwiki = pywikibot.Site("en", "wikipedia") frwiki = pywikibot.Site("fr", "wikipedia") itwikt = pywikibot.Site("it", "wiktionary") + enws = pywikibot.Site("en", "wikisource") + itws = pywikibot.Site("it", "wikisource")
namespaces = {0: [u""], # en.wikipedia.org namespaces for testing 1: [u"Talk:"], # canonical form first, then others @@ -91,6 +93,7 @@ self.assertEqual(m.title, self.titles[title])
def testHashCmp(self): + """Test hash comparison.""" # All links point to en:wikipedia:Test l1 = pywikibot.page.Link('Test', source=self.enwiki) l2 = pywikibot.page.Link('en:Test', source=self.frwiki) @@ -110,6 +113,22 @@ self.assertNotEqual(l1, other) self.assertNotEqual(hash(l1), hash(other))
+ def test_ns_title(self): + """Test that title is returned with correct namespace.""" + l1 = pywikibot.page.Link('Indice:Test', source=self.itws) + self.assertEqual(l1.ns_title(), 'Index:Test') + self.assertEqual(l1.ns_title(onsite=self.enws), 'Index:Test') + + # wikisource:it kept Autore as canonical name + l2 = pywikibot.page.Link('Autore:Albert Einstein', source=self.itws) + self.assertEqual(l2.ns_title(), 'Autore:Albert Einstein') + self.assertEqual(l2.ns_title(onsite=self.enws), 'Author:Albert Einstein') + + # Translation namespace does not exist on wikisource:it + l3 = pywikibot.page.Link('Translation:Albert Einstein', source=self.enws) + self.assertEqual(l3.ns_title(), 'Translation:Albert Einstein') + self.assertRaises(pywikibot.Error, l3.ns_title, onsite=self.itws) +
class TestPageObject(PywikibotTestCase):