Xqt has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/810472 )
Change subject: [IMPR] Only follow redirects in harvest_template.py if no wikibase item exists ......................................................................
[IMPR] Only follow redirects in harvest_template.py if no wikibase item exists
This implements the proposal no. 2 of T311883
- Make _template_link_target a public staticmethod - Test for InvalidPageError with linked_page.exists() call - do not follow redirect in template_link_target if the redirect page has a wikibase item - solve multiple return statements issue - Add test for this new behaviour
Bug: T311883 Change-Id: I87fe427009f9bbe5db2208d0ed850eb0d48bd505 --- M scripts/harvest_template.py M tests/__init__.py A tests/harvest_templates_tests.py 3 files changed, 74 insertions(+), 19 deletions(-)
Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved
diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py index c1df570..a225b8d 100755 --- a/scripts/harvest_template.py +++ b/scripts/harvest_template.py @@ -106,7 +106,11 @@ from pywikibot import textlib from pywikibot.backports import List from pywikibot.bot import ConfigParserBot, OptionHandler, WikidataBot -from pywikibot.exceptions import InvalidTitleError, NoPageError +from pywikibot.exceptions import ( + InvalidPageError, + InvalidTitleError, + NoPageError, +)
willstop = False @@ -203,15 +207,23 @@ titles.append(temp.title(with_ns=False)) return titles
- def _template_link_target(self, item, link_text - ) -> Optional[pywikibot.ItemPage]: + @staticmethod + def template_link_target(item: pywikibot.ItemPage, + link_text: str) -> Optional[pywikibot.ItemPage]: + """Find the ItemPage target for a given link text. + + .. versionchanged:: 7.4 + Only follow the redirect target if redirect page has no + wikibase item. + """ link = pywikibot.Link(link_text) linked_page = pywikibot.Page(link) try: exists = linked_page.exists() - except InvalidTitleError: - pywikibot.error('"{}" is not a valid title so it cannot be linked.' - ' Skipping.'.format(link_text)) + except (InvalidTitleError, InvalidPageError): + pywikibot.error('"{}" is not a valid title or the page itself is ' + 'invalid so it cannot be linked. Skipping.' + .format(link_text)) return None
if not exists: @@ -219,23 +231,24 @@ 'Skipping.'.format(linked_page)) return None
- if linked_page.isRedirectPage(): - linked_page = linked_page.getRedirectTarget() - - try: - linked_item = pywikibot.ItemPage.fromPage(linked_page) - except NoPageError: - linked_item = None + while True: + try: + linked_item = pywikibot.ItemPage.fromPage(linked_page) + except NoPageError: + if linked_page.isRedirectPage(): + linked_page = linked_page.getRedirectTarget() + continue + linked_item = None + break
if not linked_item or not linked_item.exists(): pywikibot.output('{} does not have a wikidata item to link with. ' 'Skipping.'.format(linked_page)) - return None - - if linked_item.title() == item.title(): + linked_item = None + elif linked_item.title() == item.title(): pywikibot.output('{} links to itself. Skipping.' .format(linked_page)) - return None + linked_item = None
return linked_item
@@ -295,7 +308,7 @@ for match in pywikibot.link_regex.finditer(value): matched = True link_text = match.group(1) - linked_item = self._template_link_target( + linked_item = self.template_link_target( item, link_text) added = False if linked_item: @@ -321,7 +334,7 @@ .format(claim.getID(), field, value)) continue
- linked_item = self._template_link_target(item, value) + linked_item = self.template_link_target(item, value) if not linked_item: continue
diff --git a/tests/__init__.py b/tests/__init__.py index 39ce2b0..fa97e19 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -148,6 +148,7 @@ 'fixing_redirects', 'generate_family_file', 'generate_user_files', + 'harvest_templates', 'interwikidata', 'l10n', 'patrolbot', diff --git a/tests/harvest_templates_tests.py b/tests/harvest_templates_tests.py new file mode 100644 index 0000000..4ea7a6e --- /dev/null +++ b/tests/harvest_templates_tests.py @@ -0,0 +1,41 @@ +#!/usr/bin/python3 +"""Tests for scripts/harvest_template.py.""" +# +# (C) Pywikibot team, 2022 +# +# Distributed under the terms of the MIT license. +# +import unittest +from contextlib import suppress + +from pywikibot import ItemPage +from scripts.harvest_template import HarvestRobot + +from tests.aspects import ScriptMainTestCase + + +class TestHarvestRobot(ScriptMainTestCase): + + """Test HarvestRobot.""" + + family = 'wikipedia' + code = 'cs' + + def test_template_link_target(self): + """Test template_link_target static method.""" + tests = [ + ('Pes', 'Q144'), + ('Imaginární číslo', 'Q9165172'), + ('Sequana', 'Q472766'), + ] + for link, item in tests: + with self.subTest(link=link, item=item): + dummy_item = ItemPage(self.site.data_repository(), 'Q1') + target = HarvestRobot.template_link_target(dummy_item, link) + self.assertIsInstance(target, ItemPage) + self.assertEqual(target.title(), item) + + +if __name__ == '__main__': # pragma: no cover + with suppress(SystemExit): + unittest.main()