jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[bugfix] Do not rely on self.current_page.site

It is set to the item whenever a suitable value
to import is found. This causes page existence
checks againts Wikidata, not the original site,
and imports of wrong references.

Change-Id: I99e6914d996d9a7ae540d01cde073e52a4afd286
---
M pywikibot/bot.py
M scripts/harvest_template.py
M tests/harvest_templates_tests.py
3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/pywikibot/bot.py b/pywikibot/bot.py
index 6ca5c61..3fd0700 100644
--- a/pywikibot/bot.py
+++ b/pywikibot/bot.py
@@ -2218,6 +2218,9 @@
page save will be reported and ignored (default: False)
:return: whether the item was saved successfully

+ .. note:: calling this method sets the current_page property
+ to the item which changes the site property
+
.. note:: calling this method with the 'source' argument modifies
the provided claim object in place
"""
@@ -2266,6 +2269,9 @@
:param logger_callback: function logging the output of the method
:return: whether the claim could be added

+ .. note:: calling this method may change the current_page property
+ to the item which will also change the site property
+
.. note:: calling this method with the 'source' argument modifies
the provided claim object in place
"""
diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py
index 2f28dd5..6dd4677 100755
--- a/scripts/harvest_template.py
+++ b/scripts/harvest_template.py
@@ -218,6 +218,7 @@

def template_link_target(self,
item: pywikibot.ItemPage,
+ site: pywikibot.site.BaseSite,
link_text: str) -> Optional[pywikibot.ItemPage]:
"""Find the ItemPage target for a given link text.

@@ -225,7 +226,7 @@
Only follow the redirect target if redirect page has no
wikibase item.
"""
- linked_page = pywikibot.Page(self.current_page.site, link_text)
+ linked_page = pywikibot.Page(site, link_text)
try:
exists = linked_page.exists()
except (InvalidTitleError, InvalidPageError):
@@ -296,10 +297,11 @@

# We found the template we were looking for
for field_item in fielddict.items():
- self.treat_field(item, field_item)
+ self.treat_field(item, page.site, field_item)

def treat_field(self,
item: pywikibot.page.ItemPage,
+ site: pywikibot.site.BaseSite,
field_item: Tuple[str, str]) -> None:
"""Process a single field of template fielddict.

@@ -310,8 +312,6 @@
if not field or field not in self.fields:
return

- site = self.current_page.site
-
# todo: extend the list of tags to ignore
value = textlib.removeDisabledParts(
# todo: eventually we may want to import the references
@@ -334,7 +334,7 @@
do_multi = self._get_option_with_fallback(options, 'multi')
inverse_prop = self._get_option_with_fallback(options, 'inverse')

- for target in handler(value, item, field):
+ for target in handler(value, site, item, field):
claim = ppage.newClaim()
claim.setTarget(target)
# A generator might yield pages from multiple sites
@@ -362,6 +362,7 @@
exists_arg.add('p')

def handle_wikibase_item(self, value: str,
+ site: pywikibot.site.BaseSite,
item: pywikibot.page.ItemPage,
field: str) -> Iterator[pywikibot.ItemPage]:
"""Handle 'wikibase-item' claim type.
@@ -375,7 +376,7 @@
for match in pywikibot.link_regex.finditer(value):
matched = True
link_text = match.group(1)
- linked_item = self.template_link_target(item, link_text)
+ linked_item = self.template_link_target(item, site, link_text)
if linked_item:
yield linked_item

@@ -388,7 +389,7 @@
.format(prop, field, value))
return

- linked_item = self.template_link_target(item, value)
+ linked_item = self.template_link_target(item, site, value)
if linked_item:
yield linked_item

@@ -410,13 +411,13 @@
for match in self.linkR.finditer(value):
yield match.group('url')

- def handle_commonsmedia(self, value, *args
+ def handle_commonsmedia(self, value, site, *args
) -> Iterator[pywikibot.FilePage]:
"""Handle 'commonsMedia' claim type.

.. versionadded:: 7.5
"""
- repo = self.current_page.site.image_repository()
+ repo = site.image_repository()
image = pywikibot.FilePage(repo, value)
if image.isRedirectPage():
image = pywikibot.FilePage(image.getRedirectTarget())
diff --git a/tests/harvest_templates_tests.py b/tests/harvest_templates_tests.py
index b0804ee..3909727 100644
--- a/tests/harvest_templates_tests.py
+++ b/tests/harvest_templates_tests.py
@@ -44,7 +44,7 @@
with self.subTest(link=link, item=item):
dummy_item = ItemPage(self.site.data_repository(), 'Q1')
target = HarvestRobot.template_link_target(
- DummyBot(self.site), dummy_item, link)
+ DummyBot(self.site), dummy_item, self.site, link)
self.assertIsInstance(target, ItemPage)
self.assertEqual(target.title(), item)


To view, visit change 815234. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I99e6914d996d9a7ae540d01cde073e52a4afd286
Gerrit-Change-Number: 815234
Gerrit-PatchSet: 3
Gerrit-Owner: Matěj Suchánek <matejsuchanek97@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged