Xqt submitted this change.

View Change

Approvals: Xqt: Verified; Looks good to me, approved
Return iterators from handlers

- Transform handlers into gerenators and handle everything
else (exists arg, multiple values, actual import) from
the dispatcher.
- Optimize treat_field for the common case when the field
was not specified by the user.

Change-Id: I457eb45adb8e5cd8109fd03279bae186693916ce
---
M scripts/harvest_template.py
1 file changed, 45 insertions(+), 56 deletions(-)

diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py
index 3d9e6f6..e07362b 100755
--- a/scripts/harvest_template.py
+++ b/scripts/harvest_template.py
@@ -99,7 +99,7 @@
#
import signal
import sys
-from typing import Any, Optional
+from typing import Any, Iterator, Optional

import pywikibot
from pywikibot import pagegenerators as pg
@@ -177,7 +177,6 @@
:type multi: bool
"""
super().__init__(**kwargs)
- # TODO: Make it a list including the redirects to the template
self.fields = {}
for key, value in fields.items():
if isinstance(value, tuple):
@@ -185,6 +184,7 @@
else: # backwards compatibility
self.fields[key] = (value, PropertyOptionHandler())
self.cacheSources()
+ # TODO: Make it a list including the redirects to the template
template_title = template_title.replace('_', ' ')
self.templateTitles = self.getTemplateSynonyms(template_title)
self.linkR = textlib.compileLinkR()
@@ -296,12 +296,15 @@
def treat_field(self,
item: pywikibot.page.ItemPage,
field_item: Tuple[str, str]) -> None:
- """Process a single field of template fileddict.
+ """Process a single field of template fielddict.

.. versionadded:: 7.4
"""
field, value = field_item
field = field.strip()
+ if not field or field not in self.fields:
+ return
+
site = self.current_page.site

# todo: extend the list of tags to ignore
@@ -309,37 +312,46 @@
# todo: eventually we may want to import the references
value, tags=['ref'], site=site).strip()

- if not field or not value or field not in self.fields:
+ if not value:
return

# This field contains something useful for us
prop, options = self.fields[field]
- exists_arg = list(self._get_option_with_fallback(options, 'exists'))
- claim = pywikibot.Claim(self.repo, prop)
+ ppage = pywikibot.PropertyPage(self.repo, prop)
handler = getattr(self, 'handle_'
- + claim.type.lower().replace('-', '_'), None)
+ + ppage.type.lower().replace('-', '_'), None)
if not handler:
pywikibot.info('{} is not a supported datatype.'
- .format(claim.type))
+ .format(ppage.type))
return

- if handler(claim, value, item, field, exists_arg):
+ exists_arg = set(self._get_option_with_fallback(options, 'exists'))
+ do_multi = self._get_option_with_fallback(options, 'multi')
+
+ for target in handler(value, item, field):
+ claim = ppage.newClaim()
+ claim.setTarget(target)
# A generator might yield pages from multiple sites
- self.user_add_claim_unless_exists(
+ added = self.user_add_claim_unless_exists(
item, claim, ''.join(exists_arg), site, pywikibot.info)

- def handle_wikibase_item(self, claim, value: str,
+ # Stop after the first match if not supposed to add
+ # multiple values
+ if not do_multi:
+ break
+
+ # Update exists_arg, so we can add more values
+ if added:
+ exists_arg.add('p')
+
+ def handle_wikibase_item(self, value: str,
item: pywikibot.page.ItemPage,
- field: str,
- exists_arg: List[str]) -> bool:
+ field: str) -> Iterator[pywikibot.ItemPage]:
"""Handle 'wikibase-item' claim type.

- .. note:: `exists_arg` may be modified in place which is reused
- by the caller method
.. versionadded:: 7.4
"""
prop, options = self.fields[field]
- do_multi = self._get_option_with_fallback(options, 'multi')
matched = False

# Try to extract a valid page
@@ -347,63 +359,41 @@
matched = True
link_text = match.group(1)
linked_item = self.template_link_target(item, link_text)
- added = False
-
if linked_item:
- claim.setTarget(linked_item)
- added = self.user_add_claim_unless_exists(
- item, claim, exists_arg, self.current_page.site,
- pywikibot.info)
- claim = pywikibot.Claim(self.repo, prop)
-
- # stop after the first match if not supposed to add
- # multiple values
- if not do_multi:
- break
-
- # update exists_arg, so we can add more values
- if 'p' not in exists_arg and added:
- exists_arg += 'p'
+ yield linked_item

if matched:
- return False
+ return

if not self._get_option_with_fallback(options, 'islink'):
pywikibot.info(
- '{} field {} value {} is not a wikilink. Skipping.'
- .format(claim.getID(), field, value))
- return False
+ '{} field {} value "{}" is not a wikilink. Skipping.'
+ .format(prop, field, value))
+ return

linked_item = self.template_link_target(item, value)
- if not linked_item:
- return False
+ if linked_item:
+ yield linked_item

- claim.setTarget(linked_item)
- return True
-
- def handle_string(self, claim, value, *args) -> bool:
+ def handle_string(self, value, *args) -> Iterator[str]:
"""Handle 'string' and 'external-id' claim type.

.. versionadded:: 7.4
"""
- claim.setTarget(value.strip())
- return True
+ yield value.strip()

handle_external_id = handle_string

- def handle_url(self, claim, value, *args) -> bool:
+ def handle_url(self, value, *args) -> Iterator[str]:
"""Handle 'url' claim type.

.. versionadded:: 7.4
"""
- match = self.linkR.search(value)
- if not match:
- return False
+ for match in self.linkR.finditer(value):
+ yield match.group('url')

- claim.setTarget(match.group('url'))
- return True
-
- def handle_commonsmedia(self, claim, value, *args) -> bool:
+ def handle_commonsmedia(self, value, *args
+ ) -> Iterator[pywikibot.FilePage]:
"""Handle 'commonsMedia' claim type.

.. versionadded:: 7.4
@@ -414,12 +404,11 @@
image = pywikibot.FilePage(image.getRedirectTarget())

if not image.exists():
- pywikibot.info("{} doesn't exist. I can't link to it"
+ pywikibot.info("{} doesn't exist so it cannot be linked"
.format(image.title(as_link=True)))
- return False
+ return

- claim.setTarget(image)
- return True
+ yield image


def main(*args: str) -> None:

To view, visit change 810940. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I457eb45adb8e5cd8109fd03279bae186693916ce
Gerrit-Change-Number: 810940
Gerrit-PatchSet: 3
Gerrit-Owner: Matěj Suchánek <matejsuchanek97@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged