jenkins-bot has submitted this change and it was merged.
Change subject: Add ItemClaimFilterPageGenerator ......................................................................
Add ItemClaimFilterPageGenerator
The generator filters ItemPages which does or does not contain a specified claim. Can be used via the -onlyif and -onlyifnot command line option.
Bug: T69568 Bug: T57005 Bug: T76547 Change-Id: I850f1063016fd0c8845c9509634f85b1830724ef --- M pywikibot/page.py M pywikibot/pagegenerators.py M tests/pagegenerators_tests.py 3 files changed, 254 insertions(+), 0 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py index 45e41c3..a10291f 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -3931,6 +3931,72 @@ else: self.qualifiers[qualifier.getID()] = [qualifier]
+ def target_equals(self, value): + """ + Check whether the Claim's target is equal to specified value. + + The function checks for: + - ItemPage ID equality + - WbTime year equality + - Coordinate equality, regarding precision + - direct equality + + @param value: the value to compare with + @return: true if the Claim's target is equal to the value provided, + false otherwise + @rtype: bool + """ + if (isinstance(self.target, pywikibot.ItemPage) and + isinstance(value, str) and + self.target.id == value): + return True + + if (isinstance(self.target, pywikibot.WbTime) and + not isinstance(value, pywikibot.WbTime) and + self.target.year == int(value)): + return True + + if (isinstance(self.target, pywikibot.Coordinate) and + isinstance(value, str)): + coord_args = [float(x) for x in value.split(',')] + if len(coord_args) >= 3: + precision = coord_args[2] + else: + precision = 0.0001 # Default value (~10 m at equator) + try: + if self.target.precision is not None: + precision = max(precision, self.target.precision) + except TypeError: + pass + + if (abs(self.target.lat - coord_args[0]) <= precision and + abs(self.target.lon - coord_args[1]) <= precision): + return True + + if self.target == value: + return True + + return False + + def has_qualifier(self, qualifier_id, target): + """ + Check whether Claim contains specified qualifier. + + @param qualifier_id: id of the qualifier + @type qualifier_id: str + @param target: qualifier target to check presence of + @return: true if the qualifier was found, false otherwise + @rtype: bool + """ + if self.isQualifier or self.isReference: + raise ValueError(u'Qualifiers and references cannot have ' + u'qualifiers.') + + for qualifier in self.qualifiers.get(qualifier_id, []): + if qualifier.target_equals(target): + return True + return False + def _formatValue(self): """ Format the target into the proper JSON value that Wikibase wants. diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index aeb2d15..6f01605 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -235,6 +235,24 @@ Case insensitive regular expressions will be used and dot matches any character, including a newline.
+-onlyif A claim the page needs to contain, otherwise the item won't + be returned. + The format is property=value,qualifier=value. Multiple (or + none) qualifiers can be passed, separated by commas. + Examples: P1=Q2 (property P1 must contain value Q2), + P3=Q4,P5=Q6,P6=Q7 (property P3 with value Q4 and + qualifiers: P5 with value Q6 and P6 with value Q7). + Value can be page ID, coordinate in format: + latitude,longitude[,precision] (all values are in decimal + degrees), year, or plain string. + The argument can be provided multiple times and the item + page will be returned only if all of the claims are present. + Argument can be also given as "-onlyif:expression". + +-onlyifnot A claim the page must not contain, otherwise the item won't + be returned. + For usage and examples, see -onlyif above. + -intersect Work on the intersection of all the provided generators. """
@@ -270,6 +288,7 @@ self.step = None self.limit = None self.articlefilter_list = [] + self.claimfilter_list = [] self.intersect = False self._site = site
@@ -353,6 +372,13 @@ else: gensList = CombinedPageGenerator(self.gens) dupfiltergen = DuplicateFilterPageGenerator(gensList) + + if self.claimfilter_list: + dupfiltergen = PreloadingItemGenerator(dupfiltergen) + for claim in self.claimfilter_list: + dupfiltergen = ItemClaimFilterPageGenerator(dupfiltergen, + claim[0], claim[1], + claim[2], claim[3])
if self.articlefilter_list: return RegexBodyFilterPageGenerator( @@ -664,6 +690,20 @@ u'Which pattern do you want to grep?')) else: self.articlefilter_list.append(arg[6:]) + return True + elif arg.startswith('-onlyif') or arg.startswith('-onlyifnot'): + ifnot = arg.startswith('-onlyifnot') + if (len(arg) == 7 and not ifnot) or (len(arg) == 10 and ifnot): + claim = pywikibot.input(u'Which claim do you want to filter?') + else: + claim = arg[11 if ifnot else 8:] + + p = re.compile(r'(?<!\),') # Match "," only if there no "" before + temp = [] # Array to store split argument + for arg in p.split(claim): + temp.append(arg.replace(',', ',').split('=')) + self.claimfilter_list.append((temp[0][0], temp[0][1], + dict(temp[1:]), ifnot)) return True elif arg.startswith('-yahoo'): gen = YahooSearchPageGenerator(arg[7:], site=self.site) @@ -1184,6 +1224,56 @@ yield page
+class ItemClaimFilter(object): + + """Item claim filter.""" + + @classmethod + def __filter_match(cls, page, prop, claim, qualifiers=None): + """ + Return true if the page contains the claim given. + + @param page: the page to check + @return: true if page contains the claim, false otherwise + @rtype: bool + """ + if not isinstance(page, pywikibot.ItemPage): + pywikibot.output(u'%s is not an ItemPage. Skipping.' % page) + return False + for page_claim in page.get()['claims'][prop]: + if page_claim.target_equals(claim): + if not qualifiers: + return True + + for prop, val in qualifiers.items(): + if not page_claim.has_qualifier(prop, val): + return False + return True + + @classmethod + def filter(cls, generator, prop, claim, qualifiers=None, negate=False): + """ + Yield all ItemPages which does contain certain claim in a property. + + @param prop: property id to check + @type prop: str + @param claim: value of the property to check. Can be exact value (for + instance, ItemPage instance) or ItemPage ID string (e.g. 'Q37470'). + @param qualifiers: dict of qualifiers that must be present, or None if + qualifiers are irrelevant + @type qualifiers: dict or None + @param negate: true if pages that does *not* contain specified claim + should be yielded, false otherwise + @type negate: bool + """ + for page in generator: + if cls.__filter_match(page, prop, claim, qualifiers) and not negate: + yield page + +# name the generator methods +ItemClaimFilterPageGenerator = ItemClaimFilter.filter + + class RegexFilter(object):
"""Regex filter.""" diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py index 1bbecab..ae7e2d8 100755 --- a/tests/pagegenerators_tests.py +++ b/tests/pagegenerators_tests.py @@ -437,6 +437,73 @@ self.assertEqual(gf.namespaces, set([1, 6]))
+class TestItemClaimFilterPageGenerator(WikidataTestCase): + + """Test item claim filter page generator generator.""" + + def _simple_claim_test(self, prop, claim, qualifiers, valid): + """ + Test given claim on sample (India) page. + + @param prop: the property to check + @type prop: str + @param claim: the claim the property should contain + @param qualifiers: qualifiers to check or None + @type qualifiers: dict or None + @param valid: true if the page should be yielded by the generator, + false otherwise + @type valid: bool + """ + item = pywikibot.ItemPage(self.get_repo(), 'Q668') + gen = pagegenerators.ItemClaimFilterPageGenerator([item], prop, + claim, qualifiers) + pages = set(gen) + self.assertEqual(len(pages), 1 if valid else 0) + + def _get_council_page(self): + """Return United Nations Security Council Wikidata page.""" + site = self.get_site() + return pywikibot.Page(site, 'Q37470') + + def test_valid_qualifiers(self): + """Test ItemClaimFilterPageGenerator on sample page using valid qualifiers.""" + qualifiers = { + 'P580': pywikibot.WbTime(1950, 1, 1, precision=9, + site=self.get_site()), + 'P582': '1951', + } + self._simple_claim_test('P463', self._get_council_page(), qualifiers, + True) + + def test_invalid_qualifiers(self): + """Test ItemClaimFilterPageGenerator on sample page using invalid qualifiers.""" + qualifiers = { + 'P580': 1950, + 'P582': pywikibot.WbTime(1960, 1, 1, precision=9, + site=self.site), + } + self._simple_claim_test('P463', self._get_council_page(), qualifiers, + False) + + def test_nonexisting_qualifiers(self): + """Test ItemClaimFilterPageGenerator on sample page using qualifiers the page doesn't have.""" + qualifiers = { + 'P370': pywikibot.WbTime(1950, 1, 1, precision=9, + site=self.get_site()), + 'P232': pywikibot.WbTime(1960, 1, 1, precision=9, + site=self.get_site()), + } + self._simple_claim_test('P463', self._get_council_page(), qualifiers, + False) + + def test_no_qualifiers(self): + """Test ItemClaimFilterPageGenerator on sample page without qualifiers.""" + self._simple_claim_test('P474', '+91', None, True) + self._simple_claim_test('P463', 'Q37470', None, True) + self._simple_claim_test('P625', '21,78', None, True) + self._simple_claim_test('P625', '21,78.05,0.01', None, False) + + class TestFactoryGenerator(DefaultSiteTestCase):
"""Test pagegenerators.GeneratorFactory.""" @@ -560,6 +627,37 @@ self.assertPagesInNamespaces(gen, set([1, 3]))
+class TestFactoryGeneratorWikibase(WikidataTestCase): + + """Test pagegenerators.GeneratorFactory on Wikibase site.""" + + def test_onlyif(self): + """Test -onlyif without qualifiers.""" + gf = pagegenerators.GeneratorFactory(site=self.site) + gf.handleArg('-page:Q15745378') + gf.handleArg('-onlyif:P357=International Journal of Minerals, ' + 'Metallurgy, and Materials') + gen = gf.getCombinedGenerator() + self.assertEqual(len(set(gen)), 1) + + def test_onlyifnot(self): + """Test -onlyifnot without qualifiers.""" + gf = pagegenerators.GeneratorFactory(site=self.site) + gf.handleArg('-page:Q15745378') + gf.handleArg('-onlyifnot:P357=International Journal of Minerals, ' + 'Metallurgy, and Materials') + gen = gf.getCombinedGenerator() + self.assertEqual(len(set(gen)), 0) + + def test_onlyif_qualifiers(self): + """Test -onlyif with qualifiers.""" + gf = pagegenerators.GeneratorFactory(site=self.site) + gf.handleArg('-page:Q668') + gf.handleArg('-onlyif:P47=Q837,P805=Q3088768') + gen = gf.getCombinedGenerator() + self.assertEqual(len(set(gen)), 1) + + class TestLogeventsFactoryGenerator(DefaultSiteTestCase):
"""Test GeneratorFactory with pagegenerators.LogeventsPageGenerator."""
pywikibot-commits@lists.wikimedia.org