jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/970820 )
Change subject: [FEAT] Add a filter to check if a page is redirect ......................................................................
[FEAT] Add a filter to check if a page is redirect
Added a -redirect filter to assert if page is a redirect. Likewise, the filter can be negated by using -redirect:false.
This is a missing feature present in compat, which implemented -redirectonly, but this is more useful because it can be negated, and the name is different to make it clearer.
Bug: T261549 Change-Id: If1a2c9bb420d9b5af69371537c7ca724096d8a23 --- M pywikibot/pagegenerators/__init__.py M pywikibot/pagegenerators/_factory.py 2 files changed, 39 insertions(+), 1 deletion(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/pagegenerators/__init__.py b/pywikibot/pagegenerators/__init__.py index d3fe693..a18aeb6 100644 --- a/pywikibot/pagegenerators/__init__.py +++ b/pywikibot/pagegenerators/__init__.py @@ -500,6 +500,9 @@ Valid values are in range 0-4. Multiple values can be comma-separated.
+-redirect Filter pages based on whether they are redirects. To return + only pages that are not redirects, use -redirect:false + -subpage -subpage:n filters pages to only those that have depth n i.e. a depth of 0 filters out all pages that are subpages, and a depth of 1 filters out all pages that are subpages of diff --git a/pywikibot/pagegenerators/_factory.py b/pywikibot/pagegenerators/_factory.py index 4a6f5c4..4a19361 100644 --- a/pywikibot/pagegenerators/_factory.py +++ b/pywikibot/pagegenerators/_factory.py @@ -33,6 +33,7 @@ ItemClaimFilterPageGenerator, NamespaceFilterPageGenerator, QualityFilterPageGenerator, + RedirectFilterPageGenerator, RegexBodyFilterPageGenerator, RegexFilterPageGenerator, SubpageFilterGenerator, @@ -54,6 +55,7 @@ WikibaseSearchItemPageGenerator, WikidataSPARQLPageGenerator, ) +from pywikibot.tools import strtobool from pywikibot.tools.collections import DequeGenerator from pywikibot.tools.itertools import ( filter_unique, @@ -113,6 +115,7 @@ self.catfilter_list: List['pywikibot.Category'] = [] self.intersect = False self.subpage_max_depth: Optional[int] = None + self.redirectfilter: Optional[bool] = None self._site = site self._positional_arg_name = positional_arg_name self._sparql: Optional[str] = None @@ -241,7 +244,8 @@ self.claimfilter_list, self.catfilter_list, self.qualityfilter_list, - self.subpage_max_depth is not None)): + self.subpage_max_depth is not None, + self.redirectfilter is not None)): pywikibot.warning('filter(s) specified but no generators.') return None
@@ -264,6 +268,12 @@ dupfiltergen = SubpageFilterGenerator( dupfiltergen, self.subpage_max_depth)
+ if self.redirectfilter is not None: + # Generator expects second parameter true to exclude redirects, but + # our logic is true to assert it is a redirect, false when it isn't + dupfiltergen = RedirectFilterPageGenerator( + dupfiltergen, not self.redirectfilter) + if self.claimfilter_list: for claim in self.claimfilter_list: dupfiltergen = ItemClaimFilterPageGenerator(dupfiltergen, @@ -902,6 +912,14 @@ f'Invalid -logevents parameter "{params[0]}"') return self._parse_log_events(*params)
+ def _handle_redirect(self, value: str) -> HANDLER_RETURN_TYPE: + """Handle `-redirect` argument.""" + if not value: + # True by default + value = 'true' + self.redirectfilter = strtobool(value) + return True + def handle_args(self, args: Iterable[str]) -> List[str]: """Handle command line arguments and return the rest as a list.
pywikibot-commits@lists.wikimedia.org