jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/960172 )
Change subject: [cleanup] cleanup redirect methods ......................................................................
[cleanup] cleanup redirect methods
- add a new redirects method to BaseSite that returns the generic redirect tag list (previously returned by BaseSite.redirect() - add a new redirects method to APISite that returns the redirect tag list extracted from getmagicwords('redirect') - return the default redirect tag with BaseSite.redirect() instead of a list. This enables to get the default tag also from APISite if APISite.redirects is defined - move redirect_regex method from APISite to BaseSite but simplify the implementation. It is not necessary to assume that getmagicwords('redirect') does not exists; it was a guess of of the very first 2.0 implemetation and was implemented as the redirect tags were part of the family files sometime in 2008. - remove deprecated redirectRegex
Bug: T347226 Change-Id: I3647cf8cb154686f075bc53a054b937675d74765 --- M ROADMAP.rst M pywikibot/site/_apisite.py M pywikibot/site/_basesite.py 3 files changed, 65 insertions(+), 46 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/ROADMAP.rst b/ROADMAP.rst index 9814645..d91c5b4 100644 --- a/ROADMAP.rst +++ b/ROADMAP.rst @@ -76,4 +76,3 @@ * 6.2.0: Throttle.multiplydelay attribute is deprecated * 6.2.0: SequenceOutputter.format_list() is deprecated in favour of 'out' property * 6.0.0: config.register_family_file() is deprecated -* 5.5.0: APISite.redirectRegex() will be removed in favour of APISite.redirect_regex() diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py index 33d1752..ba3e8ca 100644 --- a/pywikibot/site/_apisite.py +++ b/pywikibot/site/_apisite.py @@ -17,7 +17,7 @@ from pywikibot import login from pywikibot.backports import DefaultDict, Dict, List, Match from pywikibot.backports import OrderedDict as OrderedDictType -from pywikibot.backports import Iterable, Pattern, Set, Tuple, removesuffix +from pywikibot.backports import Iterable, Set, Tuple, removesuffix from pywikibot.comms import http from pywikibot.data import api from pywikibot.exceptions import ( @@ -1036,32 +1036,18 @@ return self._magicwords[word] return [word]
- def redirect(self) -> str: - """Return the localized #REDIRECT keyword.""" - # return the magic word without the preceding '#' character - return self.getmagicwords('redirect')[0].lstrip('#') + def redirects(self) -> List[str]: + """Return a list of localized tags for the site without preceding '#'.
- @deprecated('redirect_regex', since='5.5.0') - def redirectRegex(self) -> Pattern[str]: # noqa: N802 - """Return a compiled regular expression matching on redirect pages.""" - return self.redirect_regex + .. seealso:: + :meth:`BaseSite.redirect() + <pywikibot.site._basesite.BaseSite.redirect>` and + :meth:`BaseSite.redirects() + <pywikibot.site._basesite.BaseSite.redirects>`
- @property - def redirect_regex(self) -> Pattern[str]: - """Return a compiled regular expression matching on redirect pages. - - Group 1 in the regex match object will be the target title. - + .. versionadded:: 8.4 """ - # NOTE: this is needed, since the API can give false positives! - try: - keywords = {s.lstrip('#') for s in self.getmagicwords('redirect')} - keywords.add('REDIRECT') # just in case - pattern = '(?:' + '|'.join(keywords) + ')' - except KeyError: - # no localized keyword for redirects - pattern = None - return super().redirectRegex(pattern) + return [s.lstrip('#') for s in self.getmagicwords('redirect')]
def pagenamecodes(self) -> List[str]: """Return list of localized PAGENAME tags for the site.""" diff --git a/pywikibot/site/_basesite.py b/pywikibot/site/_basesite.py index 7292efc..ea47b94 100644 --- a/pywikibot/site/_basesite.py +++ b/pywikibot/site/_basesite.py @@ -11,7 +11,7 @@ from warnings import warn
import pywikibot -from pywikibot.backports import Pattern +from pywikibot.backports import List, Pattern from pywikibot.exceptions import ( Error, FamilyMaintenanceWarning, @@ -242,26 +242,36 @@ """Return dict of valid namespaces on this wiki.""" return NamespacesDict(self._build_namespaces())
- def ns_normalize(self, value): - """ - Return canonical local form of namespace name. + def ns_normalize(self, value: str): + """Return canonical local form of namespace name.
:param value: A namespace name - :type value: str - """ index = self.namespaces.lookup_name(value) return self.namespace(index)
- def redirect(self): - """Return list of localized redirect tags for the site.""" + def redirect(self) -> str: + """Return a default redirect tag for the site. + + .. versionchanged:: 8.4 + return a single generic redirect tag instead of a list of + tags. For the list use :meth:`redirects` instead. + """ + return self.redirects()[0] + + def redirects(self) -> List[str]: + """Return list of generic redirect tags for the site. + + .. seealso:: :meth:`redirect` for the default redirect tag. + .. versionadded:: 8.4 + """ return ['REDIRECT']
- def pagenamecodes(self): + def pagenamecodes(self) -> List[str]: """Return list of localized PAGENAME tags for the site.""" return ['PAGENAME']
- def pagename2codes(self): + def pagename2codes(self) -> List[str]: """Return list of localized PAGENAMEE tags for the site.""" return ['PAGENAMEE']
@@ -337,22 +347,22 @@ linkfam, linkcode = pywikibot.Link(text, self).parse_site() return linkfam != self.family.name or linkcode != self.code
- def redirectRegex( # noqa: N802 - self, - pattern: Optional[str] = None - ) -> Pattern[str]: + @property + def redirect_regex(self) -> Pattern[str]: """Return a compiled regular expression matching on redirect pages.
Group 1 in the regex match object will be the target title.
+ A redirect starts with hash (#), followed by a keyword, then + arbitrary stuff, then a wikilink. The wikilink may contain a + label, although this is not useful. + + .. versionadded:: 8.4 + moved from class:`APISite<pywikibot.site._apisite.APISite>` """ - if pattern is None: - pattern = 'REDIRECT' - # A redirect starts with hash (#), followed by a keyword, then - # arbitrary stuff, then a wikilink. The wikilink may contain - # a label, although this is not useful. - return re.compile(r'\s*#{pattern}\s*:?\s*[[(.+?)(?:|.*?)?]]' - .format(pattern=pattern), re.IGNORECASE | re.DOTALL) + tags = '|'.join(self.redirects()) + return re.compile(fr'\s*#(?:{tags})\s*:?\s*[[(.+?)(?:|.*?)?]]', + re.IGNORECASE | re.DOTALL)
def sametitle(self, title1: str, title2: str) -> bool: """
pywikibot-commits@lists.wikimedia.org