Xqt submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] detach Category class to its own file (part 2)

Change-Id: Ia4a15abac6adea179b870bb8821f939a0119b796
---
M pywikibot/CONTENT.rst
M pywikibot/page/__init__.py
M pywikibot/page/_category.py
M pywikibot/page/_filepage.py
M pywikibot/page/_page.py
M pywikibot/page/_user.py
M pywikibot/page/_wikibase.py
M tox.ini
8 files changed, 24 insertions(+), 2,671 deletions(-)

diff --git a/pywikibot/CONTENT.rst b/pywikibot/CONTENT.rst
index e92d50b..477e85f 100644
--- a/pywikibot/CONTENT.rst
+++ b/pywikibot/CONTENT.rst
@@ -118,6 +118,8 @@
+============================+======================================================+
| __init__.py | Interface representing MediaWiki pages |
+----------------------------+------------------------------------------------------+
+ | _category.py | Object representing MediaWiki categories |
+ +----------------------------+------------------------------------------------------+
| _collections.py | Structures holding data for Wikibase entities |
+----------------------------+------------------------------------------------------+
| _decorators.py | Decorators used by page objects |
@@ -126,7 +128,7 @@
+----------------------------+------------------------------------------------------+
| _links.py | Objects representing link objects |
+----------------------------+------------------------------------------------------+
- | _pages.py | Objects representing MediaWiki pages |
+ | _page.py | Objects representing MediaWiki pages |
+----------------------------+------------------------------------------------------+
| _revision.py | Object representing page revision |
+----------------------------+------------------------------------------------------+
diff --git a/pywikibot/page/__init__.py b/pywikibot/page/__init__.py
index 502e923..f320c74 100644
--- a/pywikibot/page/__init__.py
+++ b/pywikibot/page/__init__.py
@@ -6,9 +6,10 @@
#
from typing import Union

+from pywikibot.page._category import Category
from pywikibot.page._filepage import FileInfo, FilePage
from pywikibot.page._links import BaseLink, Link, SiteLink, html2unicode
-from pywikibot.page._pages import BasePage, Category, Page
+from pywikibot.page._page import BasePage, Page
from pywikibot.page._revision import Revision
from pywikibot.page._user import User
from pywikibot.page._wikibase import (
diff --git a/pywikibot/page/_category.py b/pywikibot/page/_category.py
index b76c3e5..a4aff25 100644
--- a/pywikibot/page/_category.py
+++ b/pywikibot/page/_category.py
@@ -1,2347 +1,18 @@
-"""Objects representing various types of MediaWiki pages.
-
-This module includes objects:
-
-- BasePage: Base object for a MediaWiki page
-- Page: A MediaWiki page
-- Category: A page in the Category: namespace
-
-Various Wikibase pages are defined in ``page._wikibase.py``,
-various pages for Proofread Extensions are defined in
-``pywikibot.proofreadpage``.
-
-.. note:: `Link` objects represent a wiki-page's title, while
- :class:`pywikibot.Page` objects (defined here) represent the page
- itself, including its contents.
-"""
+"""Object representing a MediaWiki category page."""
#
# (C) Pywikibot team, 2008-2022
#
# Distributed under the terms of the MIT license.
#
-import itertools
-import re
-from collections import Counter, defaultdict
-from contextlib import suppress
-from itertools import islice
-from textwrap import shorten, wrap
+from collections import defaultdict
from typing import Optional, Union
-from urllib.parse import quote_from_bytes
-from warnings import warn

import pywikibot
-from pywikibot import Timestamp, config, date, i18n, textlib
-from pywikibot.backports import Generator, Iterable, Iterator, List
-from pywikibot.cosmetic_changes import CANCEL, CosmeticChangesToolkit
-from pywikibot.exceptions import (
- Error,
- InterwikiRedirectPageError,
- InvalidPageError,
- IsNotRedirectPageError,
- IsRedirectPageError,
- NoMoveTargetError,
- NoPageError,
- NoUsernameError,
- OtherPageSaveError,
- PageSaveRelatedError,
- SectionError,
- UnknownExtensionError,
-)
-from pywikibot.page._decorators import allow_asynchronous
-from pywikibot.page._links import BaseLink, Link
-from pywikibot.page._toolforge import WikiBlameMixin
-from pywikibot.site import Namespace, NamespaceArgType
-from pywikibot.tools import (
- ComparableMixin,
- cached,
- first_upper,
- issue_deprecation_warning,
- remove_last_args,
-)
+from pywikibot.backports import Generator, List
+from pywikibot.page._page import Page


-PROTOCOL_REGEX = r'\Ahttps?://'
-
-__all__ = (
- 'BasePage',
- 'Category',
- 'Page',
-)
-
-
-class BasePage(ComparableMixin):
-
- """
- BasePage: Base object for a MediaWiki page.
-
- This object only implements internally methods that do not require
- reading from or writing to the wiki. All other methods are delegated
- to the Site object.
-
- Will be subclassed by Page, WikibasePage, and FlowPage.
- """
-
- _cache_attrs = (
- '_text', '_pageid', '_catinfo', '_templates', '_protection',
- '_contentmodel', '_langlinks', '_isredir', '_coords',
- '_preloadedtext', '_timestamp', '_applicable_protections',
- '_flowinfo', '_quality', '_pageprops', '_revid', '_quality_text',
- '_pageimage', '_item', '_lintinfo',
- )
-
- def __init__(self, source, title: str = '', ns=0) -> None:
- """
- Instantiate a Page object.
-
- Three calling formats are supported:
-
- - If the first argument is a Page, create a copy of that object.
- This can be used to convert an existing Page into a subclass
- object, such as Category or FilePage. (If the title is also
- given as the second argument, creates a copy with that title;
- this is used when pages are moved.)
- - If the first argument is a Site, create a Page on that Site
- using the second argument as the title (may include a section),
- and the third as the namespace number. The namespace number is
- mandatory, even if the title includes the namespace prefix. This
- is the preferred syntax when using an already-normalized title
- obtained from api.php or a database dump. WARNING: may produce
- invalid objects if page title isn't in normal form!
- - If the first argument is a BaseLink, create a Page from that link.
- This is the preferred syntax when using a title scraped from
- wikitext, URLs, or another non-normalized source.
-
- :param source: the source of the page
- :type source: pywikibot.page.BaseLink (or subclass),
- pywikibot.page.Page (or subclass), or pywikibot.page.Site
- :param title: normalized title of the page; required if source is a
- Site, ignored otherwise
- :type title: str
- :param ns: namespace number; required if source is a Site, ignored
- otherwise
- :type ns: int
- """
- if title is None:
- raise ValueError('Title cannot be None.')
-
- if isinstance(source, pywikibot.site.BaseSite):
- self._link = Link(title, source=source, default_namespace=ns)
- self._revisions = {}
- elif isinstance(source, Page):
- # copy all of source's attributes to this object
- # without overwriting non-None values
- self.__dict__.update((k, v) for k, v in source.__dict__.items()
- if k not in self.__dict__
- or self.__dict__[k] is None)
- if title:
- # overwrite title
- self._link = Link(title, source=source.site,
- default_namespace=ns)
- elif isinstance(source, BaseLink):
- self._link = source
- self._revisions = {}
- else:
- raise Error(
- "Invalid argument type '{}' in Page initializer: {}"
- .format(type(source), source))
-
- @property
- def site(self):
- """Return the Site object for the wiki on which this Page resides.
-
- :rtype: pywikibot.Site
- """
- return self._link.site
-
- def version(self):
- """
- Return MediaWiki version number of the page site.
-
- This is needed to use @need_version() decorator for methods of
- Page objects.
- """
- return self.site.version()
-
- @property
- def image_repository(self):
- """Return the Site object for the image repository."""
- return self.site.image_repository()
-
- @property
- def data_repository(self):
- """Return the Site object for the data repository."""
- return self.site.data_repository()
-
- def namespace(self) -> Namespace:
- """
- Return the namespace of the page.
-
- :return: namespace of the page
- """
- return self._link.namespace
-
- @property
- def content_model(self):
- """
- Return the content model for this page.
-
- If it cannot be reliably determined via the API,
- None is returned.
- """
- if not hasattr(self, '_contentmodel'):
- self.site.loadpageinfo(self)
- return self._contentmodel
-
- @property
- @cached
- def depth(self) -> int:
- """Return the depth/subpage level of the page.
-
- Check if the namespace allows subpages.
- Not allowed subpages means depth is always 0.
- """
- return self.title().count('/') if self.namespace().subpages else 0
-
- @property
- def pageid(self) -> int:
- """
- Return pageid of the page.
-
- :return: pageid or 0 if page does not exist
- """
- if not hasattr(self, '_pageid'):
- self.site.loadpageinfo(self)
- return self._pageid
-
- def title(
- self,
- *,
- underscore: bool = False,
- with_ns: bool = True,
- with_section: bool = True,
- as_url: bool = False,
- as_link: bool = False,
- allow_interwiki: bool = True,
- force_interwiki: bool = False,
- textlink: bool = False,
- as_filename: bool = False,
- insite=None,
- without_brackets: bool = False
- ) -> str:
- """
- Return the title of this Page, as a string.
-
- :param underscore: (not used with as_link) if true, replace all ' '
- characters with '_'
- :param with_ns: if false, omit the namespace prefix. If this
- option is false and used together with as_link return a labeled
- link like [[link|label]]
- :param with_section: if false, omit the section
- :param as_url: (not used with as_link) if true, quote title as if in an
- URL
- :param as_link: if true, return the title in the form of a wikilink
- :param allow_interwiki: (only used if as_link is true) if true, format
- the link as an interwiki link if necessary
- :param force_interwiki: (only used if as_link is true) if true, always
- format the link as an interwiki link
- :param textlink: (only used if as_link is true) if true, place a ':'
- before Category: and Image: links
- :param as_filename: (not used with as_link) if true, replace any
- characters that are unsafe in filenames
- :param insite: (only used if as_link is true) a site object where the
- title is to be shown. Default is the current family/lang given by
- -family and -lang or -site option i.e. config.family and
- config.mylang
- :param without_brackets: (cannot be used with as_link) if true, remove
- the last pair of brackets(usually removes disambiguation brackets).
- """
- title = self._link.canonical_title()
- label = self._link.title
- if with_section and self.section():
- section = '#' + self.section()
- else:
- section = ''
- if as_link:
- if insite:
- target_code = insite.code
- target_family = insite.family.name
- else:
- target_code = config.mylang
- target_family = config.family
- if force_interwiki \
- or (allow_interwiki
- and (self.site.family.name != target_family
- or self.site.code != target_code)):
- if self.site.family.name not in (
- target_family, self.site.code):
- title = '{site.family.name}:{site.code}:{title}'.format(
- site=self.site, title=title)
- else:
- # use this form for sites like commons, where the
- # code is the same as the family name
- title = f'{self.site.code}:{title}'
- elif textlink and (self.is_filepage() or self.is_categorypage()):
- title = f':{title}'
- elif self.namespace() == 0 and not section:
- with_ns = True
- if with_ns:
- return f'[[{title}{section}]]'
- return f'[[{title}{section}|{label}]]'
- if not with_ns and self.namespace() != 0:
- title = label + section
- else:
- title += section
- if without_brackets:
- brackets_re = r'\s+\([^()]+?\)$'
- title = re.sub(brackets_re, '', title)
- if underscore or as_url:
- title = title.replace(' ', '_')
- if as_url:
- encoded_title = title.encode(self.site.encoding())
- title = quote_from_bytes(encoded_title, safe='')
- if as_filename:
- # Replace characters that are not possible in file names on some
- # systems, but still are valid in MediaWiki titles:
- # Unix: /
- # MediaWiki: /:\
- # Windows: /:\"?*
- # Spaces are possible on most systems, but are bad for URLs.
- for forbidden in ':*?/\\" ':
- title = title.replace(forbidden, '_')
- return title
-
- def section(self) -> Optional[str]:
- """
- Return the name of the section this Page refers to.
-
- The section is the part of the title following a '#' character, if
- any. If no section is present, return None.
- """
- try:
- section = self._link.section
- except AttributeError:
- section = None
- return section
-
- def __str__(self) -> str:
- """Return a string representation."""
- return self.title(as_link=True, force_interwiki=True)
-
- def __repr__(self) -> str:
- """Return a more complete string representation."""
- return f'{self.__class__.__name__}({self.title()!r})'
-
- def _cmpkey(self):
- """
- Key for comparison of Page objects.
-
- Page objects are "equal" if and only if they are on the same site
- and have the same normalized title, including section if any.
-
- Page objects are sortable by site, namespace then title.
- """
- return (self.site, self.namespace(), self.title())
-
- def __hash__(self):
- """
- A stable identifier to be used as a key in hash-tables.
-
- This relies on the fact that the string
- representation of an instance cannot change after the construction.
- """
- return hash(self._cmpkey())
-
- def full_url(self):
- """Return the full URL."""
- return self.site.base_url(
- self.site.articlepath.format(self.title(as_url=True)))
-
- @cached
- def autoFormat(self):
- """
- Return :py:obj:`date.getAutoFormat` dictName and value, if any.
-
- Value can be a year, date, etc., and dictName is 'YearBC',
- 'Year_December', or another dictionary name. Please note that two
- entries may have exactly the same autoFormat, but be in two
- different namespaces, as some sites have categories with the
- same names. Regular titles return (None, None).
- """
- return date.getAutoFormat(self.site.lang, self.title(with_ns=False))
-
- def isAutoTitle(self):
- """Return True if title of this Page is in the autoFormat dict."""
- return self.autoFormat()[0] is not None
-
- def get(self, force: bool = False, get_redirect: bool = False) -> str:
- """Return the wiki-text of the page.
-
- This will retrieve the page from the server if it has not been
- retrieved yet, or if force is True. This can raise the following
- exceptions that should be caught by the calling code:
-
- :exception pywikibot.exceptions.NoPageError: The page does not exist
- :exception pywikibot.exceptions.IsRedirectPageError: The page is a
- redirect. The argument of the exception is the title of the page
- it redirects to.
- :exception pywikibot.exceptions.SectionError: The section does not
- exist on a page with a # link
-
- :param force: reload all page attributes, including errors.
- :param get_redirect: return the redirect text, do not follow the
- redirect, do not raise an exception.
- """
- if force:
- del self.latest_revision_id
- if hasattr(self, '_bot_may_edit'):
- del self._bot_may_edit
- try:
- self._getInternals()
- except IsRedirectPageError:
- if not get_redirect:
- raise
-
- return self.latest_revision.text
-
- def has_content(self) -> bool:
- """
- Page has been loaded.
-
- Not existing pages are considered loaded.
-
- .. versionadded:: 7.6
- """
- return not self.exists() or self._latest_cached_revision() is not None
-
- def _latest_cached_revision(self):
- """Get the latest revision if cached and has text, otherwise None."""
- if (hasattr(self, '_revid') and self._revid in self._revisions
- and self._revisions[self._revid].text is not None):
- return self._revisions[self._revid]
- return None
-
- def _getInternals(self):
- """
- Helper function for get().
-
- Stores latest revision in self if it doesn't contain it, doesn't think.
- * Raises exceptions from previous runs.
- * Stores new exceptions in _getexception and raises them.
- """
- # Raise exceptions from previous runs
- if hasattr(self, '_getexception'):
- raise self._getexception
-
- # If not already stored, fetch revision
- if self._latest_cached_revision() is None:
- try:
- self.site.loadrevisions(self, content=True)
- except (NoPageError, SectionError) as e:
- self._getexception = e
- raise
-
- # self._isredir is set by loadrevisions
- if self._isredir:
- self._getexception = IsRedirectPageError(self)
- raise self._getexception
-
- @remove_last_args(['get_redirect'])
- def getOldVersion(self, oldid, force: bool = False) -> str:
- """Return text of an old revision of this page.
-
- :param oldid: The revid of the revision desired.
- """
- if force or oldid not in self._revisions \
- or self._revisions[oldid].text is None:
- self.site.loadrevisions(self, content=True, revids=oldid)
- return self._revisions[oldid].text
-
- def permalink(self, oldid=None, percent_encoded: bool = True,
- with_protocol: bool = False) -> str:
- """Return the permalink URL of an old revision of this page.
-
- :param oldid: The revid of the revision desired.
- :param percent_encoded: if false, the link will be provided
- without title uncoded.
- :param with_protocol: if true, http or https prefixes will be
- included before the double slash.
- """
- if percent_encoded:
- title = self.title(as_url=True)
- else:
- title = self.title(as_url=False).replace(' ', '_')
- return '{}//{}{}/index.php?title={}&oldid={}'.format(
- self.site.protocol() + ':' if with_protocol else '',
- self.site.hostname(),
- self.site.scriptpath(),
- title,
- oldid if oldid is not None else self.latest_revision_id)
-
- @property
- def latest_revision_id(self):
- """Return the current revision id for this page."""
- if not hasattr(self, '_revid'):
- self.revisions()
- return self._revid
-
- @latest_revision_id.deleter
- def latest_revision_id(self) -> None:
- """
- Remove the latest revision id set for this Page.
-
- All internal cached values specifically for the latest revision
- of this page are cleared.
-
- The following cached values are not cleared:
- - text property
- - page properties, and page coordinates
- - lastNonBotUser
- - isDisambig and isCategoryRedirect status
- - langlinks, templates and deleted revisions
- """
- # When forcing, we retry the page no matter what:
- # * Old exceptions do not apply any more
- # * Deleting _revid to force reload
- # * Deleting _redirtarget, that info is now obsolete.
- for attr in ['_redirtarget', '_getexception', '_revid']:
- if hasattr(self, attr):
- delattr(self, attr)
-
- @latest_revision_id.setter
- def latest_revision_id(self, value) -> None:
- """Set the latest revision for this Page."""
- del self.latest_revision_id
- self._revid = value
-
- @property
- def latest_revision(self):
- """Return the current revision for this page."""
- rev = self._latest_cached_revision()
- if rev is not None:
- return rev
-
- with suppress(StopIteration):
- return next(self.revisions(content=True, total=1))
- raise InvalidPageError(self)
-
- @property
- def text(self) -> str:
- """
- Return the current (edited) wikitext, loading it if necessary.
-
- :return: text of the page
- """
- if getattr(self, '_text', None) is not None:
- return self._text
-
- try:
- return self.get(get_redirect=True)
- except NoPageError:
- # TODO: what other exceptions might be returned?
- return ''
-
- @text.setter
- def text(self, value: Optional[str]):
- """Update the current (edited) wikitext.
-
- :param value: New value or None
- """
- try:
- self.botMayEdit() # T262136, T267770
- except Exception as e:
- # dry tests aren't able to make an API call
- # but are rejected by an Exception; ignore it then.
- if not str(e).startswith('DryRequest rejecting request:'):
- raise
-
- del self.text
- self._text = None if value is None else str(value)
-
- @text.deleter
- def text(self) -> None:
- """Delete the current (edited) wikitext."""
- if hasattr(self, '_text'):
- del self._text
- if hasattr(self, '_expanded_text'):
- del self._expanded_text
- if hasattr(self, '_raw_extracted_templates'):
- del self._raw_extracted_templates
-
- def preloadText(self) -> str:
- """
- The text returned by EditFormPreloadText.
-
- See API module "info".
-
- Application: on Wikisource wikis, text can be preloaded even if
- a page does not exist, if an Index page is present.
- """
- self.site.loadpageinfo(self, preload=True)
- return self._preloadedtext
-
- def get_parsed_page(self, force: bool = False) -> str:
- """Retrieve parsed text (via action=parse) and cache it.
-
- .. versionchanged:: 7.1
- `force` parameter was added;
- `_get_parsed_page` becomes a public method
-
- :param force: force updating from the live site
-
- .. seealso::
- :meth:`APISite.get_parsed_page()
- <pywikibot.site._apisite.APISite.get_parsed_page>`
- """
- if not hasattr(self, '_parsed_text') or force:
- self._parsed_text = self.site.get_parsed_page(self)
- return self._parsed_text
-
- def extract(self, variant: str = 'plain', *,
- lines: Optional[int] = None,
- chars: Optional[int] = None,
- sentences: Optional[int] = None,
- intro: bool = True) -> str:
- """Retrieve an extract of this page.
-
- .. versionadded:: 7.1
-
- :param variant: The variant of extract, either 'plain' for plain
- text, 'html' for limited HTML (both excludes templates and
- any text formatting) or 'wiki' for bare wikitext which also
- includes any templates for example.
- :param lines: if not None, wrap the extract into lines with
- width of 79 chars and return a string with that given number
- of lines.
- :param chars: How many characters to return. Actual text
- returned might be slightly longer.
- :param sentences: How many sentences to return
- :param intro: Return only content before the first section
- :raises NoPageError: given page does not exist
- :raises NotImplementedError: "wiki" variant does not support
- `sencence` parameter.
- :raises ValueError: `variant` parameter must be "plain", "html" or
- "wiki"
-
- .. seealso:: :meth:`APISite.extract()
- <pywikibot.site._extensions.TextExtractsMixin.extract>`.
- """
- if variant in ('plain', 'html'):
- extract = self.site.extract(self, chars=chars, sentences=sentences,
- intro=intro,
- plaintext=variant == 'plain')
- elif variant == 'wiki':
- if not self.exists():
- raise NoPageError(self)
- if sentences:
- raise NotImplementedError(
- "'wiki' variant of extract method does not support "
- "'sencence' parameter")
-
- extract = self.text[:]
- if intro:
- pos = extract.find('\n=')
- if pos:
- extract = extract[:pos]
- if chars:
- extract = shorten(extract, chars, break_long_words=False,
- placeholder='…')
- else:
- raise ValueError(
- 'variant parameter must be "plain", "html" or "wiki", not "{}"'
- .format(variant))
-
- if not lines:
- return extract
-
- text_lines = []
- for i, text in enumerate(extract.splitlines(), start=1):
- text_lines += wrap(text, width=79) or ['']
- if i >= lines:
- break
-
- return '\n'.join(text_lines[:min(lines, len(text_lines))])
-
- def properties(self, force: bool = False) -> dict:
- """
- Return the properties of the page.
-
- :param force: force updating from the live site
- """
- if not hasattr(self, '_pageprops') or force:
- self._pageprops = {} # page may not have pageprops (see T56868)
- self.site.loadpageprops(self)
- return self._pageprops
-
- def defaultsort(self, force: bool = False) -> Optional[str]:
- """
- Extract value of the {{DEFAULTSORT:}} magic word from the page.
-
- :param force: force updating from the live site
- """
- return self.properties(force=force).get('defaultsort')
-
- def expand_text(
- self,
- force: bool = False,
- includecomments: bool = False
- ) -> str:
- """Return the page text with all templates and parser words expanded.
-
- :param force: force updating from the live site
- :param includecomments: Also strip comments if includecomments
- parameter is not True.
- """
- if not hasattr(self, '_expanded_text') or (
- self._expanded_text is None) or force:
- if not self.text:
- self._expanded_text = ''
- return ''
-
- self._expanded_text = self.site.expand_text(
- self.text,
- title=self.title(with_section=False),
- includecomments=includecomments)
- return self._expanded_text
-
- def userName(self) -> str:
- """Return name or IP address of last user to edit page."""
- return self.latest_revision.user
-
- def isIpEdit(self) -> bool:
- """Return True if last editor was unregistered."""
- return self.latest_revision.anon
-
- @cached
- def lastNonBotUser(self) -> str:
- """
- Return name or IP address of last human/non-bot user to edit page.
-
- Determine the most recent human editor out of the last revisions.
- If it was not able to retrieve a human user, returns None.
-
- If the edit was done by a bot which is no longer flagged as 'bot',
- i.e. which is not returned by Site.botusers(), it will be returned
- as a non-bot edit.
- """
- for entry in self.revisions():
- if entry.user and (not self.site.isBot(entry.user)):
- return entry.user
-
- return None
-
- def editTime(self) -> pywikibot.Timestamp:
- """Return timestamp of last revision to page."""
- return self.latest_revision.timestamp
-
- def exists(self) -> bool:
- """Return True if page exists on the wiki, even if it's a redirect.
-
- If the title includes a section, return False if this section isn't
- found.
- """
- with suppress(AttributeError):
- return self.pageid > 0
- raise InvalidPageError(self)
-
- @property
- def oldest_revision(self):
- """
- Return the first revision of this page.
-
- :rtype: :py:obj:`Revision`
- """
- return next(self.revisions(reverse=True, total=1))
-
- def isRedirectPage(self):
- """Return True if this is a redirect, False if not or not existing."""
- return self.site.page_isredirect(self)
-
- def isStaticRedirect(self, force: bool = False) -> bool:
- """Determine whether the page is a static redirect.
-
- A static redirect must be a valid redirect, and contain the magic
- word __STATICREDIRECT__.
-
- .. versionchanged:: 7.0
- __STATICREDIRECT__ can be transcluded
-
- :param force: Bypass local caching
- """
- return self.isRedirectPage() \
- and 'staticredirect' in self.properties(force=force)
-
- def isCategoryRedirect(self) -> bool:
- """Return True if this is a category redirect page, False otherwise."""
- if not self.is_categorypage():
- return False
-
- if not hasattr(self, '_catredirect'):
- self._catredirect = False
- catredirs = self.site.category_redirects()
- for template, args in self.templatesWithParams():
- if template.title(with_ns=False) not in catredirs:
- continue
-
- if args:
- # Get target (first template argument)
- target_title = args[0].strip()
- p = pywikibot.Page(
- self.site, target_title, Namespace.CATEGORY)
- try:
- p.title()
- except pywikibot.exceptions.InvalidTitleError:
- target_title = self.site.expand_text(
- text=target_title, title=self.title())
- p = pywikibot.Page(self.site, target_title,
- Namespace.CATEGORY)
- if p.namespace() == Namespace.CATEGORY:
- self._catredirect = p.title()
- else:
- pywikibot.warning(
- 'Category redirect target {} on {} is not a '
- 'category'.format(p.title(as_link=True),
- self.title(as_link=True)))
- else:
- pywikibot.warning(
- 'No target found for category redirect on '
- + self.title(as_link=True))
- break
-
- return bool(self._catredirect)
-
- def getCategoryRedirectTarget(self) -> 'Category':
- """If this is a category redirect, return the target category title."""
- if self.isCategoryRedirect():
- return Category(Link(self._catredirect, self.site))
- raise IsNotRedirectPageError(self)
-
- def isTalkPage(self):
- """Return True if this page is in any talk namespace."""
- ns = self.namespace()
- return ns >= 0 and ns % 2 == 1
-
- def toggleTalkPage(self) -> Optional['Page']:
- """
- Return other member of the article-talk page pair for this Page.
-
- If self is a talk page, returns the associated content page;
- otherwise, returns the associated talk page. The returned page need
- not actually exist on the wiki.
-
- :return: Page or None if self is a special page.
- """
- ns = self.namespace()
- if ns < 0: # Special page
- return None
-
- title = self.title(with_ns=False)
- new_ns = ns + (1, -1)[self.isTalkPage()]
- return Page(self.site,
- f'{self.site.namespace(new_ns)}:{title}')
-
- def is_categorypage(self):
- """Return True if the page is a Category, False otherwise."""
- return self.namespace() == 14
-
- def is_filepage(self):
- """Return True if this is a file description page, False otherwise."""
- return self.namespace() == 6
-
- def isDisambig(self) -> bool:
- """
- Return True if this is a disambiguation page, False otherwise.
-
- By default, it uses the Disambiguator extension's result. The
- identification relies on the presence of the __DISAMBIG__ magic word
- which may also be transcluded.
-
- If the Disambiguator extension isn't activated for the given site,
- the identification relies on the presence of specific templates.
- First load a list of template names from the Family file;
- if the value in the Family file is None or no entry was made, look for
- the list on [[MediaWiki:Disambiguationspage]]. If this page does not
- exist, take the MediaWiki message. 'Template:Disambig' is always
- assumed to be default, and will be appended regardless of its
- existence.
- """
- if self.site.has_extension('Disambiguator'):
- # If the Disambiguator extension is loaded, use it
- return 'disambiguation' in self.properties()
-
- if not hasattr(self.site, '_disambigtemplates'):
- try:
- default = set(self.site.family.disambig('_default'))
- except KeyError:
- default = {'Disambig'}
- try:
- distl = self.site.family.disambig(self.site.code,
- fallback=False)
- except KeyError:
- distl = None
- if distl is None:
- disambigpages = Page(self.site,
- 'MediaWiki:Disambiguationspage')
- if disambigpages.exists():
- disambigs = {link.title(with_ns=False)
- for link in disambigpages.linkedPages()
- if link.namespace() == 10}
- elif self.site.has_mediawiki_message('disambiguationspage'):
- message = self.site.mediawiki_message(
- 'disambiguationspage').split(':', 1)[1]
- # add the default template(s) for default mw message
- # only
- disambigs = {first_upper(message)} | default
- else:
- disambigs = default
- self.site._disambigtemplates = disambigs
- else:
- # Normalize template capitalization
- self.site._disambigtemplates = {first_upper(t) for t in distl}
- templates = {tl.title(with_ns=False) for tl in self.templates()}
- disambigs = set()
- # always use cached disambig templates
- disambigs.update(self.site._disambigtemplates)
- # see if any template on this page is in the set of disambigs
- disambig_in_page = disambigs.intersection(templates)
- return self.namespace() != 10 and bool(disambig_in_page)
-
- def getReferences(self,
- follow_redirects: bool = True,
- with_template_inclusion: bool = True,
- only_template_inclusion: bool = False,
- filter_redirects: bool = False,
- namespaces=None,
- total: Optional[int] = None,
- content: bool = False):
- """
- Return an iterator all pages that refer to or embed the page.
-
- If you need a full list of referring pages, use
- ``pages = list(s.getReferences())``
-
- :param follow_redirects: if True, also iterate pages that link to a
- redirect pointing to the page.
- :param with_template_inclusion: if True, also iterate pages where self
- is used as a template.
- :param only_template_inclusion: if True, only iterate pages where self
- is used as a template.
- :param filter_redirects: if True, only iterate redirects to self.
- :param namespaces: only iterate pages in these namespaces
- :param total: iterate no more than this number of pages in total
- :param content: if True, retrieve the content of the current version
- of each referring page (default False)
- :rtype: typing.Iterable[pywikibot.Page]
- """
- # N.B.: this method intentionally overlaps with backlinks() and
- # embeddedin(). Depending on the interface, it may be more efficient
- # to implement those methods in the site interface and then combine
- # the results for this method, or to implement this method and then
- # split up the results for the others.
- return self.site.pagereferences(
- self,
- follow_redirects=follow_redirects,
- filter_redirects=filter_redirects,
- with_template_inclusion=with_template_inclusion,
- only_template_inclusion=only_template_inclusion,
- namespaces=namespaces,
- total=total,
- content=content
- )
-
- def backlinks(self,
- follow_redirects: bool = True,
- filter_redirects: Optional[bool] = None,
- namespaces=None,
- total: Optional[int] = None,
- content: bool = False):
- """
- Return an iterator for pages that link to this page.
-
- :param follow_redirects: if True, also iterate pages that link to a
- redirect pointing to the page.
- :param filter_redirects: if True, only iterate redirects; if False,
- omit redirects; if None, do not filter
- :param namespaces: only iterate pages in these namespaces
- :param total: iterate no more than this number of pages in total
- :param content: if True, retrieve the content of the current version
- of each referring page (default False)
- """
- return self.site.pagebacklinks(
- self,
- follow_redirects=follow_redirects,
- filter_redirects=filter_redirects,
- namespaces=namespaces,
- total=total,
- content=content
- )
-
- def embeddedin(self,
- filter_redirects: Optional[bool] = None,
- namespaces=None,
- total: Optional[int] = None,
- content: bool = False):
- """
- Return an iterator for pages that embed this page as a template.
-
- :param filter_redirects: if True, only iterate redirects; if False,
- omit redirects; if None, do not filter
- :param namespaces: only iterate pages in these namespaces
- :param total: iterate no more than this number of pages in total
- :param content: if True, retrieve the content of the current version
- of each embedding page (default False)
- """
- return self.site.page_embeddedin(
- self,
- filter_redirects=filter_redirects,
- namespaces=namespaces,
- total=total,
- content=content
- )
-
- def redirects(
- self,
- *,
- filter_fragments: Optional[bool] = None,
- namespaces: NamespaceArgType = None,
- total: Optional[int] = None,
- content: bool = False
- ) -> 'Iterable[pywikibot.Page]':
- """
- Return an iterable of redirects to this page.
-
- :param filter_fragments: If True, only return redirects with fragments.
- If False, only return redirects without fragments. If None, return
- both (no filtering).
- :param namespaces: only return redirects from these namespaces
- :param total: maximum number of redirects to retrieve in total
- :param content: load the current content of each redirect
-
- .. versionadded:: 7.0
- """
- return self.site.page_redirects(
- self,
- filter_fragments=filter_fragments,
- namespaces=namespaces,
- total=total,
- content=content,
- )
-
- def protection(self) -> dict:
- """Return a dictionary reflecting page protections."""
- return self.site.page_restrictions(self)
-
- def applicable_protections(self) -> set:
- """
- Return the protection types allowed for that page.
-
- If the page doesn't exist it only returns "create". Otherwise it
- returns all protection types provided by the site, except "create".
- It also removes "upload" if that page is not in the File namespace.
-
- It is possible, that it returns an empty set, but only if original
- protection types were removed.
-
- :return: set of str
- """
- # New API since commit 32083235eb332c419df2063cf966b3400be7ee8a
- if self.site.mw_version >= '1.25wmf14':
- self.site.loadpageinfo(self)
- return self._applicable_protections
-
- p_types = set(self.site.protection_types())
- if not self.exists():
- return {'create'} if 'create' in p_types else set()
- p_types.remove('create') # no existing page allows that
- if not self.is_filepage(): # only file pages allow upload
- p_types.remove('upload')
- return p_types
-
- def has_permission(self, action: str = 'edit') -> bool:
- """Determine whether the page can be modified.
-
- Return True if the bot has the permission of needed restriction level
- for the given action type.
-
- :param action: a valid restriction type like 'edit', 'move'
- :raises ValueError: invalid action parameter
- """
- return self.site.page_can_be_edited(self, action)
-
- def botMayEdit(self) -> bool:
- """
- Determine whether the active bot is allowed to edit the page.
-
- This will be True if the page doesn't contain {{bots}} or {{nobots}}
- or any other template from edit_restricted_templates list
- in x_family.py file, or it contains them and the active bot is allowed
- to edit this page. (This method is only useful on those sites that
- recognize the bot-exclusion protocol; on other sites, it will always
- return True.)
-
- The framework enforces this restriction by default. It is possible
- to override this by setting ignore_bot_templates=True in
- user cnfig file (user-config.py), or using page.put(force=True).
- """
- if not hasattr(self, '_bot_may_edit'):
- self._bot_may_edit = self._check_bot_may_edit()
- return self._bot_may_edit
-
- def _check_bot_may_edit(self, module: Optional[str] = None) -> bool:
- """A botMayEdit helper method.
-
- :param module: The module name to be restricted. Defaults to
- :func:`pywikibot.calledModuleName`.
- """
- if not hasattr(self, 'templatesWithParams'):
- return True
-
- if config.ignore_bot_templates: # Check the "master ignore switch"
- return True
-
- username = self.site.username()
- try:
- templates = self.templatesWithParams()
- except (NoPageError, IsRedirectPageError, SectionError):
- return True
-
- # go through all templates and look for any restriction
- restrictions = set(self.site.get_edit_restricted_templates())
-
- if module is None:
- module = pywikibot.calledModuleName()
-
- # also add archive templates for non-archive bots
- if module != 'archivebot':
- restrictions.update(self.site.get_archived_page_templates())
-
- # multiple bots/nobots templates are allowed
- for template, params in templates:
- title = template.title(with_ns=False)
-
- if title in restrictions:
- return False
-
- if title not in ('Bots', 'Nobots'):
- continue
-
- try:
- key, sep, value = params[0].partition('=')
- except IndexError:
- key, sep, value = '', '', ''
- names = set()
- else:
- if not sep:
- key, value = value, key
- key = key.strip()
- names = {name.strip() for name in value.split(',')}
-
- if len(params) > 1:
- pywikibot.warning(
- '{{%s|%s}} has more than 1 parameter; taking the first.'
- % (title.lower(), '|'.join(params)))
-
- if title == 'Nobots':
- if not params:
- return False
-
- if key:
- pywikibot.error(
- '%s parameter for {{nobots}} is not allowed. '
- 'Edit declined' % key)
- return False
-
- if 'all' in names or module in names or username in names:
- return False
-
- if title == 'Bots':
- if value and not key:
- pywikibot.warning(
- '{{bots|%s}} is not valid. Ignoring.' % value)
- continue
-
- if key and not value:
- pywikibot.warning(
- '{{bots|%s=}} is not valid. Ignoring.' % key)
- continue
-
- if key == 'allow':
- if not ('all' in names or username in names):
- return False
-
- elif key == 'deny':
- if 'all' in names or username in names:
- return False
-
- elif key == 'allowscript':
- if not ('all' in names or module in names):
- return False
-
- elif key == 'denyscript':
- if 'all' in names or module in names:
- return False
-
- elif key: # ignore unrecognized keys with a warning
- pywikibot.warning(
- '{{bots|%s}} is not valid. Ignoring.' % params[0])
-
- # no restricting template found
- return True
-
- def save(self,
- summary: Optional[str] = None,
- watch: Optional[str] = None,
- minor: bool = True,
- botflag: Optional[bool] = None,
- force: bool = False,
- asynchronous: bool = False,
- callback=None,
- apply_cosmetic_changes: Optional[bool] = None,
- quiet: bool = False,
- **kwargs):
- """
- Save the current contents of page's text to the wiki.
-
- .. versionchanged:: 7.0
- boolean watch parameter is deprecated
-
- :param summary: The edit summary for the modification (optional, but
- most wikis strongly encourage its use)
- :param watch: Specify how the watchlist is affected by this edit, set
- to one of "watch", "unwatch", "preferences", "nochange":
- * watch: add the page to the watchlist
- * unwatch: remove the page from the watchlist
- * preferences: use the preference settings (Default)
- * nochange: don't change the watchlist
- If None (default), follow bot account's default settings
- :param minor: if True, mark this edit as minor
- :param botflag: if True, mark this edit as made by a bot (default:
- True if user has bot status, False if not)
- :param force: if True, ignore botMayEdit() setting
- :param asynchronous: if True, launch a separate thread to save
- asynchronously
- :param callback: a callable object that will be called after the
- page put operation. This object must take two arguments: (1) a
- Page object, and (2) an exception instance, which will be None
- if the page was saved successfully. The callback is intended for
- use by bots that need to keep track of which saves were
- successful.
- :param apply_cosmetic_changes: Overwrites the cosmetic_changes
- configuration value to this value unless it's None.
- :param quiet: enable/disable successful save operation message;
- defaults to False.
- In asynchronous mode, if True, it is up to the calling bot to
- manage the output e.g. via callback.
- """
- if not summary:
- summary = config.default_edit_summary
-
- if isinstance(watch, bool): # pragma: no cover
- issue_deprecation_warning(
- 'boolean watch parameter',
- '"watch", "unwatch", "preferences" or "nochange" value',
- since='7.0.0')
- watch = ('unwatch', 'watch')[watch]
-
- if not force and not self.botMayEdit():
- raise OtherPageSaveError(
- self, 'Editing restricted by {{bots}}, {{nobots}} '
- "or site's equivalent of {{in use}} template")
- self._save(summary=summary, watch=watch, minor=minor, botflag=botflag,
- asynchronous=asynchronous, callback=callback,
- cc=apply_cosmetic_changes, quiet=quiet, **kwargs)
-
- @allow_asynchronous
- def _save(self, summary=None, watch=None, minor: bool = True, botflag=None,
- cc=None, quiet: bool = False, **kwargs):
- """Helper function for save()."""
- link = self.title(as_link=True)
- if cc or (cc is None and config.cosmetic_changes):
- summary = self._cosmetic_changes_hook(summary)
-
- done = self.site.editpage(self, summary=summary, minor=minor,
- watch=watch, bot=botflag, **kwargs)
- if not done:
- if not quiet:
- pywikibot.warning(f'Page {link} not saved')
- raise PageSaveRelatedError(self)
- if not quiet:
- pywikibot.info(f'Page {link} saved')
-
- def _cosmetic_changes_hook(self, summary: str) -> str:
- """The cosmetic changes hook.
-
- :param summary: The current edit summary.
- :return: Modified edit summary if cosmetic changes has been done,
- else the old edit summary.
- """
- if self.isTalkPage() or self.content_model != 'wikitext' or \
- pywikibot.calledModuleName() in config.cosmetic_changes_deny_script:
- return summary
-
- # check if cosmetic_changes is enabled for this page
- family = self.site.family.name
- if config.cosmetic_changes_mylang_only:
- cc = ((family == config.family and self.site.lang == config.mylang)
- or self.site.lang in config.cosmetic_changes_enable.get(
- family, []))
- else:
- cc = True
- cc = cc and self.site.lang not in config.cosmetic_changes_disable.get(
- family, [])
- cc = cc and self._check_bot_may_edit('cosmetic_changes')
- if not cc:
- return summary
-
- old = self.text
- pywikibot.log('Cosmetic changes for {}-{} enabled.'
- .format(family, self.site.lang))
- # cc depends on page directly and via several other imports
- cc_toolkit = CosmeticChangesToolkit(self, ignore=CANCEL.MATCH)
- self.text = cc_toolkit.change(old)
-
- if summary and old.strip().replace(
- '\r\n', '\n') != self.text.strip().replace('\r\n', '\n'):
- summary += i18n.twtranslate(self.site,
- 'pywikibot-cosmetic-changes')
- return summary
-
- def put(self, newtext: str,
- summary: Optional[str] = None,
- watch: Optional[str] = None,
- minor: bool = True,
- botflag: Optional[bool] = None,
- force: bool = False,
- asynchronous: bool = False,
- callback=None,
- show_diff: bool = False,
- **kwargs) -> None:
- """
- Save the page with the contents of the first argument as the text.
-
- This method is maintained primarily for backwards-compatibility.
- For new code, using Page.save() is preferred. See save() method
- docs for all parameters not listed here.
-
- .. versionadded:: 7.0
- The `show_diff` parameter
-
- :param newtext: The complete text of the revised page.
- :param show_diff: show changes between oldtext and newtext
- (default: False)
- """
- if show_diff:
- pywikibot.showDiff(self.text, newtext)
- self.text = newtext
- self.save(summary=summary, watch=watch, minor=minor, botflag=botflag,
- force=force, asynchronous=asynchronous, callback=callback,
- **kwargs)
-
- def watch(self, unwatch: bool = False) -> bool:
- """
- Add or remove this page to/from bot account's watchlist.
-
- :param unwatch: True to unwatch, False (default) to watch.
- :return: True if successful, False otherwise.
- """
- return self.site.watch(self, unwatch)
-
- def clear_cache(self) -> None:
- """Clear the cached attributes of the page."""
- self._revisions = {}
- for attr in self._cache_attrs:
- with suppress(AttributeError):
- delattr(self, attr)
-
- def purge(self, **kwargs) -> bool:
- """
- Purge the server's cache for this page.
-
- :keyword redirects: Automatically resolve redirects.
- :type redirects: bool
- :keyword converttitles: Convert titles to other variants if necessary.
- Only works if the wiki's content language supports variant
- conversion.
- :type converttitles: bool
- :keyword forcelinkupdate: Update the links tables.
- :type forcelinkupdate: bool
- :keyword forcerecursivelinkupdate: Update the links table, and update
- the links tables for any page that uses this page as a template.
- :type forcerecursivelinkupdate: bool
- """
- self.clear_cache()
- return self.site.purgepages([self], **kwargs)
-
- def touch(self, callback=None, botflag: bool = False, **kwargs):
- """
- Make a touch edit for this page.
-
- See save() method docs for all parameters.
- The following parameters will be overridden by this method:
- - summary, watch, minor, force, asynchronous
-
- Parameter botflag is False by default.
-
- minor and botflag parameters are set to False which prevents hiding
- the edit when it becomes a real edit due to a bug.
-
- .. note:: This discards content saved to self.text.
- """
- if self.exists():
- # ensure always get the page text and not to change it.
- del self.text
- summary = i18n.twtranslate(self.site, 'pywikibot-touch')
- self.save(summary=summary, watch='nochange',
- minor=False, botflag=botflag, force=True,
- asynchronous=False, callback=callback,
- apply_cosmetic_changes=False, nocreate=True, **kwargs)
- else:
- raise NoPageError(self)
-
- def linkedPages(
- self, *args, **kwargs
- ) -> Generator['pywikibot.Page', None, None]:
- """Iterate Pages that this Page links to.
-
- Only returns pages from "normal" internal links. Embedded
- templates are omitted but links within them are returned. All
- interwiki and external links are omitted.
-
- For the parameters refer
- :py:mod:`APISite.pagelinks<pywikibot.site.APISite.pagelinks>`
-
- .. versionadded:: 7.0
- the `follow_redirects` keyword argument
- .. deprecated:: 7.0
- the positional arguments
-
- .. seealso:: :api:`Links`
-
- :keyword namespaces: Only iterate pages in these namespaces
- (default: all)
- :type namespaces: iterable of str or Namespace key,
- or a single instance of those types. May be a '|' separated
- list of namespace identifiers.
- :keyword follow_redirects: if True, yields the target of any redirects,
- rather than the redirect page
- :keyword total: iterate no more than this number of pages in total
- :keyword content: if True, load the current content of each page
- """
- # Deprecate positional arguments and synchronize with Site.pagelinks
- keys = ('namespaces', 'total', 'content')
- for i, arg in enumerate(args): # pragma: no cover
- key = keys[i]
- issue_deprecation_warning(
- f'Positional argument {i + 1} ({arg})',
- f'keyword argument "{key}={arg}"',
- since='7.0.0')
- if key in kwargs:
- pywikibot.warning('{!r} is given as keyword argument {!r} '
- 'already; ignoring {!r}'
- .format(key, arg, kwargs[key]))
- else:
- kwargs[key] = arg
-
- return self.site.pagelinks(self, **kwargs)
-
- def interwiki(self, expand: bool = True):
- """
- Iterate interwiki links in the page text, excluding language links.
-
- :param expand: if True (default), include interwiki links found in
- templates transcluded onto this page; if False, only iterate
- interwiki links found in this page's own wikitext
- :return: a generator that yields Link objects
- :rtype: generator
- """
- # This function does not exist in the API, so it has to be
- # implemented by screen-scraping
- if expand:
- text = self.expand_text()
- else:
- text = self.text
- for linkmatch in pywikibot.link_regex.finditer(
- textlib.removeDisabledParts(text)):
- linktitle = linkmatch['title']
- link = Link(linktitle, self.site)
- # only yield links that are to a different site and that
- # are not language links
- try:
- # initial ":" indicates not a language link
- # link to a different family is not a language link
- if link.site != self.site \
- and (linktitle.lstrip().startswith(':')
- or link.site.family != self.site.family):
- yield link
- except Error:
- # ignore any links with invalid contents
- continue
-
- def langlinks(self, include_obsolete: bool = False) -> list:
- """
- Return a list of all inter-language Links on this page.
-
- :param include_obsolete: if true, return even Link objects whose site
- is obsolete
- :return: list of Link objects.
- """
- # Note: We preload a list of *all* langlinks, including links to
- # obsolete sites, and store that in self._langlinks. We then filter
- # this list if the method was called with include_obsolete=False
- # (which is the default)
- if not hasattr(self, '_langlinks'):
- self._langlinks = set(self.iterlanglinks(include_obsolete=True))
-
- if include_obsolete:
- return list(self._langlinks)
- return [i for i in self._langlinks if not i.site.obsolete]
-
- def iterlanglinks(self,
- total: Optional[int] = None,
- include_obsolete: bool = False):
- """Iterate all inter-language links on this page.
-
- :param total: iterate no more than this number of pages in total
- :param include_obsolete: if true, yield even Link object whose site
- is obsolete
- :return: a generator that yields Link objects.
- :rtype: generator
- """
- if hasattr(self, '_langlinks'):
- return iter(self.langlinks(include_obsolete=include_obsolete))
- # XXX We might want to fill _langlinks when the Site
- # method is called. If we do this, we'll have to think
- # about what will happen if the generator is not completely
- # iterated upon.
- return self.site.pagelanglinks(self, total=total,
- include_obsolete=include_obsolete)
-
- def data_item(self):
- """
- Convenience function to get the Wikibase item of a page.
-
- :rtype: pywikibot.page.ItemPage
- """
- return pywikibot.ItemPage.fromPage(self)
-
- def templates(self, content: bool = False) -> List['pywikibot.Page']:
- """
- Return a list of Page objects for templates used on this Page.
-
- Template parameters are ignored. This method only returns embedded
- templates, not template pages that happen to be referenced through
- a normal link.
-
- :param content: if True, retrieve the content of the current version
- of each template (default False)
- :param content: bool
- """
- # Data might have been preloaded
- # Delete cache if content is needed and elements have no content
- if (hasattr(self, '_templates')
- and content
- and not all(t.has_content() for t in self._templates)):
- del self._templates
-
- if not hasattr(self, '_templates'):
- self._templates = set(self.itertemplates(content=content))
-
- return list(self._templates)
-
- def itertemplates(self,
- total: Optional[int] = None,
- content: bool = False):
- """
- Iterate Page objects for templates used on this Page.
-
- Template parameters are ignored. This method only returns embedded
- templates, not template pages that happen to be referenced through
- a normal link.
-
- :param total: iterate no more than this number of pages in total
- :param content: if True, retrieve the content of the current version
- of each template (default False)
- :param content: bool
- """
- if hasattr(self, '_templates'):
- return itertools.islice(self.templates(content=content), total)
-
- return self.site.pagetemplates(self, total=total, content=content)
-
- def imagelinks(self, total: Optional[int] = None, content: bool = False):
- """
- Iterate FilePage objects for images displayed on this Page.
-
- :param total: iterate no more than this number of pages in total
- :param content: if True, retrieve the content of the current version
- of each image description page (default False)
- :return: a generator that yields FilePage objects.
- """
- return self.site.pageimages(self, total=total, content=content)
-
- def categories(self,
- with_sort_key: bool = False,
- total: Optional[int] = None,
- content: bool = False) -> Iterator['pywikibot.Page']:
- """
- Iterate categories that the article is in.
-
- :param with_sort_key: if True, include the sort key in each Category.
- :param total: iterate no more than this number of pages in total
- :param content: if True, retrieve the content of the current version
- of each category description page (default False)
- :return: a generator that yields Category objects.
- :rtype: generator
- """
- # FIXME: bug T75561: with_sort_key is ignored by Site.pagecategories
- if with_sort_key:
- raise NotImplementedError('with_sort_key is not implemented')
-
- # Data might have been preloaded
- # Delete cache if content is needed and elements have no content
- if hasattr(self, '_categories'):
- if (content
- and not all(c.has_content() for c in self._categories)):
- del self._categories
- else:
- return itertools.islice(self._categories, total)
-
- return self.site.pagecategories(self, total=total, content=content)
-
- def extlinks(self, total: Optional[int] = None):
- """
- Iterate all external URLs (not interwiki links) from this page.
-
- :param total: iterate no more than this number of pages in total
- :return: a generator that yields str objects containing URLs.
- :rtype: generator
- """
- return self.site.page_extlinks(self, total=total)
-
- def coordinates(self, primary_only: bool = False):
- """
- Return a list of Coordinate objects for points on the page.
-
- Uses the MediaWiki extension GeoData.
-
- :param primary_only: Only return the coordinate indicated to be primary
- :return: A list of Coordinate objects or a single Coordinate if
- primary_only is True
- :rtype: list of Coordinate or Coordinate or None
- """
- if not hasattr(self, '_coords'):
- self._coords = []
- self.site.loadcoordinfo(self)
- if primary_only:
- for coord in self._coords:
- if coord.primary:
- return coord
- return None
- return list(self._coords)
-
- def page_image(self):
- """
- Return the most appropriate image on the page.
-
- Uses the MediaWiki extension PageImages.
-
- :return: A FilePage object
- :rtype: pywikibot.page.FilePage
- """
- if not hasattr(self, '_pageimage'):
- self._pageimage = None
- self.site.loadpageimage(self)
-
- return self._pageimage
-
- def getRedirectTarget(self):
- """
- Return a Page object for the target this Page redirects to.
-
- If this page is not a redirect page, will raise an
- IsNotRedirectPageError. This method also can raise a NoPageError.
-
- :rtype: pywikibot.Page
- """
- return self.site.getredirtarget(self)
-
- def moved_target(self):
- """
- Return a Page object for the target this Page was moved to.
-
- If this page was not moved, it will raise a NoMoveTargetError.
- This method also works if the source was already deleted.
-
- :rtype: pywikibot.page.Page
- :raises pywikibot.exceptions.NoMoveTargetError: page was not moved
- """
- gen = iter(self.site.logevents(logtype='move', page=self, total=1))
- try:
- lastmove = next(gen)
- except StopIteration:
- raise NoMoveTargetError(self)
- else:
- return lastmove.target_page
-
- def revisions(self,
- reverse: bool = False,
- total: Optional[int] = None,
- content: bool = False,
- starttime=None, endtime=None):
- """Generator which loads the version history as Revision instances."""
- # TODO: Only request uncached revisions
- self.site.loadrevisions(self, content=content, rvdir=reverse,
- starttime=starttime, endtime=endtime,
- total=total)
-
- revs = self._revisions.values()
-
- if starttime or endtime:
- t_min, t_max = Timestamp.min, Timestamp.max
-
- if reverse:
- t0 = Timestamp.set_timestamp(starttime) if starttime else t_min
- t1 = Timestamp.set_timestamp(endtime) if endtime else t_max
- else:
- t0 = Timestamp.set_timestamp(endtime) if endtime else t_min
- t1 = Timestamp.set_timestamp(starttime) if starttime else t_max
-
- revs = [rev for rev in revs if t0 <= rev.timestamp <= t1]
-
- revs = sorted(revs, reverse=not reverse, key=lambda rev: rev.timestamp)
-
- return islice(revs, total)
-
- def getVersionHistoryTable(self,
- reverse: bool = False,
- total: Optional[int] = None):
- """Return the version history as a wiki table."""
- result = '{| class="wikitable"\n'
- result += '! oldid || date/time || username || edit summary\n'
- for entry in self.revisions(reverse=reverse, total=total):
- result += '|----\n'
- result += ('| {r.revid} || {r.timestamp} || {r.user} || '
- '<nowiki>{r.comment}</nowiki>\n'.format(r=entry))
- result += '|}\n'
- return result
-
- def contributors(self,
- total: Optional[int] = None,
- starttime=None, endtime=None):
- """
- Compile contributors of this page with edit counts.
-
- :param total: iterate no more than this number of revisions in total
- :param starttime: retrieve revisions starting at this Timestamp
- :param endtime: retrieve revisions ending at this Timestamp
-
- :return: number of edits for each username
- :rtype: :py:obj:`collections.Counter`
- """
- return Counter(rev.user for rev in
- self.revisions(total=total,
- starttime=starttime, endtime=endtime))
-
- def revision_count(self, contributors=None) -> int:
- """Determine number of edits from contributors.
-
- :param contributors: contributor usernames
- :type contributors: iterable of str or pywikibot.User,
- a single pywikibot.User, a str or None
- :return: number of edits for all provided usernames
- """
- cnt = self.contributors()
-
- if not contributors:
- return sum(cnt.values())
-
- if isinstance(contributors, pywikibot.User):
- contributors = contributors.username
-
- if isinstance(contributors, str):
- return cnt[contributors]
-
- return sum(cnt[user.username]
- if isinstance(user, pywikibot.User) else cnt[user]
- for user in contributors)
-
- def merge_history(self, dest, timestamp=None, reason=None) -> None:
- """
- Merge revisions from this page into another page.
-
- See :py:obj:`APISite.merge_history` for details.
-
- :param dest: Destination page to which revisions will be merged
- :type dest: pywikibot.Page
- :param timestamp: Revisions from this page dating up to this timestamp
- will be merged into the destination page (if not given or False,
- all revisions will be merged)
- :type timestamp: pywikibot.Timestamp
- :param reason: Optional reason for the history merge
- :type reason: str
- """
- self.site.merge_history(self, dest, timestamp, reason)
-
- def move(self,
- newtitle: str,
- reason: Optional[str] = None,
- movetalk: bool = True,
- noredirect: bool = False,
- movesubpages: bool = True) -> None:
- """
- Move this page to a new title.
-
- .. versionchanged:: 7.2
- The `movesubpages` parameter was added
-
- :param newtitle: The new page title.
- :param reason: The edit summary for the move.
- :param movetalk: If true, move this page's talk page (if it exists)
- :param noredirect: if move succeeds, delete the old page
- (usually requires sysop privileges, depending on wiki settings)
- :param movesubpages: Rename subpages, if applicable.
- """
- if reason is None:
- pywikibot.info(f'Moving {self} to [[{newtitle}]].')
- reason = pywikibot.input('Please enter a reason for the move:')
- return self.site.movepage(self, newtitle, reason,
- movetalk=movetalk,
- noredirect=noredirect,
- movesubpages=movesubpages)
-
- def delete(
- self,
- reason: Optional[str] = None,
- prompt: bool = True,
- mark: bool = False,
- automatic_quit: bool = False,
- *,
- deletetalk: bool = False
- ) -> int:
- """
- Delete the page from the wiki. Requires administrator status.
-
- .. versionchanged:: 7.1
- keyword only parameter *deletetalk* was added.
-
- :param reason: The edit summary for the deletion, or rationale
- for deletion if requesting. If None, ask for it.
- :param deletetalk: Also delete the talk page, if it exists.
- :param prompt: If true, prompt user for confirmation before deleting.
- :param mark: If true, and user does not have sysop rights, place a
- speedy-deletion request on the page instead. If false, non-sysops
- will be asked before marking pages for deletion.
- :param automatic_quit: show also the quit option, when asking
- for confirmation.
-
- :return: the function returns an integer, with values as follows:
- value meaning
- 0 no action was done
- 1 page was deleted
- -1 page was marked for deletion
- """
- if reason is None:
- pywikibot.info(f'Deleting {self.title(as_link=True)}.')
- reason = pywikibot.input('Please enter a reason for the deletion:')
-
- # If user has 'delete' right, delete the page
- if self.site.has_right('delete'):
- answer = 'y'
- if prompt and not hasattr(self.site, '_noDeletePrompt'):
- answer = pywikibot.input_choice(
- 'Do you want to delete {}?'.format(self.title(
- as_link=True, force_interwiki=True)),
- [('Yes', 'y'), ('No', 'n'), ('All', 'a')],
- 'n', automatic_quit=automatic_quit)
- if answer == 'a':
- answer = 'y'
- self.site._noDeletePrompt = True
- if answer == 'y':
- self.site.delete(self, reason, deletetalk=deletetalk)
- return 1
- return 0
-
- # Otherwise mark it for deletion
- if mark or hasattr(self.site, '_noMarkDeletePrompt'):
- answer = 'y'
- else:
- answer = pywikibot.input_choice(
- "Can't delete {}; do you want to mark it for deletion instead?"
- .format(self),
- [('Yes', 'y'), ('No', 'n'), ('All', 'a')],
- 'n', automatic_quit=False)
- if answer == 'a':
- answer = 'y'
- self.site._noMarkDeletePrompt = True
- if answer == 'y':
- template = '{{delete|1=%s}}\n' % reason
- # We can't add templates in a wikidata item, so let's use its
- # talk page
- if isinstance(self, pywikibot.ItemPage):
- target = self.toggleTalkPage()
- else:
- target = self
- target.text = template + target.text
- target.save(summary=reason)
- return -1
- return 0
-
- def has_deleted_revisions(self) -> bool:
- """Return True if the page has deleted revisions.
-
- .. versionadded:: 4.2
- """
- if not hasattr(self, '_has_deleted_revisions'):
- gen = self.site.deletedrevs(self, total=1, prop=['ids'])
- self._has_deleted_revisions = bool(list(gen))
- return self._has_deleted_revisions
-
- def loadDeletedRevisions(self, total: Optional[int] = None, **kwargs):
- """
- Retrieve deleted revisions for this Page.
-
- Stores all revisions' timestamps, dates, editors and comments in
- self._deletedRevs attribute.
-
- :return: iterator of timestamps (which can be used to retrieve
- revisions later on).
- :rtype: generator
- """
- if not hasattr(self, '_deletedRevs'):
- self._deletedRevs = {}
- for item in self.site.deletedrevs(self, total=total, **kwargs):
- for rev in item.get('revisions', []):
- self._deletedRevs[rev['timestamp']] = rev
- yield rev['timestamp']
-
- def getDeletedRevision(
- self,
- timestamp,
- content: bool = False,
- **kwargs
- ) -> List:
- """
- Return a particular deleted revision by timestamp.
-
- :return: a list of [date, editor, comment, text, restoration
- marker]. text will be None, unless content is True (or has
- been retrieved earlier). If timestamp is not found, returns
- empty list.
- """
- if hasattr(self, '_deletedRevs') \
- and timestamp in self._deletedRevs \
- and (not content or 'content' in self._deletedRevs[timestamp]):
- return self._deletedRevs[timestamp]
-
- for item in self.site.deletedrevs(self, start=timestamp,
- content=content, total=1, **kwargs):
- # should only be one item with one revision
- if item['title'] == self.title() and 'revisions' in item:
- return item['revisions'][0]
- return []
-
- def markDeletedRevision(self, timestamp, undelete: bool = True):
- """
- Mark the revision identified by timestamp for undeletion.
-
- :param undelete: if False, mark the revision to remain deleted.
- """
- if not hasattr(self, '_deletedRevs'):
- self.loadDeletedRevisions()
- if timestamp not in self._deletedRevs:
- raise ValueError(
- 'Timestamp {} is not a deleted revision'
- .format(timestamp))
- self._deletedRevs[timestamp]['marked'] = undelete
-
- def undelete(self, reason: Optional[str] = None) -> None:
- """
- Undelete revisions based on the markers set by previous calls.
-
- If no calls have been made since loadDeletedRevisions(), everything
- will be restored.
-
- Simplest case::
-
- Page(...).undelete('This will restore all revisions')
-
- More complex::
-
- pg = Page(...)
- revs = pg.loadDeletedRevisions()
- for rev in revs:
- if ... #decide whether to undelete a revision
- pg.markDeletedRevision(rev) #mark for undeletion
- pg.undelete('This will restore only selected revisions.')
-
- :param reason: Reason for the action.
- """
- if hasattr(self, '_deletedRevs'):
- undelete_revs = [ts for ts, rev in self._deletedRevs.items()
- if 'marked' in rev and rev['marked']]
- else:
- undelete_revs = []
- if reason is None:
- warn('Not passing a reason for undelete() is deprecated.',
- DeprecationWarning)
- pywikibot.info(f'Undeleting {self.title(as_link=True)}.')
- reason = pywikibot.input(
- 'Please enter a reason for the undeletion:')
- self.site.undelete(self, reason, revision=undelete_revs)
-
- def protect(self,
- reason: Optional[str] = None,
- protections: Optional[dict] = None,
- **kwargs) -> None:
- """
- Protect or unprotect a wiki page. Requires administrator status.
-
- Valid protection levels are '' (equivalent to 'none'),
- 'autoconfirmed', 'sysop' and 'all'. 'all' means 'everyone is allowed',
- i.e. that protection type will be unprotected.
-
- In order to unprotect a type of permission, the protection level shall
- be either set to 'all' or '' or skipped in the protections dictionary.
-
- Expiry of protections can be set via kwargs, see Site.protect() for
- details. By default there is no expiry for the protection types.
-
- :param protections: A dict mapping type of protection to protection
- level of that type. Allowed protection types for a page can be
- retrieved by Page.self.applicable_protections()
- Defaults to protections is None, which means unprotect all
- protection types.
- Example: {'move': 'sysop', 'edit': 'autoconfirmed'}
-
- :param reason: Reason for the action, default is None and will set an
- empty string.
- """
- protections = protections or {} # protections is converted to {}
- reason = reason or '' # None is converted to ''
-
- self.site.protect(self, protections, reason, **kwargs)
-
- def change_category(self, old_cat, new_cat,
- summary: Optional[str] = None,
- sort_key=None,
- in_place: bool = True,
- include: Optional[List[str]] = None,
- show_diff: bool = False) -> bool:
- """
- Remove page from oldCat and add it to newCat.
-
- .. versionadded:: 7.0
- The `show_diff` parameter
-
- :param old_cat: category to be removed
- :type old_cat: pywikibot.page.Category
- :param new_cat: category to be added, if any
- :type new_cat: pywikibot.page.Category or None
-
- :param summary: string to use as an edit summary
-
- :param sort_key: sortKey to use for the added category.
- Unused if newCat is None, or if inPlace=True
- If sortKey=True, the sortKey used for oldCat will be used.
-
- :param in_place: if True, change categories in place rather than
- rearranging them.
-
- :param include: list of tags not to be disabled by default in relevant
- textlib functions, where CategoryLinks can be searched.
- :param show_diff: show changes between oldtext and newtext
- (default: False)
-
- :return: True if page was saved changed, otherwise False.
- """
- # get list of Category objects the article is in and remove possible
- # duplicates
- cats = []
- for cat in textlib.getCategoryLinks(self.text, site=self.site,
- include=include or []):
- if cat not in cats:
- cats.append(cat)
-
- if not self.has_permission():
- pywikibot.info(f"Can't edit {self}, skipping it...")
- return False
-
- if old_cat not in cats:
- if self.namespace() != 10:
- pywikibot.error('{} is not in category {}!'
- .format(self, old_cat.title()))
- else:
- pywikibot.info('{} is not in category {}, skipping...'
- .format(self, old_cat.title()))
- return False
-
- # This prevents the bot from adding new_cat if it is already present.
- if new_cat in cats:
- new_cat = None
-
- oldtext = self.text
- if in_place or self.namespace() == 10:
- newtext = textlib.replaceCategoryInPlace(oldtext, old_cat, new_cat,
- site=self.site)
- else:
- old_cat_pos = cats.index(old_cat)
- if new_cat:
- if sort_key is True:
- # Fetch sort_key from old_cat in current page.
- sort_key = cats[old_cat_pos].sortKey
- cats[old_cat_pos] = Category(self.site, new_cat.title(),
- sort_key=sort_key)
- else:
- cats.pop(old_cat_pos)
-
- try:
- newtext = textlib.replaceCategoryLinks(oldtext, cats)
- except ValueError:
- # Make sure that the only way replaceCategoryLinks() can return
- # a ValueError is in the case of interwiki links to self.
- pywikibot.info(
- f'Skipping {self} because of interwiki link to self')
- return False
-
- if oldtext != newtext:
- try:
- self.put(newtext, summary, show_diff=show_diff)
- except PageSaveRelatedError as error:
- pywikibot.info(f'Page {self} not saved: {error}')
- except NoUsernameError:
- pywikibot.info(
- f'Page {self} not saved; sysop privileges required.')
- else:
- return True
-
- return False
-
- def is_flow_page(self) -> bool:
- """Whether a page is a Flow page."""
- return self.content_model == 'flow-board'
-
- def create_short_link(self,
- permalink: bool = False,
- with_protocol: bool = True) -> str:
- """
- Return a shortened link that points to that page.
-
- If shared_urlshortner_wiki is defined in family config, it'll use
- that site to create the link instead of the current wiki.
-
- :param permalink: If true, the link will point to the actual revision
- of the page.
- :param with_protocol: If true, and if it's not already included,
- the link will have http(s) protocol prepended. On Wikimedia wikis
- the protocol is already present.
- :return: The reduced link.
- """
- wiki = self.site
- if self.site.family.shared_urlshortner_wiki:
- wiki = pywikibot.Site(*self.site.family.shared_urlshortner_wiki)
-
- url = self.permalink() if permalink else self.full_url()
-
- link = wiki.create_short_link(url)
- if re.match(PROTOCOL_REGEX, link):
- if not with_protocol:
- return re.sub(PROTOCOL_REGEX, '', link)
- elif with_protocol:
- return f'{wiki.protocol()}://{link}'
- return link
-
-
-class Page(BasePage, WikiBlameMixin):
-
- """Page: A MediaWiki page."""
-
- def __init__(self, source, title: str = '', ns=0) -> None:
- """Instantiate a Page object."""
- if isinstance(source, pywikibot.site.BaseSite) and not title:
- raise ValueError('Title must be specified and not empty '
- 'if source is a Site.')
- super().__init__(source, title, ns)
-
- @property
- @cached
- def raw_extracted_templates(self):
- """Extract templates and parameters.
-
- This method is using
- :func:`textlib.extract_templates_and_params`.
- Disabled parts and whitespace are stripped, except for
- whitespace in anonymous positional arguments.
-
- :rtype: list of (str, OrderedDict)
- """
- return textlib.extract_templates_and_params(self.text, True, True)
-
- def templatesWithParams(self):
- """Return templates used on this Page.
-
- The templates are extracted by :meth:`raw_extracted_templates`,
- with positional arguments placed first in order, and each named
- argument appearing as 'name=value'.
-
- All parameter keys and values for each template are stripped of
- whitespace.
-
- :return: a list of tuples with one tuple for each template invocation
- in the page, with the template Page as the first entry and a list
- of parameters as the second entry.
- :rtype: list of (pywikibot.page.Page, list)
- """
- # WARNING: may not return all templates used in particularly
- # intricate cases such as template substitution
- titles = {t.title() for t in self.templates()}
- templates = self.raw_extracted_templates
- # backwards-compatibility: convert the dict returned as the second
- # element into a list in the format used by old scripts
- result = []
- for template in templates:
- try:
- link = pywikibot.Link(template[0], self.site,
- default_namespace=10)
- if link.canonical_title() not in titles:
- continue
- except Error:
- # this is a parser function or magic word, not template name
- # the template name might also contain invalid parts
- continue
- args = template[1]
- intkeys = {}
- named = {}
- positional = []
- for key in sorted(args):
- try:
- intkeys[int(key)] = args[key]
- except ValueError:
- named[key] = args[key]
-
- for i in range(1, len(intkeys) + 1):
- # only those args with consecutive integer keys can be
- # treated as positional; an integer could also be used
- # (out of order) as the key for a named argument
- # example: {{tmp|one|two|5=five|three}}
- if i in intkeys:
- positional.append(intkeys[i])
- continue
-
- for k in intkeys:
- if k < 1 or k >= i:
- named[str(k)] = intkeys[k]
- break
-
- for item in named.items():
- positional.append('{}={}'.format(*item))
- result.append((pywikibot.Page(link, self.site), positional))
- return result
-
- def set_redirect_target(
- self,
- target_page,
- create: bool = False,
- force: bool = False,
- keep_section: bool = False,
- save: bool = True,
- **kwargs
- ):
- """
- Change the page's text to point to the redirect page.
-
- :param target_page: target of the redirect, this argument is required.
- :type target_page: pywikibot.Page or string
- :param create: if true, it creates the redirect even if the page
- doesn't exist.
- :param force: if true, it set the redirect target even the page
- doesn't exist or it's not redirect.
- :param keep_section: if the old redirect links to a section
- and the new one doesn't it uses the old redirect's section.
- :param save: if true, it saves the page immediately.
- :param kwargs: Arguments which are used for saving the page directly
- afterwards, like 'summary' for edit summary.
- """
- if isinstance(target_page, str):
- target_page = pywikibot.Page(self.site, target_page)
- elif self.site != target_page.site:
- raise InterwikiRedirectPageError(self, target_page)
- if not self.exists() and not (create or force):
- raise NoPageError(self)
- if self.exists() and not self.isRedirectPage() and not force:
- raise IsNotRedirectPageError(self)
- redirect_regex = self.site.redirect_regex
- if self.exists():
- old_text = self.get(get_redirect=True)
- else:
- old_text = ''
- result = redirect_regex.search(old_text)
- if result:
- oldlink = result[1]
- if (keep_section and '#' in oldlink
- and target_page.section() is None):
- sectionlink = oldlink[oldlink.index('#'):]
- target_page = pywikibot.Page(
- self.site,
- target_page.title() + sectionlink
- )
- prefix = self.text[:result.start()]
- suffix = self.text[result.end():]
- else:
- prefix = ''
- suffix = ''
-
- target_link = target_page.title(as_link=True, textlink=True,
- allow_interwiki=False)
- target_link = f'#{self.site.redirect()} {target_link}'
- self.text = prefix + target_link + suffix
- if save:
- self.save(**kwargs)
-
- def get_best_claim(self, prop: str):
- """
- Return the first best Claim for this page.
-
- Return the first 'preferred' ranked Claim specified by Wikibase
- property or the first 'normal' one otherwise.
-
- .. versionadded:: 3.0
-
- :param prop: property id, "P###"
- :return: Claim object given by Wikibase property number
- for this page object.
- :rtype: pywikibot.Claim or None
-
- :raises UnknownExtensionError: site has no Wikibase extension
- """
- def find_best_claim(claims):
- """Find the first best ranked claim."""
- index = None
- for i, claim in enumerate(claims):
- if claim.rank == 'preferred':
- return claim
- if index is None and claim.rank == 'normal':
- index = i
- if index is None:
- index = 0
- return claims[index]
-
- if not self.site.has_data_repository:
- raise UnknownExtensionError(
- f'Wikibase is not implemented for {self.site}.')
-
- def get_item_page(func, *args):
- try:
- item_p = func(*args)
- item_p.get()
- return item_p
- except NoPageError:
- return None
- except IsRedirectPageError:
- return get_item_page(item_p.getRedirectTarget)
-
- item_page = get_item_page(pywikibot.ItemPage.fromPage, self)
- if item_page and prop in item_page.claims:
- return find_best_claim(item_page.claims[prop])
- return None
+__all__ = ('Category', )


class Category(Page):
@@ -2567,12 +238,12 @@
if total == 0:
return

- def isEmptyCategory(self) -> bool:
+ def isEmptyCategory(self) -> bool: # noqa: N802
"""Return True if category has no members (including subcategories)."""
ci = self.categoryinfo
return sum(ci[k] for k in ['files', 'pages', 'subcats']) == 0

- def isHiddenCategory(self) -> bool:
+ def isHiddenCategory(self) -> bool: # noqa: N802
"""Return True if the category is hidden."""
return 'hiddencat' in self.properties()

diff --git a/pywikibot/page/_filepage.py b/pywikibot/page/_filepage.py
index bd627e4..2da1649 100644
--- a/pywikibot/page/_filepage.py
+++ b/pywikibot/page/_filepage.py
@@ -16,7 +16,7 @@
import pywikibot
from pywikibot.comms import http
from pywikibot.exceptions import NoPageError
-from pywikibot.page._pages import Page
+from pywikibot.page._page import Page
from pywikibot.tools import compute_file_hash, deprecated


diff --git a/pywikibot/page/_page.py b/pywikibot/page/_page.py
index b76c3e5..94239b8 100644
--- a/pywikibot/page/_page.py
+++ b/pywikibot/page/_page.py
@@ -21,11 +21,11 @@
#
import itertools
import re
-from collections import Counter, defaultdict
+from collections import Counter
from contextlib import suppress
from itertools import islice
from textwrap import shorten, wrap
-from typing import Optional, Union
+from typing import Optional
from urllib.parse import quote_from_bytes
from warnings import warn

@@ -62,11 +62,7 @@

PROTOCOL_REGEX = r'\Ahttps?://'

-__all__ = (
- 'BasePage',
- 'Category',
- 'Page',
-)
+__all__ = ('BasePage', 'Page')


class BasePage(ComparableMixin):
@@ -809,10 +805,10 @@

return bool(self._catredirect)

- def getCategoryRedirectTarget(self) -> 'Category':
+ def getCategoryRedirectTarget(self) -> 'pywikibot.Category':
"""If this is a category redirect, return the target category title."""
if self.isCategoryRedirect():
- return Category(Link(self._catredirect, self.site))
+ return pywikibot.Category(Link(self._catredirect, self.site))
raise IsNotRedirectPageError(self)

def isTalkPage(self):
@@ -2088,8 +2084,9 @@
if sort_key is True:
# Fetch sort_key from old_cat in current page.
sort_key = cats[old_cat_pos].sortKey
- cats[old_cat_pos] = Category(self.site, new_cat.title(),
- sort_key=sort_key)
+ cats[old_cat_pos] = pywikibot.Category(self.site,
+ new_cat.title(),
+ sort_key=sort_key)
else:
cats.pop(old_cat_pos)

@@ -2342,321 +2339,3 @@
if item_page and prop in item_page.claims:
return find_best_claim(item_page.claims[prop])
return None
-
-
-class Category(Page):
-
- """A page in the Category: namespace."""
-
- def __init__(self, source, title: str = '', sort_key=None) -> None:
- """
- Initializer.
-
- All parameters are the same as for Page() Initializer.
- """
- self.sortKey = sort_key
- super().__init__(source, title, ns=14)
- if self.namespace() != 14:
- raise ValueError("'{}' is not in the category namespace!"
- .format(self.title()))
-
- def aslink(self, sort_key: Optional[str] = None) -> str:
- """
- Return a link to place a page in this Category.
-
- Use this only to generate a "true" category link, not for interwikis
- or text links to category pages.
-
- :param sort_key: The sort key for the article to be placed in this
- Category; if omitted, default sort key is used.
- """
- key = sort_key or self.sortKey
- if key is not None:
- title_with_sort_key = self.title(with_section=False) + '|' + key
- else:
- title_with_sort_key = self.title(with_section=False)
- return f'[[{title_with_sort_key}]]'
-
- def subcategories(self,
- recurse: Union[int, bool] = False,
- total: Optional[int] = None,
- content: bool = False):
- """
- Iterate all subcategories of the current category.
-
- :param recurse: if not False or 0, also iterate subcategories of
- subcategories. If an int, limit recursion to this number of
- levels. (Example: recurse=1 will iterate direct subcats and
- first-level sub-sub-cats, but no deeper.)
- :param total: iterate no more than this number of
- subcategories in total (at all levels)
- :param content: if True, retrieve the content of the current version
- of each category description page (default False)
- """
-
- def is_cache_valid(cache: dict, content: bool) -> bool:
- return cache['content'] or not content
-
- if not self.categoryinfo['subcats']:
- return
-
- if not isinstance(recurse, bool) and recurse:
- recurse = recurse - 1
-
- if (not hasattr(self, '_subcats')
- or not is_cache_valid(self._subcats, content)):
- cache = {'data': [], 'content': content}
-
- for subcat in self.site.categorymembers(
- self, member_type='subcat', total=total, content=content):
- cache['data'].append(subcat)
- yield subcat
- if total is not None:
- total -= 1
- if total == 0:
- return
-
- if recurse:
- for item in subcat.subcategories(
- recurse, total=total, content=content):
- yield item
- if total is None:
- continue
-
- total -= 1
- if total == 0:
- return
- else:
- # cache is valid only if all subcategories are fetched (T88217)
- self._subcats = cache
- else:
- for subcat in self._subcats['data']:
- yield subcat
- if total is not None:
- total -= 1
- if total == 0:
- return
-
- if recurse:
- for item in subcat.subcategories(
- recurse, total=total, content=content):
- yield item
- if total is None:
- continue
-
- total -= 1
- if total == 0:
- return
-
- def articles(self,
- recurse: Union[int, bool] = False,
- total: Optional[int] = None,
- content: bool = False,
- namespaces: Union[int, List[int]] = None,
- sortby: Optional[str] = None,
- reverse: bool = False,
- starttime=None, endtime=None,
- startprefix: Optional[str] = None,
- endprefix: Optional[str] = None):
- """
- Yield all articles in the current category.
-
- By default, yields all *pages* in the category that are not
- subcategories!
-
- :param recurse: if not False or 0, also iterate articles in
- subcategories. If an int, limit recursion to this number of
- levels. (Example: recurse=1 will iterate articles in first-level
- subcats, but no deeper.)
- :param total: iterate no more than this number of pages in
- total (at all levels)
- :param namespaces: only yield pages in the specified namespaces
- :param content: if True, retrieve the content of the current version
- of each page (default False)
- :param sortby: determines the order in which results are generated,
- valid values are "sortkey" (default, results ordered by category
- sort key) or "timestamp" (results ordered by time page was
- added to the category). This applies recursively.
- :param reverse: if True, generate results in reverse order
- (default False)
- :param starttime: if provided, only generate pages added after this
- time; not valid unless sortby="timestamp"
- :type starttime: pywikibot.Timestamp
- :param endtime: if provided, only generate pages added before this
- time; not valid unless sortby="timestamp"
- :type endtime: pywikibot.Timestamp
- :param startprefix: if provided, only generate pages >= this title
- lexically; not valid if sortby="timestamp"
- :param endprefix: if provided, only generate pages < this title
- lexically; not valid if sortby="timestamp"
- :rtype: typing.Iterable[pywikibot.Page]
- """
- seen = set()
- for member in self.site.categorymembers(self,
- namespaces=namespaces,
- total=total,
- content=content,
- sortby=sortby,
- reverse=reverse,
- starttime=starttime,
- endtime=endtime,
- startprefix=startprefix,
- endprefix=endprefix,
- member_type=['page', 'file']):
- if recurse:
- seen.add(hash(member))
- yield member
- if total is not None:
- total -= 1
- if total == 0:
- return
-
- if recurse:
- if not isinstance(recurse, bool) and recurse:
- recurse -= 1
- for subcat in self.subcategories():
- for article in subcat.articles(recurse=recurse,
- total=total,
- content=content,
- namespaces=namespaces,
- sortby=sortby,
- reverse=reverse,
- starttime=starttime,
- endtime=endtime,
- startprefix=startprefix,
- endprefix=endprefix):
- hash_value = hash(article)
- if hash_value in seen:
- continue
-
- seen.add(hash_value)
- yield article
- if total is None:
- continue
-
- total -= 1
- if total == 0:
- return
-
- def members(self, recurse: bool = False,
- namespaces=None,
- total: Optional[int] = None,
- content: bool = False):
- """Yield all category contents (subcats, pages, and files).
-
- :rtype: typing.Iterable[pywikibot.Page]
- """
- for member in self.site.categorymembers(
- self, namespaces=namespaces, total=total, content=content):
- yield member
- if total is not None:
- total -= 1
- if total == 0:
- return
- if recurse:
- if not isinstance(recurse, bool) and recurse:
- recurse = recurse - 1
- for subcat in self.subcategories():
- for article in subcat.members(
- recurse, namespaces, total=total, content=content):
- yield article
- if total is None:
- continue
-
- total -= 1
- if total == 0:
- return
-
- def isEmptyCategory(self) -> bool:
- """Return True if category has no members (including subcategories)."""
- ci = self.categoryinfo
- return sum(ci[k] for k in ['files', 'pages', 'subcats']) == 0
-
- def isHiddenCategory(self) -> bool:
- """Return True if the category is hidden."""
- return 'hiddencat' in self.properties()
-
- @property
- def categoryinfo(self) -> dict:
- """
- Return a dict containing information about the category.
-
- The dict contains values for:
-
- Numbers of pages, subcategories, files, and total contents.
- """
- return self.site.categoryinfo(self)
-
- def newest_pages(
- self,
- total: Optional[int] = None
- ) -> Generator[Page, None, None]:
- """
- Return pages in a category ordered by the creation date.
-
- If two or more pages are created at the same time, the pages are
- returned in the order they were added to the category. The most
- recently added page is returned first.
-
- It only allows to return the pages ordered from newest to oldest, as it
- is impossible to determine the oldest page in a category without
- checking all pages. But it is possible to check the category in order
- with the newly added first and it yields all pages which were created
- after the currently checked page was added (and thus there is no page
- created after any of the cached but added before the currently
- checked).
-
- :param total: The total number of pages queried.
- :return: A page generator of all pages in a category ordered by the
- creation date. From newest to oldest.
-
- .. note:: It currently only returns Page instances and not a
- subclass of it if possible. This might change so don't
- expect to only get Page instances.
- """
- def check_cache(latest):
- """Return the cached pages in order and not more than total."""
- cached = []
- for timestamp in sorted((ts for ts in cache if ts > latest),
- reverse=True):
- # The complete list can be removed, it'll either yield all of
- # them, or only a portion but will skip the rest anyway
- cached += cache.pop(timestamp)[:None if total is None else
- total - len(cached)]
- if total and len(cached) >= total:
- break # already got enough
- assert total is None or len(cached) <= total, \
- 'Number of caches is more than total number requested'
- return cached
-
- # all pages which have been checked but where created before the
- # current page was added, at some point they will be created after
- # the current page was added. It saves all pages via the creation
- # timestamp. Be prepared for multiple pages.
- cache = defaultdict(list)
- # TODO: Make site.categorymembers is usable as it returns pages
- # There is no total defined, as it's not known how many pages need to
- # be checked before the total amount of new pages was found. In worst
- # case all pages of a category need to be checked.
- for member in pywikibot.data.api.QueryGenerator(
- site=self.site, parameters={
- 'list': 'categorymembers', 'cmsort': 'timestamp',
- 'cmdir': 'older', 'cmprop': 'timestamp|title',
- 'cmtitle': self.title()}):
- # TODO: Upcast to suitable class
- page = pywikibot.Page(self.site, member['title'])
- assert page.namespace() == member['ns'], \
- 'Namespace of the page is not consistent'
- cached = check_cache(pywikibot.Timestamp.fromISOformat(
- member['timestamp']))
- yield from cached
- if total is not None:
- total -= len(cached)
- if total <= 0:
- break
- cache[page.oldest_revision.timestamp] += [page]
- else:
- # clear cache
- assert total is None or total > 0, \
- 'As many items as given in total already returned'
- yield from check_cache(pywikibot.Timestamp.min)
diff --git a/pywikibot/page/_user.py b/pywikibot/page/_user.py
index 465474b..f12b507 100644
--- a/pywikibot/page/_user.py
+++ b/pywikibot/page/_user.py
@@ -15,7 +15,7 @@
UserRightsError,
)
from pywikibot.page._links import Link
-from pywikibot.page._pages import Page
+from pywikibot.page._page import Page
from pywikibot.page._revision import Revision
from pywikibot.tools import deprecated, is_ip_address

diff --git a/pywikibot/page/_wikibase.py b/pywikibot/page/_wikibase.py
index 77ac917..12752fd 100644
--- a/pywikibot/page/_wikibase.py
+++ b/pywikibot/page/_wikibase.py
@@ -44,7 +44,7 @@
)
from pywikibot.page._decorators import allow_asynchronous
from pywikibot.page._filepage import FilePage
-from pywikibot.page._pages import BasePage
+from pywikibot.page._page import BasePage
from pywikibot.site import DataSite, Namespace
from pywikibot.tools import cached

diff --git a/tox.ini b/tox.ini
index 2ab6d82..ee96c4a 100644
--- a/tox.ini
+++ b/tox.ini
@@ -145,7 +145,7 @@
pywikibot/interwiki_graph.py: N802, N803, N806
pywikibot/login.py: N802, N816
pywikibot/page/_collections.py: N802
- pywikibot/page/_pages.py: N802
+ pywikibot/page/_page.py: N802
pywikibot/page/_wikibase.py: N802
pywikibot/pagegenerators/__init__.py: N802
pywikibot/pagegenerators/_filters.py: N802

To view, visit change 839630. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ia4a15abac6adea179b870bb8821f939a0119b796
Gerrit-Change-Number: 839630
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged