[IMPR] Make textlib._get_regexes a puplic function

- Make textlib._get_regexes a puplic function as get_regexes
- keys parameter may be a plain str
- site may be optional because it is only used for several keys
- use plain str if only one key is used
- remove site parameter if is is not used
- remove unused site parameter of _extract_headings function
- replace _get_regexes with get_regexes for all its usage
- add _create_default_regexes to documentation because it is used in
  replace.py

Bug: T336144
Change-Id: Ic9f673e508228998b28375563027d6e9631f4e4a
---
M pywikibot/textlib.py
M pywikibot/cosmetic_changes.py
M scripts/replace.py
M scripts/category.py
4 files changed, 64 insertions(+), 23 deletions(-)

diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index 003d670..9235462 100644
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -51,7 +51,7 @@
                                      'your_script_name_2']
 """
 #
-# (C) Pywikibot team, 2006-2022
+# (C) Pywikibot team, 2006-2023
 #
 # Distributed under the terms of the MIT license.
 #
@@ -68,7 +68,7 @@
 from pywikibot.textlib import (
     FILE_LINK_REGEX,
     MultiTemplateMatchBuilder,
-    _get_regexes,
+    get_regexes,
 )
 from pywikibot.tools import first_lower, first_upper
 from pywikibot.tools.chars import url2string
@@ -682,7 +682,7 @@
             return text
 
         skippings = ['comment', 'category']
-        skip_regexes = _get_regexes(skippings, self.site)
+        skip_regexes = get_regexes(skippings, self.site)
         # site defined templates
         skip_templates = {
             'cs': ('Pahýl[ _]část',),  # stub section
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index ce63410..3c73b34 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -46,7 +46,7 @@
 ETPType = List[Tuple[str, OrderedDictType[str, str]]]
 
 # cache for replaceExcept to avoid recompile or regexes each call
-_regex_cache = {}
+_regex_cache: Dict[str, Pattern[str]] = {}
 
 # The regex below collects nested templates, providing simpler
 # identification of templates used at the top-level of wikitext.
@@ -253,7 +253,15 @@
 
 
 def _create_default_regexes() -> None:
-    """Fill (and possibly overwrite) _regex_cache with default regexes."""
+    """Fill (and possibly overwrite) ``_regex_cache`` with default regexes.
+
+    The following keys are provided: ``category``, ``comment``, ``file``,
+    ``header``, ``hyperlink``, ``interwiki``, ``invoke``, ``link``,
+    ``pagelist``, ``property``, ``startcolon``, ``startspace``, ``table``,
+    ``template``.
+
+    :meta public:
+    """
     _regex_cache.update({
         # categories
         'category': (r'\[\[ *(?:%s)\s*:.*?\]\]',
@@ -304,16 +312,29 @@
     })
 
 
-def _get_regexes(keys: Iterable, site) -> List[Pattern[str]]:
+def get_regexes(
+    keys: Union[str, Iterable[str]],
+    site: Optional['pywikibot.site.BaseSite'] = None
+) -> List[Pattern[str]]:
     """Fetch compiled regexes.
 
-    :meta public:
+    .. versionchanged:: 8.2
+       ``_get_regexes`` becomes a public function.
+       *keys* may be a single string; *site* is optional.
+
+    :param keys: a single key or an iterable of keys whose regex pattern
+        should be given
+    :param site: a BaseSite object needed for ``category``, ``file``,
+        ``interwiki``, ``invoke`` and ``property`` keys
+    :raises ValueError: site cannot be None.
     """
     if not _regex_cache:
         _create_default_regexes()
 
-    result = []
+    if isinstance(keys, str):
+        keys = [keys]
 
+    result = []
     for exc in keys:
         if not isinstance(exc, str):
             # assume it's a regular expression
@@ -332,7 +353,7 @@
         else:
             if not site and exc in ('interwiki', 'property', 'invoke',
                                     'category', 'file'):
-                raise ValueError(f'Site cannot be None for the {exc!r} regex')
+                raise ValueError(f'site cannot be None for the {exc!r} regex')
 
             if (exc, site) not in _regex_cache:
                 re_text, re_var = _regex_cache[exc]
@@ -396,7 +417,7 @@
     if not old.search(text):
         return text + marker
 
-    dontTouchRegexes = _get_regexes(exceptions, site)
+    dontTouchRegexes = get_regexes(exceptions, site)
 
     index = 0
     replaced = 0
@@ -500,7 +521,7 @@
        if provided as an ordered collection (list, tuple)
 
     :param tags: The exact set of parts which should be removed using
-        keywords from textlib._get_regexes().
+        keywords from :func:`get_regexes`.
     :param include: Or, in alternative, default parts that shall not
         be removed.
     :param site: Site to be used for site-dependent regexes. Default
@@ -518,7 +539,7 @@
     # ("Note" at the end of the section)
     if include:
         tags = [tag for tag in tags if tag not in include]
-    regexes = _get_regexes(tags, site)
+    regexes = get_regexes(tags, site)
     for regex in regexes:
         text = regex.sub('', text)
     return text
@@ -917,10 +938,10 @@
 _Content = namedtuple('_Content', ('header', 'sections', 'footer'))
 
 
-def _extract_headings(text: str, site) -> list:
+def _extract_headings(text: str) -> list:
     """Return _Heading objects."""
     headings = []
-    heading_regex = _get_regexes(['header'], site)[0]
+    heading_regex = get_regexes('header')[0]
     for match in heading_regex.finditer(text):
         start, end = match.span()
         if not isDisabled(text, start) and not isDisabled(text, end):
@@ -981,11 +1002,11 @@
 
     .. versionadded:: 3.0
     """
-    headings = _extract_headings(text, site)
+    headings = _extract_headings(text)
     sections = _extract_sections(text, headings)
     # Find header and footer contents
     header = text[:headings[0].start] if headings else text
-    cat_regex, interwiki_regex = _get_regexes(('category', 'interwiki'), site)
+    cat_regex, interwiki_regex = get_regexes(['category', 'interwiki'], site)
     langlink_pattern = interwiki_regex.pattern.replace(':?', '')
     last_section_content = sections[-1].content if sections else header
     footer = re.search(
@@ -1251,7 +1272,7 @@
         above_interwiki.append(comment)
 
     if above_interwiki:
-        interwiki = _get_regexes(['interwiki'], site)[0]
+        interwiki = get_regexes('interwiki', site)[0]
         first_interwiki = interwiki.search(newtext)
         for reg in above_interwiki:
             special = reg.search(newtext)
@@ -1565,7 +1586,7 @@
         under_categories.append(stub)
 
     if under_categories:
-        category = _get_regexes(['category'], site)[0]
+        category = get_regexes('category', site)[0]
         for last_category in category.finditer(newtext):
             pass
         for reg in under_categories:
diff --git a/scripts/category.py b/scripts/category.py
index 6df0aea..4c3a7a9 100755
--- a/scripts/category.py
+++ b/scripts/category.py
@@ -143,7 +143,7 @@
    :mod:`pagegenerators` are supported with "move" and "remove" action.
 """
 #
-# (C) Pywikibot team, 2004-2022
+# (C) Pywikibot team, 2004-2023
 #
 # Distributed under the terms of the MIT license.
 #
@@ -1184,7 +1184,7 @@
         # skip initial templates, images and comments for articles.
         if member.namespace() == member.site.namespaces.MAIN:
             excludes = ('template', 'file', 'comment')
-            regexes = textlib._get_regexes(excludes, member.site)
+            regexes = textlib.get_regexes(excludes, member.site)
             i = 0
             while i < 3:
                 i = 0
diff --git a/scripts/replace.py b/scripts/replace.py
index 2a19090..b9d2b66 100755
--- a/scripts/replace.py
+++ b/scripts/replace.py
@@ -142,7 +142,7 @@
 the top of the help.
 """
 #
-# (C) Pywikibot team, 2004-2022
+# (C) Pywikibot team, 2004-2023
 #
 # Distributed under the terms of the MIT license.
 #
@@ -504,8 +504,8 @@
             regular expressions.
         inside-tags
             A list of strings. These strings must be keys from the
-            dictionary in textlib._create_default_regexes() or must be
-            accepted by textlib._get_regexes().
+            dictionary in :func:`textlib._create_default_regexes` or must be
+            accepted by :func:`textlib.get_regexes`.
 
     :keyword allowoverlap: when matches overlap, all of them are replaced.
     :type allowoverlap: bool