jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/762414 )
Change subject: [pg] restore removed pgegenerators functions ......................................................................
[pg] restore removed pgegenerators functions
This partly reverts: Ib4247ae2d9b302f26d2e7d08141ba789b5ad3a19 I17c55e3eb22c393497e12b38f04291e804a76f80 See also 3f72af3
Change-Id: I4b250ffce9ec90ba612dfa5d1af92b665e8e525d --- M ROADMAP.rst M pywikibot/pagegenerators.py M tests/pagegenerators_tests.py 3 files changed, 427 insertions(+), 5 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/ROADMAP.rst b/ROADMAP.rst index cbd48ab..67dcdd7 100644 --- a/ROADMAP.rst +++ b/ROADMAP.rst @@ -74,7 +74,7 @@
* BaseBot.stop() method were removed in favour of BaseBot.generator.close() * showHelp() function was remove in favour of show_help -* Remove AllpagesPageGenerator, UnconnectedPageGenerator, CombinedPageGenerator, WantedPagesPageGenerator pagegenerators +* CombinedPageGenerator pagegenerator was removed in favour of itertools.chain * Remove deprecated echo.Notification.id * Remove APISite.newfiles() method (T168339) * Remove APISite.page_exists() method diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index 7889c4f..072247e 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -1325,6 +1325,35 @@ return None if not v else int(v)
+def AllpagesPageGenerator( + start: str = '!', + namespace: int = 0, + includeredirects: Union[str, bool] = True, + site: OPT_SITE_TYPE = None, + total: Optional[int] = None, content: bool = False +) -> Iterable['pywikibot.page.Page']: + """Iterate Page objects for all titles in a single namespace. + + If includeredirects is False, redirects are not included. If + includeredirects equals the string 'only', only redirects are added. + + :param total: Maximum number of pages to retrieve in total + :param content: If True, load current version of each page (default False) + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + + filterredir = None # type: Optional[bool] + if not includeredirects: + filterredir = False + elif includeredirects == 'only': + filterredir = True + + return site.allpages(start=start, namespace=namespace, + filterredir=filterredir, total=total, content=content) + + def PrefixingPageGenerator(prefix: str, namespace: NAMESPACE_OR_INT_TYPE = None, includeredirects: Union[None, bool, str] = True, @@ -1445,6 +1474,40 @@ return gen
+def UnconnectedPageGenerator( + site: OPT_SITE_TYPE = None, + total: Optional[int] = None +) -> Iterable['pywikibot.page.Page']: + """Iterate Page objects for all unconnected pages to a Wikibase repository. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + if not site.data_repository(): + raise ValueError('The given site does not have Wikibase repository.') + return site.unconnected_pages(total=total) + + +def FileLinksGenerator( + referredFilePage: 'pywikibot.page.FilePage', + total: Optional[int] = None, + content: bool = False +) -> Iterable['pywikibot.page.Page']: + """Yield Pages on which referredFilePage file is displayed.""" + return referredFilePage.usingPages(total=total, content=content) + + +def ImagesPageGenerator( + pageWithImages: 'pywikibot.page.Page', + total: Optional[int] = None, + content: bool = False +) -> Iterable['pywikibot.page.Page']: + """Yield FilePages displayed on pageWithImages.""" + return pageWithImages.imagelinks(total=total, content=content) + + def InterwikiPageGenerator(page: 'pywikibot.page.Page' ) -> Iterable['pywikibot.page.Page']: """Iterate over all interwiki (non-language) links on a page.""" @@ -1514,6 +1577,24 @@ yield s
+def LinkedPageGenerator( + linkingPage: 'pywikibot.page.Page', + total: Optional[int] = None, + content: bool = False +) -> Iterable['pywikibot.page.Page']: + """Yield all pages linked from a specific page. + + See :py:obj:`pywikibot.page.BasePage.linkedPages` for details. + + :param linkingPage: the page that links to the pages we want + :param total: the total number of pages to iterate + :param content: if True, retrieve the current content of each linked page + :return: a generator that yields Page objects of pages linked to + linkingPage + """ + return linkingPage.linkedPages(total=total, content=content) + + def _yield_titles(f: Union[codecs.StreamReaderWriter, io.StringIO], site: pywikibot.site.BaseSite ) -> Iterable['pywikibot.page.Page']: @@ -1590,6 +1671,26 @@ yield pywikibot.Page(pywikibot.Link(title, site))
+def PagesFromPageidGenerator( + pageids: Iterable[str], + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.page.Page']: + """Return a page generator from pageids. + + Pages are iterated in the same order than in the underlying pageids. + Pageids are filtered and only one page is returned in case of + duplicate pageid. + + :param pageids: an iterable that returns pageids, or a comma-separated + string of pageids (e.g. '945097,1483753,956608') + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + + return site.load_pages_from_pageids(pageids) + + def UserContributionsGenerator(username: str, namespaces: Optional[List[int]] = None, site: OPT_SITE_TYPE = None, @@ -1966,7 +2067,7 @@
msg = '{prefix} edit on {page} was on {time}.\n' \ 'Too {{when}}. Skipping.' \ - .format(prefix=edit.__class__.__name__, # prefix = Class name. + .format(prefix=type(edit).__name__, page=page, time=edit_time.isoformat())
@@ -2258,9 +2359,10 @@ yield page
-def AncientPagesPageGenerator(total: int = 100, # pragma: no cover - site: OPT_SITE_TYPE = None - ) -> Iterator['pywikibot.page.Page']: +def AncientPagesPageGenerator( + total: int = 100, + site: OPT_SITE_TYPE = None +) -> Iterator['pywikibot.page.Page']: """ Ancient page generator.
@@ -2272,6 +2374,162 @@ return (page for page, _ in site.ancientpages(total=total))
+def UnusedFilesGenerator( + total: Optional[int] = None, + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.page.FilePage']: + """Unused files generator. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.unusedfiles(total=total) + + +def WithoutInterwikiPageGenerator( + total: Optional[int] = None, + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.page.Page']: + """Page lacking interwikis generator. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.withoutinterwiki(total=total) + + +def UnCategorizedCategoryGenerator( + total: Optional[int] = 100, + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.Category']: + """Uncategorized category generator. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.uncategorizedcategories(total=total) + + +def UnCategorizedImageGenerator( + total: int = 100, + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.page.FilePage']: + """Uncategorized file generator. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.uncategorizedimages(total=total) + + +def UnCategorizedPageGenerator( + total: int = 100, + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.page.Page']: + """Uncategorized page generator. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.uncategorizedpages(total=total) + + +def UnCategorizedTemplateGenerator( + total: int = 100, + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.page.Page']: + """Uncategorized template generator. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.uncategorizedtemplates(total=total) + + +def LonelyPagesPageGenerator( + total: Optional[int] = None, + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.page.Page']: + """Lonely page generator. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.lonelypages(total=total) + + +def UnwatchedPagesPageGenerator( + total: Optional[int] = None, + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.page.Page']: + """Unwatched page generator. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.unwatchedpages(total=total) + + +def page_with_property_generator( + name: str, + total: Optional[int] = None, + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.page.Page']: + """Special:PagesWithProperty page generator. + + :param name: Property name of pages to be retrieved + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.pages_with_property(name, total=total) + + +def WantedPagesPageGenerator( + total: int = 100, + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.page.Page']: + """Wanted page generator. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.wantedpages(total=total) + + +def DeadendPagesPageGenerator( + total: int = 100, + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.page.Page']: + """Dead-end page generator. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.deadendpages(total=total) + + def LongPagesPageGenerator(total: int = 100, site: OPT_SITE_TYPE = None ) -> Iterator['pywikibot.page.Page']: @@ -2300,6 +2558,78 @@ return (page for page, _ in site.shortpages(total=total))
+def RandomPageGenerator( + total: Optional[int] = None, + site: OPT_SITE_TYPE = None, + namespaces: Optional[Sequence[NAMESPACE_OR_STR_TYPE]] = None +) -> Iterable['pywikibot.page.Page']: + """Random page generator. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.randompages(total=total, namespaces=namespaces) + + +def RandomRedirectPageGenerator( + total: Optional[int] = None, + site: OPT_SITE_TYPE = None, + namespaces: Optional[ + Sequence[NAMESPACE_OR_STR_TYPE]] = None +) -> Iterable['pywikibot.page.Page']: + """Random redirect generator. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.randompages(total=total, namespaces=namespaces, + redirects=True) + + +def LinksearchPageGenerator( + url: str, + namespaces: Optional[List[int]] = None, + total: Optional[int] = None, + site: OPT_SITE_TYPE = None, + protocol: Optional[str] = None +) -> Iterable['pywikibot.page.Page']: + """Yield all pages that link to a certain URL. + + :param url: The URL to search for (with ot without the protocol prefix); + this may include a '*' as a wildcard, only at the start of the + hostname + :param namespaces: list of namespace numbers to fetch contribs from + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results + :param protocol: Protocol to search for, likely http or https, http by + default. Full list shown on Special:LinkSearch wikipage + """ + if site is None: + site = pywikibot.Site() + return site.exturlusage(url, namespaces=namespaces, protocol=protocol, + total=total, content=False) + + +def SearchPageGenerator( + query: str, + total: Optional[int] = None, + namespaces: Optional[Sequence[NAMESPACE_OR_STR_TYPE]] = None, + site: OPT_SITE_TYPE = None +) -> Iterable['pywikibot.page.Page']: + """Yield pages from the MediaWiki internal search engine. + + :param total: Maximum number of pages to retrieve in total + :param site: Site for generator results. + """ + if site is None: + site = pywikibot.Site() + return site.search(query, total=total, namespaces=namespaces) + + def LiveRCPageGenerator(site: OPT_SITE_TYPE = None, total: Optional[int] = None ) -> Iterator['pywikibot.page.Page']: diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py index f195ffe..2455530 100644 --- a/tests/pagegenerators_tests.py +++ b/tests/pagegenerators_tests.py @@ -202,6 +202,19 @@ self.titles = [self.base_title.format(i) for i in range(1, 11)]
+class TestPagesFromPageidGenerator(BasetitleTestCase): + + """Test PagesFromPageidGenerator method.""" + + def test_PagesFromPageidGenerator(self): + """Test PagesFromPageidGenerator.""" + gen_pages = pagegenerators.PagesFromTitlesGenerator(self.titles, + self.site) + pageids = (page.pageid for page in gen_pages) + gen = pagegenerators.PagesFromPageidGenerator(pageids, self.site) + self.assertPageTitlesEqual(gen, self.titles) + + class TestCategoryFilterPageGenerator(BasetitleTestCase):
"""Test CategoryFilterPageGenerator method.""" @@ -1649,6 +1662,85 @@ self.assertIn(key, rcinfo.keys())
+class TestUnconnectedPageGenerator(DefaultSiteTestCase): + + """Test UnconnectedPageGenerator.""" + + cached = True + + def test_unconnected_with_repo(self): + """Test UnconnectedPageGenerator.""" + if not self.site.data_repository(): + self.skipTest('Site is not using a Wikibase repository') + upgen = pagegenerators.UnconnectedPageGenerator(self.site, 3) + self.assertDictEqual( + upgen.request._params, { + 'gqppage': ['UnconnectedPages'], + 'prop': ['info', 'imageinfo', 'categoryinfo'], + 'inprop': ['protection'], + 'iilimit': ['max'], + 'iiprop': ['timestamp', 'user', 'comment', 'url', 'size', + 'sha1', 'metadata'], + 'generator': ['querypage'], 'action': ['query'], + 'indexpageids': [True], 'continue': [True]}) + self.assertLessEqual(len(tuple(upgen)), 3) + + def test_unconnected_without_repo(self): + """Test that it raises a ValueError on sites without repository.""" + if self.site.data_repository(): + self.skipTest('Site is using a Wikibase repository') + with self.assertRaises(ValueError): + for _ in pagegenerators.UnconnectedPageGenerator(self.site, + total=5): + raise AssertionError("this shouldn't be reached") + + +class TestLinksearchPageGenerator(TestCase): + + """Tests for pagegenerators.LinksearchPageGenerator.""" + + family = 'wikipedia' + code = 'en' + + def test_weblink(self): + """Test -weblink.""" + cases = (('wikipedia.org', 'http://wikipedia.org'), + ('en.wikipedia.org', 'http://en.wikipedia.org'), + ('https://fr.wikipedia.org', 'https://fr.wikipedia.org'), + ('ftp://*', 'ftp://')) + + for search, expected in cases: + gf = pagegenerators.GeneratorFactory(site=self.site) + gf.handle_arg('-weblink:{}'.format(search)) + gf.handle_arg('-ns:2') + gf.handle_arg('-limit:1') + gen = gf.getCombinedGenerator() + genlist = list(gen) + self.assertLength(genlist, 1) + + page = genlist[0] + self.assertIsInstance(page, pywikibot.Page) + self.assertTrue(page.exists()) + self.assertEqual(page.namespace(), 2) + self.assertIn(expected, page.text) + + def test_double_opposite_protocols(self): + """Test LinksearchPageGenerator with two opposite protocols.""" + with self.assertRaises(ValueError): + pagegenerators.LinksearchPageGenerator('http://w.wiki', + protocol='https', + site=self.site) + + def test_double_same_protocols(self): + """Test LinksearchPageGenerator with two same protocols.""" + gen = pagegenerators.LinksearchPageGenerator('https://w.wiki', + protocol='https', + site=self.site, + total=1) + self.assertIsInstance(gen, pywikibot.data.api.PageGenerator) + self.assertLength(list(gen), 1) + + if __name__ == '__main__': # pragma: no cover with suppress(SystemExit): unittest.main()
pywikibot-commits@lists.wikimedia.org