jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[pg] restore removed pgegenerators functions

This partly reverts:
Ib4247ae2d9b302f26d2e7d08141ba789b5ad3a19
I17c55e3eb22c393497e12b38f04291e804a76f80
See also 3f72af3

Change-Id: I4b250ffce9ec90ba612dfa5d1af92b665e8e525d
---
M ROADMAP.rst
M pywikibot/pagegenerators.py
M tests/pagegenerators_tests.py
3 files changed, 427 insertions(+), 5 deletions(-)

diff --git a/ROADMAP.rst b/ROADMAP.rst
index cbd48ab..67dcdd7 100644
--- a/ROADMAP.rst
+++ b/ROADMAP.rst
@@ -74,7 +74,7 @@

* BaseBot.stop() method were removed in favour of BaseBot.generator.close()
* showHelp() function was remove in favour of show_help
-* Remove AllpagesPageGenerator, UnconnectedPageGenerator, CombinedPageGenerator, WantedPagesPageGenerator pagegenerators
+* CombinedPageGenerator pagegenerator was removed in favour of itertools.chain
* Remove deprecated echo.Notification.id
* Remove APISite.newfiles() method (T168339)
* Remove APISite.page_exists() method
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 7889c4f..072247e 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -1325,6 +1325,35 @@
return None if not v else int(v)


+def AllpagesPageGenerator(
+ start: str = '!',
+ namespace: int = 0,
+ includeredirects: Union[str, bool] = True,
+ site: OPT_SITE_TYPE = None,
+ total: Optional[int] = None, content: bool = False
+) -> Iterable['pywikibot.page.Page']:
+ """Iterate Page objects for all titles in a single namespace.
+
+ If includeredirects is False, redirects are not included. If
+ includeredirects equals the string 'only', only redirects are added.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param content: If True, load current version of each page (default False)
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+
+ filterredir = None # type: Optional[bool]
+ if not includeredirects:
+ filterredir = False
+ elif includeredirects == 'only':
+ filterredir = True
+
+ return site.allpages(start=start, namespace=namespace,
+ filterredir=filterredir, total=total, content=content)
+
+
def PrefixingPageGenerator(prefix: str,
namespace: NAMESPACE_OR_INT_TYPE = None,
includeredirects: Union[None, bool, str] = True,
@@ -1445,6 +1474,40 @@
return gen


+def UnconnectedPageGenerator(
+ site: OPT_SITE_TYPE = None,
+ total: Optional[int] = None
+) -> Iterable['pywikibot.page.Page']:
+ """Iterate Page objects for all unconnected pages to a Wikibase repository.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ if not site.data_repository():
+ raise ValueError('The given site does not have Wikibase repository.')
+ return site.unconnected_pages(total=total)
+
+
+def FileLinksGenerator(
+ referredFilePage: 'pywikibot.page.FilePage',
+ total: Optional[int] = None,
+ content: bool = False
+) -> Iterable['pywikibot.page.Page']:
+ """Yield Pages on which referredFilePage file is displayed."""
+ return referredFilePage.usingPages(total=total, content=content)
+
+
+def ImagesPageGenerator(
+ pageWithImages: 'pywikibot.page.Page',
+ total: Optional[int] = None,
+ content: bool = False
+) -> Iterable['pywikibot.page.Page']:
+ """Yield FilePages displayed on pageWithImages."""
+ return pageWithImages.imagelinks(total=total, content=content)
+
+
def InterwikiPageGenerator(page: 'pywikibot.page.Page'
) -> Iterable['pywikibot.page.Page']:
"""Iterate over all interwiki (non-language) links on a page."""
@@ -1514,6 +1577,24 @@
yield s


+def LinkedPageGenerator(
+ linkingPage: 'pywikibot.page.Page',
+ total: Optional[int] = None,
+ content: bool = False
+) -> Iterable['pywikibot.page.Page']:
+ """Yield all pages linked from a specific page.
+
+ See :py:obj:`pywikibot.page.BasePage.linkedPages` for details.
+
+ :param linkingPage: the page that links to the pages we want
+ :param total: the total number of pages to iterate
+ :param content: if True, retrieve the current content of each linked page
+ :return: a generator that yields Page objects of pages linked to
+ linkingPage
+ """
+ return linkingPage.linkedPages(total=total, content=content)
+
+
def _yield_titles(f: Union[codecs.StreamReaderWriter, io.StringIO],
site: pywikibot.site.BaseSite
) -> Iterable['pywikibot.page.Page']:
@@ -1590,6 +1671,26 @@
yield pywikibot.Page(pywikibot.Link(title, site))


+def PagesFromPageidGenerator(
+ pageids: Iterable[str],
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.page.Page']:
+ """Return a page generator from pageids.
+
+ Pages are iterated in the same order than in the underlying pageids.
+ Pageids are filtered and only one page is returned in case of
+ duplicate pageid.
+
+ :param pageids: an iterable that returns pageids, or a comma-separated
+ string of pageids (e.g. '945097,1483753,956608')
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+
+ return site.load_pages_from_pageids(pageids)
+
+
def UserContributionsGenerator(username: str,
namespaces: Optional[List[int]] = None,
site: OPT_SITE_TYPE = None,
@@ -1966,7 +2067,7 @@

msg = '{prefix} edit on {page} was on {time}.\n' \
'Too {{when}}. Skipping.' \
- .format(prefix=edit.__class__.__name__, # prefix = Class name.
+ .format(prefix=type(edit).__name__,
page=page,
time=edit_time.isoformat())

@@ -2258,9 +2359,10 @@
yield page


-def AncientPagesPageGenerator(total: int = 100, # pragma: no cover
- site: OPT_SITE_TYPE = None
- ) -> Iterator['pywikibot.page.Page']:
+def AncientPagesPageGenerator(
+ total: int = 100,
+ site: OPT_SITE_TYPE = None
+) -> Iterator['pywikibot.page.Page']:
"""
Ancient page generator.

@@ -2272,6 +2374,162 @@
return (page for page, _ in site.ancientpages(total=total))


+def UnusedFilesGenerator(
+ total: Optional[int] = None,
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.page.FilePage']:
+ """Unused files generator.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.unusedfiles(total=total)
+
+
+def WithoutInterwikiPageGenerator(
+ total: Optional[int] = None,
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.page.Page']:
+ """Page lacking interwikis generator.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.withoutinterwiki(total=total)
+
+
+def UnCategorizedCategoryGenerator(
+ total: Optional[int] = 100,
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.Category']:
+ """Uncategorized category generator.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.uncategorizedcategories(total=total)
+
+
+def UnCategorizedImageGenerator(
+ total: int = 100,
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.page.FilePage']:
+ """Uncategorized file generator.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.uncategorizedimages(total=total)
+
+
+def UnCategorizedPageGenerator(
+ total: int = 100,
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.page.Page']:
+ """Uncategorized page generator.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.uncategorizedpages(total=total)
+
+
+def UnCategorizedTemplateGenerator(
+ total: int = 100,
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.page.Page']:
+ """Uncategorized template generator.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.uncategorizedtemplates(total=total)
+
+
+def LonelyPagesPageGenerator(
+ total: Optional[int] = None,
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.page.Page']:
+ """Lonely page generator.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.lonelypages(total=total)
+
+
+def UnwatchedPagesPageGenerator(
+ total: Optional[int] = None,
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.page.Page']:
+ """Unwatched page generator.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.unwatchedpages(total=total)
+
+
+def page_with_property_generator(
+ name: str,
+ total: Optional[int] = None,
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.page.Page']:
+ """Special:PagesWithProperty page generator.
+
+ :param name: Property name of pages to be retrieved
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.pages_with_property(name, total=total)
+
+
+def WantedPagesPageGenerator(
+ total: int = 100,
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.page.Page']:
+ """Wanted page generator.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.wantedpages(total=total)
+
+
+def DeadendPagesPageGenerator(
+ total: int = 100,
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.page.Page']:
+ """Dead-end page generator.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.deadendpages(total=total)
+
+
def LongPagesPageGenerator(total: int = 100,
site: OPT_SITE_TYPE = None
) -> Iterator['pywikibot.page.Page']:
@@ -2300,6 +2558,78 @@
return (page for page, _ in site.shortpages(total=total))


+def RandomPageGenerator(
+ total: Optional[int] = None,
+ site: OPT_SITE_TYPE = None,
+ namespaces: Optional[Sequence[NAMESPACE_OR_STR_TYPE]] = None
+) -> Iterable['pywikibot.page.Page']:
+ """Random page generator.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.randompages(total=total, namespaces=namespaces)
+
+
+def RandomRedirectPageGenerator(
+ total: Optional[int] = None,
+ site: OPT_SITE_TYPE = None,
+ namespaces: Optional[
+ Sequence[NAMESPACE_OR_STR_TYPE]] = None
+) -> Iterable['pywikibot.page.Page']:
+ """Random redirect generator.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.randompages(total=total, namespaces=namespaces,
+ redirects=True)
+
+
+def LinksearchPageGenerator(
+ url: str,
+ namespaces: Optional[List[int]] = None,
+ total: Optional[int] = None,
+ site: OPT_SITE_TYPE = None,
+ protocol: Optional[str] = None
+) -> Iterable['pywikibot.page.Page']:
+ """Yield all pages that link to a certain URL.
+
+ :param url: The URL to search for (with ot without the protocol prefix);
+ this may include a '*' as a wildcard, only at the start of the
+ hostname
+ :param namespaces: list of namespace numbers to fetch contribs from
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results
+ :param protocol: Protocol to search for, likely http or https, http by
+ default. Full list shown on Special:LinkSearch wikipage
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.exturlusage(url, namespaces=namespaces, protocol=protocol,
+ total=total, content=False)
+
+
+def SearchPageGenerator(
+ query: str,
+ total: Optional[int] = None,
+ namespaces: Optional[Sequence[NAMESPACE_OR_STR_TYPE]] = None,
+ site: OPT_SITE_TYPE = None
+) -> Iterable['pywikibot.page.Page']:
+ """Yield pages from the MediaWiki internal search engine.
+
+ :param total: Maximum number of pages to retrieve in total
+ :param site: Site for generator results.
+ """
+ if site is None:
+ site = pywikibot.Site()
+ return site.search(query, total=total, namespaces=namespaces)
+
+
def LiveRCPageGenerator(site: OPT_SITE_TYPE = None,
total: Optional[int] = None
) -> Iterator['pywikibot.page.Page']:
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index f195ffe..2455530 100644
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -202,6 +202,19 @@
self.titles = [self.base_title.format(i) for i in range(1, 11)]


+class TestPagesFromPageidGenerator(BasetitleTestCase):
+
+ """Test PagesFromPageidGenerator method."""
+
+ def test_PagesFromPageidGenerator(self):
+ """Test PagesFromPageidGenerator."""
+ gen_pages = pagegenerators.PagesFromTitlesGenerator(self.titles,
+ self.site)
+ pageids = (page.pageid for page in gen_pages)
+ gen = pagegenerators.PagesFromPageidGenerator(pageids, self.site)
+ self.assertPageTitlesEqual(gen, self.titles)
+
+
class TestCategoryFilterPageGenerator(BasetitleTestCase):

"""Test CategoryFilterPageGenerator method."""
@@ -1649,6 +1662,85 @@
self.assertIn(key, rcinfo.keys())


+class TestUnconnectedPageGenerator(DefaultSiteTestCase):
+
+ """Test UnconnectedPageGenerator."""
+
+ cached = True
+
+ def test_unconnected_with_repo(self):
+ """Test UnconnectedPageGenerator."""
+ if not self.site.data_repository():
+ self.skipTest('Site is not using a Wikibase repository')
+ upgen = pagegenerators.UnconnectedPageGenerator(self.site, 3)
+ self.assertDictEqual(
+ upgen.request._params, {
+ 'gqppage': ['UnconnectedPages'],
+ 'prop': ['info', 'imageinfo', 'categoryinfo'],
+ 'inprop': ['protection'],
+ 'iilimit': ['max'],
+ 'iiprop': ['timestamp', 'user', 'comment', 'url', 'size',
+ 'sha1', 'metadata'],
+ 'generator': ['querypage'], 'action': ['query'],
+ 'indexpageids': [True], 'continue': [True]})
+ self.assertLessEqual(len(tuple(upgen)), 3)
+
+ def test_unconnected_without_repo(self):
+ """Test that it raises a ValueError on sites without repository."""
+ if self.site.data_repository():
+ self.skipTest('Site is using a Wikibase repository')
+ with self.assertRaises(ValueError):
+ for _ in pagegenerators.UnconnectedPageGenerator(self.site,
+ total=5):
+ raise AssertionError("this shouldn't be reached")
+
+
+class TestLinksearchPageGenerator(TestCase):
+
+ """Tests for pagegenerators.LinksearchPageGenerator."""
+
+ family = 'wikipedia'
+ code = 'en'
+
+ def test_weblink(self):
+ """Test -weblink."""
+ cases = (('wikipedia.org', 'http://wikipedia.org'),
+ ('en.wikipedia.org', 'http://en.wikipedia.org'),
+ ('https://fr.wikipedia.org', 'https://fr.wikipedia.org'),
+ ('ftp://*', 'ftp://'))
+
+ for search, expected in cases:
+ gf = pagegenerators.GeneratorFactory(site=self.site)
+ gf.handle_arg('-weblink:{}'.format(search))
+ gf.handle_arg('-ns:2')
+ gf.handle_arg('-limit:1')
+ gen = gf.getCombinedGenerator()
+ genlist = list(gen)
+ self.assertLength(genlist, 1)
+
+ page = genlist[0]
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertTrue(page.exists())
+ self.assertEqual(page.namespace(), 2)
+ self.assertIn(expected, page.text)
+
+ def test_double_opposite_protocols(self):
+ """Test LinksearchPageGenerator with two opposite protocols."""
+ with self.assertRaises(ValueError):
+ pagegenerators.LinksearchPageGenerator('http://w.wiki',
+ protocol='https',
+ site=self.site)
+
+ def test_double_same_protocols(self):
+ """Test LinksearchPageGenerator with two same protocols."""
+ gen = pagegenerators.LinksearchPageGenerator('https://w.wiki',
+ protocol='https',
+ site=self.site,
+ total=1)
+ self.assertIsInstance(gen, pywikibot.data.api.PageGenerator)
+ self.assertLength(list(gen), 1)
+
+
if __name__ == '__main__': # pragma: no cover
with suppress(SystemExit):
unittest.main()

To view, visit change 762414. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I4b250ffce9ec90ba612dfa5d1af92b665e8e525d
Gerrit-Change-Number: 762414
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Multichill <maarten@mdammers.nl>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged