jenkins-bot has submitted this change and it was merged.
Change subject: Preloading tests
......................................................................
Preloading tests
Adds tests for
- PropertyGenerator,
- APISite.preloadpages
- PreloadingGenerator
With several expected failures due to bugs with FIXME notes.
Bug: 73461
Change-Id: I11df5361bd2b67cce132faba252552bfa5053827
---
M pywikibot/site.py
M tests/api_tests.py
M tests/pagegenerators_tests.py
M tests/site_tests.py
4 files changed, 467 insertions(+), 20 deletions(-)
Approvals:
John Vandenberg: Looks good to me, but someone else must approve
Mpaa: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/site.py b/pywikibot/site.py
index 799d99c..1242c70 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -2457,8 +2457,8 @@
break
else:
pywikibot.warning(
- u"preloadpages: Query returned unexpected
title"
- u"'%s'" % pagedata['title'])
+ u"preloadpages: Query returned unexpected "
+ u"title '%s'" %
pagedata['title'])
continue
except KeyError:
pywikibot.debug(u"No 'title' in %s" % pagedata,
_logger)
diff --git a/tests/api_tests.py b/tests/api_tests.py
index 5d6a07b..7c6c273 100644
--- a/tests/api_tests.py
+++ b/tests/api_tests.py
@@ -239,9 +239,9 @@
self.assertIn('revisions', pi.prefixes)
-class TestPageGenerator(TestCase):
+class TestDryPageGenerator(TestCase):
- """API PageGenerator object test class."""
+ """Dry API PageGenerator object test class."""
family = 'wikipedia'
code = 'en'
@@ -249,7 +249,7 @@
dry = True
def setUp(self):
- super(TestPageGenerator, self).setUp()
+ super(TestDryPageGenerator, self).setUp()
mysite = self.get_site()
self.gen = api.PageGenerator(site=mysite,
generator="links",
@@ -328,6 +328,134 @@
self.assertEqual(len(results), 4) # total=-1 but 4 expected
+class TestPropertyGenerator(TestCase):
+
+ """API PropertyGenerator object test class."""
+
+ family = 'wikipedia'
+ code = 'en'
+
+ def test_info(self):
+ mainpage = self.get_mainpage()
+ links = list(self.site.pagelinks(mainpage, total=10))
+ titles = [l.title(withSection=False)
+ for l in links]
+ gen = api.PropertyGenerator(site=self.site,
+ prop="info",
+ titles='|'.join(titles))
+
+ count = 0
+ for pagedata in gen:
+ self.assertIsInstance(pagedata, dict)
+ self.assertIn('pageid', pagedata)
+ self.assertIn('lastrevid', pagedata)
+ count += 1
+ self.assertEqual(len(links), count)
+
+ def test_one_continuation(self):
+ mainpage = self.get_mainpage()
+ links = list(self.site.pagelinks(mainpage, total=10))
+ titles = [l.title(withSection=False)
+ for l in links]
+ gen = api.PropertyGenerator(site=self.site,
+ prop="revisions",
+ titles='|'.join(titles))
+ gen.set_maximum_items(-1) # suppress use of "rvlimit" parameter
+
+ count = 0
+ for pagedata in gen:
+ self.assertIsInstance(pagedata, dict)
+ self.assertIn('pageid', pagedata)
+ self.assertIn('revisions', pagedata)
+ self.assertIn('revid', pagedata['revisions'][0])
+ count += 1
+ self.assertEqual(len(links), count)
+
+ def test_two_continuations(self):
+ mainpage = self.get_mainpage()
+ links = list(self.site.pagelinks(mainpage, total=10))
+ titles = [l.title(withSection=False)
+ for l in links]
+ gen = api.PropertyGenerator(site=self.site,
+ prop="revisions|coordinates",
+ titles='|'.join(titles))
+ gen.set_maximum_items(-1) # suppress use of "rvlimit" parameter
+
+ count = 0
+ for pagedata in gen:
+ self.assertIsInstance(pagedata, dict)
+ self.assertIn('pageid', pagedata)
+ self.assertIn('revisions', pagedata)
+ self.assertIn('revid', pagedata['revisions'][0])
+ count += 1
+ self.assertEqual(len(links), count)
+
+ @unittest.expectedFailure
+ def test_many_continuations_limited(self):
+ mainpage = self.get_mainpage()
+ links = list(self.site.pagelinks(mainpage, total=30))
+ titles = [l.title(withSection=False)
+ for l in links]
+ gen = api.PropertyGenerator(site=self.site,
+
prop="revisions|info|categoryinfo|langlinks|templates",
+
rvprop="ids|flags|timestamp|user|comment|content",
+ titles='|'.join(titles))
+
+ # An APIError is raised if set_maximum_items is not called.
+ gen.set_maximum_items(-1) # suppress use of "rvlimit" parameter
+ # Force the generator into continuation mode
+ gen.set_query_increment(5)
+
+ count = 0
+ for pagedata in gen:
+ self.assertIsInstance(pagedata, dict)
+ self.assertIn('pageid', pagedata)
+ count += 1
+ self.assertEqual(len(links), count)
+ # FIXME: AssertionError: 30 != 6150
+
+ @unittest.expectedFailure
+ def test_two_continuations_limited(self):
+ # FIXME: test fails
+ mainpage = self.get_mainpage()
+ links = list(self.site.pagelinks(mainpage, total=30))
+ titles = [l.title(withSection=False)
+ for l in links]
+ gen = api.PropertyGenerator(site=self.site,
+
prop="info|categoryinfo|langlinks|templates",
+ titles='|'.join(titles))
+ # Force the generator into continuation mode
+ gen.set_query_increment(5)
+
+ count = 0
+ for pagedata in gen:
+ self.assertIsInstance(pagedata, dict)
+ self.assertIn('pageid', pagedata)
+ count += 1
+ self.assertEqual(len(links), count)
+ # FIXME: AssertionError: 30 != 11550
+
+ # FIXME: test disabled as it takes longer than 10 minutes
+ def _test_two_continuations_limited_long_test(self):
+ """Long duration test, with total & step that are a real
scenario."""
+ mainpage = self.get_mainpage()
+ links = list(mainpage.backlinks(total=300))
+ titles = [l.title(withSection=False)
+ for l in links]
+ gen = api.PropertyGenerator(site=self.site,
+
prop="info|categoryinfo|langlinks|templates",
+ titles='|'.join(titles))
+ # Force the generator into continuation mode
+ gen.set_query_increment(50)
+
+ count = 0
+ for pagedata in gen:
+ self.assertIsInstance(pagedata, dict)
+ self.assertIn('pageid', pagedata)
+ count += 1
+ self.assertEqual(len(links), count)
+
+
class TestCachedRequest(DefaultSiteTestCase):
"""Test API Request caching.
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index 81720d1..9e19bde 100755
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -14,7 +14,10 @@
import pywikibot
from pywikibot import pagegenerators
-from pywikibot.pagegenerators import PagesFromTitlesGenerator
+from pywikibot.pagegenerators import (
+ PagesFromTitlesGenerator,
+ PreloadingGenerator,
+)
from tests import _data_dir
from tests.aspects import (
@@ -270,6 +273,41 @@
self.assertPagelistTitles(titles, self.expected_titles[site.case()])
+class TestPreloadingGenerator(DefaultSiteTestCase):
+
+ """Test preloading generator on lists."""
+
+ def test_basic(self):
+ """Test PreloadingGenerator with a list of
pages."""
+ mainpage = self.get_mainpage()
+ links = list(self.site.pagelinks(mainpage, total=10))
+ count = 0
+ for page in PreloadingGenerator(links, step=20):
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertTrue(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 1)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ count += 1
+ self.assertEqual(len(links), count)
+
+ def test_low_step(self):
+ """Test PreloadingGenerator with a list of
pages."""
+ mainpage = self.get_mainpage()
+ links = list(self.site.pagelinks(mainpage, total=20))
+ count = 0
+ for page in PreloadingGenerator(links, step=10):
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertTrue(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 1)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ count += 1
+ self.assertEqual(len(links), count)
+
+
class TestDequePreloadingGenerator(DefaultSiteTestCase):
"""Test preloading generator on lists."""
diff --git a/tests/site_tests.py b/tests/site_tests.py
index 408c60b..262bfd7 100644
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -280,20 +280,6 @@
if a:
self.assertEqual(a[0], mainpage)
- def testPreload(self):
- """Test that preloading works."""
- mysite = self.get_site()
- mainpage = self.get_mainpage()
- count = 0
- for page in mysite.preloadpages(mysite.pagelinks(mainpage, total=10)):
- self.assertIsInstance(page, pywikibot.Page)
- self.assertIsInstance(page.exists(), bool)
- if page.exists():
- self.assertTrue(hasattr(page, "_text"))
- count += 1
- if count >= 5:
- break
-
def testLinkMethods(self):
"""Test site methods for getting links to and from a
page."""
mysite = self.get_site()
@@ -1562,6 +1548,301 @@
self.assertTrue(site.is_uploaddisabled())
+class TestPagePreloading(DefaultSiteTestCase):
+
+ """Test site.preloadpages()."""
+
+ def test_pageids(self):
+ """Test basic preloading with pageids."""
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+ links = mysite.pagelinks(mainpage, total=10)
+ # preloadpages will send the page ids,
+ # as they have already been loaded by pagelinks
+ for page in mysite.preloadpages(links):
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertTrue(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 1)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ count += 1
+ if count >= 5:
+ break
+
+ def test_titles(self):
+ """Test basic preloading with titles."""
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+ links = mysite.pagelinks(mainpage, total=10)
+
+ # remove the pageids that have already been loaded above by pagelinks
+ # so that preloadpages will use the titles instead
+ for page in links:
+ del page._pageid
+
+ for page in mysite.preloadpages(links):
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertTrue(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 1)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ count += 1
+ if count >= 5:
+ break
+
+ def test_preload_continuation(self):
+ """Test preloading continuation works."""
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+ links = mysite.pagelinks(mainpage, total=10)
+ for page in mysite.preloadpages(links, groupsize=5):
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertTrue(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 1)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ count += 1
+ if count >= 6:
+ break
+
+ def test_preload_high_groupsize(self):
+ """Test preloading continuation with groupsize greater than
total."""
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+
+ # Determine if there are enough links on the main page,
+ # for the test to be useful.
+ link_count = len(list(mysite.pagelinks(mainpage, total=10)))
+ if link_count < 2:
+ raise unittest.SkipTest('insufficient links on main page')
+
+ # get a fresh generator; we now know how many results it will have,
+ # if it is less than 10.
+ links = mysite.pagelinks(mainpage, total=10)
+ for page in mysite.preloadpages(links, groupsize=50):
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertTrue(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 1)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ count += 1
+ self.assertEqual(count, link_count)
+
+ def test_preload_low_groupsize(self):
+ """Test preloading continuation with groupsize greater than
total."""
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+
+ # Determine if there are enough links on the main page,
+ # for the test to be useful.
+ link_count = len(list(mysite.pagelinks(mainpage, total=10)))
+ if link_count < 2:
+ raise unittest.SkipTest('insufficient links on main page')
+
+ # get a fresh generator; we now know how many results it will have,
+ # if it is less than 10.
+ links = mysite.pagelinks(mainpage, total=10)
+ for page in mysite.preloadpages(links, groupsize=5):
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertTrue(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 1)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ count += 1
+ self.assertEqual(count, link_count)
+
+ def test_preload_unexpected_titles_using_pageids(self):
+ """Test sending pageids with unnormalized titles, causing
warnings."""
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+ links = list(mysite.pagelinks(mainpage, total=10))
+ if len(links) < 2:
+ raise unittest.SkipTest('insufficient links on main page')
+
+ # change the title of the page, to test sametitle().
+ # preloadpages will send the page ids, as they have already been loaded
+ # by pagelinks, and preloadpages should complain the returned titles
+ # do not match any title in the pagelist.
+ for page in links:
+ page._link._text += ' '
+
+ gen = mysite.preloadpages(links, groupsize=5)
+ for page in gen:
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertFalse(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 0)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ count += 1
+ if count > 5:
+ break
+
+ def test_preload_unexpected_titles_using_titles(self):
+ """Test sending unnormalized titles, causing
warnings."""
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+ links = list(mysite.pagelinks(mainpage, total=10))
+ if len(links) < 2:
+ raise unittest.SkipTest('insufficient links on main page')
+
+ # change the title of the page _and_ delete the pageids.
+ # preloadpages can only send the titles, and preloadpages should
+ # complain the returned titles do not match any title in the pagelist.
+ for page in links:
+ page._link._text += ' '
+ del page._pageid
+
+ gen = mysite.preloadpages(links, groupsize=5)
+ for page in gen:
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertFalse(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 0)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ count += 1
+ if count > 5:
+ break
+
+ def test_preload_invalid_titles_without_pageids(self):
+ """Test sending invalid titles. No warnings issued, but it
should."""
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+ links = list(mysite.pagelinks(mainpage, total=10))
+ if len(links) < 2:
+ raise unittest.SkipTest('insufficient links on main page')
+
+ for page in links:
+ page._link._text += ' foobar'
+ del page._pageid
+
+ gen = mysite.preloadpages(links, groupsize=5)
+ for page in gen:
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ self.assertFalse(page.exists())
+ count += 1
+ if count > 5:
+ break
+
+ @unittest.expectedFailure
+ def test_preload_langlinks_normal(self):
+ """Test preloading continuation works."""
+ # FIXME: test fails
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+ links = mysite.pagelinks(mainpage, total=10)
+ for page in mysite.preloadpages(links, groupsize=5, langlinks=True):
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertTrue(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 1)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ self.assertTrue(hasattr(page, '_langlinks'))
+ count += 1
+ if count >= 6:
+ break
+
+ @unittest.expectedFailure
+ def test_preload_langlinks_count(self):
+ """Test preloading continuation works."""
+ # FIXME: test fails
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+ links = mysite.pagelinks(mainpage, total=20)
+ pages = list(mysite.preloadpages(links, groupsize=5,
+ langlinks=True))
+ for page in pages:
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertTrue(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 1)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ count += 1
+
+ self.assertEqual(len(links), count)
+
+ def _test_preload_langlinks_long(self):
+ """Test preloading continuation works."""
+ # FIXME: test fails. It is disabled as it takes more
+ # than 10 minutes on travis for English Wikipedia
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+ links = mainpage.backlinks(total=100)
+ for page in mysite.preloadpages(links, groupsize=50,
+ langlinks=True):
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertTrue(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 1)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ self.assertTrue(hasattr(page, '_langlinks'))
+ count += 1
+
+ self.assertEqual(len(links), count)
+
+ @unittest.expectedFailure
+ def test_preload_templates(self):
+ """Test preloading templates works."""
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+ # Use backlinks, as any backlink has at least one link
+ links = mysite.pagelinks(mainpage, total=10)
+ for page in mysite.preloadpages(links, templates=True):
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertTrue(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 1)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ self.assertTrue(hasattr(page, '_templates'))
+ count += 1
+ if count >= 6:
+ break
+
+ @unittest.expectedFailure
+ def test_preload_templates_and_langlinks(self):
+ """Test preloading templates and langlinks
works."""
+ mysite = self.get_site()
+ mainpage = self.get_mainpage()
+ count = 0
+ # Use backlinks, as any backlink has at least one link
+ links = mysite.pagebacklinks(mainpage, total=10)
+ for page in mysite.preloadpages(links, langlinks=True, templates=True):
+ self.assertIsInstance(page, pywikibot.Page)
+ self.assertIsInstance(page.exists(), bool)
+ if page.exists():
+ self.assertTrue(hasattr(page, "_text"))
+ self.assertEqual(len(page._revisions), 1)
+ self.assertFalse(hasattr(page, '_pageprops'))
+ self.assertTrue(hasattr(page, '_templates'))
+ self.assertTrue(hasattr(page, '_langlinks'))
+ count += 1
+ if count >= 6:
+ break
+
+
class TestDataSitePreloading(WikidataTestCase):
"""Test DataSite.preloaditempages for repo pages."""
--
To view, visit
https://gerrit.wikimedia.org/r/173499
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I11df5361bd2b67cce132faba252552bfa5053827
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>