jenkins-bot has submitted this change and it was merged.
Change subject: proofreadpage.py: add IndexPage.get_page_from_number()
......................................................................
proofreadpage.py: add IndexPage.get_page_from_number()
Add:
- an additional function to retrieve mapping info between page and page
numbers
- property num_pages to retrieve total number of pages in IndexPage
Add also related tests.
Rename internal function from _get_labels() to _get_page_mappings() as
purpose is not restricted to labels only.
Change-Id: I9eed8035b02b7c7893eebaf68ed480a468f69293
---
M pywikibot/proofreadpage.py
M tests/proofreadpage_tests.py
2 files changed, 44 insertions(+), 5 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py
index a14e2f2..13c7d1e 100644
--- a/pywikibot/proofreadpage.py
+++ b/pywikibot/proofreadpage.py
@@ -374,7 +374,7 @@
self._labels_from_page_number = {}
self._labels_from_page = {}
- def _get_page_labels(self):
+ def _get_page_mappings(self):
"""Associate label and number for each page linked to the
index."""
self._parsed_text = self._get_parsed_page()
self._soup = BeautifulSoup(self._parsed_text, 'html.parser')
@@ -423,6 +423,17 @@
# Sanity check: all links to Page: ns must have been considered.
assert set(self._labels_from_page) == set(self._all_page_links)
+ @property
+ def num_pages(self):
+ """Return total number of pages in Index.
+
+ @return: total number of pages in Index
+ @rtype: int
+ """
+ if not self._page_from_numbers:
+ self._get_page_mappings()
+ return len(self._page_from_numbers)
+
def get_label_from_page(self, page):
"""Return 'page label' for page.
@@ -433,7 +444,7 @@
@rtype: unicode string
"""
if not self._labels_from_page:
- self._get_page_labels()
+ self._get_page_mappings()
try:
return self._labels_from_page[page]
@@ -450,7 +461,7 @@
@rtype: unicode string
"""
if not self._labels_from_page_number:
- self._get_page_labels()
+ self._get_page_mappings()
try:
return self._labels_from_page_number[page_number]
@@ -462,7 +473,7 @@
"""Helper function to get info from label."""
# Convert label to string if an integer is passed.
if not mapping_dict:
- self._get_page_labels()
+ self._get_page_mappings()
if isinstance(label, int):
label = str(label)
@@ -491,3 +502,18 @@
@return: list containing pages corresponding to page label.
"""
return self._get_from_label(self._pages_from_label, label)
+
+ def get_page_from_number(self, page_number):
+ """Return a page object from page number.
+
+ @param page_number: int
+ @return: page
+ @rtype: page object
+ """
+ if not self._page_from_numbers:
+ self._get_page_mappings()
+
+ try:
+ return self._page_from_numbers[page_number]
+ except KeyError:
+ raise KeyError('Invalid page number: %s.' % page_number)
diff --git a/tests/proofreadpage_tests.py b/tests/proofreadpage_tests.py
index b16ec0e..8711016 100644
--- a/tests/proofreadpage_tests.py
+++ b/tests/proofreadpage_tests.py
@@ -279,6 +279,7 @@
'family': 'wikisource',
'code': 'en',
'index': 'Popular Science Monthly Volume 1.djvu',
+ 'num_pages': 804,
'page': 'Popular Science Monthly Volume 1.djvu/{0}',
'get_label': [11, 11, '1'],
'get_number': [[1, set([11])],
@@ -290,6 +291,7 @@
'family': 'wikisource',
'code': 'de',
'index': 'Musen-Almanach für das Jahr 1799',
+ 'num_pages': 272,
'page': 'Schiller_Musenalmanach_1799_{0:3d}.jpg',
'get_label': [120, 120, '120'], # page no, title no, label
'get_number': [[120, set([120])],
@@ -300,6 +302,7 @@
'family': 'wikisource',
'code': 'fr',
'index': 'Segard - Hymnes profanes, 1894.djvu',
+ 'num_pages': 107,
'page': 'Segard - Hymnes profanes, 1894.djvu/{0}',
'get_label': [11, 11, '8'],
'get_number': [[8, set([11])],
@@ -326,6 +329,11 @@
for i in page_numbers)
site_def['get_page'].append([label, page_set])
+ def test_num_pages(self, key):
+ """Test num_pages property."""
+ index_page = IndexPage(self.site, self.sites[key]['index'])
+ self.assertEqual(index_page.num_pages, self.sites[key]['num_pages'])
+
def test_get_labels(self, key):
"""Test IndexPage page get_label_from_*
functions."""
data = self.sites[key]
@@ -345,7 +353,7 @@
# Error if page does not exists.
self.assertRaises(KeyError, index_page.get_label_from_page, None)
- def test_get_page_number(self, key):
+ def test_get_page_and_number(self, key):
"""Test IndexPage page get_page_number
functions."""
data = self.sites[key]
index_page = IndexPage(self.site, self.sites[key]['index'])
@@ -383,6 +391,11 @@
n = index_page._page_from_numbers[p]
self.assertEqual(index_page._numbers_from_page[p], n)
+ # Test get_page_from_number.
+ for n in num_set:
+ p = index_page.get_page_from_number(n)
+ self.assertEqual(index_page._numbers_from_page[p], n)
+
def test_page_number_mapping(self, key):
"""Test consistency of page <-> mapping."""
data = self.sites[key]
--
To view, visit
https://gerrit.wikimedia.org/r/242347
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I9eed8035b02b7c7893eebaf68ed480a468f69293
Gerrit-PatchSet: 4
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>