Pywikibot-commits October 2015

pywikibot-commits@lists.wikimedia.org

2 participants
187 discussions

[Gerrit] proofreadpage.py: manage also non existing links in Index page - change (pywikibot/core)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: proofreadpage.py: manage also non existing links in Index page ...................................................................... proofreadpage.py: manage also non existing links in Index page Add possibility to handle not existing pages when quering labels/numbers. Does not force namespace in ProofreadPage/IndexPage.__init__() so the ns check is now effective. Change-Id: I8dea8731de76a56572fb8dd5865848b7a2d910d0 --- M pywikibot/proofreadpage.py M tests/proofreadpage_tests.py 2 files changed, 99 insertions(+), 28 deletions(-) Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py index 29458b3..d27b08b 100644 --- a/pywikibot/proofreadpage.py +++ b/pywikibot/proofreadpage.py @@ -91,11 +91,10 @@ site = source.site else: site = source - ns = site.proofread_page_ns - super(ProofreadPage, self).__init__(source, title, ns=ns) + super(ProofreadPage, self).__init__(source, title) if self.namespace() != site.proofread_page_ns: raise ValueError('Page %s must belong to %s namespace' - % (self.title(), ns)) + % (self.title(), site.proofread_page_ns)) # Ensure that constants are in line with Extension values. if list(self.site.proofread_levels.keys()) != self.PROOFREAD_LEVELS: raise ValueError('QLs do not match site values: %s != %s' @@ -401,8 +400,6 @@ """Index Page page used in Mediawiki ProofreadPage extension.""" - # TODO: handle not existing pages when quering labels/nubers? - # Currently APIError is thrown. def __init__(self, source, title=''): """Instantiate a IndexPage object. @@ -433,14 +430,13 @@ site = source.site else: site = source - ns = site.proofread_index_ns - super(IndexPage, self).__init__(source, title, ns=site.proofread_index_ns) + super(IndexPage, self).__init__(source, title) if self.namespace() != site.proofread_index_ns: raise ValueError('Page %s must belong to %s namespace' - % (self.title(), ns)) + % (self.title(), site.proofread_index_ns)) self._all_page_links = set( - self.site.pagelinks(self, namespaces=self.site.proofread_page_ns)) + self.site.pagelinks(self, namespaces=site.proofread_page_ns)) self._cached = False @@ -451,6 +447,15 @@ self._get_page_mappings() return fn(self, *args, **kwargs) return wrapper + + def _parse_redlink(self, href): + """Parse page title when link in Index is a redlink.""" + p_href = re.compile('/w/index\.php\?title=(.+?)&action=edit&redlink=1') + title = p_href.search(href) + if title: + return title.group(1) + else: + return None def _get_page_mappings(self): """Associate label and number for each page linked to the index.""" @@ -466,13 +471,20 @@ self._parsed_text = self._get_parsed_page() self._soup = BeautifulSoup(self._parsed_text, 'html.parser') - attrs = {'class': re.compile('prp-pagequality')} + attrs = {'class': re.compile('prp-pagequality|new')} - # Search for attribute "prp-pagequality" in tags like: - # <a class="quality1 prp-pagequality-1" - # href="/wiki/Page:xxx.djvu/n" + # Search for attribute "prp-pagequality" in tags: + # Existing pages: + # <a href="/wiki/Page:xxx.djvu/n" # title="Page:xxx.djvu/n">m + # class="quality1 prp-pagequality-1" + # </a> or + # Non-existing pages: + # <a href="/w/index.php?title=xxx&action=edit&redlink=1" + # class="new" + # title="Page:xxx.djvu/n (page does not exist)">m # </a> + # Try to purge or raise ValueError. if not self._soup.find_all('a', attrs=attrs): self.purge() @@ -481,17 +493,30 @@ self._soup = BeautifulSoup(self._parsed_text, 'html.parser') if not self._soup.find_all('a', attrs=attrs): raise ValueError( - 'Missing class="qualityN prp-pagequality-N" in: %s.' + 'Missing class="qualityN prp-pagequality-N" or' + 'class="new" in: %s.' % self) page_cnt = 0 for a_tag in self._soup.find_all('a', attrs=attrs): - page_cnt += 1 label = a_tag.text.lstrip('0') # Label is not converted to int. - title = a_tag.get('title') + class_ = a_tag.get('class') + href = a_tag.get('href') - page = ProofreadPage(self.site, title) - page.index = self # set index property for page + if 'new' in class_: + title = self._parse_redlink(href) # non-existing page + if title is None: # title not conforming to required format + continue + else: + title = a_tag.get('title') # existing page + try: + page = ProofreadPage(self.site, title) + page.index = self # set index property for page + page_cnt += 1 + except ValueError: + # title is not in site.proofread_page_ns; do not consider it + continue + if page not in self._all_page_links: raise pywikibot.Error('Page %s not recognised.' % page) diff --git a/tests/proofreadpage_tests.py b/tests/proofreadpage_tests.py index e28d9a7..67927fd 100644 --- a/tests/proofreadpage_tests.py +++ b/tests/proofreadpage_tests.py @@ -108,7 +108,7 @@ def test_valid_site_source(self): """Test ProofreadPage from valid Site as source.""" - page = ProofreadPage(self.site, 'title') + page = ProofreadPage(self.site, 'Page:dummy test page') self.assertEqual(page.namespace(), self.site.proofread_page_ns) def test_invalid_existing_page_source(self): @@ -166,7 +166,7 @@ def test_preload_from_not_existing_page(self): """Test ProofreadPage page decomposing/composing text.""" - page = ProofreadPage(self.site, 'dummy test page') + page = ProofreadPage(self.site, 'Page:dummy test page') self.assertEqual(page.text, '<noinclude><pagequality level="1" user="%s" />' '<div class="pagetext">\n\n\n</noinclude>' @@ -175,7 +175,7 @@ def test_preload_from_empty_text(self): """Test ProofreadPage page decomposing/composing text.""" - page = ProofreadPage(self.site, 'dummy test page') + page = ProofreadPage(self.site, 'Page:dummy test page') page.text = '' self.assertEqual(page.text, '<noinclude><pagequality level="1" user="%s" />' @@ -300,7 +300,7 @@ def test_valid_site_as_source(self): """Test IndexPage from valid Site as source.""" - page = IndexPage(self.site, 'title') + page = IndexPage(self.site, 'Index:dummy test page') self.assertEqual(page.namespace(), self.site.proofread_index_ns) def test_invalid_existing_page_as_source(self): @@ -377,9 +377,9 @@ 'enws': { 'family': 'wikisource', 'code': 'en', - 'index': 'Popular Science Monthly Volume 1.djvu', + 'index': 'Index:Popular Science Monthly Volume 1.djvu', 'num_pages': 804, - 'page': 'Popular Science Monthly Volume 1.djvu/{0}', + 'page': 'Page:Popular Science Monthly Volume 1.djvu/{0}', 'get_label': [11, 11, '1'], 'get_number': [[1, set([11])], ['Cvr', set([1, 9, 10, 804])], @@ -389,9 +389,9 @@ 'dews': { # dews does not use page convention name/number. 'family': 'wikisource', 'code': 'de', - 'index': 'Musen-Almanach für das Jahr 1799', + 'index': 'Index:Musen-Almanach für das Jahr 1799', 'num_pages': 272, - 'page': 'Schiller_Musenalmanach_1799_{0:3d}.jpg', + 'page': 'Seite:Schiller_Musenalmanach_1799_{0:3d}.jpg', 'get_label': [120, 120, '120'], # page no, title no, label 'get_number': [[120, set([120])], ], @@ -400,9 +400,9 @@ 'frws': { 'family': 'wikisource', 'code': 'fr', - 'index': 'Segard - Hymnes profanes, 1894.djvu', + 'index': 'Index:Segard - Hymnes profanes, 1894.djvu', 'num_pages': 107, - 'page': 'Segard - Hymnes profanes, 1894.djvu/{0}', + 'page': 'Page:Segard - Hymnes profanes, 1894.djvu/{0}', 'get_label': [11, 11, '8'], 'get_number': [[8, set([11])], ['-', set(range(1, 4)) | set(range(101, 108))], @@ -532,6 +532,52 @@ self.assertEqual(list(gen), []) +class TestIndexPageMappingsRedlinks(IndexPageTestCase): + + """Test IndexPage mappings with redlinks.""" + + family = 'wikisource' + code = 'en' + + cached = True + + with_redlink = { + 'title': {'blue': 'Page:Pywikibot test page 1/1', + 'red': 'Page:Pywikibot test page 2/2', + }, + 'index': 'Index:Pywikibot test page 1' + } + + def test_index_redlink(self): + """Test index property with redlink.""" + page = ProofreadPage(self.site, self.with_redlink['title']['red']) + index_page = IndexPage(self.site, self.with_redlink['index']) + self.assertEqual(page.index, index_page) + + def test_get_page_and_number_redlink(self): + """Test IndexPage page get_page_number functions with redlinks.""" + index_page = IndexPage(self.site, self.with_redlink['index']) + + for title in self.with_redlink['title'].values(): + p = ProofreadPage(self.site, title) + n = index_page.get_number(p) + self.assertEqual(index_page.get_page(n), p) + + def test_page_gen_redlink(self): + """Test Index page generator with redlinks.""" + index_page = IndexPage(self.site, self.with_redlink['index']) + proofread_pages = [ProofreadPage(self.site, page_title) for + page_title in self.with_redlink['title'].values()] + + # Check start/end limits. + self.assertRaises(ValueError, index_page.page_gen, -1, 2) + self.assertRaises(ValueError, index_page.page_gen, 1, -1) + self.assertRaises(ValueError, index_page.page_gen, 2, 1) + + gen = index_page.page_gen(1, None, filter_ql=range(5)) + self.assertEqual(list(gen), proofread_pages) + + if __name__ == '__main__': try: unittest.main() -- To view, visit https://gerrit.wikimedia.org/r/244807 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I8dea8731de76a56572fb8dd5865848b7a2d910d0 Gerrit-PatchSet: 6 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de> Gerrit-Reviewer: jenkins-bot <>

8 years, 6 months

[Gerrit] proofreadpage.py: add ProofreadPage.index property and other... - change (pywikibot/core)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: proofreadpage.py: add ProofreadPage.index property and other methods ...................................................................... proofreadpage.py: add ProofreadPage.index property and other methods Add: - ProofreadPage.index property to get Index page containing the page - IndexPage.page_gen() to load pages related to an Index page in specified page range (filters are available for quality level and page existance) - IndexPage.get_number() to get page number of a page - IndexPage.pages() to get the list of pages in Index Rename: - IndexPage.get_page_from_number() to get_page() bs4 is now mandatory for ProofreadPage if ProofreadPage.index is used. Added and cleaned up docstrings. Added related tests. Change-Id: I9dab8c2e75dc27fe87500eac3202f14553525a82 --- M pywikibot/proofreadpage.py M tests/proofreadpage_tests.py 2 files changed, 238 insertions(+), 22 deletions(-) Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py index c04951c..29458b3 100644 --- a/pywikibot/proofreadpage.py +++ b/pywikibot/proofreadpage.py @@ -7,6 +7,7 @@ This module includes objects: * ProofreadPage(Page) * FullHeader +* IndexPage(Page) """ # @@ -69,6 +70,12 @@ PROBLEMATIC = 2 PROOFREAD = 3 VALIDATED = 4 + PROOFREAD_LEVELS = [WITHOUT_TEXT, + NOT_PROOFREAD, + PROBLEMATIC, + PROOFREAD, + VALIDATED, + ] open_tag = '<noinclude>' close_tag = '</noinclude>' @@ -78,7 +85,7 @@ def __init__(self, source, title=''): """Instantiate a ProofreadPage object. - Raises UnknownExtension if source Site has no ProofreadPage Extension. + @raise UnknownExtension: source Site has no ProofreadPage Extension. """ if not isinstance(source, pywikibot.site.BaseSite): site = source.site @@ -89,6 +96,67 @@ if self.namespace() != site.proofread_page_ns: raise ValueError('Page %s must belong to %s namespace' % (self.title(), ns)) + # Ensure that constants are in line with Extension values. + if list(self.site.proofread_levels.keys()) != self.PROOFREAD_LEVELS: + raise ValueError('QLs do not match site values: %s != %s' + % (self.site.proofread_levels.keys(), + self.PROOFREAD_LEVELS)) + + @property + def index(self): + """Get the Index page which contains ProofreadPage. + + To force reload, delete index and call it again. + + Returns: + None: if ProofreadPage is linked to no or several Index pages + and no inerence can be done from titles. + IndexPage: if ProofreadPage is linked to one Index page. + """ + if not hasattr(self, '_index'): + index_ns = self.site.proofread_index_ns + what_links_here = [IndexPage(page) for + page in self.getReferences(namespaces=index_ns)] + + if not what_links_here: + self._index = (None, []) + elif len(what_links_here) == 1: + self._index = (what_links_here[0], []) + else: + self._index = (None, what_links_here) + # Try to infer names form page titles. + base, sep, num = self.title(withNamespace=False).rpartition('/') + if sep == '/': + for page in what_links_here: + if page.title(withNamespace=False) == base: + what_links_here.remove(page) + self._index = (page, what_links_here) + break + + page, others = self._index + if others: + pywikibot.warning('Page %s is linked to several Index pages: %s.' + % (self, others)) + if page: + pywikibot.warning(' %s selected as Index.' % page) + pywikibot.warning(' %s remaining.' % others) + elif not page: + pywikibot.warning('Page %s is not linked to any Index page.' + % self) + + return page + + @index.setter + def index(self, value): + if not isinstance(value, IndexPage): + raise ValueError('value %s must be a IndexPage object.' + % value) + self._index = (value, None) + + @index.deleter + def index(self): + if hasattr(self, "_index"): + del self._index def decompose(fn): """Decorator. @@ -347,7 +415,15 @@ on de wikisource). page label is the label associated with a page in the Index page. - Raises UnknownExtension if source Site has no ProofreadPage Extension. + This class provides methods to get pages contained in Index page, + and relative page numbers and labels by means of several helper + functions. + + It also providesa generator to pages contained in Index page, with + possibility to define range, filter by quality levels and page existance. + + @raise UnknownExtension: source Site has no ProofreadPage Extension. + @raise ImportError: bs4 is not installed. """ # Check if BeautifulSoup is imported. if isinstance(BeautifulSoup, ImportError): @@ -415,6 +491,7 @@ title = a_tag.get('title') page = ProofreadPage(self.site, title) + page.index = self # set index property for page if page not in self._all_page_links: raise pywikibot.Error('Page %s not recognised.' % page) @@ -458,6 +535,55 @@ """ return len(self._page_from_numbers) + def page_gen(self, start=1, end=None, filter_ql=None, + only_existing=False, content=True): + """Return a page generator which yields pages contained in Index page. + + Range is [start ... end], extremes included. + + @param start: first page, defaults to 1 + @type start: int + @param end: num_pages if end is None + @type end: int + @param filter_ql: filters quality levels + if None: all but 'Without Text'. + @type filter_ql: list of ints (corresponding to ql constants + defined in ProofreadPage). + @param only_existing: yields only existing pages. + @type only_existing: bool + @param content: preload content. + @type content: bool + """ + if end is None: + end = self.num_pages + + if not ((1 <= start <= self.num_pages) and + (1 <= end <= self.num_pages) and + (start <= end)): + raise ValueError('start=%s, end=%s are not in valid range (%s, %s)' + % (start, end, 1, self.num_pages)) + + # All but 'Without Text' + if filter_ql is None: + filter_ql = list(self.site.proofread_levels.keys()) + filter_ql.remove(ProofreadPage.WITHOUT_TEXT) + + gen = (self.get_page(i) for i in range(start, end + 1)) + if content: + gen = self.site.preloadpages(gen) + # Decorate and sort by page number because preloadpages does not + # guarantee order. + # TODO: remove if preloadpages will guarantee order. + gen = ((p, self.get_number(p)) for p in gen) + gen = (p[0] for p in sorted(gen, key=lambda x: x[1])) + # Filter by QL. + gen = (p for p in gen if p.ql in filter_ql) + # Yield only existing. + if only_existing: + gen = (p for p in gen if p.exists()) + + return gen + @check_if_cached def get_label_from_page(self, page): """Return 'page label' for page. @@ -486,7 +612,7 @@ try: return self._labels_from_page_number[page_number] except KeyError: - raise KeyError('Page number ".../%s" not range.' + raise KeyError('Page number ".../%s" not in range.' % page_number) def _get_from_label(self, mapping_dict, label): @@ -523,14 +649,26 @@ return self._get_from_label(self._pages_from_label, label) @check_if_cached - def get_page_from_number(self, page_number): - """Return a page object from page number. - - @param page_number: int - @return: page - @rtype: page object - """ + def get_page(self, page_number): + """Return a page object from page number.""" try: return self._page_from_numbers[page_number] except KeyError: raise KeyError('Invalid page number: %s.' % page_number) + + @check_if_cached + def pages(self): + """Return the list of pages in Index, sorted by page number. + + @return: list of pages + @rtype: list + """ + return [self._page_from_numbers[i] for i in range(1, self.num_pages + 1)] + + @check_if_cached + def get_number(self, page): + """Return a page number from page object.""" + try: + return self._numbers_from_page[page] + except KeyError: + raise KeyError('Invalid page: %s.' % page) diff --git a/tests/proofreadpage_tests.py b/tests/proofreadpage_tests.py index 28dfb5f..e28d9a7 100644 --- a/tests/proofreadpage_tests.py +++ b/tests/proofreadpage_tests.py @@ -86,6 +86,7 @@ valid = { 'title': 'Page:Popular Science Monthly Volume 1.djvu/12', + 'index': 'Index:Popular Science Monthly Volume 1.djvu', 'ql': 4, 'user': 'T. Mazzei', 'header': u"{{rh|2|''THE POPULAR SCIENCE MONTHLY.''}}", @@ -94,6 +95,10 @@ existing_invalid = { 'title': 'Main Page', + } + + existing_unlinked = { + 'title': 'Page:Pywikibot unlinked test page', } not_existing_invalid = { @@ -203,6 +208,61 @@ @require_modules('bs4') +class TestProofreadPageIndexProperty(TestCase): + + """Test ProofreadPage index property.""" + + family = 'wikisource' + code = 'en' + + cached = True + + valid = { + 'title': 'Page:Popular Science Monthly Volume 1.djvu/12', + 'index': 'Index:Popular Science Monthly Volume 1.djvu', + } + + existing_multilinked = { + 'title': 'Page:Pywikibot test page 1/1', + 'index_1': 'Index:Pywikibot test page 1', + 'index_2': 'Index:Pywikibot test page 2', + } + + existing_unlinked = { + 'title': 'Page:Pywikibot unlinked test page', + } + + def test_index(self): + """Test index property.""" + # Page with Index. + page = ProofreadPage(self.site, self.valid['title']) + index_page = IndexPage(self.site, self.valid['index']) + + # Test propery. + self.assertEqual(page.index, index_page) + + # Test deleter + del page.index + self.assertFalse(hasattr(page, '_index')) + # Test setter + page.index = index_page + self.assertEqual(page.index, index_page) + + # Page without Index. + page = ProofreadPage(self.site, self.existing_multilinked['title']) + index_page_1 = IndexPage(self.site, self.existing_multilinked['index_1']) + index_page_2 = IndexPage(self.site, self.existing_multilinked['index_2']) + self.assertEqual(page.index, index_page_1) + self.assertNotEqual(page.index, index_page_2) + self.assertEqual(page._index, (index_page_1, [index_page_2])) + + # Page without Index. + page = ProofreadPage(self.site, self.existing_unlinked['title']) + self.assertIs(page.index, None) + self.assertEqual(page._index, (None, [])) + + +@require_modules('bs4') class IndexPageTestCase(TestCase): """Run tests related to IndexPage ProofreadPage extension.""" @@ -270,7 +330,8 @@ self.assertEqual(page.namespace(), source.namespace) -class TestBasePageMethodsIndexPage(IndexPageTestCase, BasePageMethodsTestBase): +@require_modules('bs4') +class TestBasePageMethodsIndexPage(BasePageMethodsTestBase): """Test behavior of ProofreadPage methods inherited from BasePage.""" @@ -439,19 +500,36 @@ # Error if label does not exists. self.assertRaises(KeyError, index_page.get_page_from_label, 'dummy label') - # Test consistency of page <-> numbers mapping on last page_set and - # num_set used. - for p in page_set: - n = index_page._numbers_from_page[p] - self.assertEqual(index_page._page_from_numbers[n], p) + # Test get_page. for n in num_set: - p = index_page._page_from_numbers[n] - self.assertEqual(index_page._numbers_from_page[p], n) + p = index_page.get_page(n) + self.assertEqual(index_page.get_number(p), n) - # Test get_page_from_number. - for n in num_set: - p = index_page.get_page_from_number(n) - self.assertEqual(index_page._numbers_from_page[p], n) + # Test get_number. + for p in page_set: + n = index_page.get_number(p) + self.assertEqual(index_page.get_page(n), p) + + def test_page_gen(self, key): + """Test Index page generator.""" + data = self.sites[key] + num, title_num, label = data['get_label'] + + index_page = IndexPage(self.site, self.sites[key]['index']) + page_title = self.sites[key]['page'].format(title_num) + proofread_page = ProofreadPage(self.site, page_title) + + # Check start/end limits. + self.assertRaises(ValueError, index_page.page_gen, -1, 2) + self.assertRaises(ValueError, index_page.page_gen, 1, -1) + self.assertRaises(ValueError, index_page.page_gen, 2, 1) + + # Check quality filters. + gen = index_page.page_gen(num, num, filter_ql=range(5)) + self.assertEqual(list(gen), [proofread_page]) + + gen = index_page.page_gen(num, num, filter_ql=[0]) + self.assertEqual(list(gen), []) if __name__ == '__main__': -- To view, visit https://gerrit.wikimedia.org/r/243489 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I9dab8c2e75dc27fe87500eac3202f14553525a82 Gerrit-PatchSet: 19 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de> Gerrit-Reviewer: jenkins-bot <>

8 years, 6 months

Fixed: wikimedia/pywikibot-core#2998 (master - 702014c)

by Travis CI

Build Update for wikimedia/pywikibot-core ------------------------------------- Build: #2998 Status: Fixed Duration: 48 minutes and 34 seconds Commit: 702014c (master) Author: jenkins-bot Message: Merge "[IMPROV] cosmetic_changes: Dynamic header fixer" View the changeset: https://github.com/wikimedia/pywikibot-core/compare/c4c6f35be683...702014c6… View the full build log and details: https://travis-ci.org/wikimedia/pywikibot-core/builds/84719492 -- You can configure recipients for build notifications in your .travis.yml file. See http://docs.travis-ci.com/user/notifications

8 years, 6 months

[Gerrit] [IMPROV] cosmetic_changes: Dynamic header fixer - change (pywikibot/core)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: [IMPROV] cosmetic_changes: Dynamic header fixer ...................................................................... [IMPROV] cosmetic_changes: Dynamic header fixer Instead of iterating over the text for each of the allowed levels it could just dynamically react on each header instance via a replacement method. Also instead of using the %-notation which doesn't allow reusing indexed parameters it can use `str.format` to generate the header wiki text. Change-Id: I04a627e2d3f99ca093c1970a7ffa7d9b4f1630a1 --- M pywikibot/cosmetic_changes.py 1 file changed, 9 insertions(+), 8 deletions(-) Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py index 485769d..6b4af85 100755 --- a/pywikibot/cosmetic_changes.py +++ b/pywikibot/cosmetic_changes.py @@ -731,6 +731,10 @@ return text def fixHtml(self, text): + def replace_header(match): + depth = int(match.group(1)) + return r'{0} {1} {0}'.format('=' * depth, match.group(2)) + # Everything case-insensitive (?i) # Keep in mind that MediaWiki automatically converts to exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', @@ -748,14 +752,11 @@ r'<hr \1 />', exceptions) # a header where only spaces are in the same line - for level in range(1, 7): - equals = '\\1%s \\2 %s\\3' % ("=" * level, "=" * level) - text = textlib.replaceExcept( - text, - r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])' - % (level, level), - r'%s' % equals, - exceptions) + text = textlib.replaceExcept( + text, + r'(?i)(?<=[\r\n]) *<h([1-7])> *([^<]+?) *</h\1> *(?=[\r\n])', + replace_header, + exceptions) # TODO: maybe we can make the bot replace tags with \r\n's. return text -- To view, visit https://gerrit.wikimedia.org/r/244877 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I04a627e2d3f99ca093c1970a7ffa7d9b4f1630a1 Gerrit-PatchSet: 3 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de> Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de> Gerrit-Reviewer: jenkins-bot <>

8 years, 6 months

Broken: wikimedia/pywikibot-core#2997 (master - c4c6f35)

by Travis CI

Build Update for wikimedia/pywikibot-core ------------------------------------- Build: #2997 Status: Broken Duration: 51 minutes and 12 seconds Commit: c4c6f35 (master) Author: jenkins-bot Message: Merge "pagegenerators.py: titleregex as filter of other generators" View the changeset: https://github.com/wikimedia/pywikibot-core/compare/84f5efd03d7f...c4c6f35b… View the full build log and details: https://travis-ci.org/wikimedia/pywikibot-core/builds/84710426 -- You can configure recipients for build notifications in your .travis.yml file. See http://docs.travis-ci.com/user/notifications

8 years, 6 months

[Gerrit] pagegenerators.py: titleregex as filter of other generators - change (pywikibot/core)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: pagegenerators.py: titleregex as filter of other generators ...................................................................... pagegenerators.py: titleregex as filter of other generators Implement -titleregex as a a filter, applying a regex to titles of pages returned by the other page generators. -start now defaults to ! if nothing is specified, instead of asking for a page as today. Functionality before this patch shall now be obtained using: -start -titleregex:my_regex Warning is emitted if grep/titleregex filters are specified but no generators are requested. Bug:T114015 Change-Id: I249c4ee61b89ea4042b08fff0a3dc4557170e6f4 --- M pywikibot/pagegenerators.py M tests/pagegenerators_tests.py 2 files changed, 40 insertions(+), 30 deletions(-) Approvals: John Vandenberg: Looks good to me, approved XZise: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index eb6f583..ecffb9a 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -134,8 +134,9 @@ before -newpages. If used with -recentchanges, efficiency is improved if -namepace/ns is provided before -recentchanges. - If used with -titleregex, -namepace/ns must be provided - before -titleregex and shall contain only one value. + + If used with -start, -namepace/ns shall contain only one + value. -interwiki Work on the given page and all equivalent pages in other languages. This can, for example, be used to fight @@ -181,13 +182,21 @@ "-start:Template:!" will make the bot work on all pages in the template namespace. + default value is start:! + -prefixindex Work on pages commencing with a common prefix. -step:n When used with any other argument that specifies a set of pages, only retrieve n pages at a time from the wiki server. --titleregex Work on titles that match the given regular expression. +-titleregex A regular expression that needs to match the article title + otherwise the page won't be returned. + Multiple -titleregex:regexpr can be provided and the page will + be returned if title is matched by any of the regexpr + provided. + Case insensitive regular expressions will be used and + dot matches any character. -transcludes Work on all pages that use a certain template. Argument can also be given as "-transcludes:Title". @@ -327,6 +336,7 @@ self.step = None self.limit = None self.articlefilter_list = [] + self.titlefilter_list = [] self.claimfilter_list = [] self.intersect = False self._site = site @@ -396,6 +406,9 @@ if self.limit: self.gens[i] = itertools.islice(self.gens[i], self.limit) if len(self.gens) == 0: + if self.titlefilter_list or self.articlefilter_list: + pywikibot.warning( + 'grep/titleregex filters specified but no generators.') return None elif len(self.gens) == 1: gensList = self.gens[0] @@ -419,11 +432,15 @@ claim[0], claim[1], claim[2], claim[3]) + if self.titlefilter_list: + dupfiltergen = RegexFilterPageGenerator( + dupfiltergen, self.titlefilter_list) + if self.articlefilter_list: - return RegexBodyFilterPageGenerator( + dupfiltergen = RegexBodyFilterPageGenerator( PreloadingGenerator(dupfiltergen), self.articlefilter_list) - else: - return dupfiltergen + + return dupfiltergen def getCategoryGen(self, arg, recurse=False, content=False, gen_func=None): @@ -672,8 +689,7 @@ elif arg.startswith('-start'): firstPageTitle = arg[7:] if not firstPageTitle: - firstPageTitle = pywikibot.input( - u'At which page do you want to start?') + firstPageTitle = '!' firstpagelink = pywikibot.Link(firstPageTitle, self.site) namespace = firstpagelink.namespace @@ -739,18 +755,11 @@ gen = GoogleSearchPageGenerator(arg[8:]) elif arg.startswith('-titleregex'): if len(arg) == 11: - regex = pywikibot.input(u'What page names are you looking for?') + self.titlefilter_list.append(pywikibot.input( + 'What page names are you looking for?')) else: - regex = arg[12:] - # partial workaround for bug T85389 - # to use -namespace/ns with -newpages, -ns must be given - # before -titleregex, otherwise default namespace is 0. - # allpages only accepts a single namespace, and will raise a - # TypeError if self.namespaces contains more than one namespace. - namespaces = self.namespaces or 0 - gen = RegexFilterPageGenerator( - self.site.allpages(namespace=namespaces), - regex) + self.titlefilter_list.append(arg[12:]) + return True elif arg.startswith('-grep'): if len(arg) == 5: self.articlefilter_list.append(pywikibot.input( diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py index 1e8cd20..3f9cb4f 100755 --- a/tests/pagegenerators_tests.py +++ b/tests/pagegenerators_tests.py @@ -592,6 +592,7 @@ def test_regexfilter_default(self): gf = pagegenerators.GeneratorFactory() # Matches titles with the same two or more continous characters + self.assertTrue(gf.handleArg('-start')) self.assertTrue(gf.handleArg('-titleregex:(.)\\1+')) gf.handleArg('-limit:10') gen = gf.getCombinedGenerator() @@ -603,39 +604,39 @@ self.assertRegex(page.title().lower(), '(.)\\1+') def test_regexfilter_ns_after(self): - """Bug: T85389: -ns after -titleregex is ignored with a warning.""" gf = pagegenerators.GeneratorFactory() + self.assertTrue(gf.handleArg('-start')) self.assertTrue(gf.handleArg('-titleregex:.*')) gf.handleArg('-ns:1') gf.handleArg('-limit:10') gen = gf.getCombinedGenerator() pages = list(gen) - self.assertGreater(len(pages), 0) self.assertLessEqual(len(pages), 10) - self.assertPagesInNamespaces(pages, 0) + self.assertPagesInNamespaces(pages, 1) - def test_regexfilter_ns_first(self): + def test_regexfilter_ns_before(self): gf = pagegenerators.GeneratorFactory() - # Workaround for Bug: T85389 - # Give -ns before -titleregex (as for -newpages) + self.assertTrue(gf.handleArg('-start')) gf.handleArg('-ns:1') self.assertTrue(gf.handleArg('-titleregex:.*')) gf.handleArg('-limit:10') gen = gf.getCombinedGenerator() self.assertIsNotNone(gen) pages = list(gen) - self.assertGreater(len(pages), 0) self.assertLessEqual(len(pages), 10) self.assertPagesInNamespaces(pages, 1) - def test_regexfilter_two_ns_first(self): + def test_allpages_with_two_ns(self): + """Test that allpages fails with two ns as parameter.""" gf = pagegenerators.GeneratorFactory() + self.assertTrue(gf.handleArg('-start')) gf.handleArg('-ns:3,1') - self.assertRaisesRegex( + # allpages only accepts a single namespace, and will raise a + # TypeError if self.namespaces contains more than one namespace. + self.assertRaises( TypeError, 'allpages module does not support multiple namespaces', - gf.handleArg, - '-titleregex:.*') + gf.getCombinedGenerator) def test_prefixing_default(self): gf = pagegenerators.GeneratorFactory() -- To view, visit https://gerrit.wikimedia.org/r/244817 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I249c4ee61b89ea4042b08fff0a3dc4557170e6f4 Gerrit-PatchSet: 4 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de> Gerrit-Reviewer: jenkins-bot <>

8 years, 6 months

[Gerrit] [IMPROV] cosmetic_changes: merge similar regexes - change (pywikibot/core)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: [IMPROV] cosmetic_changes: merge similar regexes ...................................................................... [IMPROV] cosmetic_changes: merge similar regexes Instead of having two pretty similar regexes it's in most cases possible to combine them easily. This is also fixing “dash” into “pipe” in the comment explaining how the external link fixer works. It also adds the site parameter to each changed call. Change-Id: I8513e34f873730ee9c92abb75c499f67cb3d86fc (cherry picked from commit 20b2b6bcd7be359fa7007a0a5c224a8692eb88e6) --- M pywikibot/cosmetic_changes.py 1 file changed, 14 insertions(+), 26 deletions(-) Approvals: Mpaa: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py index 57a2c78..31de09d 100755 --- a/pywikibot/cosmetic_changes.py +++ b/pywikibot/cosmetic_changes.py @@ -712,17 +712,13 @@ # r'\[https?://%s\.%s\.org/wiki/(?P<link>\S+)\s+(?P<title>.+?)\s?\]' # % (self.site.code, self.site.family.name), # r'[[\g<link>|\g<title>]]', exceptions) - # external link in double brackets + # external link in/starting with double brackets text = textlib.replaceExcept( text, - r'\[\[(?P<url>https?://[^\]]+?)\]\]', - r'[\g<url>]', exceptions) - # external link starting with double bracket - text = textlib.replaceExcept(text, - r'\[\[(?P<url>https?://.+?)\]', - r'[\g<url>]', exceptions) - # external link and description separated by a dash, with - # whitespace in front of the dash, so that it is clear that + r'\[\[(?P<url>https?://[^\]]+?)\]\]?', + r'[\g<url>]', exceptions, site=self.site) + # external link and description separated by a pipe, with + # whitespace in front of the pipe, so that it is clear that # the dash is not a legitimate part of the URL. text = textlib.replaceExcept( text, @@ -742,14 +738,10 @@ # Keep in mind that MediaWiki automatically converts to exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] - text = textlib.replaceExcept(text, r'(?i)(.*?)', r"'''\1'''", - exceptions) - text = textlib.replaceExcept(text, r'(?i)(.*?)', - r"'''\1'''", exceptions) - text = textlib.replaceExcept(text, r'(?i)(.*?)', r"''\1''", - exceptions) - text = textlib.replaceExcept(text, r'(?i)(.*?)', r"''\1''", - exceptions) + text = textlib.replaceExcept(text, r'(?i)<(b|strong)>(.*?)</\1>', + r"'''\2'''", exceptions, site=self.site) + text = textlib.replaceExcept(text, r'(?i)<(i|em)>(.*?)</\1>', + r"''\2''", exceptions, site=self.site) # horizontal line without attributes in a single line text = textlib.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])', r'\1----\2', exceptions) @@ -800,19 +792,15 @@ exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace', 'gallery', 'hyperlink', 'interwiki', 'link'] # change <number> ccm -> <number> cm³ - text = textlib.replaceExcept(text, r'(\d)\s* ccm', - r'\1 ' + u'cm³', exceptions) - text = textlib.replaceExcept(text, - r'(\d)\s*ccm', r'\1 ' + u'cm³', - exceptions) + text = textlib.replaceExcept(text, r'(\d)\s*(?: )?ccm', + r'\1 cm³', exceptions, + site=self.site) # Solve wrong Nº sign with °C or °F # additional exception requested on fr-wiki for this stuff pattern = re.compile(u'«.*?»', re.UNICODE) exceptions.append(pattern) - text = textlib.replaceExcept(text, r'(\d)\s* ' + u'[º°]([CF])', - r'\1 ' + u'°' + r'\2', exceptions) - text = textlib.replaceExcept(text, r'(\d)\s*' + u'[º°]([CF])', - r'\1 ' + u'°' + r'\2', exceptions) + text = textlib.replaceExcept(text, r'(\d)\s*(?: )?[º°]([CF])', + r'\1 °\2', exceptions, site=self.site) text = textlib.replaceExcept(text, u'º([CF])', u'°' + r'\1', exceptions) return text -- To view, visit https://gerrit.wikimedia.org/r/244660 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I8513e34f873730ee9c92abb75c499f67cb3d86fc Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: 2.0 Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com> Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de> Gerrit-Reviewer: jenkins-bot <>

8 years, 6 months

Passed: wikimedia/pywikibot-core#2995 (2.0 - 75ad4d8)

by Travis CI

Build Update for wikimedia/pywikibot-core ------------------------------------- Build: #2995 Status: Passed Duration: 13 minutes and 31 seconds Commit: 75ad4d8 (2.0) Author: John Vandenberg Message: Ignore pep257 D105 and D211 Manual cherry-pick of - abd685a3 - 76d90698 Change-Id: I8e9c05cb578b1745d83e75d660289f91a772b598 View the changeset: https://github.com/wikimedia/pywikibot-core/compare/6ae1d7edc85e...75ad4d87… View the full build log and details: https://travis-ci.org/wikimedia/pywikibot-core/builds/84646407 -- You can configure recipients for build notifications in your .travis.yml file. See http://docs.travis-ci.com/user/notifications

8 years, 6 months

[Gerrit] Ignore pep257 D105 and D211 - change (pywikibot/core)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: Ignore pep257 D105 and D211 ...................................................................... Ignore pep257 D105 and D211 Manual cherry-pick of - abd685a3 - 76d90698 Change-Id: I8e9c05cb578b1745d83e75d660289f91a772b598 --- M tox.ini 1 file changed, 6 insertions(+), 3 deletions(-) Approvals: Mpaa: Looks good to me, approved jenkins-bot: Verified diff --git a/tox.ini b/tox.ini index cb3ca7e..6f6b5e6 100644 --- a/tox.ini +++ b/tox.ini @@ -13,13 +13,13 @@ install_command = pip install --process-dependency-links --pre {opts} {packages} [testenv:flake8] -commands = flake8 --ignore=D102,D103,E122,E127,E241,E402,E731 {posargs} +commands = flake8 --ignore=D102,D103,D105,D211,E122,E127,E241,E402,E731 {posargs} basepython = python2.7 deps = flake8 flake8-docstrings [testenv:flake8-py3] -commands = flake8 --ignore=D102,D103,E122,E127,E241,E402,E731 {posargs} +commands = flake8 --ignore=D102,D103,D105,D211,E122,E127,E241,E402,E731 {posargs} basepython = python3 deps = flake8 flake8-docstrings @@ -157,7 +157,7 @@ commands = {posargs} [flake8] -ignore = E122,E127,E241,E265,E402,E731 +ignore = D105,D211,E122,E127,E241,E265,E402,E731 exclude = .tox,.git,./*.egg,ez_setup.py,build,externals,user-config.py,./scripts/i18n/* max_line_length = 130 @@ -166,6 +166,9 @@ exclude = .tox,.git,./*.egg,ez_setup.py,build,externals,user-config.py,./scripts/i18n/* max_line_length = 130 +[pep257] +ignore = D105,D211 + [testenv:doc] changedir = docs commands = make html -- To view, visit https://gerrit.wikimedia.org/r/244873 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I8e9c05cb578b1745d83e75d660289f91a772b598 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: 2.0 Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com> Gerrit-Reviewer: jenkins-bot <>

8 years, 6 months

Fixed: wikimedia/pywikibot-core#2994 (master - 84f5efd)

by Travis CI

Build Update for wikimedia/pywikibot-core ------------------------------------- Build: #2994 Status: Fixed Duration: 30 minutes and 49 seconds Commit: 84f5efd (master) Author: jenkins-bot Message: Merge "[IMPROV] Split cosmetic changes tests into dry and live" View the changeset: https://github.com/wikimedia/pywikibot-core/compare/74f68eb16ff3...84f5efd0… View the full build log and details: https://travis-ci.org/wikimedia/pywikibot-core/builds/84616288 -- You can configure recipients for build notifications in your .travis.yml file. See http://docs.travis-ci.com/user/notifications

8 years, 6 months

← Newer
1
...
6
7
8
9
10
11
12
...
19
Older →

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

Pywikibot-commits October 2015