jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/407889 )
Change subject: Revert "[IMPR] Exclude links in disambiguation templates from possibilities" ......................................................................
Revert "[IMPR] Exclude links in disambiguation templates from possibilities"
This reverts commit 49a0b0b55483e5dc11eed7ec27e3610afa2e0128.
Bug: T186316 Bug: T118719 Change-Id: I557efe3ad4f2ac403abacc72d01091ce7d4daa38 --- M scripts/solve_disambiguation.py D tests/solve_disambiguation_tests.py 2 files changed, 7 insertions(+), 128 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py index 7169f6d..54efe23 100755 --- a/scripts/solve_disambiguation.py +++ b/scripts/solve_disambiguation.py @@ -71,7 +71,7 @@ # (C) Daniel Herding, 2004 # (C) Andre Engels, 2003-2004 # (C) WikiWichtel, 2004 -# (C) Pywikibot team, 2003-2018 +# (C) Pywikibot team, 2003-2017 # # Distributed under the terms of the MIT license. # @@ -85,7 +85,7 @@ import pywikibot from pywikibot import editor as editarticle from pywikibot.tools import first_lower, first_upper as firstcap -from pywikibot import pagegenerators, config, i18n, textlib +from pywikibot import pagegenerators, config, i18n from pywikibot.bot import ( Bot, QuitKeyboardInterrupt, StandardOption, HighlightContextOption, ListOption, OutputProxyOption, @@ -96,53 +96,6 @@ dn_template = { 'en': u'{{dn}}', 'fr': u'{{Lien vers un homonyme}}', -} - -# Regexes of disambiguation template titles to exclude links from -disamb_templates = { - 'wikipedia': { - 'bs': [r'[Čč]vor', r'[Dd]isambig'], - 'cs': [r'[Rr]ozcestník', r'[Rr]ozcestník[ _]-[ _][^}]+'], - 'en': [r'[Dd]isambig-plants', r'[Dd]isambig(uation)?', - r'[Dd]isambiguation[ _]cleanup', r'[Gg]eodis', - r'[Hh]ndis-cleanup', - r'[Ll]etter-Number[ _]Combination[ _]Disambiguation', - r'[Mm]il-unit-dis', r'[Nn]umberdis', r'.+?[ _]disambiguation'], - 'haw': [r'[Hh]uaʻōlelo[ _]puana[ _]like'], - 'hr': [r'[Rr]azdvojba', r'[Dd]isambig'], - 'no': [r'[Pp]eker', r'[Ee]tternavn', r'[Dd]isambig', - r'[Tt]obokstavsforkortelse', r'[Tt]rebokstavsforkortelse', - r'[Ff]lertydig', r'[Pp]ekerside'], - 'nov': [r'[Dd]esambig'], - 'qr': [r"[Ss]ut'ichana[ _]qillqa", r'[Dd]isambig', r'SJM'], - 'rmy': [r'[Dd]udalipen'], - 'sk': [r'[Dd]isambig', r'[Rr]ozlišovacia[ _]stránka', - r'[Dd]isambiguation'], - 'sr': [r'[Dd]isambig(uation)?', r'ВЗО', r'[Вв]зо', r'[Вв]ишезначна', - r'[Вв]ишезначна[ _]одредница', r'[Вв]ишезначност', - r'[Vv]išeznačna[ _]odrednica-lat'], - 'tg': [r'Ибҳомзудоӣ', r'[Dd]isambig', r'Рафъи[ _]ибҳом', - r'[Dd]isambiguation'], - 'tr': [r'[Aa]nlam[ _]ayrım', r'[Dd]isambig', r'[Aa]nlam[ _]ayrımı', - r'[Kk]işi[ _]adları[ _](anlam[ _]ayrımı)', - r'[Yy]erleşim[ _]yerleri[ _](anlam[ _]ayrımı)', - r'[Kk]ısaltmalar[ _](anlam[ _]ayrımı)', - r'[Cc]oğrafya[ _](anlam[ _]ayrımı)', - r'[Yy]erleşim[ _]yerleri[ _](anlam[ _]ayrımı)', - r'[Ss]ayılar[ _](anlam[ _]ayrımı)', - r"ABD'deki[ _]iller[ _](anlam[ _]ayrımı)"], - 'wo': [r'[Bb]okktekki'], - 'yi': [r'באדייטען'], - 'zea': [r'[Dd]p', r'[Dd]eurverwiespagina'], - 'zh-classical': [r'釋義', r'消歧義', r'[Dd]isambig'], - }, - 'loveto': { - '1911': [r'[Dd]isamb'], - }, - 'wowwiki': { - 'en': [r'[Dd]isambig', r'[Dd]isambig/quest', r'[Dd]isambig/quest2', - r'[Dd]isambig/achievement2'], - }, }
# disambiguation page name format for "primary topic" disambiguations @@ -1036,35 +989,6 @@ pywikibot.output(u'Page not saved: %s' % error.args) return 'done'
- def get_disambiguation_links(self, disambPage): - """Get links from disambPage excluding links from disamb_templates. - - @param disambPage: the disambiguation page - @type disambPage: pywikibot.Page - @return: list of processed links - @rtype: list of str - - """ - site_disamb_templates = i18n.translate(self.site, disamb_templates) - if site_disamb_templates: - exceptions = ['nowiki', 'comment', 'category', 'file', 'interwiki'] - stripped_text = disambPage.text - exc_regexes = textlib._get_regexes(exceptions, self.site) - for exc in exc_regexes: - stripped_text = exc.sub(r'', stripped_text) - for template in site_disamb_templates: - template_regex = re.compile( - r'{{ *(?:' + r':|'.join(self.site.namespaces[10]) + - r':)?' + template + r'\s*(|[^}]*)?}}' - ) - stripped_text = template_regex.sub(r'', stripped_text) - disambPage.text = stripped_text - full_text = disambPage.expand_text() - links = re.findall(r'[[([^]|]+)(?:|[^]]*|)]]', full_text) - else: - links = disambPage.linkedPages() - return links - def findAlternatives(self, disambPage): """Extend self.alternatives using correctcap of disambPage.linkedPages.
@@ -1089,12 +1013,12 @@ try: disambPage2 = pywikibot.Page( pywikibot.Link(disambTitle, self.mysite)) - links = self.get_disambiguation_links(disambPage2) + links = disambPage2.linkedPages() links = [correctcap(l, disambPage2.get()) for l in links] except pywikibot.NoPage: pywikibot.output(u"No page at %s, using redirect target." % disambTitle) - links = self.get_disambiguation_links(disambPage)[:1] + links = disambPage.linkedPages()[:1] links = [correctcap(l, disambPage.get(get_redirect=True)) for l in links] self.alternatives += links @@ -1125,19 +1049,19 @@ primary_topic_format[self.mylang] % disambPage.title(), self.mysite)) - links = self.get_disambiguation_links(disambPage2) + links = disambPage2.linkedPages() links = [correctcap(l, disambPage2.get()) for l in links] except pywikibot.NoPage: pywikibot.output( 'Page does not exist; using first link in page %s.' % disambPage.title()) - links = self.get_disambiguation_links(disambPage)[:1] + links = disambPage.linkedPages()[:1] links = [correctcap(l, disambPage.get()) for l in links] else: try: - links = self.get_disambiguation_links(disambPage) + links = disambPage.linkedPages() links = [correctcap(l, disambPage.get()) for l in links] except pywikibot.NoPage: diff --git a/tests/solve_disambiguation_tests.py b/tests/solve_disambiguation_tests.py deleted file mode 100644 index 0ef9a45..0000000 --- a/tests/solve_disambiguation_tests.py +++ /dev/null @@ -1,45 +0,0 @@ -# -*- coding: utf-8 -*- -"""Test solve_disambiguation bot module.""" -# -# (C) Pywikibot team, 2018 -# -# Distributed under the terms of the MIT license. -# -from __future__ import absolute_import, unicode_literals - -import pywikibot - -from scripts.solve_disambiguation import DisambiguationRobot - -from tests.aspects import TestCase, unittest - - -class TestGettingDisambigLinks(TestCase): - """Test getting disambiguation links.""" - - family = 'wikipedia' - code = 'en' - - def test_get(self): - """Test getting disambiguation links.""" - page = pywikibot.Page(self.site, 'foo') - bot = DisambiguationRobot(None, [], True, False, None, False, False, - minimum=0) - page.text = '* [[Link1]]\n* [[Link2]]' - newlinks = bot.get_disambiguation_links(page) - links = ['Link1', 'Link2'] - self.assertEqual(newlinks, links) - - def test_get_without_templates(self): - """Test excluding links from disamb_templates.""" - page = pywikibot.Page(self.site, 'foo') - bot = DisambiguationRobot(None, [], True, False, None, False, False, - minimum=0) - page.text = '* [[Link1]]\n{{Disambig}}' - newlinks = bot.get_disambiguation_links(page) - links = ['Link1'] - self.assertEqual(newlinks, links) - - -if __name__ == '__main__': - unittest.main()
pywikibot-commits@lists.wikimedia.org