http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9874
Revision: 9874 Author: binbot Date: 2012-02-07 20:45:24 +0000 (Tue, 07 Feb 2012) Log Message: ----------- Oops, that was a merged version, will come later, this is the right one.
Modified Paths: -------------- trunk/pywikipedia/solve_disambiguation.py
Modified: trunk/pywikipedia/solve_disambiguation.py =================================================================== --- trunk/pywikipedia/solve_disambiguation.py 2012-02-07 20:40:12 UTC (rev 9873) +++ trunk/pywikipedia/solve_disambiguation.py 2012-02-07 20:45:24 UTC (rev 9874) @@ -364,21 +364,6 @@ else: return linkupper
-def firstlinks(page): - #Returns a list of first links of every line beginning with * - #When a disambpage is full of unnecessary links, this may be useful - #to sort out the relevant links. E.g. from line - #*[[Jim Smith (smith)|Jim Smith]] ([[1832]]-[[1932]]) [[English]] [[smith]] - #it returns only Jim Smith (smith) - #No check for page existence, it has already been done. - list = [] - reg = re.compile(r'*.*?[[(.*?)(||]])') - for line in page.get().splitlines(): - found = reg.match(line) - if found: - list.append(found.group(1)) - return list - class ReferringPageGeneratorWithIgnore: def __init__(self, disambPage, primary=False, minimum = 0): self.disambPage = disambPage @@ -538,20 +523,6 @@ # note that the definition of 'letter' varies from language to language. self.linkR = re.compile(r'[[(?P<title>[^]|#]*)(?P<section>#[^]|]*)?(|(?P<label>[^]]*))?]](?P<linktrail>' + linktrail + ')')
- def firstize(self, page, links): - #duma - #check param - titles = [t.capitalize() for t in firstlinks(page)] - pywikibot.output('\t'.join(titles)) - print len (titles), len(links) - pywikibot.output('\t'.join(l.title() for l in links)) - for l in links[:]: - pywikibot.output(l.title()) - if l.title() not in titles: - links.remove(l) - print 'meghalt' - return links - def treat(self, refPage, disambPage): """ Parameters: @@ -903,7 +874,6 @@ primary_topic_format[self.mylang] % disambPage.title()) links = disambPage2.linkedPages() - links = self.firstize(disambPage2, links) links = [correctcap(l, disambPage2.get()) for l in links] except pywikibot.NoPage: