Revision: 4416 Author: cosoleto Date: 2007-10-04 15:36:58 +0000 (Thu, 04 Oct 2007)
Log Message: ----------- Fixed not working code in site.search() function added yesterday. In MediaWiki API exists a nice and suitable 'list=search' parameter, but isn't working or is disabled, I cannot use it.
Modified Paths: -------------- trunk/pywikipedia/family.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2007-10-04 07:28:42 UTC (rev 4415) +++ trunk/pywikipedia/family.py 2007-10-04 15:36:58 UTC (rev 4416) @@ -2631,7 +2631,7 @@ if i > 0: namespace_params = namespace_params + '&ns%d=1' % i
- return "%s?title=%s:Search&search=%s&limit=%d%s" % (self.path(code), + return "%s?title=%s:Search&search=%s&limit=%d%s&fulltext=1" % (self.path(code), self.special_namespace_url(code), query, limit,
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2007-10-04 07:28:42 UTC (rev 4415) +++ trunk/pywikipedia/wikipedia.py 2007-10-04 15:36:58 UTC (rev 4416) @@ -505,7 +505,7 @@ If change_edit_time is False, do not check this version for changes before saving. This should be used only if the page has been loaded previously. - + """ # NOTE: The following few NoPage exceptions could already be thrown at # the Page() constructor. They are raised here instead for convenience, @@ -3666,44 +3666,38 @@ except KeyError: return False
- def search(self, query, number = 10, repeat = False, namespaces = None): + def search(self, query, number = 10, namespaces = None): """ Generator which yields search results """ - seen = set() throttle = True - while True: - path = self.search_address(query, n=number, ns = namespaces) - get_throttle() - html = self.getUrl(path) - entryR = re.compile(ur'<li[^>]*><a href=".+?" title="(?P<title>.+?)">.+?</a>' - '(?P<match>.*?)<br ?/><span[^>]*>Relevance: ' - '(?P<relevance>[0-9.]+)% - ' - '(?P<size>[0-9.]+) ' - '(?P<sizeunit>[A-Za-z]+) ' - '((?P<words>.+?) words) - ' - '(?P<date>.+?)</span></li>', re.DOTALL) + path = self.search_address(urllib.quote_plus(query), n=number, ns = namespaces) + get_throttle() + html = self.getUrl(path)
- for m in entryR.finditer(html): - title = m.group('title') + entryR = re.compile(ur'<li[^>]*><a href=".+?" title="(?P<title>.+?)">.+?</a>' + '<br />(?P<match>.*?)<span style="color[^>]*>.+?: ' + '(?P<relevance>[0-9.]+)% - ' +# '(?P<size>[0-9.]*) ' +# '(?P<sizeunit>[A-Za-z]) ' +# '((?P<words>.+?) \w+) - ' +# '(?P<date>.+?)</span></li>' + , re.DOTALL)
- if title not in seen: - seen.add(title) - page = Page(self, title) + for m in entryR.finditer(html): + page = Page(self, m.group('title')) + match = m.group('match') + relevance = m.group('relevance') + #size = m.group('size') + ## sizeunit appears to always be "KB" + #words = m.group('words') + #date = m.group('date')
- match = m.group('match') - relevance = m.group('relevance') - size = m.group('size') - # sizeunit appears to always be "KB" - words = m.group('words') - date = m.group('date') + #print "%s - %s %s (%s words) - %s" % (relevance, size, sizeunit, words, date)
- #print "%s - %s %s (%s words) - %s" % (relevance, size, sizeunit, words, date) + #yield page, match, relevance, size, words, date + yield page, match, relevance, '', '', ''
- yield page, match, relevance, size, words, date - if not repeat: - break - # TODO: avoid code duplication for the following methods def newpages(self, number = 10, get_redirect = False, repeat = False): """Generator which yields new articles subsequently.