Revision: 6823 Author: philip Date: 2009-05-05 03:48:13 +0000 (Tue, 05 May 2009)
Log Message: ----------- * Modified the Site.search. We can't get a page title though '-search' due to a html-code change of MediaWiki. * Fixed a bug of Site.search. The query string should be encoded to utf-8, otherwise we can't search a unicode string with this function. * Fixed a bug of Family.search_address. We need to start i(ns) from 0 because the main namespace's id is 0.
Modified Paths: -------------- trunk/pywikipedia/family.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2009-05-04 22:04:07 UTC (rev 6822) +++ trunk/pywikipedia/family.py 2009-05-05 03:48:13 UTC (rev 6823) @@ -3580,7 +3580,7 @@ # add all namespaces namespaces = self.namespaces.keys() for i in namespaces: - if i > 0: + if i >= 0: namespace_params = namespace_params + '&ns%d=1' % i
return "%s?title=%s:Search&search=%s&limit=%d%s&fulltext=1&useskin=monobook" % (self.path(code),
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-05-04 22:04:07 UTC (rev 6822) +++ trunk/pywikipedia/wikipedia.py 2009-05-05 03:48:13 UTC (rev 6823) @@ -5052,14 +5052,14 @@ def search(self, query, number = 10, namespaces = None): """Yield search results (using Special:Search page) for query.""" throttle = True - path = self.search_address(urllib.quote_plus(query), + path = self.search_address(urllib.quote_plus(query.encode('utf-8')), n=number, ns=namespaces) get_throttle() html = self.getUrl(path)
- entryR = re.compile(ur'<li[^>]*><a href=".+?" title="(?P<title>.+?)">.+?</a>' - '<br />(?P<match>.*?)<span style="color[^>]*>.+?: ' - '(?P<relevance>[0-9.]+)% - ' + entryR = re.compile(ur'<li><a href=".+?" title="(?P<title>.+?)">.+?</a>' +# '<br />(?P<match>.*?)<span style="color[^>]*>.+?: ' +# '(?P<relevance>[0-9.]+)% - ' # '(?P<size>[0-9.]*) ' # '(?P<sizeunit>[A-Za-z]) ' # '((?P<words>.+?) \w+) - ' @@ -5068,8 +5068,8 @@
for m in entryR.finditer(html): page = Page(self, m.group('title')) - match = m.group('match') - relevance = m.group('relevance') + #match = m.group('match') + #relevance = m.group('relevance') #size = m.group('size') ## sizeunit appears to always be "KB" #words = m.group('words') @@ -5078,7 +5078,7 @@ #print "%s - %s %s (%s words) - %s" % (relevance, size, sizeunit, words, date)
#yield page, match, relevance, size, words, date - yield page, match, relevance, '', '', '' + yield page, '', '', '', '', ''
# TODO: avoid code duplication for the following methods def newpages(self, number = 10, get_redirect = False, repeat = False, namespace = 0):