[Pywikipedia-l] SVN: [4416] trunk/pywikipedia - pywikibot

4 Oct 2007

Revision: 4416
Author:   cosoleto
Date:     2007-10-04 15:36:58 +0000 (Thu, 04 Oct 2007)

Log Message:
-----------
Fixed not working code in site.search() function added yesterday. In MediaWiki API exists
a nice and suitable 'list=search' parameter, but isn't working or is disabled,
I cannot use it.

Modified Paths:
--------------
    trunk/pywikipedia/family.py
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/family.py
===================================================================

--- trunk/pywikipedia/family.py	2007-10-04 07:28:42 UTC (rev 4415)
+++ trunk/pywikipedia/family.py	2007-10-04 15:36:58 UTC (rev 4416)
@@ -2631,7 +2631,7 @@
                     if i > 0:
                         namespace_params = namespace_params + '&ns%d=1' % i
 
-        return "%s?title=%s:Search&search=%s&limit=%d%s" %
(self.path(code),
+        return "%s?title=%s:Search&search=%s&limit=%d%s&fulltext=1"
% (self.path(code),
                                                            
self.special_namespace_url(code),
                                                             query,
                                                             limit,

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2007-10-04 07:28:42 UTC (rev 4415)
+++ trunk/pywikipedia/wikipedia.py	2007-10-04 15:36:58 UTC (rev 4416)
@@ -505,7 +505,7 @@
         If change_edit_time is False, do not check this version for changes
         before saving. This should be used only if the page has been loaded
         previously.
-        
+
         """
         # NOTE: The following few NoPage exceptions could already be thrown at
         # the Page() constructor. They are raised here instead for convenience,
@@ -3666,44 +3666,38 @@
         except KeyError:
             return False
 
-    def search(self, query, number = 10, repeat = False, namespaces = None):
+    def search(self, query, number = 10, namespaces = None):
         """
         Generator which yields search results
         """
-        seen = set()
         throttle = True
-        while True:
-            path = self.search_address(query, n=number, ns = namespaces)
-            get_throttle()
-            html = self.getUrl(path)
-            entryR = re.compile(ur'<li[^>]*><a href=".+?"
title="(?P<title>.+?)">.+?</a>'
-                                  '(?P<match>.*?)<br
?/><span[^>]*>Relevance: '
-                                  '(?P<relevance>[0-9.]+)% - '
-                                  '(?P<size>[0-9.]+) '
-                                  '(?P<sizeunit>[A-Za-z]+) '
-                                  '\((?P<words>.+?) words\) - '
-                                  '(?P<date>.+?)</span></li>',
re.DOTALL)
+        path = self.search_address(urllib.quote_plus(query), n=number, ns = namespaces)
+        get_throttle()
+        html = self.getUrl(path)
 
-            for m in entryR.finditer(html):
-                title = m.group('title')
+        entryR = re.compile(ur'<li[^>]*><a href=".+?"
title="(?P<title>.+?)">.+?</a>'
+                              '<br />(?P<match>.*?)<span
style="color[^>]*>.+?: '
+                              '(?P<relevance>[0-9.]+)% - '
+#                              '(?P<size>[0-9.]*) '
+#                              '(?P<sizeunit>[A-Za-z]) '
+#                              '\((?P<words>.+?) \w+\) - '
+#                              '(?P<date>.+?)</span></li>'
+                              , re.DOTALL)
 
-                if title not in seen:
-                    seen.add(title)
-                    page = Page(self, title)
+        for m in entryR.finditer(html):
+            page = Page(self, m.group('title'))
+            match = m.group('match')
+            relevance = m.group('relevance')
+            #size = m.group('size')
+            ## sizeunit appears to always be "KB"
+            #words = m.group('words')
+            #date = m.group('date')
 
-                    match = m.group('match')
-                    relevance = m.group('relevance')
-                    size = m.group('size')
-                    # sizeunit appears to always be "KB"
-                    words = m.group('words')
-                    date = m.group('date')
+            #print "%s - %s %s (%s words) - %s" % (relevance, size, sizeunit,
words, date)
 
-                    #print "%s - %s %s (%s words) - %s" % (relevance, size,
sizeunit, words, date)
+            #yield page, match, relevance, size, words, date
+            yield page, match, relevance, '', '', ''
 
-                    yield page, match, relevance, size, words, date
-            if not repeat:
-                break
-
     # TODO: avoid code duplication for the following methods
     def newpages(self, number = 10, get_redirect = False, repeat = False):
         """Generator which yields new articles subsequently.