[Pywikipedia-l] SVN: [4416] trunk/pywikipedia - pywikibot

4 Oct 2007

Revision: 4416
Author:   cosoleto
Date:     2007-10-04 15:36:58 +0000 (Thu, 04 Oct 2007)
Log Message:
-----------
Fixed not working code in site.search() function added yesterday. In MediaWiki API exists a nice and suitable 'list=search' parameter, but isn't working or is disabled, I cannot use it.
Modified Paths:
--------------
    trunk/pywikipedia/family.py
    trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py
===================================================================

--- trunk/pywikipedia/family.py	2007-10-04 07:28:42 UTC (rev 4415)
+++ trunk/pywikipedia/family.py	2007-10-04 15:36:58 UTC (rev 4416)
@@ -2631,7 +2631,7 @@
                     if i > 0:
                         namespace_params = namespace_params + '&ns%d=1' % i
-        return "%s?title=%s:Search&search=%s&limit=%d%s" % (self.path(code),
+        return "%s?title=%s:Search&search=%s&limit=%d%s&fulltext=1" % (self.path(code),
                                                             self.special_namespace_url(code),
                                                             query,
                                                             limit,
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2007-10-04 07:28:42 UTC (rev 4415)
+++ trunk/pywikipedia/wikipedia.py	2007-10-04 15:36:58 UTC (rev 4416)
@@ -505,7 +505,7 @@
         If change_edit_time is False, do not check this version for changes
         before saving. This should be used only if the page has been loaded
         previously.
-        
+
         """
         # NOTE: The following few NoPage exceptions could already be thrown at
         # the Page() constructor. They are raised here instead for convenience,
@@ -3666,44 +3666,38 @@
         except KeyError:
             return False
-    def search(self, query, number = 10, repeat = False, namespaces = None):
+    def search(self, query, number = 10, namespaces = None):
         """
         Generator which yields search results
         """
-        seen = set()
         throttle = True
-        while True:
-            path = self.search_address(query, n=number, ns = namespaces)
-            get_throttle()
-            html = self.getUrl(path)
-            entryR = re.compile(ur'<li[^>]*><a href=".+?" title="(?P<title>.+?)">.+?</a>'
-                                  '(?P<match>.*?)<br ?/><span[^>]*>Relevance: '
-                                  '(?P<relevance>[0-9.]+)% - '
-                                  '(?P<size>[0-9.]+) '
-                                  '(?P<sizeunit>[A-Za-z]+) '
-                                  '((?P<words>.+?) words) - '
-                                  '(?P<date>.+?)</span></li>', re.DOTALL)
+        path = self.search_address(urllib.quote_plus(query), n=number, ns = namespaces)
+        get_throttle()
+        html = self.getUrl(path)
-            for m in entryR.finditer(html):
-                title = m.group('title')
+        entryR = re.compile(ur'<li[^>]*><a href=".+?" title="(?P<title>.+?)">.+?</a>'
+                              '<br />(?P<match>.*?)<span style="color[^>]*>.+?: '
+                              '(?P<relevance>[0-9.]+)% - '
+#                              '(?P<size>[0-9.]*) '
+#                              '(?P<sizeunit>[A-Za-z]) '
+#                              '((?P<words>.+?) \w+) - '
+#                              '(?P<date>.+?)</span></li>'
+                              , re.DOTALL)
-                if title not in seen:
-                    seen.add(title)
-                    page = Page(self, title)
+        for m in entryR.finditer(html):
+            page = Page(self, m.group('title'))
+            match = m.group('match')
+            relevance = m.group('relevance')
+            #size = m.group('size')
+            ## sizeunit appears to always be "KB"
+            #words = m.group('words')
+            #date = m.group('date')
-                    match = m.group('match')
-                    relevance = m.group('relevance')
-                    size = m.group('size')
-                    # sizeunit appears to always be "KB"
-                    words = m.group('words')
-                    date = m.group('date')
+            #print "%s - %s %s (%s words) - %s" % (relevance, size, sizeunit, words, date)
-                    #print "%s - %s %s (%s words) - %s" % (relevance, size, sizeunit, words, date)
+            #yield page, match, relevance, size, words, date
+            yield page, match, relevance, '', '', ''
-                    yield page, match, relevance, size, words, date
-            if not repeat:
-                break
-
     # TODO: avoid code duplication for the following methods
     def newpages(self, number = 10, get_redirect = False, repeat = False):
         """Generator which yields new articles subsequently.