SVN: [5371] branches/rewrite/pywikibot/site.py - Pywikipedia-l

14 May 2008

Revision: 5371
Author:   russblau
Date:     2008-05-14 16:11:13 +0000 (Wed, 14 May 2008)
Log Message:
-----------
site: search, usercontribs, watchlist_revs methods (the watchlist_revs name is used to allow possibly adding a future method that just iterates the Pages in the user's watchlist)
Modified Paths:
--------------
    branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/site.py
===================================================================

--- branches/rewrite/pywikibot/site.py	2008-05-14 14:10:43 UTC (rev 5370)
+++ branches/rewrite/pywikibot/site.py	2008-05-14 16:11:13 UTC (rev 5371)
@@ -1090,6 +1090,16 @@
             acgen.request["gacdir"] = "descending"
         return acgen
+    def categories(self, number=10, repeat=False):
+        """Deprecated; retained for backwards-compatibility"""
+        logging.debug(
+            "Site.categories() method is deprecated; use .allcategories()")
+        if repeat:
+            limit = None
+        else:
+            limit = number
+        return self.allcategories(limit=limit)
+
     def allusers(self, start="!", prefix="", limit=None, group=None):
         """Iterate registered users, ordered by username.
@@ -1326,9 +1336,9 @@
             "recentchanges: start must be later than end with reverse=False")
         rcgen = api.ListGenerator("recentchanges", site=self)
         if start is not None:
-            rcgen.request["start"] = start
+            rcgen.request["rcstart"] = start
         if end is not None:
-            rcgen.request["end"] = end
+            rcgen.request["rcend"] = end
         if reverse:
             rcgen.request["rcdir"] = "newer"
         if isinstance(limit, int):
@@ -1341,8 +1351,10 @@
                                                  for p in pagelist)
         if changetype:
             rcgen.request["rctype"] = changetype
-        filters = {'minor': showMinor, 'bot':showBot,
-                   'anon': showAnon, 'redirects': showRedirects,
+        filters = {'minor': showMinor,
+                   'bot': showBot,
+                   'anon': showAnon,
+                   'redirects': showRedirects,
                    'patrolled': showPatrolled}
         rcshow = []
         for item in filters:
@@ -1352,7 +1364,156 @@
             rcgen.request["rcshow"] = "|".join(rcshow)
         return rcgen
+    def search(self, searchstring, number=None, namespaces=[0], where="text",
+               getredirects=False, limit=None):
+        """Iterate Pages that contain the searchstring.
+        Note that this may include non-existing Pages if the wiki's database
+        table contains outdated entries.
+
+        @param searchstring: the text to search for
+        @type searchstring: unicode
+        @param where: Where to search; value must be "text" or "titles" (many
+            wikis do not support title search)
+        @param namespaces: search only in these namespaces (default: 0)
+        @type namespaces: list of ints
+        @param getredirects: if True, include redirects in results
+        @param limit: maximum number of results to iterate
+        @param number: deprecated, synonym for 'limit'
+
+        """
+        if number is not None:
+            logging.debug("search: number parameter is deprecated; use limit")
+            limit = number
+        if not searchstring:
+            raise Error("search: searchstring cannot be empty")
+        if where not in ("text", "titles"):
+            raise Error("search: unrecognized 'where' value: %s" % where)
+        srgen = PageGenerator("search", gsrsearch=searchstring, gsrwhat=where,
+                              site=self)
+        if not namespaces:
+            logging.warning("search: namespaces cannot be empty; using [0].")
+            namespaces = [0]
+        if isinstance(namespaces, basestring):
+            srgen.request["gsrnamespace"] = namespaces
+        else:
+            srgen.request["gsrnamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        if getredirects:
+            srgen.request["gsrredirects"] = ""
+        if isinstance(limit, int):
+            srgen.limit = limit
+        return srgen
+
+    def usercontribs(self, user=None, userprefix=None, start=None, end=None,
+                     reverse=False, limit=None, namespaces=None,
+                     showMinor=None):
+        """Iterate contributions by a particular user.
+
+        Iterated values are in the same format as recentchanges.
+
+        @param user: Iterate contributions by this user (name or IP)
+        @param userprefix: Iterate contributions by all users whose names
+            or IPs start with this substring
+        @param start: Iterate contributions starting at this timestamp
+        @param end: Iterate contributions ending at this timestamp
+        @param reverse: Iterate oldest contributions first (default: newest)
+        @param limit: Maximum number of contributions to iterate
+        @param namespaces: Only iterate contributions in these namespaces
+        @type namespaces: list of ints
+        @param showMinor: if True, iterate only minor edits; if False and
+            not None, iterate only non-minor edits (default: iterate both)
+
+        """
+        if not (user or userprefix):
+            raise Error(
+                "usercontribs: either user or userprefix must be non-empty")
+        if start and end:
+            if reverse:
+                if end < start:
+                    raise Error(
+                "usercontribs: end must be later than start with reverse=True")
+            else:
+                if start < end:
+                    raise Error(
+                "usercontribs: start must be later than end with reverse=False")
+        ucgen = ListGenerator("usercontribs", site=self,
+                              ucprop="ids|title|timestamp|comment|flags")
+        if user:
+            ucgen.request["ucuser"] = user
+        if userprefix:
+            ucgen.request["ucuserprefix"] = userprefix
+        if start is not None:
+            ucgen.request["ucstart"] = start
+        if end is not None:
+            ucgen.request["ucend"] = end
+        if reverse:
+            ucgen.request["ucdir"] = "newer"
+        if isinstance(limit, int):
+            ucgen.limit = limit
+        if namespaces is not None:
+            ucgen.request["ucnamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        if showMinor is not None:
+            ucgen.request["ucshow"] = showMinor and "minor" or "!minor"
+        return ucgen
+
+    def watchlist_revs(self, start=None, end=None, reverse=False,
+                       namespaces=None, showMinor=None, showBot=None,
+                       showAnon=None):
+        """Iterate revisions to pages on the bot user's watchlist.
+
+        Iterated values will be in same format as recentchanges.
+        
+        @param start: Iterate revisions starting at this timestamp
+        @param end: Iterate revisions ending at this timestamp
+        @param reverse: Iterate oldest revisions first (default: newest)
+        @param namespaces: only iterate revisions to pages in these
+            namespaces (default: all)
+        @type namespaces: list of ints
+        @param showMinor: if True, only list minor edits; if False (and not
+            None), only list non-minor edits
+        @param showBot: if True, only list bot edits; if False (and not
+            None), only list non-bot edits
+        @param showAnon: if True, only list anon edits; if False (and not
+            None), only list non-anon edits
+        
+        """
+        if start and end:
+            if reverse:
+                if end < start:
+                    raise Error(
+            "watchlist_revs: end must be later than start with reverse=True")
+            else:
+                if start < end:
+                    raise Error(
+            "watchlist_revs: start must be later than end with reverse=False")
+        wlgen = ListGenerator("watchlist", wlallrev="", site=self,
+                              wlprop="user|comment|timestamp|title|ids|flags")
+        #TODO: allow users to ask for "patrol" as well?
+        if start is not None:
+            wlgen.request["wlstart"] = start
+        if end is not None:
+            wlgen.request["wlend"] = end
+        if reverse:
+            wlgen.request["wldir"] = "newer"
+        if isinstance(limit, int):
+            wlgen.limit = limit
+        if namespaces is not None:
+            wlgen.request["wlnamespace"] = u"|".join(unicode(ns)
+                                                     for ns in namespaces)
+        filters = {'minor': showMinor,
+                   'bot': showBot,
+                   'anon': showAnon}
+        wlshow = []
+        for item in filters:
+            if filters[item] is not None:
+                wlshow.append(filters[item] and item or ("!"+item))
+        if wlshow:
+            wlgen.request["wlshow"] = "|".join(wlshow)
+        return wlgen
+
+
 #### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####
 class NotImplementedYet:
@@ -1863,37 +2024,6 @@
         # Parse data
         self._getUserData(text, sysop = sysop)
-    def search(self, query, number = 10, namespaces = None):
-        """Yield search results (using Special:Search page) for query."""
-        throttle = True
-        path = self.search_address(urllib.quote_plus(query),
-                                   n=number, ns=namespaces)
-        get_throttle()
-        html = self.getUrl(path)
-
-        entryR = re.compile(ur'<li[^>]*><a href=".+?" title="(?P<title>.+?)">.+?</a>'
-                              '<br />(?P<match>.*?)<span style="color[^>]*>.+?: '
-                              '(?P<relevance>[0-9.]+)% - '
-#                              '(?P<size>[0-9.]*) '
-#                              '(?P<sizeunit>[A-Za-z]) '
-#                              '((?P<words>.+?) \w+) - '
-#                              '(?P<date>.+?)</span></li>'
-                              , re.DOTALL)
-
-        for m in entryR.finditer(html):
-            page = Page(self, m.group('title'))
-            match = m.group('match')
-            relevance = m.group('relevance')
-            #size = m.group('size')
-            ## sizeunit appears to always be "KB"
-            #words = m.group('words')
-            #date = m.group('date')
-
-            #print "%s - %s %s (%s words) - %s" % (relevance, size, sizeunit, words, date)
-
-            #yield page, match, relevance, size, words, date
-            yield page, match, relevance, '', '', ''
-
     # TODO: avoid code duplication for the following methods
     def newpages(self, number = 10, get_redirect = False, repeat = False):
         """Yield new articles (as Page objects) from Special:Newpages.
@@ -1987,25 +2117,6 @@
             if not repeat:
                 break
-    def categories(self, number=10, repeat=False):
-        """Yield Category objects from Special:Categories"""
-        import catlib
-        seen = set()
-        while True:
-            path = self.categories_address(n=number)
-            get_throttle()
-            html = self.getUrl(path)
-            entryR = re.compile(
-                '<li><a href=".+?" title="(?P<title>.+?)">.+?</a>.*?</li>')
-            for m in entryR.finditer(html):
-                title = m.group('title')
-                if title not in seen:
-                    seen.add(title)
-                    page = catlib.Category(self, title)
-                    yield page
-            if not repeat:
-                break
-
     def deadendpages(self, number = 10, repeat = False):
         """Yield Page objects retrieved from Special:Deadendpages."""
         seen = set()