[Pywikipedia-l] SVN: [5363] branches/rewrite/pywikibot/site.py

russblau at svn.wikimedia.org russblau at svn.wikimedia.org
Tue May 13 15:25:56 UTC 2008


Revision: 5363
Author:   russblau
Date:     2008-05-13 15:25:55 +0000 (Tue, 13 May 2008)

Log Message:
-----------
site methods: exturlusage, imageusage, logevents, recentchanges

Modified Paths:
--------------
    branches/rewrite/pywikibot/site.py

Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py	2008-05-13 14:40:48 UTC (rev 5362)
+++ branches/rewrite/pywikibot/site.py	2008-05-13 15:25:55 UTC (rev 5363)
@@ -522,7 +522,7 @@
         if not hasattr(page, "_pageid"):
             self.loadpageinfo(page)
         return page._pageid > 0
-   
+
     def page_restrictions(self, page):
         """Returns a dictionary reflecting page protections"""
         if not self.page_exists(page):
@@ -957,7 +957,7 @@
 
         Note: parameters includeRedirects and throttle are deprecated and
         included only for backwards compatibility.
-        
+
         @param start: Start at this title (page need not exist).
         @param prefix: Only yield pages starting with this string.
         @param namespace: Iterate pages from this (single) namespace
@@ -996,7 +996,7 @@
                     filterredirs = None
             else:
                 filterredirs = False
-                    
+
         apgen = api.PageGenerator("allpages", gapnamespace=str(namespace),
                                   gapfrom=start, site=self)
         if prefix:
@@ -1068,7 +1068,9 @@
                       reverse=False):
         """Iterate categories used (which need not have a Category page).
 
-        Iterator yields Category objects.
+        Iterator yields Category objects. Note that, in practice, links that
+        were found on pages that have been deleted may not have been removed
+        from the database table, so this method can return false positives.
 
         @param start: Start at this category title (category need not exist).
         @param prefix: Only yield categories starting with this string.
@@ -1078,7 +1080,8 @@
             order (default: iterate in forward order)
 
         """
-        acgen = api.CategoryGenerator("allcategories", gapfrom=start, site=self)
+        acgen = api.CategoryPageGenerator("allcategories",
+                                          gapfrom=start, site=self)
         if prefix:
             acgen.request["gacprefix"] = prefix
         if isinstance(limit, int):
@@ -1095,7 +1098,7 @@
         present only if the user is a member of at least 1 group, and will
         be a list of unicodes; all the other values are unicodes and should
         always be present.
-        
+
         @param start: start at this username (name need not exist)
         @param prefix: only iterate usernames starting with this substring
         @param limit: maximum number of users to iterate (default: all)
@@ -1194,7 +1197,162 @@
             bkgen.limit = limit
         return bkgen
 
+    def exturlusage(self, url, protocol="http", namespaces=None,
+                    limit=None):
+        """Iterate Pages that contain links to the given URL.
 
+        @param url: The URL to search for (without the protocol prefix);
+            this many include a '*' as a wildcard, only at the start of the
+            hostname
+        @param protocol: The protocol prefix (default: "http")
+        @param namespaces: Only iterate pages in these namespaces (default: all)
+        @type namespaces: list of ints
+        @param limit: Only iterate this many linking pages (default: all)
+
+        """
+        eugen = api.PageGenerator("exturlusage", geuquery=url,
+                                  geuprotocol=protocol, site=self)
+        if namespaces is not None:
+            eugen.request["geunamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        if isinstance(limit, int):
+            eugen.limit = limit
+        return eugen
+
+    def imageusage(self, image, namespaces=None, filterredir=None,
+                   limit=None):
+        """Iterate Pages that contain links to the given ImagePage.
+
+        @param image: the image to search for (ImagePage need not exist on the wiki)
+        @type image: ImagePage
+        @param namespaces: Only iterate pages in these namespaces (default: all)
+        @type namespaces: list of ints
+        @param filterredir: if True, only yield redirects; if False (and not
+            None), only yield non-redirects (default: yield both)
+        @param limit: Only iterate this many linking pages (default: all)
+
+        """
+        iugen = api.PageGenerator("imageusage", site=self,
+                                  giutitle=image.title(withSection=False))
+        if namespaces is not None:
+            iugen.request["giunamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        if isinstance(limit, int):
+            iugen.limit = limit
+        if filterredir is not None:
+            iugen.request["giufilterredir"] = (filterredir and "redirects"
+                                                           or "nonredirects")
+        return iugen
+
+    def logevents(self, logtype=None, user=None, page=None,
+                  start=None, end=None, reverse=False, limit=None):
+        """Iterate all log entries.
+
+        @param logtype: only iterate entries of this type (see wiki
+            documentation for available types, which will include "block",
+            "protect", "rights", "delete", "upload", "move", "import",
+            "patrol", "merge")
+        @param user: only iterate entries that match this user name
+        @param page: only iterate entries affecting this page
+        @param start: only iterate entries from and after this timestamp
+        @param end: only iterate entries up to and through this timestamp
+        @param reverse: if True, iterate oldest entries first (default: newest)
+        @param limit: only iterate up to this many entries
+
+        """
+        if start and end:
+            if reverse:
+                if end < start:
+                    raise Error(
+                  "logevents: end must be later than start with reverse=True")
+            else:
+                if start < end:
+                    raise Error(
+                  "logevents: start must be later than end with reverse=False")
+        legen = api.ListGenerator("logevents", site=self)
+        if logtype is not None:
+            legen.request["letype"] = logtype
+        if user is not None:
+            legen.request["leuser"] = user
+        if page is not None:
+            legen.request["letitle"] = page.title(withSection=False)
+        if start is not None:
+            legen.request["lestart"] = start
+        if end is not None:
+            legen.request["leend"] = end
+        if reverse:
+            legen.request["ledir"] = "newer"
+        if isinstance(limit, int):
+            legen.limit = limit
+        return legen
+
+    def recentchanges(self, start=None, end=None, reverse=False, limit=None,
+                      namespaces=None, pagelist=None, changetype=None,
+                      showMinor=None, showBot=None, showAnon=None,
+                      showRedirects=None, showPatrolled=None):
+        """Iterate recent changes.
+
+        @param start: timestamp to start listing from
+        @param end: timestamp to end listing at
+        @param reverse: if True, start with oldest changes (default: newest)
+        @param limit: iterate no more than this number of entries
+        @param namespaces: iterate changes to pages in these namespaces only
+        @type namespaces: list of ints
+        @param pagelist: iterate changes to pages in this list only
+        @param pagelist: list of Pages
+        @param changetype: only iterate changes of this type ("edit" for
+            edits to existing pages, "new" for new pages, "log" for log
+            entries)
+        @param showMinor: if True, only list minor edits; if False (and not
+            None), only list non-minor edits
+        @param showBot: if True, only list bot edits; if False (and not
+            None), only list non-bot edits
+        @param showAnon: if True, only list anon edits; if False (and not
+            None), only list non-anon edits
+        @param showRedirects: if True, only list edits to redirect pages; if
+            False (and not None), only list edits to non-redirect pages
+        @param showPatrolled: if True, only list patrolled edits; if False
+            (and not None), only list non-patrolled edits
+
+        """
+        if start and end:
+            if reverse:
+                if end < start:
+                    raise Error(
+            "recentchanges: end must be later than start with reverse=True")
+            else:
+                if start < end:
+                    raise Error(
+            "recentchanges: start must be later than end with reverse=False")
+        rcgen = api.ListGenerator("recentchanges", site=self)
+        if start is not None:
+            rcgen.request["start"] = start
+        if end is not None:
+            rcgen.request["end"] = end
+        if reverse:
+            rcgen.request["rcdir"] = "newer"
+        if isinstance(limit, int):
+            rcgen.limit = limit
+        if namespaces is not None:
+            rcgen.request["rcunamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        if pagelist:
+            rcgen.request["rctitles"] = u"|".join(p.title(withSection=False)
+                                                 for p in pagelist)
+        if changetype:
+            rcgen.request["rctype"] = changetype
+        filters = {'minor': showMinor, 'bot':showBot,
+                   'anon': showAnon, 'redirects': showRedirects,
+                   'patrolled': showPatrolled}
+        rcshow = []
+        for item in filters:
+            if filters[item] is not None:
+                rcshow.append(filters[item] and item or ("!"+item))
+        if rcshow:
+            rcgen.request["rcshow"] = "|".join(rcshow)
+        return rcgen
+
+
 #### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####
 class NotImplementedYet:
 





More information about the Pywikipedia-l mailing list