[Pywikipedia-l] SVN: [5310] branches/rewrite/pywikibot

russblau at svn.wikimedia.org russblau at svn.wikimedia.org
Mon May 5 21:47:19 UTC 2008


Revision: 5310
Author:   russblau
Date:     2008-05-05 21:47:18 +0000 (Mon, 05 May 2008)

Log Message:
-----------
debugging

Modified Paths:
--------------
    branches/rewrite/pywikibot/login.py
    branches/rewrite/pywikibot/site.py
    branches/rewrite/pywikibot/throttle.py

Modified: branches/rewrite/pywikibot/login.py
===================================================================
--- branches/rewrite/pywikibot/login.py	2008-05-05 21:46:16 UTC (rev 5309)
+++ branches/rewrite/pywikibot/login.py	2008-05-05 21:47:18 UTC (rev 5310)
@@ -44,6 +44,7 @@
 #
 __version__='$Id$'
 
+import logging
 import re
 import urllib2
 import config

Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py	2008-05-05 21:46:16 UTC (rev 5309)
+++ branches/rewrite/pywikibot/site.py	2008-05-05 21:47:18 UTC (rev 5310)
@@ -578,6 +578,9 @@
     def preloadpages(self, pagelist, groupsize=60):
         """Return a generator to a list of preloaded pages.
 
+        Note that [at least in current implementation] pages may be iterated
+        in a different order than in the underlying pagelist.
+
         @param pagelist: an iterable that returns Page objects
         @param groupsize: how many Pages to query at a time
         @type groupsize: int
@@ -585,7 +588,9 @@
         """
         from pywikibot.tools import itergroup
         for sublist in itergroup(pagelist, groupsize):
-            pageids = [str(p._pageid) for p in sublist if hasattr(p, "_pageid")]
+            pageids = [str(p._pageid) for p in sublist
+                                      if hasattr(p, "_pageid")
+                                         and p._pageid > 0]
             cache = dict((p.title(withSection=False), p) for p in sublist)
             rvgen = api.PropertyGenerator("revisions|info")
             if len(pageids) == len(sublist):
@@ -595,12 +600,21 @@
                 rvgen.request["titles"] = "|".join(cache.keys())
             rvgen.request[u"rvprop"] = \
                     u"ids|flags|timestamp|user|comment|content"
+            logging.info(u"Retrieving %s pages from %s."
+                           % (len(cache), self)
+                        )
             for pagedata in rvgen:
-                if pagedata['title'] not in cache:
-                    raise Error(
+                try:
+                    if pagedata['title'] not in cache:
+                        raise Error(
                         u"preloadpages: Query returned unexpected title '%s'"
-                         % pagedata['title']
-                    )
+                             % pagedata['title']
+                        )
+                except KeyError:
+                    logging.debug("No 'title' in %s" % pagedata)
+                    logging.debug("pageids=%s" % pageids)
+                    logging.debug("titles=%s" % cache.keys())
+                    continue
                 page = cache[pagedata['title']]
                 api.update_page(page, pagedata)
                 if 'revisions' in pagedata: # true if page exists

Modified: branches/rewrite/pywikibot/throttle.py
===================================================================
--- branches/rewrite/pywikibot/throttle.py	2008-05-05 21:46:16 UTC (rev 5309)
+++ branches/rewrite/pywikibot/throttle.py	2008-05-05 21:47:18 UTC (rev 5310)
@@ -245,6 +245,12 @@
             # account for any time we waited while acquiring the lock
             wait = delay - (time.time() - started)
             if wait > 0:
+                if wait > config.noisysleep:
+                    logging.warn(u"Sleeping for %.1f seconds, %s"
+                                  % (wait,
+                                     time.strftime("%Y-%m-%d %H:%M:%S",
+                                                   time.localtime()))
+                                     )
                 time.sleep(wait)
         finally:
             self.lock.release()





More information about the Pywikipedia-l mailing list