[Pywikipedia-l] SVN: [5310] branches/rewrite/pywikibot
russblau at svn.wikimedia.org
russblau at svn.wikimedia.org
Mon May 5 21:47:19 UTC 2008
Revision: 5310
Author: russblau
Date: 2008-05-05 21:47:18 +0000 (Mon, 05 May 2008)
Log Message:
-----------
debugging
Modified Paths:
--------------
branches/rewrite/pywikibot/login.py
branches/rewrite/pywikibot/site.py
branches/rewrite/pywikibot/throttle.py
Modified: branches/rewrite/pywikibot/login.py
===================================================================
--- branches/rewrite/pywikibot/login.py 2008-05-05 21:46:16 UTC (rev 5309)
+++ branches/rewrite/pywikibot/login.py 2008-05-05 21:47:18 UTC (rev 5310)
@@ -44,6 +44,7 @@
#
__version__='$Id$'
+import logging
import re
import urllib2
import config
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-05-05 21:46:16 UTC (rev 5309)
+++ branches/rewrite/pywikibot/site.py 2008-05-05 21:47:18 UTC (rev 5310)
@@ -578,6 +578,9 @@
def preloadpages(self, pagelist, groupsize=60):
"""Return a generator to a list of preloaded pages.
+ Note that [at least in current implementation] pages may be iterated
+ in a different order than in the underlying pagelist.
+
@param pagelist: an iterable that returns Page objects
@param groupsize: how many Pages to query at a time
@type groupsize: int
@@ -585,7 +588,9 @@
"""
from pywikibot.tools import itergroup
for sublist in itergroup(pagelist, groupsize):
- pageids = [str(p._pageid) for p in sublist if hasattr(p, "_pageid")]
+ pageids = [str(p._pageid) for p in sublist
+ if hasattr(p, "_pageid")
+ and p._pageid > 0]
cache = dict((p.title(withSection=False), p) for p in sublist)
rvgen = api.PropertyGenerator("revisions|info")
if len(pageids) == len(sublist):
@@ -595,12 +600,21 @@
rvgen.request["titles"] = "|".join(cache.keys())
rvgen.request[u"rvprop"] = \
u"ids|flags|timestamp|user|comment|content"
+ logging.info(u"Retrieving %s pages from %s."
+ % (len(cache), self)
+ )
for pagedata in rvgen:
- if pagedata['title'] not in cache:
- raise Error(
+ try:
+ if pagedata['title'] not in cache:
+ raise Error(
u"preloadpages: Query returned unexpected title '%s'"
- % pagedata['title']
- )
+ % pagedata['title']
+ )
+ except KeyError:
+ logging.debug("No 'title' in %s" % pagedata)
+ logging.debug("pageids=%s" % pageids)
+ logging.debug("titles=%s" % cache.keys())
+ continue
page = cache[pagedata['title']]
api.update_page(page, pagedata)
if 'revisions' in pagedata: # true if page exists
Modified: branches/rewrite/pywikibot/throttle.py
===================================================================
--- branches/rewrite/pywikibot/throttle.py 2008-05-05 21:46:16 UTC (rev 5309)
+++ branches/rewrite/pywikibot/throttle.py 2008-05-05 21:47:18 UTC (rev 5310)
@@ -245,6 +245,12 @@
# account for any time we waited while acquiring the lock
wait = delay - (time.time() - started)
if wait > 0:
+ if wait > config.noisysleep:
+ logging.warn(u"Sleeping for %.1f seconds, %s"
+ % (wait,
+ time.strftime("%Y-%m-%d %H:%M:%S",
+ time.localtime()))
+ )
time.sleep(wait)
finally:
self.lock.release()
More information about the Pywikipedia-l
mailing list