Revision: 6159
Author: russblau
Date: 2008-12-17 19:57:39 +0000 (Wed, 17 Dec 2008)
Log Message:
-----------
Improve output and logfile formatting.
Modified Paths:
--------------
branches/rewrite/pywikibot/__init__.py
branches/rewrite/pywikibot/bot.py
branches/rewrite/pywikibot/comms/http.py
branches/rewrite/pywikibot/config2.py
branches/rewrite/pywikibot/data/api.py
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/pagegenerators.py
branches/rewrite/pywikibot/scripts/touch.py
branches/rewrite/pywikibot/throttle.py
Modified: branches/rewrite/pywikibot/__init__.py
===================================================================
--- branches/rewrite/pywikibot/__init__.py 2008-12-17 16:11:20 UTC (rev 6158)
+++ branches/rewrite/pywikibot/__init__.py 2008-12-17 19:57:39 UTC (rev 6159)
@@ -105,7 +105,13 @@
# User interface functions (kept extremely simple for debugging)
def output(text, toStdout=False):
- print text.encode(config.console_encoding, "xmlcharrefreplace")
+ if toStdout:
+ level = STDOUT
+ else:
+ level = logging.INFO
+ logging.getLogger().log(level,
+ text.encode(config.console_encoding,
+ "xmlcharrefreplace"))
def input(prompt, password=False):
if isinstance(prompt, unicode):
@@ -169,7 +175,7 @@
# only need one drop() call because all throttles use the same global pid
try:
_sites[_sites.keys()[0]].throttle.drop()
- logger.log("VERBOSE", "Dropped throttle(s).")
+ logger.log(pywikibot.VERBOSE, "Dropped throttle(s).")
except IndexError:
pass
Modified: branches/rewrite/pywikibot/bot.py
===================================================================
--- branches/rewrite/pywikibot/bot.py 2008-12-17 16:11:20 UTC (rev 6158)
+++ branches/rewrite/pywikibot/bot.py 2008-12-17 19:57:39 UTC (rev 6159)
@@ -21,6 +21,13 @@
from pywikibot import config2 as config
+# logging levels
+
+STDOUT = 16
+VERBOSE = 18
+INPUT = 25
+
+
def calledModuleName():
"""Return the name of the module calling this function.
@@ -35,6 +42,18 @@
return os.path.basename(called)
+class LevelFilter(logging.Filter):
+ """Filter that only passes records at a specific
level."""
+ def __init__(self, level=None):
+ self.level = level
+
+ def filter(self, record):
+ if self.level:
+ return record.levelno == self.level
+ else:
+ return True
+
+
def _decodeArg(arg):
if sys.platform=='win32':
if config.console_encoding == 'cp850':
@@ -77,7 +96,7 @@
for arg in args:
arg = _decodeArg(arg)
if arg == '-help':
- showHelp(moduleName)
+ showHelp()
sys.exit(0)
elif arg.startswith('-family:'):
config.family = arg[8:]
@@ -100,10 +119,8 @@
elif arg == "-debug":
if moduleName not in config.log:
config.log.append(moduleName)
- config.log.debug_log = True
+ config.debug_log = True
elif arg == '-verbose' or arg == "-v":
- pywikibot.output(u'Pywikipediabot %s' % (version.getversion()))
- pywikibot.output(u'Python %s' % (sys.version))
config.verbose_output += 1
elif arg == '-daemonize':
import daemonize
@@ -118,38 +135,60 @@
# initialize logging system for terminal-based bots
- logging.addLevelName(18, "VERBOSE") # for messages to be displayed on
- # terminal at "verbose" setting
- # use INFO for messages to be displayed
- # even on non-verbose setting
- logging.addLevelName(24, "STDOUT") # for messages to be displayed to
stdout
- logging.addLevelName(26, "INPUT") # for prompts requiring user response
+ logging.addLevelName(VERBOSE, "VERBOSE")
+ # for messages to be displayed on terminal at "verbose" setting
+ # use INFO for messages to be displayed even on non-verbose setting
+ logging.addLevelName(STDOUT, "STDOUT")
+ # for messages to be displayed to stdout
+ logging.addLevelName(INPUT, "INPUT")
+ # for prompts requiring user response
- logging.basicConfig() # initializes root logger
+ logging.basicConfig(format="%(message)s") # initialize root logger
root_logger = logging.getLogger()
+ default_handler = root_logger.handlers[0]
+ root_logger.setLevel(logging.DEBUG) # all records go to logger
+ # handlers filter separately by level
if config.verbose_output:
- root_logger.setLevel("VERBOSE")
+ default_handler.setLevel(VERBOSE)
else:
- root_logger.setLevel(logging.INFO)
- if moduleName in config.log:
+ default_handler.setLevel(logging.INFO)
+ if moduleName in config.log or '*' in config.log:
if config.logfilename:
logfile = config.datafilepath(config.logfilename)
else:
- logfile = config.datafilepath("%s.log" % moduleName)
+ logfile = config.datafilepath("%s-bot.log" % moduleName)
file_handler = logging.handlers.RotatingFileHandler(
filename=logfile, maxBytes=2 << 20, backupCount=5)
if config.debug_log:
file_handler.setLevel(logging.DEBUG)
else:
- file_handler.setLevel("VERBOSE")
- logging.addHandler(file_handler)
+ file_handler.setLevel(VERBOSE)
+ form = logging.Formatter(
+ fmt="%(asctime)s %(filename)-18s:%(lineno)-4d "
+ "%(levelname)-8s %(message)s",
+ datefmt="%Y-%m-%d %H:%M:%S"
+ )
+ file_handler.setFormatter(form)
+ root_logger.addHandler(file_handler)
+ output_handler = logging.StreamHandler(strm=sys.stdout)
+ output_handler.setLevel(STDOUT)
+ output_handler.addFilter(LevelFilter(STDOUT))
+ root_logger.addHandler(output_handler)
+
+ if config.verbose_output:
+ import re
+ ver = pywikibot.__version__ # probably can be improved on
+ m = re.search(r"\$Id: .* (\d+ \d+-\d+-\d+ \d+:\d+:\d+Z) .*\$", ver)
+ pywikibot.output(u'Pywikipediabot r%s' % m.group(1))
+ pywikibot.output(u'Python %s' % sys.version)
+
return nonGlobalArgs
def showHelp(name=""):
# argument, if given, is ignored
- module = calledModuleName()
+ modname = calledModuleName()
globalHelp =u'''\
Global arguments available for all bots:
@@ -170,28 +209,30 @@
-help Shows this help text.
--log Enable the logfile. Logs will be stored in the logs
- subdirectory.
+-log Enable the logfile, using the default filename
+ '%s-bot.log'
--log:xyz Enable the logfile, using xyz as the filename.
+-log:xyz Enable the logfile, using 'xyz' as the filename.
-nolog Disable the logfile (if it is enabled by default).
+-debug Enable the logfile and include extensive debugging data.
+
-putthrottle:n Set the minimum time (in seconds) the bot will wait between
-pt:n saving pages.
-verbose Have the bot provide additional output that may be useful in
-v debugging.
-'''
+''' % modname
try:
- exec('import %s as module' % module)
+ exec('import %s as module' % modname)
helpText = module.__doc__.decode('utf-8')
if hasattr(module, 'docuReplacements'):
for key, value in module.docuReplacements.iteritems():
helpText = helpText.replace(key, value.strip('\n\r'))
pywikibot.output(helpText)
except:
- if module:
- pywikibot.output(u'Sorry, no help available for %s' % module)
+ if modname:
+ pywikibot.output(u'Sorry, no help available for %s' % modname)
logging.exception('showHelp:')
pywikibot.output(globalHelp)
Modified: branches/rewrite/pywikibot/comms/http.py
===================================================================
--- branches/rewrite/pywikibot/comms/http.py 2008-12-17 16:11:20 UTC (rev 6158)
+++ branches/rewrite/pywikibot/comms/http.py 2008-12-17 19:57:39 UTC (rev 6159)
@@ -27,7 +27,7 @@
import logging
import atexit
-from pywikibot import config
+from pywikibot import config, VERBOSE
import cookielib
import threadedhttp
@@ -57,7 +57,7 @@
# Build up HttpProcessors
-logger.info('Starting %(numthreads)i threads...' % locals())
+logger.log(VERBOSE, 'Starting %(numthreads)i threads...', locals())
for i in range(numthreads):
proc = threadedhttp.HttpProcessor(http_queue, cookie_jar, connection_pool)
proc.setDaemon(True)
@@ -68,7 +68,7 @@
def _flush():
for i in threads:
http_queue.put(None)
- logger.info('Waiting for threads to finish... ')
+ logger.log(VERBOSE, 'Waiting for threads to finish... ')
for i in threads:
i.join()
logger.debug('All threads finished.')
Modified: branches/rewrite/pywikibot/config2.py
===================================================================
--- branches/rewrite/pywikibot/config2.py 2008-12-17 16:11:20 UTC (rev 6158)
+++ branches/rewrite/pywikibot/config2.py 2008-12-17 19:57:39 UTC (rev 6159)
@@ -214,10 +214,13 @@
# log = []
# Per default, logging of interwiki.py is enabled because its logfiles can
# be used to generate so-called warnfiles.
-# This setting can be overridden by the -log or -nolog command-line arguments.
log = ['interwiki']
-logfilename = None # defaults to modulename.log
+# filename defaults to modulename-bot.log
+logfilename = None
+# set to 1 (or higher) to generate "informative" messages to terminal
verbose_output = 0
+# if True, include a lot of debugging info in logfile
+# (overrides log setting above)
debug_log = False
############## INTERWIKI SETTINGS ##############
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2008-12-17 16:11:20 UTC (rev 6158)
+++ branches/rewrite/pywikibot/data/api.py 2008-12-17 19:57:39 UTC (rev 6159)
@@ -195,7 +195,7 @@
rawdata = http.request(self.site, uri)
except Exception, e: #TODO: what exceptions can occur here?
logger.warning(traceback.format_exc())
- print uri, params
+ logger.warning("%s, %s", uri, params)
self.wait()
continue
if not isinstance(rawdata, unicode):
@@ -621,7 +621,7 @@
from pywikibot import Site
logger.setLevel(pywikibot.logging.DEBUG)
mysite = Site("en", "wikipedia")
- print "starting test...."
+ pywikibot.output("starting test....")
def _test():
import doctest
doctest.testmod()
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2008-12-17 16:11:20 UTC (rev 6158)
+++ branches/rewrite/pywikibot/page.py 2008-12-17 19:57:39 UTC (rev 6159)
@@ -37,8 +37,8 @@
"""
- @deprecate_arg("insite", None)
- @deprecate_arg("defaultNamespace", None)
+# @deprecate_arg("insite", None)
+# @deprecate_arg("defaultNamespace", None)
def __init__(self, source, title=u"", ns=0):
"""Instantiate a Page object.
Modified: branches/rewrite/pywikibot/pagegenerators.py
===================================================================
--- branches/rewrite/pywikibot/pagegenerators.py 2008-12-17 16:11:20 UTC (rev 6158)
+++ branches/rewrite/pywikibot/pagegenerators.py 2008-12-17 19:57:39 UTC (rev 6159)
@@ -328,92 +328,6 @@
return gen
-class ThreadedGenerator(threading.Thread):
- """Look-ahead generator class.
-
- Runs a generator in a separate thread and queues the results; can
- be called like a regular generator.
-
- Subclasses should override self.generator, _not_ self.run
-
- Important: the generator thread will stop itself if the generator's
- internal queue is exhausted; but, if the calling program does not use
- all the generated values, it must call the generator's stop() method to
- stop the background thread. Example usage:
-
- >>> gen = ThreadedGenerator(target=foo)
- >>> try:
- ... for data in gen:
- ... do_work(data)
- ... finally:
- ... gen.stop()
-
- """ #NOT CURRENTLY USED: Intended for future development
-
- def __init__(self, group=None, target=None, name="GeneratorThread",
- args=(), kwargs=None, qsize=65536):
- """Constructor. Takes same keyword arguments as
threading.Thread.
-
- target must be a generator function (or other callable that returns
- an iterable object).
-
- @param qsize: The size of the lookahead queue. The larger the qsize,
- the more values will be computed in advance of use (which can eat
- up memory and processor time).
- @type qsize: int
-
- """
- if kwargs is None:
- kwargs = {}
- if target:
- self.generator = target
- if not hasattr(self, "generator"):
- raise RuntimeError("No generator for ThreadedGenerator to run.")
- self.args, self.kwargs = args, kwargs
- threading.Thread.__init__(self, group=group, name=name)
- self.queue = Queue.Queue(qsize)
- self.finished = threading.Event()
-
- def __iter__(self):
- """Iterate results from the queue."""
- if not self.isAlive() and not self.finished.isSet():
- self.start()
- # if there is an item in the queue, yield it, otherwise wait
- while not self.finished.isSet():
- try:
- yield self.queue.get(True, 0.25)
- except Queue.Empty:
- pass
- except KeyboardInterrupt:
- self.stop()
-
- def stop(self):
- """Stop the background thread."""
-## if not self.finished.isSet():
-## pywikibot.output("DEBUG: signalling %s to stop." % self)
- self.finished.set()
-
- def run(self):
- """Run the generator and store the results on the
queue."""
- self.__gen = self.generator(*self.args, **self.kwargs)
- for result in self.__gen:
- while True:
- if self.finished.isSet():
-## pywikibot.output("DEBUG: %s received stop signal." %
self)
- return
- try:
- self.queue.put_nowait(result)
- except Queue.Full:
- time.sleep(0.25)
- continue
- break
- # wait for queue to be emptied, then kill the thread
- while not self.finished.isSet() and not self.queue.empty():
- time.sleep(0.25)
- self.stop()
-## pywikibot.output("DEBUG: %s stopped because generator exhausted." %
self)
-
-
def AllpagesPageGenerator(start ='!', namespace=None, includeredirects=True,
site=None):
"""
@@ -761,6 +675,8 @@
for page in site.search(query, number=number, namespaces = namespaces):
yield page[0]
+# following classes just ported from version 1 without revision; not tested
+
class YahooSearchPageGenerator:
'''
To use this generator, install pYsearch
@@ -785,7 +701,8 @@
def __iter__(self):
# restrict query to local site
localQuery = '%s site:%s' % (self.query, self.site.hostname())
- base = 'http://%s%s' % (self.site.hostname(),
self.site.nice_get_address(''))
+ base = 'http://%s%s' % (self.site.hostname(),
+ self.site.nice_get_address(''))
for url in self.queryYahoo(localQuery):
if url[:len(base)] == base:
title = url[len(base):]
@@ -826,7 +743,8 @@
google.LICENSE_KEY = config.google_key
offset = 0
estimatedTotalResultsCount = None
- while not estimatedTotalResultsCount or offset < estimatedTotalResultsCount:
+ while not estimatedTotalResultsCount \
+ or offset < estimatedTotalResultsCount:
while (True):
# Google often yields 502 errors.
try:
@@ -887,7 +805,8 @@
def __iter__(self):
# restrict query to local site
localQuery = '%s site:%s' % (self.query, self.site.hostname())
- base = 'http://%s%s' % (self.site.hostname(),
self.site.nice_get_address(''))
+ base = 'http://%s%s' % (self.site.hostname(),
+ self.site.nice_get_address(''))
for url in self.queryGoogle(localQuery):
if url[:len(base)] == base:
title = url[len(base):]
Modified: branches/rewrite/pywikibot/scripts/touch.py
===================================================================
--- branches/rewrite/pywikibot/scripts/touch.py 2008-12-17 16:11:20 UTC (rev 6158)
+++ branches/rewrite/pywikibot/scripts/touch.py 2008-12-17 19:57:39 UTC (rev 6159)
@@ -39,11 +39,12 @@
text = page.get(get_redirect = self.touch_redirects)
page.save("Pywikibot touch script")
except pywikibot.NoPage:
- print "Page %s does not exist?!" % page.aslink()
+ pywikibot.output("Page %s does not exist?!" % page.aslink())
except pywikibot.IsRedirectPage:
- print "Page %s is a redirect; skipping." % page.aslink()
+ pywikibot.output("Page %s is a redirect; skipping."
+ % page.aslink())
except pywikibot.LockedPage:
- print "Page %s is locked?!" % page.aslink()
+ pywikibot.output("Page %s is locked?!" % page.aslink())
def main(*args):
Modified: branches/rewrite/pywikibot/throttle.py
===================================================================
--- branches/rewrite/pywikibot/throttle.py 2008-12-17 16:11:20 UTC (rev 6158)
+++ branches/rewrite/pywikibot/throttle.py 2008-12-17 19:57:39 UTC (rev 6159)
@@ -39,7 +39,7 @@
multiplydelay=True, verbosedelay=False):
self.lock = threading.RLock()
self.mysite = str(site)
- self.logfn = config.datafilepath('throttle.log')
+ self.ctrlfilename = config.datafilepath('throttle.ctrl')
self.mindelay = mindelay
if self.mindelay is None:
self.mindelay = config.minthrottle
@@ -73,7 +73,7 @@
count = 1
# open throttle.log
try:
- f = open(self.logfn, 'r')
+ f = open(self.ctrlfilename, 'r')
except IOError:
if not pid:
pass
@@ -110,7 +110,7 @@
processes.append({'pid': pid,
'time': self.checktime,
'site': mysite})
- f = open(self.logfn, 'w')
+ f = open(self.ctrlfilename, 'w')
processes.sort(key=lambda p:(p['pid'], p['site']))
for p in processes:
f.write("%(pid)s %(time)s %(site)s\n" % p)
@@ -187,7 +187,7 @@
self.checktime = 0
processes = []
try:
- f = open(self.logfn, 'r')
+ f = open(self.ctrlfilename, 'r')
except IOError:
return
else:
@@ -206,15 +206,14 @@
processes.append({'pid': this_pid,
'time': ptime,
'site': this_site})
- f = open(self.logfn, 'w')
+ f = open(self.ctrlfilename, 'w')
processes.sort(key=lambda p:p['pid'])
for p in processes:
f.write("%(pid)s %(time)s %(site)s\n" % p)
f.close()
def __call__(self, requestsize=1, write=False):
- """
- Block the calling program if the throttle time has not expired.
+ """Block the calling program if the throttle time has not
expired.
Parameter requestsize is the number of Pages to be read/written;
multiply delay time by an appropriate factor.
@@ -248,8 +247,7 @@
self.lock.release()
def lag(self, lagtime):
- """
- Seize the throttle lock due to server lag.
+ """Seize the throttle lock due to server lag.
This will prevent any thread from accessing this site.