Revision: 6159 Author: russblau Date: 2008-12-17 19:57:39 +0000 (Wed, 17 Dec 2008)
Log Message: ----------- Improve output and logfile formatting.
Modified Paths: -------------- branches/rewrite/pywikibot/__init__.py branches/rewrite/pywikibot/bot.py branches/rewrite/pywikibot/comms/http.py branches/rewrite/pywikibot/config2.py branches/rewrite/pywikibot/data/api.py branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/pagegenerators.py branches/rewrite/pywikibot/scripts/touch.py branches/rewrite/pywikibot/throttle.py
Modified: branches/rewrite/pywikibot/__init__.py =================================================================== --- branches/rewrite/pywikibot/__init__.py 2008-12-17 16:11:20 UTC (rev 6158) +++ branches/rewrite/pywikibot/__init__.py 2008-12-17 19:57:39 UTC (rev 6159) @@ -105,7 +105,13 @@ # User interface functions (kept extremely simple for debugging)
def output(text, toStdout=False): - print text.encode(config.console_encoding, "xmlcharrefreplace") + if toStdout: + level = STDOUT + else: + level = logging.INFO + logging.getLogger().log(level, + text.encode(config.console_encoding, + "xmlcharrefreplace"))
def input(prompt, password=False): if isinstance(prompt, unicode): @@ -169,7 +175,7 @@ # only need one drop() call because all throttles use the same global pid try: _sites[_sites.keys()[0]].throttle.drop() - logger.log("VERBOSE", "Dropped throttle(s).") + logger.log(pywikibot.VERBOSE, "Dropped throttle(s).") except IndexError: pass
Modified: branches/rewrite/pywikibot/bot.py =================================================================== --- branches/rewrite/pywikibot/bot.py 2008-12-17 16:11:20 UTC (rev 6158) +++ branches/rewrite/pywikibot/bot.py 2008-12-17 19:57:39 UTC (rev 6159) @@ -21,6 +21,13 @@ from pywikibot import config2 as config
+# logging levels + +STDOUT = 16 +VERBOSE = 18 +INPUT = 25 + + def calledModuleName(): """Return the name of the module calling this function.
@@ -35,6 +42,18 @@ return os.path.basename(called)
+class LevelFilter(logging.Filter): + """Filter that only passes records at a specific level.""" + def __init__(self, level=None): + self.level = level + + def filter(self, record): + if self.level: + return record.levelno == self.level + else: + return True + + def _decodeArg(arg): if sys.platform=='win32': if config.console_encoding == 'cp850': @@ -77,7 +96,7 @@ for arg in args: arg = _decodeArg(arg) if arg == '-help': - showHelp(moduleName) + showHelp() sys.exit(0) elif arg.startswith('-family:'): config.family = arg[8:] @@ -100,10 +119,8 @@ elif arg == "-debug": if moduleName not in config.log: config.log.append(moduleName) - config.log.debug_log = True + config.debug_log = True elif arg == '-verbose' or arg == "-v": - pywikibot.output(u'Pywikipediabot %s' % (version.getversion())) - pywikibot.output(u'Python %s' % (sys.version)) config.verbose_output += 1 elif arg == '-daemonize': import daemonize @@ -118,38 +135,60 @@
# initialize logging system for terminal-based bots
- logging.addLevelName(18, "VERBOSE") # for messages to be displayed on - # terminal at "verbose" setting - # use INFO for messages to be displayed - # even on non-verbose setting - logging.addLevelName(24, "STDOUT") # for messages to be displayed to stdout - logging.addLevelName(26, "INPUT") # for prompts requiring user response + logging.addLevelName(VERBOSE, "VERBOSE") + # for messages to be displayed on terminal at "verbose" setting + # use INFO for messages to be displayed even on non-verbose setting + logging.addLevelName(STDOUT, "STDOUT") + # for messages to be displayed to stdout + logging.addLevelName(INPUT, "INPUT") + # for prompts requiring user response
- logging.basicConfig() # initializes root logger + logging.basicConfig(format="%(message)s") # initialize root logger root_logger = logging.getLogger() + default_handler = root_logger.handlers[0] + root_logger.setLevel(logging.DEBUG) # all records go to logger + # handlers filter separately by level if config.verbose_output: - root_logger.setLevel("VERBOSE") + default_handler.setLevel(VERBOSE) else: - root_logger.setLevel(logging.INFO) - if moduleName in config.log: + default_handler.setLevel(logging.INFO) + if moduleName in config.log or '*' in config.log: if config.logfilename: logfile = config.datafilepath(config.logfilename) else: - logfile = config.datafilepath("%s.log" % moduleName) + logfile = config.datafilepath("%s-bot.log" % moduleName) file_handler = logging.handlers.RotatingFileHandler( filename=logfile, maxBytes=2 << 20, backupCount=5) if config.debug_log: file_handler.setLevel(logging.DEBUG) else: - file_handler.setLevel("VERBOSE") - logging.addHandler(file_handler) + file_handler.setLevel(VERBOSE) + form = logging.Formatter( + fmt="%(asctime)s %(filename)-18s:%(lineno)-4d " + "%(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + file_handler.setFormatter(form) + root_logger.addHandler(file_handler)
+ output_handler = logging.StreamHandler(strm=sys.stdout) + output_handler.setLevel(STDOUT) + output_handler.addFilter(LevelFilter(STDOUT)) + root_logger.addHandler(output_handler) + + if config.verbose_output: + import re + ver = pywikibot.__version__ # probably can be improved on + m = re.search(r"$Id: .* (\d+ \d+-\d+-\d+ \d+:\d+:\d+Z) .*$", ver) + pywikibot.output(u'Pywikipediabot r%s' % m.group(1)) + pywikibot.output(u'Python %s' % sys.version) + return nonGlobalArgs
def showHelp(name=""): # argument, if given, is ignored - module = calledModuleName() + modname = calledModuleName() globalHelp =u'''\ Global arguments available for all bots:
@@ -170,28 +209,30 @@
-help Shows this help text.
--log Enable the logfile. Logs will be stored in the logs - subdirectory. +-log Enable the logfile, using the default filename + '%s-bot.log'
--log:xyz Enable the logfile, using xyz as the filename. +-log:xyz Enable the logfile, using 'xyz' as the filename.
-nolog Disable the logfile (if it is enabled by default).
+-debug Enable the logfile and include extensive debugging data. + -putthrottle:n Set the minimum time (in seconds) the bot will wait between -pt:n saving pages.
-verbose Have the bot provide additional output that may be useful in -v debugging. -''' +''' % modname try: - exec('import %s as module' % module) + exec('import %s as module' % modname) helpText = module.__doc__.decode('utf-8') if hasattr(module, 'docuReplacements'): for key, value in module.docuReplacements.iteritems(): helpText = helpText.replace(key, value.strip('\n\r')) pywikibot.output(helpText) except: - if module: - pywikibot.output(u'Sorry, no help available for %s' % module) + if modname: + pywikibot.output(u'Sorry, no help available for %s' % modname) logging.exception('showHelp:') pywikibot.output(globalHelp)
Modified: branches/rewrite/pywikibot/comms/http.py =================================================================== --- branches/rewrite/pywikibot/comms/http.py 2008-12-17 16:11:20 UTC (rev 6158) +++ branches/rewrite/pywikibot/comms/http.py 2008-12-17 19:57:39 UTC (rev 6159) @@ -27,7 +27,7 @@ import logging import atexit
-from pywikibot import config +from pywikibot import config, VERBOSE import cookielib import threadedhttp
@@ -57,7 +57,7 @@
# Build up HttpProcessors -logger.info('Starting %(numthreads)i threads...' % locals()) +logger.log(VERBOSE, 'Starting %(numthreads)i threads...', locals()) for i in range(numthreads): proc = threadedhttp.HttpProcessor(http_queue, cookie_jar, connection_pool) proc.setDaemon(True) @@ -68,7 +68,7 @@ def _flush(): for i in threads: http_queue.put(None) - logger.info('Waiting for threads to finish... ') + logger.log(VERBOSE, 'Waiting for threads to finish... ') for i in threads: i.join() logger.debug('All threads finished.')
Modified: branches/rewrite/pywikibot/config2.py =================================================================== --- branches/rewrite/pywikibot/config2.py 2008-12-17 16:11:20 UTC (rev 6158) +++ branches/rewrite/pywikibot/config2.py 2008-12-17 19:57:39 UTC (rev 6159) @@ -214,10 +214,13 @@ # log = [] # Per default, logging of interwiki.py is enabled because its logfiles can # be used to generate so-called warnfiles. -# This setting can be overridden by the -log or -nolog command-line arguments. log = ['interwiki'] -logfilename = None # defaults to modulename.log +# filename defaults to modulename-bot.log +logfilename = None +# set to 1 (or higher) to generate "informative" messages to terminal verbose_output = 0 +# if True, include a lot of debugging info in logfile +# (overrides log setting above) debug_log = False
############## INTERWIKI SETTINGS ##############
Modified: branches/rewrite/pywikibot/data/api.py =================================================================== --- branches/rewrite/pywikibot/data/api.py 2008-12-17 16:11:20 UTC (rev 6158) +++ branches/rewrite/pywikibot/data/api.py 2008-12-17 19:57:39 UTC (rev 6159) @@ -195,7 +195,7 @@ rawdata = http.request(self.site, uri) except Exception, e: #TODO: what exceptions can occur here? logger.warning(traceback.format_exc()) - print uri, params + logger.warning("%s, %s", uri, params) self.wait() continue if not isinstance(rawdata, unicode): @@ -621,7 +621,7 @@ from pywikibot import Site logger.setLevel(pywikibot.logging.DEBUG) mysite = Site("en", "wikipedia") - print "starting test...." + pywikibot.output("starting test....") def _test(): import doctest doctest.testmod()
Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2008-12-17 16:11:20 UTC (rev 6158) +++ branches/rewrite/pywikibot/page.py 2008-12-17 19:57:39 UTC (rev 6159) @@ -37,8 +37,8 @@
"""
- @deprecate_arg("insite", None) - @deprecate_arg("defaultNamespace", None) +# @deprecate_arg("insite", None) +# @deprecate_arg("defaultNamespace", None) def __init__(self, source, title=u"", ns=0): """Instantiate a Page object.
Modified: branches/rewrite/pywikibot/pagegenerators.py =================================================================== --- branches/rewrite/pywikibot/pagegenerators.py 2008-12-17 16:11:20 UTC (rev 6158) +++ branches/rewrite/pywikibot/pagegenerators.py 2008-12-17 19:57:39 UTC (rev 6159) @@ -328,92 +328,6 @@ return gen
-class ThreadedGenerator(threading.Thread): - """Look-ahead generator class. - - Runs a generator in a separate thread and queues the results; can - be called like a regular generator. - - Subclasses should override self.generator, _not_ self.run - - Important: the generator thread will stop itself if the generator's - internal queue is exhausted; but, if the calling program does not use - all the generated values, it must call the generator's stop() method to - stop the background thread. Example usage: - - >>> gen = ThreadedGenerator(target=foo) - >>> try: - ... for data in gen: - ... do_work(data) - ... finally: - ... gen.stop() - - """ #NOT CURRENTLY USED: Intended for future development - - def __init__(self, group=None, target=None, name="GeneratorThread", - args=(), kwargs=None, qsize=65536): - """Constructor. Takes same keyword arguments as threading.Thread. - - target must be a generator function (or other callable that returns - an iterable object). - - @param qsize: The size of the lookahead queue. The larger the qsize, - the more values will be computed in advance of use (which can eat - up memory and processor time). - @type qsize: int - - """ - if kwargs is None: - kwargs = {} - if target: - self.generator = target - if not hasattr(self, "generator"): - raise RuntimeError("No generator for ThreadedGenerator to run.") - self.args, self.kwargs = args, kwargs - threading.Thread.__init__(self, group=group, name=name) - self.queue = Queue.Queue(qsize) - self.finished = threading.Event() - - def __iter__(self): - """Iterate results from the queue.""" - if not self.isAlive() and not self.finished.isSet(): - self.start() - # if there is an item in the queue, yield it, otherwise wait - while not self.finished.isSet(): - try: - yield self.queue.get(True, 0.25) - except Queue.Empty: - pass - except KeyboardInterrupt: - self.stop() - - def stop(self): - """Stop the background thread.""" -## if not self.finished.isSet(): -## pywikibot.output("DEBUG: signalling %s to stop." % self) - self.finished.set() - - def run(self): - """Run the generator and store the results on the queue.""" - self.__gen = self.generator(*self.args, **self.kwargs) - for result in self.__gen: - while True: - if self.finished.isSet(): -## pywikibot.output("DEBUG: %s received stop signal." % self) - return - try: - self.queue.put_nowait(result) - except Queue.Full: - time.sleep(0.25) - continue - break - # wait for queue to be emptied, then kill the thread - while not self.finished.isSet() and not self.queue.empty(): - time.sleep(0.25) - self.stop() -## pywikibot.output("DEBUG: %s stopped because generator exhausted." % self) - - def AllpagesPageGenerator(start ='!', namespace=None, includeredirects=True, site=None): """ @@ -761,6 +675,8 @@ for page in site.search(query, number=number, namespaces = namespaces): yield page[0]
+# following classes just ported from version 1 without revision; not tested + class YahooSearchPageGenerator: ''' To use this generator, install pYsearch @@ -785,7 +701,8 @@ def __iter__(self): # restrict query to local site localQuery = '%s site:%s' % (self.query, self.site.hostname()) - base = 'http://%s%s' % (self.site.hostname(), self.site.nice_get_address('')) + base = 'http://%s%s' % (self.site.hostname(), + self.site.nice_get_address('')) for url in self.queryYahoo(localQuery): if url[:len(base)] == base: title = url[len(base):] @@ -826,7 +743,8 @@ google.LICENSE_KEY = config.google_key offset = 0 estimatedTotalResultsCount = None - while not estimatedTotalResultsCount or offset < estimatedTotalResultsCount: + while not estimatedTotalResultsCount \ + or offset < estimatedTotalResultsCount: while (True): # Google often yields 502 errors. try: @@ -887,7 +805,8 @@ def __iter__(self): # restrict query to local site localQuery = '%s site:%s' % (self.query, self.site.hostname()) - base = 'http://%s%s' % (self.site.hostname(), self.site.nice_get_address('')) + base = 'http://%s%s' % (self.site.hostname(), + self.site.nice_get_address('')) for url in self.queryGoogle(localQuery): if url[:len(base)] == base: title = url[len(base):]
Modified: branches/rewrite/pywikibot/scripts/touch.py =================================================================== --- branches/rewrite/pywikibot/scripts/touch.py 2008-12-17 16:11:20 UTC (rev 6158) +++ branches/rewrite/pywikibot/scripts/touch.py 2008-12-17 19:57:39 UTC (rev 6159) @@ -39,11 +39,12 @@ text = page.get(get_redirect = self.touch_redirects) page.save("Pywikibot touch script") except pywikibot.NoPage: - print "Page %s does not exist?!" % page.aslink() + pywikibot.output("Page %s does not exist?!" % page.aslink()) except pywikibot.IsRedirectPage: - print "Page %s is a redirect; skipping." % page.aslink() + pywikibot.output("Page %s is a redirect; skipping." + % page.aslink()) except pywikibot.LockedPage: - print "Page %s is locked?!" % page.aslink() + pywikibot.output("Page %s is locked?!" % page.aslink())
def main(*args):
Modified: branches/rewrite/pywikibot/throttle.py =================================================================== --- branches/rewrite/pywikibot/throttle.py 2008-12-17 16:11:20 UTC (rev 6158) +++ branches/rewrite/pywikibot/throttle.py 2008-12-17 19:57:39 UTC (rev 6159) @@ -39,7 +39,7 @@ multiplydelay=True, verbosedelay=False): self.lock = threading.RLock() self.mysite = str(site) - self.logfn = config.datafilepath('throttle.log') + self.ctrlfilename = config.datafilepath('throttle.ctrl') self.mindelay = mindelay if self.mindelay is None: self.mindelay = config.minthrottle @@ -73,7 +73,7 @@ count = 1 # open throttle.log try: - f = open(self.logfn, 'r') + f = open(self.ctrlfilename, 'r') except IOError: if not pid: pass @@ -110,7 +110,7 @@ processes.append({'pid': pid, 'time': self.checktime, 'site': mysite}) - f = open(self.logfn, 'w') + f = open(self.ctrlfilename, 'w') processes.sort(key=lambda p:(p['pid'], p['site'])) for p in processes: f.write("%(pid)s %(time)s %(site)s\n" % p) @@ -187,7 +187,7 @@ self.checktime = 0 processes = [] try: - f = open(self.logfn, 'r') + f = open(self.ctrlfilename, 'r') except IOError: return else: @@ -206,15 +206,14 @@ processes.append({'pid': this_pid, 'time': ptime, 'site': this_site}) - f = open(self.logfn, 'w') + f = open(self.ctrlfilename, 'w') processes.sort(key=lambda p:p['pid']) for p in processes: f.write("%(pid)s %(time)s %(site)s\n" % p) f.close()
def __call__(self, requestsize=1, write=False): - """ - Block the calling program if the throttle time has not expired. + """Block the calling program if the throttle time has not expired.
Parameter requestsize is the number of Pages to be read/written; multiply delay time by an appropriate factor. @@ -248,8 +247,7 @@ self.lock.release()
def lag(self, lagtime): - """ - Seize the throttle lock due to server lag. + """Seize the throttle lock due to server lag.
This will prevent any thread from accessing this site.