Revision: 5946 Author: russblau Date: 2008-10-10 12:44:44 +0000 (Fri, 10 Oct 2008)
Log Message: ----------- Get rid of unused threading cruft, and put all logs on one page instead of littering wiki with daily logs
Modified Paths: -------------- trunk/pywikipedia/category_redirect.py
Modified: trunk/pywikipedia/category_redirect.py =================================================================== --- trunk/pywikipedia/category_redirect.py 2008-10-10 11:16:07 UTC (rev 5945) +++ trunk/pywikipedia/category_redirect.py 2008-10-10 12:44:44 UTC (rev 5946) @@ -13,7 +13,6 @@ import math import re import sys, traceback -import threading, Queue import time from datetime import datetime, timedelta
@@ -29,55 +28,11 @@ return "%(code)s: %(info)s" % self.errors
-class ThreadList(list): - """A simple threadpool class to limit the number of simultaneous threads. - - Any threading.Thread object can be added to the pool using the append() - method. If the maximum number of simultaneous threads has not been - reached, the Thread object will be started immediately; if not, the - append() call will block until the thread is able to start. - - >>> pool = ThreadList(limit=10) - >>> def work(): - ... time.sleep(1) - ... - >>> for x in xrange(20): - ... pool.append(threading.Thread(target=work)) - ... - - """ - def __init__(self, limit=sys.maxint, *args): - self.limit = limit - list.__init__(self, *args) - for item in list(self): - if not isinstance(threading.Thread, item): - raise TypeError("Cannot add '%s' to ThreadList" % type(item)) - - def active_count(self): - """Return the number of alive threads, and delete all non-alive ones.""" - count = 0 - for item in list(self): - if item.isAlive(): - count += 1 - else: - self.remove(item) - return count - - def append(self, thd): - if not isinstance(thd, threading.Thread): - raise TypeError("Cannot append '%s' to ThreadList" % type(thd)) - while self.active_count() >= self.limit: - time.sleep(2) - list.append(self, thd) - thd.start() - - class CategoryRedirectBot(object): def __init__(self): self.cooldown = 7 # days self.site = wikipedia.getSite() self.catprefix = self.site.namespace(14)+":" - self.result_queue = Queue.Queue() self.log_text = [] self.edit_requests = []
@@ -270,15 +225,13 @@ if found: wikipedia.output(u"%s: %s found, %s moved" % (oldCat.title(), found, moved)) - self.result_queue.put((oldCatTitle, found, moved)) - return + return (found, moved) except wikipedia.ServerError: wikipedia.output(u"Server error: retrying in 5 seconds...") time.sleep(5) continue except: - self.result_queue.put((oldCatTitle, None, None)) - raise + return (None, None)
def readyToEdit(self, cat): """Return True if cat not edited during cooldown period, else False.""" @@ -364,8 +317,7 @@ l = time.localtime() today = "%04d-%02d-%02d" % l[:3] log_page = wikipedia.Page(self.site, - u"User:%(user)s/category redirect logs/%(today)s" - % locals()) + u"User:%(user)s/category redirect log" % locals()) problem_page = wikipedia.Page(self.site, u"User:%(user)s/category redirect problems" % locals()) edit_request_page = wikipedia.Page(self.site, @@ -579,7 +531,7 @@ wikipedia.output(u"Moving pages out of %s redirected categories." % len(cats_to_empty)) # thread_limit = int(math.log(len(cats_to_empty), 8) + 1) - threadpool = ThreadList(limit=1) # disabling multi-threads +# threadpool = ThreadList(limit=1) # disabling multi-threads
for cat in cats_to_empty: cat_title = cat.titleWithoutNamespace() @@ -589,33 +541,32 @@ u"* Skipping %s; in cooldown period." % cat.aslink(textlink=True)) continue - threadpool.append( - threading.Thread(target=self.move_contents, - args=(cat_title, catmap[cat]), - kwargs=dict(editSummary=comment))) - while len(counts) < len(cats_to_empty): - title, found, moved = self.result_queue.get() + found, moved = self.move_contents(cat_title, catmap[cat], + editSummary=comment) if found is None: self.log_text.append( - u"* [[:%s%s]]: error in move_contents thread" - % (self.catprefix, title)) - else: - if found: - self.log_text.append( - u"* [[:%s%s]]: %d found, %d moved" - % (self.catprefix, title, found, moved)) - counts[title] = found - record[title][today] = found + u"* [[:%s%s]]: error in move_contents" + % (self.catprefix, cat_title)) + elif found: + record[cat_title][today] = found + self.log_text.append( + u"* [[:%s%s]]: %d found, %d moved" + % (self.catprefix, cat_title, found, moved)) + counts[cat_title] = found
cPickle.dump(record, open(datafile, "wb"))
wikipedia.setAction(wikipedia.translate(self.site.lang, self.maint_comment)) - log_page.put("\n".join(self.log_text)) + try: + log_text = log_page.get() + except wikipedia.NoPage: + log_text = u"" + log_page.put(log_text + u"\n==~~~~~==\n"+ u"\n".join(self.log_text)) problem_page.put("\n".join(problems)) if self.edit_requests: edit_request_page.put(self.edit_request_text - % "\n".join((self.edit_request_item % item) + % u"\n".join((self.edit_request_item % item) for item in self.edit_requests))