Revision: 5946
Author: russblau
Date: 2008-10-10 12:44:44 +0000 (Fri, 10 Oct 2008)
Log Message:
-----------
Get rid of unused threading cruft, and put all logs on one page instead of littering wiki with daily logs
Modified Paths:
--------------
trunk/pywikipedia/category_redirect.py
Modified: trunk/pywikipedia/category_redirect.py
===================================================================
--- trunk/pywikipedia/category_redirect.py 2008-10-10 11:16:07 UTC (rev 5945)
+++ trunk/pywikipedia/category_redirect.py 2008-10-10 12:44:44 UTC (rev 5946)
@@ -13,7 +13,6 @@
import math
import re
import sys, traceback
-import threading, Queue
import time
from datetime import datetime, timedelta
@@ -29,55 +28,11 @@
return "%(code)s: %(info)s" % self.errors
-class ThreadList(list):
- """A simple threadpool class to limit the number of simultaneous threads.
-
- Any threading.Thread object can be added to the pool using the append()
- method. If the maximum number of simultaneous threads has not been
- reached, the Thread object will be started immediately; if not, the
- append() call will block until the thread is able to start.
-
- >>> pool = ThreadList(limit=10)
- >>> def work():
- ... time.sleep(1)
- ...
- >>> for x in xrange(20):
- ... pool.append(threading.Thread(target=work))
- ...
-
- """
- def __init__(self, limit=sys.maxint, *args):
- self.limit = limit
- list.__init__(self, *args)
- for item in list(self):
- if not isinstance(threading.Thread, item):
- raise TypeError("Cannot add '%s' to ThreadList" % type(item))
-
- def active_count(self):
- """Return the number of alive threads, and delete all non-alive ones."""
- count = 0
- for item in list(self):
- if item.isAlive():
- count += 1
- else:
- self.remove(item)
- return count
-
- def append(self, thd):
- if not isinstance(thd, threading.Thread):
- raise TypeError("Cannot append '%s' to ThreadList" % type(thd))
- while self.active_count() >= self.limit:
- time.sleep(2)
- list.append(self, thd)
- thd.start()
-
-
class CategoryRedirectBot(object):
def __init__(self):
self.cooldown = 7 # days
self.site = wikipedia.getSite()
self.catprefix = self.site.namespace(14)+":"
- self.result_queue = Queue.Queue()
self.log_text = []
self.edit_requests = []
@@ -270,15 +225,13 @@
if found:
wikipedia.output(u"%s: %s found, %s moved"
% (oldCat.title(), found, moved))
- self.result_queue.put((oldCatTitle, found, moved))
- return
+ return (found, moved)
except wikipedia.ServerError:
wikipedia.output(u"Server error: retrying in 5 seconds...")
time.sleep(5)
continue
except:
- self.result_queue.put((oldCatTitle, None, None))
- raise
+ return (None, None)
def readyToEdit(self, cat):
"""Return True if cat not edited during cooldown period, else False."""
@@ -364,8 +317,7 @@
l = time.localtime()
today = "%04d-%02d-%02d" % l[:3]
log_page = wikipedia.Page(self.site,
- u"User:%(user)s/category redirect logs/%(today)s"
- % locals())
+ u"User:%(user)s/category redirect log" % locals())
problem_page = wikipedia.Page(self.site,
u"User:%(user)s/category redirect problems" % locals())
edit_request_page = wikipedia.Page(self.site,
@@ -579,7 +531,7 @@
wikipedia.output(u"Moving pages out of %s redirected categories."
% len(cats_to_empty))
# thread_limit = int(math.log(len(cats_to_empty), 8) + 1)
- threadpool = ThreadList(limit=1) # disabling multi-threads
+# threadpool = ThreadList(limit=1) # disabling multi-threads
for cat in cats_to_empty:
cat_title = cat.titleWithoutNamespace()
@@ -589,33 +541,32 @@
u"* Skipping %s; in cooldown period."
% cat.aslink(textlink=True))
continue
- threadpool.append(
- threading.Thread(target=self.move_contents,
- args=(cat_title, catmap[cat]),
- kwargs=dict(editSummary=comment)))
- while len(counts) < len(cats_to_empty):
- title, found, moved = self.result_queue.get()
+ found, moved = self.move_contents(cat_title, catmap[cat],
+ editSummary=comment)
if found is None:
self.log_text.append(
- u"* [[:%s%s]]: error in move_contents thread"
- % (self.catprefix, title))
- else:
- if found:
- self.log_text.append(
- u"* [[:%s%s]]: %d found, %d moved"
- % (self.catprefix, title, found, moved))
- counts[title] = found
- record[title][today] = found
+ u"* [[:%s%s]]: error in move_contents"
+ % (self.catprefix, cat_title))
+ elif found:
+ record[cat_title][today] = found
+ self.log_text.append(
+ u"* [[:%s%s]]: %d found, %d moved"
+ % (self.catprefix, cat_title, found, moved))
+ counts[cat_title] = found
cPickle.dump(record, open(datafile, "wb"))
wikipedia.setAction(wikipedia.translate(self.site.lang,
self.maint_comment))
- log_page.put("\n".join(self.log_text))
+ try:
+ log_text = log_page.get()
+ except wikipedia.NoPage:
+ log_text = u""
+ log_page.put(log_text + u"\n==~~~~~==\n"+ u"\n".join(self.log_text))
problem_page.put("\n".join(problems))
if self.edit_requests:
edit_request_page.put(self.edit_request_text
- % "\n".join((self.edit_request_item % item)
+ % u"\n".join((self.edit_request_item % item)
for item in self.edit_requests))