[Pywikipedia-svn] SVN: [9053] trunk/pywikipedia/commonsdelinker

13 Mar 2011

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9053
Revision: 9053
Author:   xqt
Date:     2011-03-13 12:24:49 +0000 (Sun, 13 Mar 2011)
Log Message:
-----------
stripped trailing whitespace
Modified Paths:
--------------
    trunk/pywikipedia/commonsdelinker/checkusage.py
    trunk/pywikipedia/commonsdelinker/delinker.py
    trunk/pywikipedia/commonsdelinker/image_replacer.py
    trunk/pywikipedia/commonsdelinker/threadpool.py
Modified: trunk/pywikipedia/commonsdelinker/checkusage.py
===================================================================

--- trunk/pywikipedia/commonsdelinker/checkusage.py	2011-03-13 12:19:20 UTC (rev 9052)
+++ trunk/pywikipedia/commonsdelinker/checkusage.py	2011-03-13 12:24:49 UTC (rev 9053)
@@ -1,32 +1,32 @@
 #!/usr/bin/python
 # -*- coding: utf-8  -*-
 """
-This module provides a way for users of the Wikimedia toolserver to check the 
+This module provides a way for users of the Wikimedia toolserver to check the
 use of images from Commons on other Wikimedia wikis. It supports both running
-checkusage against the database and against the live wikis. It is very 
-efficient as it only creates one HTTP connection and one MySQL connection 
+checkusage against the database and against the live wikis. It is very
+efficient as it only creates one HTTP connection and one MySQL connection
 during its life time. It is not suitable for multithreading!
- 
+
 The CheckUsage class' constructor accept as parameters the maximum number of
 wikis that should be checked, an option to use it only live and the parameters
-to connect to the MySQL database. The top wikis in size will be checked. The 
+to connect to the MySQL database. The top wikis in size will be checked. The
 class provides multiple methods:
- 
+
 get_usage(image)
-This method will return a generator object that generates the usage of the 
+This method will return a generator object that generates the usage of the
 image, returned as the following tuple: (page_namespace, page_title,
 full_title). page_namespace is the numeric namespace, page_title the page title
 without namespace, full_title the page title including localized namespace.
- 
+
 get_usage_db(dbname, image), get_usage_live(domain, image)
 Those methods allow querying a specific wiki, respectively against the database
 and against the live wiki. They accept respectively the database name and the
-domain name. The return a generator which generates the same results as 
+domain name. The return a generator which generates the same results as
 get_usage().
- 
+
 get_usage_multi(images)
 Calls get_usage for each image and returns a dictionary with usages.
- 
+
 get_replag(dbname)
 Returns the time in seconds since the latest known edit of dbname.
 """
@@ -37,13 +37,13 @@
 #
 __version__ = '$Id$'
 #
- 
+
 import httplib, urlparse, socket, time
 from urllib import urlencode
 import simplejson
import wikipedia, family
- 
+
 try:
     import MySQLdb
 except ImportError:
@@ -53,7 +53,7 @@
 except ImportError:
     pass
 __ver__ = '0.4c'
- 
+
 def strip_ns(title):
     title = title.replace(' ', '_')
     if title.find(':') != -1:
@@ -63,14 +63,14 @@
     if title.startswith('Image:'):
         return strip_ns(title)
     return title
-    
+
 def family(domain):
     if domain is None:
         raise RuntimeError('None is not a valid family')
-    
+
     wiki = domain.split('.')
     # Standard family
-    if wiki[1] in ('wikipedia', 'wiktionary', 'wikibooks', 
+    if wiki[1] in ('wikipedia', 'wiktionary', 'wikibooks',
         'wikiquote', 'wikisource', 'wikinews', 'wikiversity'):
         return wiki[0], wiki[1]
     # Family on own domain
@@ -92,7 +92,7 @@
         #self._conn.set_debuglevel(100)
         self._conn.connect()
-    def request(self, method, path, headers, data):        
+    def request(self, method, path, headers, data):
         if not headers: headers = {}
         if not data: data = ''
         headers['Connection'] = 'Keep-Alive'
@@ -143,28 +143,28 @@
             data = simplejson.load(res)
         finally:
             res.close()
-            
+
         if 'error' in data:
             if data['error']['code'] == u'internal_api_error_DBConnectionError':
                 return self.query_api(host, path, **kwargs)
-            raise wikipedia.Error(data['error']['code'], 
+            raise wikipedia.Error(data['error']['code'],
                 data['error']['info'])
-            
+
         return data
     def close(self):
         self._conn.close()
class HTTPPool(list):
-    def __init__(self, retry_timeout = 10, max_retries = -1, 
+    def __init__(self, retry_timeout = 10, max_retries = -1,
         callback = lambda *args: None):
-            
+
         self.retry_timeout = retry_timeout
         self.max_retries = -1
         self.callback = callback
         self.current_retry = 0
-        
+
         list.__init__(self, ())
-        
+
     def query_api(self, host, path, **kwargs):
         conn = self.find_conn(host)
         while True:
@@ -180,7 +180,7 @@
                 self.wait()
                 conn = self.find_conn(host)
-            
+
     def find_conn(self, host):
         for conn in self:
             if host in conn.hosts:
@@ -199,37 +199,37 @@
         conn.hosts = []
         self.append(conn)
         return self
-        
+
     def wait(self):
         if self.current_retry > self.max_retries and self.max_retries != -1:
             raise RuntimeError('Maximum retries exceeded')
         if self.current_retry:
             self.callback(self)
-        time.sleep(self.current_retry * self.retry_timeout)    
+        time.sleep(self.current_retry * self.retry_timeout)
         self.current_retry += 1
-        
+
     def close(self):
         for conn in self:
             conn.close()
         del self[:]
-
+
 class CheckUsage(object):
-    def __init__(self, limit = 100, 
+    def __init__(self, limit = 100,
             mysql_default_server = 3, mysql_host_prefix = 'sql-s', mysql_host_suffix = '',
-            mysql_kwargs = {}, no_db = False, use_autoconn = False, 
-            
-            http_retry_timeout = 30, http_max_retries = -1, 
+            mysql_kwargs = {}, no_db = False, use_autoconn = False,
+
+            http_retry_timeout = 30, http_max_retries = -1,
             http_callback = lambda *args: None,
-            
+
             mysql_retry_timeout = 60,
             mysql_max_retries = -1, mysql_callback = lambda *args: None):
-                
-        self.http = None 
+
+        self.http = None
         self.http_retry_timeout = http_retry_timeout
         self.http_max_retries = http_max_retries
         self.http_callback = http_callback
-        
+
         if no_db: return
self.mysql_host_prefix = mysql_host_prefix
@@ -239,18 +239,18 @@
         self.mysql_retry_timeout = mysql_retry_timeout
         self.mysql_max_retries = mysql_max_retries
         self.mysql_callback = mysql_callback
-        
+
         self.connections = []
-        
+
         # Mapping database name -> mysql connection
         self.databases = {}
         # Mapping server id -> mysql connection
         self.servers = {}
         # Mapping database name -> (lang, family)
         self.sites = {}
-        
+
         self.domains = {}
-        
+
         self.unknown_families = []
         # Mapping family name -> family object
         self.known_families = {}
@@ -263,7 +263,7 @@
         for dbname, domain, server in cursor.fetchall():
             if server not in self.servers:
                 self.servers[server] = self.connect_mysql(mysql_host_prefix + str(server) + mysql_host_suffix)
-            
+
             # FIXME: wikimediafoundation!
             # TODO: This is one big mess
             try:
@@ -275,7 +275,7 @@
             else:
                 self.sites[dbname] = (lang, fam)
                 self.databases[dbname] = self.servers[server]
-                
+
             self.domains[dbname] = domain
@@ -286,8 +286,8 @@
         if self.use_autoconn:
             database = mysql_autoconnection.connect(
                 use_unicode = False, host = host,
-                retry_timeout = self.mysql_retry_timeout, 
-                max_retries = self.mysql_max_retries, 
+                retry_timeout = self.mysql_retry_timeout,
+                max_retries = self.mysql_max_retries,
                 callback = self.mysql_callback,
                 **self.mysql_kwargs)
         else:
@@ -298,7 +298,7 @@
         return database, cursor
     def connect_http(self):
         if not self.http:
-            self.http = HTTPPool(retry_timeout = self.http_retry_timeout, 
+            self.http = HTTPPool(retry_timeout = self.http_retry_timeout,
                 max_retries = self.http_max_retries, callback = self.http_callback)
def get_usage(self, image):
@@ -311,14 +311,14 @@
         #image = strip_image(image)
         lang, family_name = self.sites[dbname]
         family = self.known_families[family_name]
-        
+
         if family.shared_image_repository(lang) != (lang, family_name) and shared:
             left_join = 'LEFT JOIN %s.image ON (il_to = img_name) WHERE img_name IS NULL AND' % dbname
         else:
             left_join = 'WHERE';
         query = """SELECT page_namespace, page_title FROM %s.page, %s.imagelinks
     %s page_id = il_from AND il_to = %%s"""
-        self.databases[dbname][1].execute(query % (dbname, dbname, left_join), 
+        self.databases[dbname][1].execute(query % (dbname, dbname, left_join),
             (image.encode('utf-8', 'ignore'), ))
         for page_namespace, page_title in self.databases[dbname][1]:
             stripped_title = page_title.decode('utf-8', 'ignore')
@@ -330,32 +330,32 @@
def get_usage_live(self, site, image, shared = False):
         self.connect_http()
-        
+
         if type(site) is str:
             hostname = site
             apipath = '/w/api.php'
         else:
             hostname = site.hostname()
             apipath = site.apipath()
-        
+
         # FIXME: Use continue
         kwargs = {'action': 'query', 'iutitle': u'Image:' + image,
             'titles': u'Image:' + image, 'prop': 'info'}
         kwargs['list'] = 'imageusage'
         kwargs['iulimit'] = '500'
-        
+
         res = self.http.query_api(hostname, apipath,
             **kwargs)
         if '-1' not in res['query']['pages'] and shared:
             return
-            
+
         usages = res['query'].get('imageusage')
         if not usages: return
-        
+
         # Apparently this someday changed from dict to list?
         if type(usages) is dict:
             usages = usages.values()
-        
+
         for usage in usages:
             title = usage['title'].replace(' ', '_')
             namespace = usage['ns']
@@ -365,7 +365,7 @@
                 stripped_title = title
             yield namespace, stripped_title, title
-    
+
     def exists(self, site, image):
         self.connect_http()
         # Check whether the image still is deleted on Commons.
@@ -375,8 +375,8 @@
         # BUG: This is ugly.
         return '-1' not in self.http.query_api(site.hostname(), site.apipath(),
             action = 'query', titles = 'Image:' + image)['query']['pages']
-        
-        
+
+
     def close(self):
         if getattr(self, 'http'):
             self.http.close()
@@ -384,7 +384,6 @@
         for connection, cursor in self.databases.itervalues():
             try:
                 connection.close()
-            except: 
+            except:
                 pass
-            
\ No newline at end of file
Modified: trunk/pywikipedia/commonsdelinker/delinker.py
===================================================================
--- trunk/pywikipedia/commonsdelinker/delinker.py	2011-03-13 12:19:20 UTC (rev 9052)
+++ trunk/pywikipedia/commonsdelinker/delinker.py	2011-03-13 12:24:49 UTC (rev 9053)
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 # -*- coding: utf-8  -*-
 """
-This script keeps track of image deletions and delinks removed files 
+This script keeps track of image deletions and delinks removed files
 from (any) wiki. Usage
 on protected pages or pages containing blacklisted external links cannot
 be processed.
@@ -15,7 +15,7 @@
 Please refer to delinker.txt for full documentation.
 """
 #
-# 
+#
 # (C) Kyle/Orgullomoore, 2006-2007
 # (C) Siebrand Mazeland, 2006-2007
 # (C) Bryan Tong Minh, 2007-2008
@@ -55,7 +55,7 @@
     output(u'%s Connection has been lost in %s. Attempting reconnection.' % (threading.currentThread(), repr(object)), False)
     if hasattr(object, 'error'):
         output(u'Error was %s: %s' % tuple(object.error))
-    
+
 def universal_unicode(s):
     if type(s) is str:
         return s.decode('utf-8', 'ignore')
@@ -75,11 +75,11 @@
         # the standard MySQL character set.
         kwargs['use_unicode'] = False
         kwargs['callback'] = wait_callback
-        
+
         return mysql_autoconnection.connect(**kwargs)
     # TODO: Add support for sqlite3
     raise RuntimeError('Unsupported database engine %s' % engine)
-    
+
 class ImmutableByReference(object):
     def __init__(self, data):
         self.data = data
@@ -100,30 +100,30 @@
         threadpool.Thread.__init__(self, pool)
         self.CommonsDelinker = CommonsDelinker
         self.sql_layout = self.CommonsDelinker.config.get('sql_layout', 'new')
-        
+
     def delink_image(self, image, usage, timestamp, admin, reason, replacement = None):
         """ Performs the delink for image on usage. """
         output(u'%s Usage of %s: %s' % (self, image, usage))
         if self.CommonsDelinker.exec_hook('before_delink',
                 (image, usage, timestamp, admin, reason, replacement)) is False:
             return
-        
+
         skipped_images = {}
         for (lang, family), pages in usage.iteritems():
             site = self.CommonsDelinker.get_site(lang, family)
             if not site:
                 output(u'%s Warning! Unknown site %s:%s' % (self, family, lang))
                 continue
-            
+
             try:
                 summary = self.get_summary(site, image, admin, reason, replacement)
-                
+
                 for page_namespace, page_title, title in pages:
                     if (site.lang, site.family.name) == (self.CommonsDelinker.site.lang,
                             self.CommonsDelinker.site.family.name) and \
                             (page_namespace, page_title) == (6, image):
                         continue
-                        
+
                     if self.CommonsDelinker.set_edit(str(site), title):
                         # The page is currently being editted. Postpone.
                         if (lang, family) not in skipped_images:
@@ -133,7 +133,7 @@
                     else:
                         # Delink the image
                         output(u'%s Delinking %s from %s' % (self, image, site))
-                        
+
                         try:
                             try:
                                 result = self.replace_image(image, site, title, summary, replacement)
@@ -147,14 +147,14 @@
                                     (page_namespace, page_title, title))
                         finally:
                             self.CommonsDelinker.unset_edit(str(site), title)
-                        
+
                         # Add to logging queue
                         if self.sql_layout == 'new':
-                            self.CommonsDelinker.Loggers.append((timestamp, image, 
+                            self.CommonsDelinker.Loggers.append((timestamp, image,
                                 site.lang, site.family.name, page_namespace, page_title,
                                 result, replacement))
                         else:
-                            self.CommonsDelinker.Loggers.append((timestamp, image, site.hostname(), 
+                            self.CommonsDelinker.Loggers.append((timestamp, image, site.hostname(),
                                 page_namespace, page_title, result, replacement))
             finally:
                 self.CommonsDelinker.unlock_site(site)
@@ -168,14 +168,14 @@
         elif replacement:
             # Let them know that we are done replacing.
             self.CommonsDelinker.Loggers.append((timestamp, image, replacement))
-            
+
     def replace_image(self, image, site, page_title, summary, replacement = None):
         """ The actual replacement. Giving None as argument for replacement
         will delink instead of replace."""
-        
+
         page = wikipedia.Page(site, page_title)
         hook = None
-        
+
         # TODO: Per site config.
         if page.namespace() in self.CommonsDelinker.config['delink_namespaces']:
             try:
@@ -183,25 +183,25 @@
             except wikipedia.NoPage:
                 return 'failed'
             new_text = text
-            
+
             m_image = ImmutableByReference(image)
             m_replacement = ImmutableByReference(replacement)
-            self.CommonsDelinker.exec_hook('before_replace', 
+            self.CommonsDelinker.exec_hook('before_replace',
                 (page, summary, m_image, m_replacement))
             image = m_image.get()
             replacement = m_replacement.get()
-            
+
             def create_regex(s):
                 first, other = re.escape(s[0]), re.escape(s[1:])
                 return ur'(?:[%s%s]%s)' % (first.upper(), first.lower(), other)
             def create_regex_i(s):
                 return ur'(?:%s)' % u''.join([u'[%s%s]' % (c.upper(), c.lower()) for c in s])
-            
+
             namespaces = site.namespace(6, all = True) + site.namespace(-2, all = True)
             r_namespace = ur'\s*(?:%s)\s*:\s*' % u'|'.join(map(create_regex_i, namespaces))
             # Note that this regex creates a group!
             r_image = u'(%s)' % create_regex(image).replace(r'_', '[ _]')
-            
+
             def simple_replacer(match):
                 m_replacement = ImmutableByReference(replacement)
                 groups = list(match.groups())
@@ -209,21 +209,21 @@
                     if False is self.CommonsDelinker.exec_hook('%s_replace' % hook,
                             (page, summary, image, m_replacement, match, groups)):
                         return u''.join(groups)
-                        
+
                 if m_replacement.get() is None:
                     return u''
                 else:
                     groups[1] = m_replacement.get()
                     return u''.join(groups)
-                    
-            # Previously links in image descriptions will cause 
+
+            # Previously links in image descriptions will cause
             # unexpected behaviour: [[Image:image.jpg|thumb|[[link]] in description]]
             # will truncate at the first occurence of ]]. This cannot be
             # fixed using one regular expression.
             # This means that all ]] after the start of the image
             # must be located. If it then does not have an associated
             # [[, this one is the closure of the image.
-            
+
             r_simple_s = u'([[%s)%s' % (r_namespace, r_image)
             r_s = '[['
             r_e = ']]'
@@ -231,25 +231,25 @@
             image_starts = [match.start() for match in re.finditer(r_simple_s, text)]
             link_starts = [match.start() for match in re.finditer(r_s, text)]
             link_ends = [match.end() for match in re.finditer(r_e, text)]
-                
+
             r_simple = u'([[%s)%s(.*)' % (r_namespace, r_image)
             hook = 'simple'
             replacements = []
             for image_start in image_starts:
-                current_link_starts = [link_start for link_start in link_starts 
+                current_link_starts = [link_start for link_start in link_starts
                     if link_start > image_start]
-                current_link_ends = [link_end for link_end in link_ends 
+                current_link_ends = [link_end for link_end in link_ends
                     if link_end > image_start]
                 end = image_start
                 if current_link_ends: end = current_link_ends[0]
-                
+
                 while current_link_starts and current_link_ends:
                     start = current_link_starts.pop(0)
                     end = current_link_ends.pop(0)
                     if end <= start and end > image_start:
                         # Found the end of the image
                         break
-                
+
                 # Check whether this image is the first one on the line
                 if image_start == 0:
                     prev = ''
@@ -262,38 +262,38 @@
                             end += 1
                         else:
                             break
-                
+
                 # Add the replacement to the todo list. Doing the
                 # replacement right know would alter the indices.
                 replacements.append((new_text[image_start:end],
-                    re.sub(r_simple, simple_replacer, 
+                    re.sub(r_simple, simple_replacer,
                     new_text[image_start:end])))
-            
+
             # Perform the replacements
             for old, new in replacements:
                 if old: new_text = new_text.replace(old, new)
-            
+
             # Remove the image from galleries
             hook = 'gallery'
-            r_galleries = ur'(?s)(<%s>)(.*?)(</%s>)' % (create_regex_i('gallery'), 
+            r_galleries = ur'(?s)(<%s>)(.*?)(</%s>)' % (create_regex_i('gallery'),
                 create_regex_i('gallery'))
             r_gallery = ur'(?m)^((?:%s)?)%s(\s*(?:|.*?)?\s*$)' % (r_namespace, r_image)
             def gallery_replacer(match):
-                return ur'%s%s%s' % (match.group(1), re.sub(r_gallery, 
+                return ur'%s%s%s' % (match.group(1), re.sub(r_gallery,
                     simple_replacer, match.group(2)), match.group(3))
             new_text = re.sub(r_galleries, gallery_replacer, new_text)
-            
+
             if text == new_text or self.CommonsDelinker.config.get('force_complex', False):
                 # All previous steps did not work, so the image is
                 # likely embedded in a complicated template.
                 hook = 'complex'
                 r_templates = ur'(?s)({{.*?}})'
                 r_complicated = u'(?s)(?<=[|{=])[\s\u200E\uFEFF\u200B\u200C]*((?:%s)?)%s[\u200E\uFEFF\u200B\u200C]*' % (r_namespace, r_image)
-                
+
                 def template_replacer(match):
                     return re.sub(r_complicated, simple_replacer, match.group(1))
                 new_text = re.sub(r_templates, template_replacer, text)
-            
+
             if text != new_text:
                 # Save to the wiki
                 # Code for checking user page existance has been moved
@@ -304,7 +304,7 @@
                 if False is self.CommonsDelinker.exec_hook('before_save',
                         (page, text, new_text, m_summary)):
                     return 'skipped'
-                
+
                 is_retry = False
                 while True:
                     try:
@@ -330,18 +330,18 @@
             else:
                 return 'skipped'
         return 'skipped'
-            
-        
-        
+
+
+
     def do(self, args):
         try:
             self.delink_image(*args)
         except:
             output(u'An exception occured in %s' % self, False)
             traceback.print_exc(file = sys.stderr)
-        
+
     def get_summary(self, site, image, admin, reason, replacement):
-        """ Get the summary template and substitute the 
+        """ Get the summary template and substitute the
         correct values."""
         # FIXME: Hardcode is EVIL, but now only the global bot uses this
         if (site.lang != 'commons' and self.CommonsDelinker.config['global']):
@@ -350,7 +350,7 @@
             tlp = self.CommonsDelinker.SummaryCache.get(site, 'replace-I18n')
         else:
             tlp = self.CommonsDelinker.SummaryCache.get(site, 'summary-I18n')
-        
+
         tlp = tlp.replace('$1', image)
         if replacement:
             tlp = tlp.replace('$2', replacement)
@@ -359,23 +359,23 @@
         else:
             tlp = tlp.replace('$2', unicode(admin))
             tlp = tlp.replace('$3', unicode(reason))
-        
+
         return tlp
-        
+
 class SummaryCache(object):
     """ Object to thread-safe cache summary templates. """
     def __init__(self, CommonsDelinker):
         self.summaries = {}
         self.lock = threading.Lock()
         self.CommonsDelinker = CommonsDelinker
-        
+
     def get(self, site, type, key = None, default = None):
-        # This can probably also provide something for 
-        # localised settings, but then it first needs to 
+        # This can probably also provide something for
+        # localised settings, but then it first needs to
         # check whether the page is sysop only.
         if not key:
             key = str(site)
-            
+
         self.lock.acquire()
         try:
             if type not in self.summaries:
@@ -385,9 +385,9 @@
                         self.CommonsDelinker.config['summary_cache']:
                     # Return cached result
                     return self.summaries[type][key][0]
-                    
+
             output(u'%s Fetching new summary for %s' % (self, site))
-            
+
             # FIXME: evil
             if self.CommonsDelinker.config['global']:
                 self.check_user_page(site)
@@ -402,25 +402,25 @@
                 pass
         finally:
             self.lock.release()
-            
+
         # No i18n available, but it may be available in the wikipedia
         # of that language. Only do so for wiktionary, wikibooks,
         # wikiquote, wikisource, wikinews, wikiversity
         # This will cause the bot to function even on special wikis
         # like mediawiki.org and meta and species.
         output(u'%s Using default summary for %s' % (self, site))
-        
+
         if default: return default
-                
+
         if site.family.name != 'wikipedia' and self.CommonsDelinker.config['global']:
-            if site.family.name in ('wiktionary', 'wikibooks', 'wikiquote', 
+            if site.family.name in ('wiktionary', 'wikibooks', 'wikiquote',
                     'wikisource', 'wikinews', 'wikiversity'):
                 if site.lang in config.usernames['wikipedia']:
-                    newsite = self.CommonsDelinker.get_site(site.lang, 
+                    newsite = self.CommonsDelinker.get_site(site.lang,
                         wikipedia.Family('wikipedia'))
                     return self.get(newsite, type, key = key)
         return self.CommonsDelinker.config['default_settings'].get(type, '')
-                
+
     def check_user_page(self, site):
         "Check whether a userpage exists. Only used for CommonsDelinker."
         try:
@@ -435,24 +435,24 @@
             ftxt = f.read()
             f.close()
             if not '#' + str(site) in ftxt:
-                username = config.usernames[site.family.name][site.lang] 
-                
+                username = config.usernames[site.family.name][site.lang]
+
                 userpage = wikipedia.Page(site, 'User:' + username)
-                # Removed check for page existence. If it is not in our 
+                # Removed check for page existence. If it is not in our
                 # database we can safely assume that we have no user page
                 # there. In case there is, we will just overwrite it once.
-                # It causes no real problems, but it is one call to the 
+                # It causes no real problems, but it is one call to the
                 # servers less.
                 # TODO: Config setting?
                 userpage.put('#REDIRECT [[m:User:CommonsDelinker]]', '')
-                
+
                 f = open(filename, 'a')
                 f.write('#' + str(site))
                 f.close()
         except wikipedia.LockedPage:
             # User page is protected, continue anyway
-            pass    
-            
+            pass
+
 class CheckUsage(threadpool.Thread):
     timeout = 120
     def __init__(self, pool, CommonsDelinker):
@@ -460,14 +460,14 @@
         self.CommonsDelinker = CommonsDelinker
         # Not really thread safe, but we should only do read operations...
         self.site = CommonsDelinker.site
-        
+
     def run(self):
         try:
             self.connect()
         except:
             return self.exit()
         threadpool.Thread.run(self)
-        
+
     def connect(self):
         output(u'%s Connecting to databases' % self)
         config = self.CommonsDelinker.config
@@ -475,22 +475,22 @@
             # Note: global use requires MySQL
             self.CheckUsage = checkusage.CheckUsage(limit = sys.maxint,
                 mysql_kwargs = config['sql_config'],
-                use_autoconn = True, 
+                use_autoconn = True,
                 http_callback = wait_callback,
                 mysql_callback = wait_callback,
                 mysql_host_suffix = '-fast')
         else:
             self.CheckUsage = checkusage.CheckUsage(sys.maxint,
                 http_callback = wait_callback, no_db = True)
-        
-        
+
+
     def check_usage(self, image, timestamp, admin, reason, replacement):
         """ Check whether this image needs to be delinked. """
-        
+
         # Check whether the image still is deleted on Commons.
         # BUG: This also returns true for images with a page, but
         # without the image itself. Can be fixed by querying query.php
-        # instead of api.php. Also should this be made as an exits() 
+        # instead of api.php. Also should this be made as an exits()
         # method of checkusage.CheckUsage?
         if self.site.shared_image_repository() != (None, None):
             shared_image_repository = self.CommonsDelinker.get_site(*self.site.shared_image_repository())
@@ -505,12 +505,12 @@
                 not bool(replacement):
             output(u'%s %s exists again!' % (self, image))
             return
-        
-        
+
+
         if self.CommonsDelinker.config['global']:
             usage = self.CheckUsage.get_usage(image)
             usage_domains = {}
-            
+
             count = 0
             # Sort usage per domain
             for (lang, family), (page_namespace, page_title, title) in usage:
@@ -520,21 +520,21 @@
                 count += 1
         else:
             #FIX!
-            usage_domains = {(self.site.lang, self.site.family.name): 
-                list(self.CheckUsage.get_usage_live(self.site, 
+            usage_domains = {(self.site.lang, self.site.family.name):
+                list(self.CheckUsage.get_usage_live(self.site,
                     image))}
             count = len(usage_domains[(self.site.lang, self.site.family.name)])
-            
+
         output(u'%s %s used on %s pages' % (self, image, count))
-        
+
         if count:
             # Pass the usage to the Delinker pool along with other arguments
-            self.CommonsDelinker.Delinkers.append((image, usage_domains, 
+            self.CommonsDelinker.Delinkers.append((image, usage_domains,
                 timestamp, admin, reason, replacement))
         elif replacement:
             # Record replacement done
             self.CommonsDelinker.Loggers.append((timestamp, image, replacement))
-        
+
     def do(self, args):
         try:
             self.check_usage(*args)
@@ -544,12 +544,12 @@
             traceback.print_exc(file = sys.stderr)
             self.exit()
             self.CommonsDelinker.thread_died()
-            
+
     def starve(self):
         self.pool.jobLock.acquire()
         try:
             if self.pool[id(self)].isSet(): return False
-            
+
             output(u'%s Starving' % self)
             self.CheckUsage.close()
             del self.pool[id(self)]
@@ -557,66 +557,66 @@
             return True
         finally:
             self.pool.jobLock.release()
-        
+
 class Logger(threadpool.Thread):
     timeout = 360
-    
+
     def __init__(self, pool, CommonsDelinker):
         threadpool.Thread.__init__(self, pool)
         self.CommonsDelinker = CommonsDelinker
         self.sql_layout = self.CommonsDelinker.config.get('sql_layout', 'new')
         self.enabled = self.CommonsDelinker.config.get('enable_logging', True)
-        
+
     def run(self):
         self.connect()
         threadpool.Thread.run(self)
-        
+
     def connect(self):
         output(u'%s Connecting to log database' % self)
         self.database = connect_database()
         self.cursor = self.database.cursor()
-        
-        
+
+
     def log_result_legacy(self, timestamp, image, domain, namespace, page, status = "ok", newimage = None):
         # TODO: Make sqlite3 ready
-        
+
         # The original delinker code cached log results,
         # in order to limit the number of connections.
         # However, since we are now using persistent
         # connections, we can safely insert the result
         # on the fly.
         output(u'%s Logging %s for %s on %s' % (self, repr(status), image, page))
-        
+
         # There is no need to escape each parameter if
-        # a parametrized call is made.        
+        # a parametrized call is made.
         self.cursor.execute("""INSERT INTO %s (timestamp, img, wiki, page_title,
     namespace, status, newimg) VALUES
     (%%s, %%s, %%s, %%s, %%s, %%s, %%s)""" % self.CommonsDelinker.config['log_table'],
             (timestamp, image, domain, page, namespace, status, newimage))
         self.database.commit()
-            
-    def log_result_new(self, timestamp, image, site_lang, site_family, 
+
+    def log_result_new(self, timestamp, image, site_lang, site_family,
             page_namespace, page_title, status = 'ok', new_image = None):
-                
+
         output(u'%s Logging %s for %s on %s' % (self, repr(status), image, page_title))
self.cursor.execute("""INSERT INTO %s (timestamp, image, site_lang, site_family,
     page_namespace, page_title, status, new_image) VALUES
     (%%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s)""" % self.CommonsDelinker.config['log_table'],
-            (timestamp, image, site_lang, site_family, page_namespace, page_title, 
+            (timestamp, image, site_lang, site_family, page_namespace, page_title,
                 status, new_image))
         self.database.commit()
-            
+
     def log_replacement(self, timestamp, old_image, new_image):
         # TODO: Same as above
-        
+
         output(u'Replacing %s by %s done' % (old_image, new_image))
-        self.cursor.execute("""UPDATE %s SET status = 'done' WHERE 
-            timestamp = %%s AND old_image = %%s AND 
+        self.cursor.execute("""UPDATE %s SET status = 'done' WHERE
+            timestamp = %%s AND old_image = %%s AND
             new_image = %%s""" % self.CommonsDelinker.config['replacer_table'],
             (timestamp, old_image, new_image))
         self.database.commit()
-        
+
     def do(self, args):
         if not self.enabled: return
         try:
@@ -633,12 +633,12 @@
             traceback.print_exc(file = sys.stderr)
             self.exit()
             self.CommonsDelinker.thread_died()
-        
+
     def starve(self):
         self.pool.jobLock.acquire()
         try:
             if self.pool[id(self)].isSet(): return False
-            
+
             output(u'%s Starving' % self)
             self.database.close()
             del self.pool[id(self)]
@@ -653,7 +653,7 @@
         self.config = config.CommonsDelinker
         self.site = wikipedia.getSite()
         self.site.forceLogin()
-        
+
         # Initialize workers
         self.CheckUsages = threadpool.ThreadPool(CheckUsage, self.config['checkusage_instances'], self)
         self.Delinkers = threadpool.ThreadPool(Delinker, self.config['delinker_instances'], self)
@@ -661,34 +661,34 @@
             self.Loggers = threadpool.ThreadPool(Logger, self.config['logger_instances'], self)
         else:
             self.Loggers = threadpool.ThreadPool(Logger, 1, self)
-        
+
         self.http = checkusage.HTTP(self.site.hostname())
-        
+
         self.edit_list = []
         self.editLock = threading.Lock()
-        
+
         self.sites = {}
         self.siteLock = threading.Lock()
-        
+
         self.SummaryCache = SummaryCache(self)
-        
+
         if self.config.get('enable_replacer', False):
             self.connect_mysql()
-            
+
         if self.config.get('no_sysop', False):
             # Don't edit as sysop
             if hasattr(config, 'sysopnames'):
                 config.sysopnames = dict([(fam, {}) for fam in config.sysopnames.keys()])
-                
+
         self.last_check = time.time()
-        
+
         #if 'bot' in self.site.userGroups:
         #    self.log_limit = '5000'
         #else:
         #    self.log_limit = '500'
         self.log_limit = '500'
         self.init_plugins()
-        
+
     def init_plugins(self, do_reload = False):
         import plugins
         self.hooks = {}
@@ -705,7 +705,7 @@
             self.hooks[plugin.hook].append(plugin)
             output(u"%s Loaded plugin %s for hook '%s'" % \
                 (self, plugin, plugin.hook))
-            
+
     def exec_hook(self, name, args):
         # TODO: Threadsafety!
         if name in self.hooks:
@@ -729,16 +729,16 @@
                         self.hooks[name].remove(plugin)
                     finally:
                         self.siteLock.release()
-                        
+
     def reload_plugins(signalnum, stack):
         pass
-                        
+
     def connect_mysql(self):
         self.database = connect_database()
         self.cursor = self.database.cursor()
-        
+
     def set_edit(self, domain, page):
-        """ Make sure the bot does not create edit 
+        """ Make sure the bot does not create edit
         conflicts with itself."""
         self.editLock.acquire()
         being_editted = (domain, page) in self.edit_list
@@ -751,9 +751,9 @@
         self.editLock.acquire()
         self.edit_list.remove((domain, page))
         self.editLock.release()
-        
+
     def get_site(self, code, fam):
-        # Threadsafe replacement of wikipedia.getSite 
+        # Threadsafe replacement of wikipedia.getSite
         key = '%s:%s' % (code, fam)
         self.siteLock.acquire()
         try:
@@ -779,34 +779,34 @@
             self.sites[key][self.sites[key].index((site, True))] = (site, False)
         finally:
             self.siteLock.release()
-        
-        
+
+
     def read_deletion_log(self):
         ts_format = '%Y-%m-%dT%H:%M:%SZ'
         wait = self.config['delink_wait']
         exclusion = self.config['exclude_string']
-        
+
         ts_from = self.last_check
         # Truncate -> int()
         ts_end = int(time.time())
         self.last_check = ts_end
-        
+
         # Format as a Mediawiki timestamp and substract a
         # certain wait period.
         ts_from_s = time.strftime(ts_format, time.gmtime(ts_from - wait + 1))
         ts_end_s = time.strftime(ts_format, time.gmtime(ts_end - wait))
-        
+
         try:
             # Assume less than 500 deletion have been made between
-            # this and the previous check of the log. If this is not 
+            # this and the previous check of the log. If this is not
             # the case, timeout should be set lower.
             result = self.http.query_api(self.site.hostname(), self.site.apipath(),
-                action = 'query', list = 'logevents', letype = 'delete', 
-                lelimit = self.log_limit, lestart = ts_from_s, leend = ts_end_s, 
+                action = 'query', list = 'logevents', letype = 'delete',
+                lelimit = self.log_limit, lestart = ts_from_s, leend = ts_end_s,
                 ledir = 'newer')
             logevents = result['query']['logevents']
         except Exception, e:
-            if type(e) in (SystemError, KeyboardInterrupt): raise            
+            if type(e) in (SystemError, KeyboardInterrupt): raise
             # Something happened, but since it is a network error,
             # it will not be critical. In order to prevent data loss
             # the last_check timestamp has to be set correctly.
@@ -814,7 +814,7 @@
             output('Warning! Unable to read deletion logs', False)
             output('%s: %s' % (e.__class__.__name__, str(e)), False)
             return time.sleep(self.config['timeout'])
-        
+
         for logevent in logevents:
             if logevent['ns'] == 6 and logevent['action'] == 'delete':
                 if exclusion not in logevent.get('comment', ''):
@@ -823,14 +823,14 @@
                     timestamp = timestamp.replace(':', '')
                     timestamp = timestamp.replace('T', '')
                     timestamp = timestamp.replace('Z', '')
-                    
+
                     output(u'Deleted image: %s' % logevent['title'])
                     self.CheckUsages.append((checkusage.strip_ns(logevent['title']),
                         timestamp, logevent['user'], logevent.get('comment', ''),
                         None))
                 else:
                     output(u'Skipping deleted image: %s' % logevent['title'])
-                    
+
     def read_replacement_log(self):
         # TODO: Make sqlite3 ready
         # TODO: Single process replacer
@@ -845,22 +845,22 @@
             self.CheckUsages.append((old_image, timestamp, user, comment, new_image))
             output(u'Replacing %s by %s'  % (old_image, new_image))
             self.cursor.execute(update, ('ok', id))
-            
+
         self.database.commit()
-            
+
     def start(self):
         # Gracefully exit all threads on SIG_INT or SIG_TERM
         threadpool.catch_signals()
-        
+
         # Start threads
         self.Loggers.start()
         self.Delinkers.start()
         self.CheckUsages.start()
-        
+
         # Give threads some time to initialize
         time.sleep(self.config['timeout'])
         output(u'All workers started')
-        
+
         # Main loop
         while True:
             if self.config.get('enable_delinker', True):
@@ -871,17 +871,17 @@
                     self.read_deletion_log()
             if self.config.get('enable_replacer', False):
                 self.read_replacement_log()
-                
+
             time.sleep(self.config['timeout'])
-            
+
     def thread_died(self):
         # Obsolete
         return
-            
+
     @staticmethod
     def output(*args):
         return output(*args)
-            
+
 def output(message, toStdout = True):
     message = time.strftime('[%Y-%m-%d %H:%M:%S] ') + message
     wikipedia.output(message, toStdout = toStdout)
@@ -895,16 +895,16 @@
     output(u'Running ' + __version__)
     CD = CommonsDelinker()
     output(u'This bot runs from: ' + str(CD.site))
-    
+
     re._MAXCACHE = 4
-    
+
     args = wikipedia.handleArgs()
     if '-since' in args:
         # NOTE: Untested
         ts_format = '%Y-%m-%d %H:%M:%S'
         try:
             since = time.strptime(
-                args[args.index('-since') + 1], 
+                args[args.index('-since') + 1],
                 ts_format)
         except ValueError:
             if args[args.index('-since') + 1][0] == '[' and \
@@ -917,7 +917,7 @@
         output(u'Reading deletion log since [%s]' %\
             time.strftime(ts_format, since))
         CD.last_check = time.mktime(since)
-    
+
     try:
         try:
             CD.start()
Modified: trunk/pywikipedia/commonsdelinker/image_replacer.py
===================================================================
--- trunk/pywikipedia/commonsdelinker/image_replacer.py	2011-03-13 12:19:20 UTC (rev 9052)
+++ trunk/pywikipedia/commonsdelinker/image_replacer.py	2011-03-13 12:24:49 UTC (rev 9053)
@@ -4,7 +4,7 @@
 Please refer to delinker.txt for full documentation.
 """
 #
-# 
+#
 # (C) Bryan Tong Minh, 2007
 #
 # Distributed under the terms of the MIT license.
@@ -43,44 +43,44 @@
         self.config.update(getattr(config, 'Replacer', ()))
         self.template = re.compile(r'{{%s|([^|]*?)|([^|]*?)(?:(?:|reason=(.*?))?)}}' % \
                 self.config['replace_template'])
-        self.disallowed_replacements = [(re.compile(i[0], re.I), re.compile(i[1], re.I)) 
+        self.disallowed_replacements = [(re.compile(i[0], re.I), re.compile(i[1], re.I))
             for i in self.config.get('disallowed_replacements', ())]
-                
+
         self.site = wikipedia.getSite(persistent_http = True)
         self.site.forceLogin()
-        
+
         self.database = connect_database()
         self.cursor = self.database.cursor()
-        
+
         self.first_revision = 0
         if self.config.get('replacer_report_replacements', False):
             self.reporters = threadpool.ThreadPool(Reporter, 1, self.site, self.config)
             self.reporters.start()
-            
-        
+
+
     def read_replace_log(self):
         """ The actual worker method """
-        
+
         # FIXME: Make sqlite3 compatible
-        insert = """INSERT INTO %s (timestamp, old_image, new_image, 
+        insert = """INSERT INTO %s (timestamp, old_image, new_image,
             status, user, comment) VALUES (%%s, %%s, %%s,
             'pending', %%s, %%s)""" % self.config['replacer_table']
-        
+
         page = wikipedia.Page(self.site, self.config['command_page'])
-        
+
         # Get last revision date
-        if self.cursor.execute("""SELECT timestamp FROM %s 
+        if self.cursor.execute("""SELECT timestamp FROM %s
                 ORDER BY timestamp DESC LIMIT 1""" % \
                 self.config['replacer_table']):
             since = mw_timestamp(self.cursor.fetchone()[0])
         else:
             since = None
-            
+
         if self.config.get('clean_list', False):
             username = config.sysopnames[self.site.family.name][self.site.lang]
         else:
             username = None
-            
+
         try:
             # Fetch revision history
             revisions = self.get_history(page.title(), since, username)
@@ -95,18 +95,18 @@
             #self.site.conn.close()
             #self.site.conn.connect()
             return time.sleep(self.config['timeout'])
-        
+
         # We're being killed
         if '{{stop}}' in text.lower():
             output(u'Found {{stop}} on command page. Not replacing anything.')
             return time.sleep(self.config['timeout'])
-        
+
         # Sort oldest first
         revisions.sort(key = lambda rev: rev['timestamp'])
-        
+
         # Find all commands
         replacements = self.template.finditer(text)
-        
+
         remove_from_list = []
         count = 0
         for replacement in replacements:
@@ -122,10 +122,10 @@
                 remove_from_list.append(replacement.group(0))
                 output('Replacing %s by %s: %s' % replacement.groups())
             count += 1
-        
+
         # Save all replaces to database
         self.database.commit()
-        
+
         if remove_from_list and self.config.get('clean_list', False):
             # Cleanup the command page
             while True:
@@ -144,10 +144,10 @@
                 except wikipedia.EditConflict:
                     # Try again
                     text = page.get()
-                    
+
     def get_history(self, title, since, username):
         """ Fetch the last 50 revisions using the API """
-        
+
         address = self.site.api_address()
         predata = [
             ('action', 'query'),
@@ -170,10 +170,10 @@
         if 'missing' in page:
             raise Exception('Missing page!')
         return page.get('revisions', [])
-        
+
     def examine_revision_history(self, revisions, replacement, username):
         """ Find out who is to blame for a replacement """
-        
+
         for revision in revisions:
             if replacement.group(0) in revision['*']:
                 db_time = db_timestamp(revision['timestamp'])
@@ -182,52 +182,52 @@
                 return (db_time, strip_image(replacement.group(1)),
                     strip_image(replacement.group(2)),
                     revision['user'], replacement.group(3))
-                    
+
         output('Warning! Could not find out who did %s' % \
                 repr(replacement.group(0)), False)
         return
-        
+
     def read_finished_replacements(self):
-        """ Find out which replacements have been completed and add them to 
+        """ Find out which replacements have been completed and add them to
             the reporters queue. """
-        
+
         self.cursor.execute('START TRANSACTION WITH CONSISTENT SNAPSHOT')
         self.cursor.execute("""SELECT old_image, new_image, user, comment FROM
             %s WHERE status = 'done' AND timestamp >= %i""" % \
             (self.config['replacer_table'], self.first_revision))
         finished_images = list(self.cursor)
-        self.cursor.execute("""UPDATE %s SET status = 'reported' 
+        self.cursor.execute("""UPDATE %s SET status = 'reported'
             WHERE status = 'done' AND timestamp >= %i""" % \
             (self.config['replacer_table'], self.first_revision))
         self.cursor.commit()
-        
+
         for old_image, new_image, user, comment in finished_images:
-            self.cursor.execute("""SELECT wiki, namespace, page_title 
-                FROM %s WHERE img = %%s AND status <> 'ok'""" % 
+            self.cursor.execute("""SELECT wiki, namespace, page_title
+                FROM %s WHERE img = %%s AND status <> 'ok'""" %
                 self.config['log_table'], (old_image, ))
             not_ok = [(wiki, namespace, page_title.decode('utf-8', 'ignore'))
                 for wiki, namespace, page_title in self.cursor]
-            
+
             if not comment: comment = ''
-            
+
             self.reporters.append((old_image.decode('utf-8', 'ignore'),
-                new_image.decode('utf-8', 'ignore'), 
-                user.decode('utf-8', 'ignore'), 
+                new_image.decode('utf-8', 'ignore'),
+                user.decode('utf-8', 'ignore'),
                 comment.decode('utf-8', 'ignore'), not_ok))
-        
-            
+
+
     def start(self):
         while True:
             self.read_replace_log()
             if self.config.get('replacer_report_replacements', False):
                 self.read_finished_replacements()
-            
+
             # Replacer should not loop as often as delinker
             time.sleep(self.config['timeout'] * 2)
-            
+
     def allowed_replacement(self, replacement):
         """ Method to prevent World War III """
-        
+
         for source, target in self.disallowed_replacements:
             if source.search(replacement.group(1)) and \
                     target.search(replacement.group(2)):
@@ -236,14 +236,14 @@
class Reporter(threadpool.Thread):
     """ Asynchronous worker to report finished replacements to file pages. """
-    
+
     def __init__(self, pool, site, config):
         self.site = wikipedia.getSite(site.lang, site.family,
             site.user, True)
         self.config = config
-        
+
         threadpool.Thread.__init__(self, pool)
-        
+
     def do(self, args):
         try:
             self.report(args)
@@ -254,7 +254,7 @@
             sys.stderr.flush()
             self.exit()
             os.kill(0, signal.SIGTERM)
-            
+
     def report(self, (old_image, new_image, user, comment, not_ok)):
         not_ok_items = []
         for wiki, namespace, page_title in not_ok:
@@ -265,7 +265,7 @@
                 namespace_name = namespace_name + u':'
             else:
                 namespace_name = u''
-            
+
             if unicode(site) == unicode(self.site):
                 if (namespace, page_title) != (6, old_image):
                     not_ok_items.append(u'[[:%s%s]]' % \
@@ -273,13 +273,13 @@
             else:
                 not_ok_items.append(u'[[:%s:%s%s]]' % (site_prefix(site),
                     namespace_name, page_title))
-        
+
         template = u'{{%s|new_image=%s|user=%s|comment=%s|not_ok=%s}}' % \
             (self.config['replacer_report_template'],
-            new_image, user, comment, 
+            new_image, user, comment,
             self.config.get('replacer_report_seperator', u', ').join(not_ok_items))
         page = wikipedia.Page(self.site, u'Image:' + old_image)
-        
+
         try:
             text = page.get()
         except wikipedia.NoPage:
@@ -289,7 +289,7 @@
             output(u'Warning! %s is a redirect; not reporting replacement!' % old_image)
             return
         try:
-            page.put(u'%s\n%s' % (template, text), 
+            page.put(u'%s\n%s' % (template, text),
                 comment = u'This image has been replaced by ' + new_image)
         except wikipedia.PageNotSaved, e:
             output(u'Warning! Unable to report replacement to %s.' % old_image, False)
@@ -301,11 +301,11 @@
         else:
             output(u'Reporting replacement of %s by %s.' % \
                 (old_image, new_image))
-
+
 def main():
     global R
-    
+
     import sys, traceback
     wikipedia.handleArgs()
     output(u'Running ' + __version__)
@@ -327,5 +327,5 @@
         except:
             pass
         wikipedia.stopme()
-        
+
 if __name__ == '__main__': main()
Modified: trunk/pywikipedia/commonsdelinker/threadpool.py
===================================================================
--- trunk/pywikipedia/commonsdelinker/threadpool.py	2011-03-13 12:19:20 UTC (rev 9052)
+++ trunk/pywikipedia/commonsdelinker/threadpool.py	2011-03-13 12:24:49 UTC (rev 9053)
@@ -1,19 +1,19 @@
 #!/usr/bin/python
 # -*- coding: utf-8  -*-
 """
-This module implements a threadpool which allows scripts that require 
+This module implements a threadpool which allows scripts that require
 performing concurrent jobs, an efficient and thread safe way to do this.
- 
-The two classes available are ThreadPool and Thread. ThreadPool is the 
+
+The two classes available are ThreadPool and Thread. ThreadPool is the
 controller class and contains a collection of Thread objects, which must be
 subclassed.
 Any thread can add a job to the ThreadPool by calling its append() method.
 The pool will add this task to the jobqueue and activate a sleeping thread, if
 available. In case no thread is directly available, the job will be handled by
 the first free thread.
- 
+
 The Thread class must be subclassed and passed to the ThreadPool's constructor.
-The subclass should implement a do(args) method, which will receive as its 
+The subclass should implement a do(args) method, which will receive as its
 argument the job. Please note that providing mutable variables to the jobqueue
 may cause thread unsafety!
 """
@@ -24,9 +24,9 @@
 #
 __version__ = '$Id$'
 #
- 
+
 import sys, threading, os
- 
+
 class ThreadPool(dict):
     pools = []
     def __init__(self, worker, max_threads, *args, **kwargs):
@@ -36,7 +36,7 @@
         self.jobQueue = []
         self.worker = worker
         self.threads = []
-        
+
         self.max_threads = max_threads
         self.args = args
         self.kwargs = kwargs
@@ -50,7 +50,7 @@
             self.jobQueue.append(job)
             # The amount of workers needed to be unlocked
             unlock_workers = len(self.jobQueue)
-    
+
             for event in self.itervalues():
                 if not event.isSet():
                     event.set()
@@ -62,7 +62,7 @@
         if counter == 0 and len(self.threads) < self.max_threads:
             self.add_thread()
             self.start()
-    
+
     def add_thread(self):
         self.jobLock.acquire()
         try:
@@ -71,7 +71,7 @@
             self[id(thread)] = threading.Event()
         finally:
             self.jobLock.release()
-    
+
     def start(self):
         for thread in self.threads:
             if not thread.isAlive():
@@ -92,23 +92,23 @@
         threading.Thread.__init__(self)
         self.pool = pool
         self.quit = False
-    
+
     def run(self):
         while True:
             # No try..finally: lock.release() here:
             # The lock might be released twice, in case
-            # the thread waits for an event, a race 
+            # the thread waits for an event, a race
             # condition might occur where a lock is released
-            # that is acquired by another thread. 
+            # that is acquired by another thread.
             self.pool.jobLock.acquire()
-    
+
             if self.quit and not self.pool.jobQueue:
                 # Only return once the jobQueue is empty.
                 self.pool.jobLock.release()
                 return
-        
+
             if not self.pool.jobQueue:
-                # In case no job is available, wait for the pool 
+                # In case no job is available, wait for the pool
                 # to call and do not start a busy while loop.
                 event = self.pool[id(self)]
                 self.pool.jobLock.release()
@@ -119,9 +119,9 @@
                 continue
             job = self.pool.jobQueue.pop(0)
             self.pool.jobLock.release()
-        
+
             self.do(job)
-    
+
     def exit(self):
         self.pool.jobLock.acquire()
         try:
@@ -133,7 +133,7 @@
                 self.pool.threads.remove(self)
         finally:
             self.pool.jobLock.release()
-    
+
     def starve(self):
         pass
@@ -146,12 +146,12 @@
         import signal
         for pool in ThreadPool.pools:
                 pool.exit()
-                
+
         if signalnum == signal.SIGINT:
                 raise KeyboardInterrupt
         if signalnum == signal.SIGTERM:
                 raise SystemExit
-                
+
 def terminate():
     # Maybe not a good idea, will also kill child processes
     import signal
@@ -160,24 +160,24 @@
 if __name__ == '__main__':
     import time
     # Test cases
-    
+
     class Worker(Thread):
         def do(self, args):
             print 'Working', self
             time.sleep(10)
             print 'Done', self
-    
+
     pool = ThreadPool(Worker)
     print 'Spawning 5 threads'
     [pool.add_thread() for i in xrange(5)]
     pool.start()
-    
+
     print 'Doing 25 jobs'
     for i in xrange(25):
         print 'Job', i
         pool.append(i)
         time.sleep(i % 6)
-    
+
     for thread in pool.threads:
         thread.exit()

    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

[Pywikipedia-svn] SVN: [9053] trunk/pywikipedia/commonsdelinker