Revision: 4039
Author: btongminh
Date: 2007-08-13 21:05:24 +0000 (Mon, 13 Aug 2007)
Log Message:
-----------
Cosmetic fixes; Documentation; New reporting feature.
Modified Paths:
--------------
trunk/pywikipedia/delinker.py
trunk/pywikipedia/delinker.txt
trunk/pywikipedia/image_replacer.py
Modified: trunk/pywikipedia/delinker.py
===================================================================
--- trunk/pywikipedia/delinker.py 2007-08-13 19:50:49 UTC (rev 4038)
+++ trunk/pywikipedia/delinker.py 2007-08-13 21:05:24 UTC (rev 4039)
@@ -317,7 +317,7 @@
self.summaries[type] = {}
if domain in self.summaries[type]:
if (time.time() - self.summaries[type][domain][1]) < \
- self.CommonsDelinker.config['summary_cache']:
+ self.CommonsDelinker.config['summary_cache']:
# Return cached result
return self.summaries[type][domain][0]
Modified: trunk/pywikipedia/delinker.txt
===================================================================
--- trunk/pywikipedia/delinker.txt 2007-08-13 19:50:49 UTC (rev 4038)
+++ trunk/pywikipedia/delinker.txt 2007-08-13 21:05:24 UTC (rev 4039)
@@ -106,6 +106,13 @@
actually has edit permissions to the list.
* ''disallowed_replacements = [(r'\.png$', r'\.svg$')]'':
List of regular expressions
of refused replacements.
+
+==== Reporting replacements ====
+The replacer can insert a report on replaced images.
+* ''replacer_report_replacements = False'': Set to True to enable
reporting.
+* ''replacer_report_template = universally replaced'': The template to
insert. The
+ template will be called with the following parameters: ''new_image, user,
comment,
+ not_ok''.
=== SQL settings ===
* ''sql_engine = "mysql"'': Database engine to use. Currently
supported:
@@ -132,6 +139,7 @@
status ENUM('ok', 'skipped', 'failed'),
newimg VARBINARY(255)
);
+
CREATE TABLE replacer (
id INT NOT NULL AUTO_INCREMENT,
timestamp VARBINARY(14),
@@ -146,7 +154,7 @@
);
</code>
-==== Edit and debugging settings ====
+=== Edit and debugging settings ===
* ''save_diff = False'': Save all changes to a diff. Create a directory
diff/
before running.
* ''edit = True'': Actually edit to the wiki.
Modified: trunk/pywikipedia/image_replacer.py
===================================================================
--- trunk/pywikipedia/image_replacer.py 2007-08-13 19:50:49 UTC (rev 4038)
+++ trunk/pywikipedia/image_replacer.py 2007-08-13 21:05:24 UTC (rev 4039)
@@ -12,8 +12,9 @@
__version__ = '$Id$'
import config, wikipedia
import re, time
+import threadpool
-from delinker import wait_callback, output, connect_database
+from delinker import wait_callback, output, connect_database, family
def mw_timestamp(ts):
return '%s%s%s%s-%s%s-%s%sT%s%s:%s%s:%s%sZ' % tuple(ts)
@@ -27,6 +28,14 @@
img = img[0].upper() + img[1:]
return img.strip()
+def site_prefix(site):
+ if site.lang == site.family.name:
+ return site.lang
+ # TODO: fix
+ #if (site.lang, site.family.name) == ('-', 'wikisource'):
+ # return 'wikisource'
+ return '%s:%s' % (site.family.name, site.lang)
+
class Replacer(object):
def __init__(self):
self.config = config.CommonsDelinker
@@ -41,6 +50,12 @@
self.database = connect_database()
self.cursor = self.database.cursor()
+ self.first_revision = 0
+ if self.config.get('replacer_report_replacements', False):
+ self.reporters = threadpool.ThreadPool(self.reporter)
+ self.reporters.add_thread(self.site, self.config)
+
+
def read_replace_log(self):
# FIXME: Make sqlite3 compatible
insert = """INSERT INTO %s (timestamp, old_image, new_image,
@@ -97,18 +112,44 @@
for timestamp, user, text in revisions[1:]:
if replacement.group(0) in text and user != username:
- return (db_timestamp(timestamp),
- strip_image(replacement.group(1)),
+ db_time = db_timestamp(timestamp)
+ if db_time < self.first_revision or not revision:
+ self.first_revision = db_time
+ return (db_time, strip_image(replacement.group(1)),
strip_image(replacement.group(2)),
user, replacement.group(3))
output('Warning! Could not find out who did %s' % \
repr(replacement.group(0)), False)
return
+
+ def read_finished_replacements(self):
+ self.cursor.execute('START TRANSACTION WITH CONSISTENT SNAPSHOT')
+ self.cursor.execute("""SELECT old_image, new_image, user, comment FROM
+ %s WHERE status = 'done' AND timestamp >= %i""" % \
+ (self.config['replacer_table'], self.first_revision))
+ finished_images = list(self.cursor)
+ self.cursor.execute("""UPDATE %s SET status = 'reported'
+ WHERE status = 'done' AND timestamp >= %i""" % \
+ (self.config['replacer_table'], self.first_revision))
+ self.cursor.commit()
+
+ for old_image, new_image, user, comment in finished_images:
+ self.cursor.execute("""SELECT wiki, namespace, page_title
+ FROM %s WHERE img = %%s AND status <> 'ok'""" %
+ self.config['delinker_table'], (old_image, ))
+ not_ok = list(self.cursor)
+ self.reporters.append((old_image, new_image, user,
+ comment, not_ok))
+
+
def start(self):
while True:
self.read_replace_log()
+ if self.config.get('replacer_report_replacements', False):
+ self.read_finished_replacements()
+
# Replacer should not loop as often as delinker
time.sleep(self.config['timeout'] * 2)
@@ -119,6 +160,36 @@
return False
return True
+class Reporter(threadpool.Thread):
+ def __init__(self, site, config):
+ self.site = site
+ self.config = config
+
+ threadpool.Thread.__init__(self)
+ def do(self, (old_image, new_image, user, comment, not_ok)):
+ not_ok_items = []
+ for wiki, namespace, page_title in not_ok:
+ site = family(wiki)
+ if unicode(site) == unicode(self.site):
+ title = u'%s:%s' % (site.namespace(namespace), page_title)
+ else:
+ title = u'%s:%s:%s' % (site_prefix(site),
+ site.namespace(namespace), page_title)
+ not_ok_items.append(title)
+
+ page = wikipedia.Page(self.site, u'Image:' + old_image)
+ text = page.get()
+ template = u'{{%s|new_image=%s|user=%s|comment=%s|not_ok=%}}' % \
+ (self.config['replacer_report_template'],
+ new_image, user, comment,
+ self.config.get('replacer_report_seperator', u', ').join(not_ok))
+ page.put(u'%s\n%s' % (template, text),
+ comment = u'This image has been replaced by ' + new_image)
+
+ output(u'Reporting replacement of %s by %s to %s' % \
+ (old_image, new_image))
+
+
if __name__ == '__main__':
import sys, cgitb
try: