Revision: 4120
Author: btongminh
Date: 2007-08-27 10:08:12 +0000 (Mon, 27 Aug 2007)
Log Message:
-----------
Fixed all bugs related to local usage. NOTE: Changed the database layout. If you are upgrading, you need to set CommonsDelinker['sql_layout'] = 'legacy' in user-config.py.
Modified Paths:
--------------
trunk/pywikipedia/checkusage.py
trunk/pywikipedia/delinker.py
trunk/pywikipedia/delinker.txt
Modified: trunk/pywikipedia/checkusage.py
===================================================================
--- trunk/pywikipedia/checkusage.py 2007-08-26 21:49:19 UTC (rev 4119)
+++ trunk/pywikipedia/checkusage.py 2007-08-27 10:08:12 UTC (rev 4120)
@@ -300,18 +300,18 @@
def get_usage(self, image):
for dbname in self.databases:
- for link in self.get_usage_db(dbname, image):
+ for link in self.get_usage_db(dbname, image, True):
yield self.sites[dbname], link
- def get_usage_db(self, dbname, image):
+ def get_usage_db(self, dbname, image, shared = False):
#image = strip_image(image)
lang, family_name = self.sites[dbname]
family = self.known_families[family_name]
- if family.shared_image_repository(lang) == (lang, family_name):
+ if family.shared_image_repository(lang) != (lang, family_name) and shared:
+ left_join = 'LEFT JOIN %s.image ON (il_to = img_name) WHERE img_name IS NULL AND' % dbname
+ else:
left_join = 'WHERE';
- else:
- left_join = 'LEFT JOIN %s.image ON (il_to = img_name) WHERE img_name IS NULL AND' % dbname
query = """SELECT page_namespace, page_title FROM %s.page, %s.imagelinks
%s page_id = il_from AND il_to = %%s"""
self.databases[dbname][1].execute(query % (dbname, dbname, left_join),
@@ -324,24 +324,39 @@
title = stripped_title
yield page_namespace, stripped_title, title
- def get_usage_live(self, site, image):
+ def get_usage_live(self, site, image, shared = False):
self.connect_http()
- #image = strip_image(image)
- # BUG: This is ugly.
+ # FIXME: Use continue
+ kwargs = {'action': 'query', 'titles': u'Image:' + image,
+ 'prop': 'info'}
+ if site.live_version()[:2] > (1, 10):
+ kwargs['list'] = 'imageusage'
+ kwargs['iulimit'] = '500'
+ else:
+ kwargs['list'] = 'imagelinks'
+ kwargs['illimit'] = '500'
+
res = self.http.query_api(site.hostname(), site.apipath(),
- action = 'query', list = 'imageusage',
- prop = 'info', iulimit = '500', titles = 'Image:' + image)
- if '-1' in res['query']['pages']:
- for usage in res['query'].get('imageusage', ()):
- title = usage['title'].replace(' ', '_')
- namespace = usage['ns']
- if namespace != 0:
- stripped_title = strip_ns(title)
- else:
- stripped_title = title
- yield namespace, stripped_title, title
+ **kwargs)
+ if '-1' not in res['query']['pages'] and shared:
+ return
+
+ if site.live_version()[:2] > (1, 10):
+ usages = res['query'].get('imageusage', ())
+ else:
+ usages = res['query'].get('imagelinks', {}).itervalues()
+
+ for usage in usages:
+ title = usage['title'].replace(' ', '_')
+ namespace = usage['ns']
+ if namespace != 0:
+ stripped_title = strip_ns(title)
+ else:
+ stripped_title = title
+ yield namespace, stripped_title, title
+
def exists(self, site, image):
self.connect_http()
Modified: trunk/pywikipedia/delinker.py
===================================================================
--- trunk/pywikipedia/delinker.py 2007-08-26 21:49:19 UTC (rev 4119)
+++ trunk/pywikipedia/delinker.py 2007-08-27 10:08:12 UTC (rev 4120)
@@ -77,7 +77,7 @@
def __init__(self, pool, CommonsDelinker):
threadpool.Thread.__init__(self, pool)
self.CommonsDelinker = CommonsDelinker
- self.summaries = {}
+ self.sql_layout = self.CommonsDelinker.config.get('sql_layout', 'new')
def delink_image(self, image, usage, timestamp, admin, reason, replacement = None):
""" Performs the delink for image on usage. """
@@ -105,9 +105,15 @@
result = self.replace_image(image, site, title, summary, replacement)
finally:
self.CommonsDelinker.unset_edit(str(site), title)
+
# Add to logging queue
- self.CommonsDelinker.Loggers.append((timestamp, image, site.hostname(),
- page_namespace, page_title, result, replacement))
+ if self.sql_layout == 'new':
+ self.CommonsDelinker.Loggers.append((timestamp, image,
+ site.lang, site.family.name, page_namespace, page_title,
+ result, replacement))
+ else:
+ self.CommonsDelinker.Loggers.append((timestamp, image, site.hostname(),
+ page_namespace, page_title, result, replacement))
finally:
self.CommonsDelinker.unlock_site(site)
@@ -426,10 +432,10 @@
count += 1
else:
#FIX!
- usage_domains = {self.site.hostname(): list(
- self.CheckUsage.get_usage_live(self.site.hostname(),
- image))}
- count = len(usage_domains[self.site.hostname()])
+ usage_domains = {(self.site.lang, self.site.family.name):
+ list(self.CheckUsage.get_usage_live(self.site,
+ image))}
+ count = len(usage_domains[(self.site.lang, self.site.family.name)])
output(u'%s %s used on %s pages' % (self, image, count))
@@ -455,6 +461,7 @@
def __init__(self, pool, CommonsDelinker):
threadpool.Thread.__init__(self, pool)
self.CommonsDelinker = CommonsDelinker
+ self.sql_layout = self.CommonsDelinker.config.get('sql_layout', 'new')
def run(self):
self.connect()
@@ -465,9 +472,8 @@
self.cursor = self.database.cursor()
- def log_result(self, timestamp, image, domain, namespace, page, status = "ok", newimage = None):
+ def log_result_legacy(self, timestamp, image, domain, namespace, page, status = "ok", newimage = None):
# TODO: Make sqlite3 ready
- # FIXME: domain is BAD, BAD, BAD!!!
# The original delinker code cached log results,
# in order to limit the number of connections.
@@ -482,9 +488,20 @@
namespace, status, newimg) VALUES
(%%s, %%s, %%s, %%s, %%s, %%s, %%s)""" % self.CommonsDelinker.config['log_table'],
(timestamp, image, domain, page, namespace, status, newimage))
+ self.database.commit()
+ def log_result_new(self, timestamp, image, site_lang, site_family,
+ page_namespace, page_title, status = 'ok', new_image = None):
+
+ output(u'%s Logging %s for %s on %s' % (self, repr(status), image, page_title))
+
+ self.cursor.execute("""INSERT INTO %s (timestamp, image, site_lang, site_family,
+ page_namespace, page_title, status, new_image) VALUES
+ (%%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s)""" % self.CommonsDelinker.config['log_table'],
+ (timestamp, image, site_lang, site_family, page_namespace, page_title,
+ status, new_image))
+ self.database.commit()
- self.database.commit()
def log_replacement(self, timestamp, old_image, new_image):
# TODO: Same as above
@@ -500,7 +517,10 @@
if len(args) == 3:
self.log_replacement(*args)
else:
- self.log_result(*args)
+ if self.sql_layout == 'new':
+ self.log_result_new(*args)
+ else:
+ self.log_result_legacy(*args)
except:
# Something unexpected happened. Report and die.
output('An exception occured in %s' % self, False)
Modified: trunk/pywikipedia/delinker.txt
===================================================================
--- trunk/pywikipedia/delinker.txt 2007-08-26 21:49:19 UTC (rev 4119)
+++ trunk/pywikipedia/delinker.txt 2007-08-27 10:08:12 UTC (rev 4120)
@@ -124,6 +124,7 @@
}'': The configuration variables to be passed to the connect method of
the database engine. Please refer to your database api manual for a full
overview of the options.
+* ''sql_layout = "new"'': Set to "legacy" for the old table layout.
* ''log_table = "database.delinker"'': The database.table to log to.
* ''replacer_table = "database.replacer"'': The database.table for the
replacer. Only required if the replacer is activated.
@@ -132,12 +133,13 @@
<code lang="sql">
CREATE TABLE delinker (
timestamp CHAR(14),
- img VARBINARY(255),
- wiki VARBINARY(255),
+ image VARBINARY(255),
+ site_lang VARBINARY(31),
+ site_family VARBINARY(255),
+ page_namespace INT,
page_title VARBINARY(255),
- namespace INT,
status ENUM('ok', 'skipped', 'failed'),
- newimg VARBINARY(255)
+ new_image VARBINARY(255)
);
CREATE TABLE replacer (
@@ -154,6 +156,20 @@
INDEX(status)
);
</code>
+
+However, if you use CommonsDelinker with CommonsTicker, you will need to have
+a different table layout:
+<code lang="sql">
+CREATE TABLE delinker (
+ timestamp CHAR(14),
+ img VARBINARY(255),
+ wiki VARBINARY(255),
+ page_title VARBINARY(255),
+ namespace INT,
+ status ENUM('ok', 'skipped', 'failed'),
+ newimg VARBINARY(255)
+);
+</code>
=== Edit and debugging settings ===
* ''save_diff = False'': Save all changes to a diff. Create a directory diff/