Revision: 4120 Author: btongminh Date: 2007-08-27 10:08:12 +0000 (Mon, 27 Aug 2007)
Log Message: ----------- Fixed all bugs related to local usage. NOTE: Changed the database layout. If you are upgrading, you need to set CommonsDelinker['sql_layout'] = 'legacy' in user-config.py.
Modified Paths: -------------- trunk/pywikipedia/checkusage.py trunk/pywikipedia/delinker.py trunk/pywikipedia/delinker.txt
Modified: trunk/pywikipedia/checkusage.py =================================================================== --- trunk/pywikipedia/checkusage.py 2007-08-26 21:49:19 UTC (rev 4119) +++ trunk/pywikipedia/checkusage.py 2007-08-27 10:08:12 UTC (rev 4120) @@ -300,18 +300,18 @@
def get_usage(self, image): for dbname in self.databases: - for link in self.get_usage_db(dbname, image): + for link in self.get_usage_db(dbname, image, True): yield self.sites[dbname], link
- def get_usage_db(self, dbname, image): + def get_usage_db(self, dbname, image, shared = False): #image = strip_image(image) lang, family_name = self.sites[dbname] family = self.known_families[family_name] - if family.shared_image_repository(lang) == (lang, family_name): + if family.shared_image_repository(lang) != (lang, family_name) and shared: + left_join = 'LEFT JOIN %s.image ON (il_to = img_name) WHERE img_name IS NULL AND' % dbname + else: left_join = 'WHERE'; - else: - left_join = 'LEFT JOIN %s.image ON (il_to = img_name) WHERE img_name IS NULL AND' % dbname query = """SELECT page_namespace, page_title FROM %s.page, %s.imagelinks %s page_id = il_from AND il_to = %%s""" self.databases[dbname][1].execute(query % (dbname, dbname, left_join), @@ -324,24 +324,39 @@ title = stripped_title yield page_namespace, stripped_title, title
- def get_usage_live(self, site, image): + def get_usage_live(self, site, image, shared = False): self.connect_http() - #image = strip_image(image) - # BUG: This is ugly. + # FIXME: Use continue + kwargs = {'action': 'query', 'titles': u'Image:' + image, + 'prop': 'info'} + if site.live_version()[:2] > (1, 10): + kwargs['list'] = 'imageusage' + kwargs['iulimit'] = '500' + else: + kwargs['list'] = 'imagelinks' + kwargs['illimit'] = '500' + res = self.http.query_api(site.hostname(), site.apipath(), - action = 'query', list = 'imageusage', - prop = 'info', iulimit = '500', titles = 'Image:' + image) - if '-1' in res['query']['pages']: - for usage in res['query'].get('imageusage', ()): - title = usage['title'].replace(' ', '_') - namespace = usage['ns'] - if namespace != 0: - stripped_title = strip_ns(title) - else: - stripped_title = title - yield namespace, stripped_title, title + **kwargs) + if '-1' not in res['query']['pages'] and shared: + return + + if site.live_version()[:2] > (1, 10): + usages = res['query'].get('imageusage', ()) + else: + usages = res['query'].get('imagelinks', {}).itervalues()
+ + for usage in usages: + title = usage['title'].replace(' ', '_') + namespace = usage['ns'] + if namespace != 0: + stripped_title = strip_ns(title) + else: + stripped_title = title + yield namespace, stripped_title, title + def exists(self, site, image): self.connect_http()
Modified: trunk/pywikipedia/delinker.py =================================================================== --- trunk/pywikipedia/delinker.py 2007-08-26 21:49:19 UTC (rev 4119) +++ trunk/pywikipedia/delinker.py 2007-08-27 10:08:12 UTC (rev 4120) @@ -77,7 +77,7 @@ def __init__(self, pool, CommonsDelinker): threadpool.Thread.__init__(self, pool) self.CommonsDelinker = CommonsDelinker - self.summaries = {} + self.sql_layout = self.CommonsDelinker.config.get('sql_layout', 'new') def delink_image(self, image, usage, timestamp, admin, reason, replacement = None): """ Performs the delink for image on usage. """ @@ -105,9 +105,15 @@ result = self.replace_image(image, site, title, summary, replacement) finally: self.CommonsDelinker.unset_edit(str(site), title) + # Add to logging queue - self.CommonsDelinker.Loggers.append((timestamp, image, site.hostname(), - page_namespace, page_title, result, replacement)) + if self.sql_layout == 'new': + self.CommonsDelinker.Loggers.append((timestamp, image, + site.lang, site.family.name, page_namespace, page_title, + result, replacement)) + else: + self.CommonsDelinker.Loggers.append((timestamp, image, site.hostname(), + page_namespace, page_title, result, replacement)) finally: self.CommonsDelinker.unlock_site(site) @@ -426,10 +432,10 @@ count += 1 else: #FIX! - usage_domains = {self.site.hostname(): list( - self.CheckUsage.get_usage_live(self.site.hostname(), - image))} - count = len(usage_domains[self.site.hostname()]) + usage_domains = {(self.site.lang, self.site.family.name): + list(self.CheckUsage.get_usage_live(self.site, + image))} + count = len(usage_domains[(self.site.lang, self.site.family.name)]) output(u'%s %s used on %s pages' % (self, image, count)) @@ -455,6 +461,7 @@ def __init__(self, pool, CommonsDelinker): threadpool.Thread.__init__(self, pool) self.CommonsDelinker = CommonsDelinker + self.sql_layout = self.CommonsDelinker.config.get('sql_layout', 'new') def run(self): self.connect() @@ -465,9 +472,8 @@ self.cursor = self.database.cursor() - def log_result(self, timestamp, image, domain, namespace, page, status = "ok", newimage = None): + def log_result_legacy(self, timestamp, image, domain, namespace, page, status = "ok", newimage = None): # TODO: Make sqlite3 ready - # FIXME: domain is BAD, BAD, BAD!!! # The original delinker code cached log results, # in order to limit the number of connections. @@ -482,9 +488,20 @@ namespace, status, newimg) VALUES (%%s, %%s, %%s, %%s, %%s, %%s, %%s)""" % self.CommonsDelinker.config['log_table'], (timestamp, image, domain, page, namespace, status, newimage)) + self.database.commit() + def log_result_new(self, timestamp, image, site_lang, site_family, + page_namespace, page_title, status = 'ok', new_image = None): + + output(u'%s Logging %s for %s on %s' % (self, repr(status), image, page_title)) + + self.cursor.execute("""INSERT INTO %s (timestamp, image, site_lang, site_family, + page_namespace, page_title, status, new_image) VALUES + (%%s, %%s, %%s, %%s, %%s, %%s, %%s, %%s)""" % self.CommonsDelinker.config['log_table'], + (timestamp, image, site_lang, site_family, page_namespace, page_title, + status, new_image)) + self.database.commit() - self.database.commit() def log_replacement(self, timestamp, old_image, new_image): # TODO: Same as above @@ -500,7 +517,10 @@ if len(args) == 3: self.log_replacement(*args) else: - self.log_result(*args) + if self.sql_layout == 'new': + self.log_result_new(*args) + else: + self.log_result_legacy(*args) except: # Something unexpected happened. Report and die. output('An exception occured in %s' % self, False)
Modified: trunk/pywikipedia/delinker.txt =================================================================== --- trunk/pywikipedia/delinker.txt 2007-08-26 21:49:19 UTC (rev 4119) +++ trunk/pywikipedia/delinker.txt 2007-08-27 10:08:12 UTC (rev 4120) @@ -124,6 +124,7 @@ }'': The configuration variables to be passed to the connect method of the database engine. Please refer to your database api manual for a full overview of the options. +* ''sql_layout = "new"'': Set to "legacy" for the old table layout. * ''log_table = "database.delinker"'': The database.table to log to. * ''replacer_table = "database.replacer"'': The database.table for the replacer. Only required if the replacer is activated. @@ -132,12 +133,13 @@ <code lang="sql"> CREATE TABLE delinker ( timestamp CHAR(14), - img VARBINARY(255), - wiki VARBINARY(255), + image VARBINARY(255), + site_lang VARBINARY(31), + site_family VARBINARY(255), + page_namespace INT, page_title VARBINARY(255), - namespace INT, status ENUM('ok', 'skipped', 'failed'), - newimg VARBINARY(255) + new_image VARBINARY(255) );
CREATE TABLE replacer ( @@ -154,6 +156,20 @@ INDEX(status) ); </code> + +However, if you use CommonsDelinker with CommonsTicker, you will need to have +a different table layout: +<code lang="sql"> +CREATE TABLE delinker ( + timestamp CHAR(14), + img VARBINARY(255), + wiki VARBINARY(255), + page_title VARBINARY(255), + namespace INT, + status ENUM('ok', 'skipped', 'failed'), + newimg VARBINARY(255) +); +</code>
=== Edit and debugging settings === * ''save_diff = False'': Save all changes to a diff. Create a directory diff/
pywikipedia-l@lists.wikimedia.org