Revision: 4130 Author: btongminh Date: 2007-08-29 15:14:32 +0000 (Wed, 29 Aug 2007)
Log Message: ----------- Reverting wrong commit.
Modified Paths: -------------- trunk/pywikipedia/checkusage.py trunk/pywikipedia/delinker.py trunk/pywikipedia/image_replacer.py
Modified: trunk/pywikipedia/checkusage.py =================================================================== --- trunk/pywikipedia/checkusage.py 2007-08-29 15:09:47 UTC (rev 4129) +++ trunk/pywikipedia/checkusage.py 2007-08-29 15:14:32 UTC (rev 4130) @@ -3,7 +3,7 @@ """ This module provides a way for users of the Wikimedia toolserver to check the use of images from Commons on other Wikimedia wikis. It supports both running -checkusage against the database and but also against the live wikis. It is very +checkusage against the database and against the live wikis. It is very efficient as it only creates one HTTP connection and one MySQL connection during its life time. It is not suitable for multithreading!
@@ -12,6 +12,23 @@ to connect to the MySQL database. The top wikis in size will be checked. The class provides multiple methods:
+get_usage(image) +This method will return a generator object that generates the usage of the +image, returned as the following tuple: (page_namespace, page_title, +full_title). page_namespace is the numeric namespace, page_title the page title +without namespace, full_title the page title including localized namespace. + +get_usage_db(dbname, image), get_usage_live(domain, image) +Those methods allow querying a specific wiki, respectively against the database +and against the live wiki. They accept respectively the database name and the +domain name. The return a generator which generates the same results as +get_usage(). + +get_usage_multi(images) +Calls get_usage for each image and returns a dictionary with usages. + +get_replag(dbname) +Returns the time in seconds since the latest known edit of dbname. """ # # (C) Bryan Tong Minh, 2007
Modified: trunk/pywikipedia/delinker.py =================================================================== --- trunk/pywikipedia/delinker.py 2007-08-29 15:09:47 UTC (rev 4129) +++ trunk/pywikipedia/delinker.py 2007-08-29 15:14:32 UTC (rev 4130) @@ -2,11 +2,12 @@ # -*- coding: utf-8 -*- """ This script keeps track of image deletions and delinks removed files -from (any) wiki. Usage on protected pages or pages containing blacklisted -external links cannot be processed. +from (any) wiki. Usage +on protected pages or pages containing blacklisted external links cannot +be processed.
This script is run by [[commons:User:Siebrand]] on the toolserver. It should -not be run on Commons by other users without prior contact. +not be run by other users without prior contact.
Although the classes are called CommonsDelinker and Delinker, it is in fact a general delinker/replacer, also suitable for local use. @@ -28,7 +29,6 @@ # TODO: # * Don't replace within <nowiki /> tags # * Make as many config settings site dependend -# * Implement sqlite3 mode # BUGS: # * There is a problem with images in the es.wikisource project namespace. # The exact problem is described somewhere in Bryan's IRC logs, but it is @@ -69,10 +69,7 @@ kwargs['callback'] = wait_callback return mysql_autoconnection.connect(**kwargs) - elif engine == 'sqlite3': - import sqlite3 - return sqlite3.connect(**kwargs) - + # TODO: Add support for sqlite3 raise RuntimeError('Unsupported database engine %s' % engine)
class Delinker(threadpool.Thread): @@ -309,10 +306,6 @@ self.CommonsDelinker.config['summary_cache']: # Return cached result return self.summaries[type][key][0] - else: - self.summaries[type][key] = \ - (self.CommonsDelinker.config['default_settings'].get(type, ''), - time.time()) output(u'%s Fetching new summary for %s' % (self, site)) @@ -343,10 +336,9 @@ 'wikisource', 'wikinews', 'wikiversity'): newsite = self.CommonsDelinker.get_site(site.lang, wikipedia.Family('wikipedia')) - return self.get(newsite, type, key) + return self.get(newsite, type, key = key) + return self.CommonsDelinker.config['default_settings'].get(type, '') - return self.get(site, type, key) - def check_user_page(self, site): "Check whether a userpage exists. Only used for CommonsDelinker." try: @@ -361,8 +353,7 @@ ftxt = f.read() f.close() if not '#' + str(site) in ftxt: - # BUG: Username does not exist - username = config.usernames[site.family.name][site.lang] + username = config.usernames[site.family.name][site.lang] userpage = wikipedia.Page(site, 'User:' + username) # Removed check for page existence. If it is not in our @@ -409,6 +400,10 @@ """ Check whether this image needs to be delinked. """ # Check whether the image still is deleted on Commons. + # BUG: This also returns true for images with a page, but + # without the image itself. Can be fixed by querying query.php + # instead of api.php. Also should this be made as an exits() + # method of checkusage.CheckUsage? if self.site.shared_image_repository() != (None, None): shared_image_repository = self.CommonsDelinker.get_site(*self.site.shared_image_repository()) try: @@ -533,9 +528,6 @@ self.exit() self.CommonsDelinker.thread_died() - def format_query(self, query): - - class CommonsDelinker(object): def __init__(self): self.config = config.CommonsDelinker
Modified: trunk/pywikipedia/image_replacer.py =================================================================== --- trunk/pywikipedia/image_replacer.py 2007-08-29 15:09:47 UTC (rev 4129) +++ trunk/pywikipedia/image_replacer.py 2007-08-29 15:14:32 UTC (rev 4130) @@ -241,6 +241,4 @@ if type(e) not in (SystemExit, KeyboardInterrupt): output('A critical error has occured! Aborting!') print >>sys.stderr, cgitb.text(sys.exc_info()) - r.reporters.exit() - output(u'Stopping ImageReplacer') wikipedia.stopme() \ No newline at end of file