http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10830
Revision: 10830 Author: btongminh Date: 2012-12-24 13:50:34 +0000 (Mon, 24 Dec 2012) Log Message: ----------- Add experimental support for globalusage extension
Modified Paths: -------------- trunk/pywikipedia/commonsdelinker/checkusage.py trunk/pywikipedia/commonsdelinker/delinker.py
Modified: trunk/pywikipedia/commonsdelinker/checkusage.py =================================================================== --- trunk/pywikipedia/commonsdelinker/checkusage.py 2012-12-24 11:07:46 UTC (rev 10829) +++ trunk/pywikipedia/commonsdelinker/checkusage.py 2012-12-24 13:50:34 UTC (rev 10830) @@ -327,7 +327,44 @@ else: title = stripped_title yield page_namespace, stripped_title, title + + def get_globalusage(self, site, image, shared = False): + self.connect_http() + if type(site) is str: + hostname = site + apipath = '/w/api.php' + else: + hostname = site.hostname() + apipath = site.apipath()
+ kwargs = {'action': 'query', 'titles': u'File:' + image, + 'prop': 'globalusage|imageinfo', + 'iiprop': '', 'guprop': 'namespace', 'gulimit': '500'} + + while True: + res = self.http.query_api(hostname, apipath, **kwargs) + if not res or not res['query'] or not res['query']['pages']: + return + if res['query']['pages'].values()[0].get('imagerepository') == 'local' and shared: + return + + usages = res['query']['pages'].values()[0].get('globalusage', ()) + for usage in usages: + title = usage['title'].replace(' ', '_') + namespace = int(usage['ns']) + site = family(usage['wiki']) + + if namespace != 0: + yield site, (namespace, strip_ns(title), title) + else: + yield site, (namespace, title, title) + + if 'globalusage' in res.get('query-continue', ()): + kwargs.update(res['query-continue']['globalusage']) + else: + return + + def get_usage_live(self, site, image, shared = False): self.connect_http()
@@ -369,14 +406,14 @@ def exists(self, site, image): self.connect_http() # Check whether the image still is deleted on Commons. - # BUG: This also returns true for images with a page, but - # without the image itself. Can be fixed by querying query.php - # instead of api.php. - # BUG: This is ugly. - return '-1' not in self.http.query_api(site.hostname(), site.apipath(), - action = 'query', titles = 'Image:' + image)['query']['pages'] + res = self.http.query_api(site.hostname(), site.apipath(), + action = 'query', titles = u'Image:' + image, + prop = 'imageinfo', iiprop = '') + + if not res or not res['query'] or not res['query']['pages']: + return + return res['query']['pages'].values()[0].get('imagerepository') == 'local'
- def close(self): if getattr(self, 'http'): self.http.close()
Modified: trunk/pywikipedia/commonsdelinker/delinker.py =================================================================== --- trunk/pywikipedia/commonsdelinker/delinker.py 2012-12-24 11:07:46 UTC (rev 10829) +++ trunk/pywikipedia/commonsdelinker/delinker.py 2012-12-24 13:50:34 UTC (rev 10830) @@ -18,7 +18,7 @@ # # (C) Kyle/Orgullomoore, 2006-2007 # (C) Siebrand Mazeland, 2006-2007 -# (C) Bryan Tong Minh, 2007-2008 +# (C) Bryan Tong Minh, 2007-2008, 2012 # # Distributed under the terms of the MIT license. # @@ -478,7 +478,8 @@ use_autoconn = True, http_callback = wait_callback, mysql_callback = wait_callback, - mysql_host_suffix = '-fast') + mysql_host_suffix = '-fast', + no_db = config['global'] == 'live') else: self.CheckUsage = checkusage.CheckUsage(sys.maxint, http_callback = wait_callback, no_db = True) @@ -508,7 +509,10 @@
if self.CommonsDelinker.config['global']: - usage = self.CheckUsage.get_usage(image) + if self.CommonsDelinker.config['global'] == 'live': + usage = self.CheckUsage.get_globalusage(self.site, image, True) + else: + usage = self.CheckUsage.get_usage(image) usage_domains = {}
count = 0