Revision: 4619
Author: yurik
Date: 2007-11-30 05:31:33 +0000 (Fri, 30 Nov 2007)
Log Message:
-----------
Updated query library & casechacker script to use the new API
Modified Paths:
--------------
trunk/pywikipedia/casechecker.py
trunk/pywikipedia/query.py
Modified: trunk/pywikipedia/casechecker.py
===================================================================
--- trunk/pywikipedia/casechecker.py 2007-11-29 16:16:45 UTC (rev 4618)
+++ trunk/pywikipedia/casechecker.py 2007-11-30 05:31:33 UTC (rev 4619)
@@ -74,9 +74,9 @@
langs = {
'ru': {
- 'alphabet' : u'АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя',
- 'localsuspects': u'АаВЕеКкМНОоРрСсТуХх',
- 'latinsuspects': u'AaBEeKkMHOoPpCcTyXx',
+ 'alphabet' : u'АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯяІі',
+ 'localsuspects': u'АаВЕеКкМНОоРрСсТуХхІі',
+ 'latinsuspects': u'AaBEeKkMHOoPpCcTyXxIi',
},
'uk': {
'alphabet' : u'АаБбВвГ㥴ДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЮюЯяЬь',
@@ -111,7 +111,6 @@
title = None
replace = False
stopAfter = 0
- verbose = False
wikilog = None
wikilogfile = 'wikilog.txt'
autonomous = False
@@ -136,9 +135,7 @@
self.replace = True
elif arg.startswith('-limit:'):
self.stopAfter = int(arg[7:])
- elif arg == '-verbose':
- self.verbose = True
- elif arg == '-autonomous':
+ elif arg == '-autonomous' or arg == '-a':
self.autonomous = True
elif arg.startswith('-ns:'):
self.namespaces.append( int(arg[4:]) )
@@ -155,14 +152,15 @@
else:
self.namespaces = [0]
- self.params = {'what' : 'allpages',
- 'aplimit' : self.aplimit,
- 'apfilterredir' : 'nonredirects',
- 'noprofile' : '' }
-
+ self.params = { 'action' : 'query',
+ 'generator' : 'allpages',
+ 'gaplimit' : self.aplimit,
+ 'gapfilterredir': 'nonredirects'}
+
if self.links:
- self.params['what'] += '|links|categories';
+ self.params['prop'] = 'links|categories'
+
self.site = wikipedia.getSite()
if self.site.lang in self.langs:
@@ -200,46 +198,53 @@
count = 0
lastLetter = ''
for namespace in self.namespaces:
- self.params['apnamespace'] = namespace
+ self.params['gapnamespace'] = namespace
title = None
while True:
# Get data
- self.params['apfrom'] = self.apfrom
- data = query.GetData(self.site.lang, self.params, self.verbose)
+ self.params['gapfrom'] = self.apfrom
+ data = query.GetData(self.site.lang, self.params, wikipedia.verbose, True)
try:
- self.apfrom = data['query']['allpages']['next']
+ self.apfrom = data['query-continue']['allpages']['gapfrom']
except:
self.apfrom = None
# Process received data
- if 'pages' in data:
+ if 'query' in data and 'pages' in data['query']:
firstItem = True
- for pageID, page in data['pages'].iteritems():
+ for pageID, page in data['query']['pages'].iteritems():
printed = False
title = page['title']
if firstItem:
if lastLetter != title[0]:
- print 'Processing ' + title
+ try:
+ print 'Processing ' + title
+ except:
+ print 'Processing unprintable title'
lastLetter = title[0]
firstItem = False
if self.titles:
err = self.ProcessTitle(title)
if err:
- if page['ns'] == 14:
- self.WikiLog(u"* Move category content: " + err[0])
- else:
- changed = False
- if self.replace:
- newTitle = self.PickTarget(False, title, title, err[1])
- if newTitle:
- src = wikipedia.Page(self.site, title)
- src.move( newTitle, wikipedia.translate(self.site, self.msgRename))
- changed = True
+ changed = False
+ if self.replace:
+ newTitle = self.PickTarget(False, title, title, err[1])
+ if newTitle:
+ editSummary = wikipedia.translate(self.site, self.msgRename)
+ src = wikipedia.Page(self.site, title)
+ if page['ns'] == 14:
+ import category
+ dst = wikipedia.Page(self.site, newTitle)
+ bot = category.CategoryMoveRobot(src.titleWithoutNamespace(), dst.titleWithoutNamespace(), self.autonomous, editSummary, True)
+ bot.run()
+ else:
+ src.move(newTitle, editSummary)
+ changed = True
- if not changed:
- self.WikiLog(u"* " + err[0])
- printed = True
+ if not changed:
+ self.WikiLog(u"* " + err[0])
+ printed = True
if self.links:
allLinks = None
@@ -257,7 +262,7 @@
msg = []
for l in allLinks:
- ltxt = l['*']
+ ltxt = l['title']
err = self.ProcessTitle(ltxt)
if err:
newTitle = None
Modified: trunk/pywikipedia/query.py
===================================================================
--- trunk/pywikipedia/query.py 2007-11-29 16:16:45 UTC (rev 4618)
+++ trunk/pywikipedia/query.py 2007-11-30 05:31:33 UTC (rev 4619)
@@ -4,8 +4,9 @@
import wikipedia
import simplejson
import urllib
+import time
-def GetData( lang, params, verbose = False ):
+def GetData( lang, params, verbose = False, useAPI = False, retryCount = 5 ):
"""Get data from the query api, and convert it into a data object
"""
site = wikipedia.getSite( lang )
@@ -15,7 +16,9 @@
params[k] = unicode(v)
params['format'] = 'json'
- params['noprofile'] = ''
+
+ if not useAPI:
+ params['noprofile'] = ''
for k,v in params.iteritems():
if type(v) == type(u''):
@@ -30,29 +33,45 @@
data = None
titlecount = 0
- path = u"/w/query.php?" + urllib.urlencode( params.items() )
+ if useAPI:
+ path = site.api_address() + urllib.urlencode( params.items() )
+ else:
+ path = site.query_address() + urllib.urlencode( params.items() )
if verbose:
- wikipedia.output( u"Requesting %d titles from %s:%s" % (titlecount, lang, path) )
+ if titlecount > 0:
+ wikipedia.output( u"Requesting %d titles from %s:%s" % (titlecount, lang, path) )
+ else:
+ wikipedia.output( u"Request %s:%s" % (lang, path) )
- url = site.family.querypath(lang)
-
- retryCount = 1
-
+ lastError = None
+ retry_idle_time = 5
while retryCount >= 0:
try:
+ jsontext = "Nothing received"
jsontext = site.getUrl( path, retry=True, data=data )
# This will also work, but all unicode strings will need to be converted from \u notation
# decodedObj = eval( jsontext )
- decodedObj = simplejson.loads( jsontext )
+ return simplejson.loads( jsontext )
break
except ValueError, error:
retryCount -= 1
wikipedia.output( u"Error downloading data: %s" % error )
+ wikipedia.output( u"Request %s:%s" % (lang, path) )
+ wikipedia.debugDump('ApiGetDataParse', site, str(error) + '\n' + path, jsontext)
+ lastError = error
+ if retryCount >= 0:
+ wikipedia.output( u"Retrying in %i seconds..." % retry_idle_time )
+ time.sleep(retry_idle_time)
+ # Next time wait longer, but not longer than half an hour
+ retry_idle_time *= 2
+ if retry_idle_time > 300:
+ retry_idle_time = 300
+
- return decodedObj
+ raise lastError
def GetInterwikies( lang, titles, extraParams = None ):
""" Usage example: data = GetInterwikies('ru','user:yurik')