Revision: 7118
Author: alexsh
Date: 2009-08-06 11:08:52 +0000 (Thu, 06 Aug 2009)
Log Message:
-----------
wikipedia.py: Put API version lock. according [[mw:API:Query]] information (some API query is not necessary because they start from 1.9)
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-08-06 10:45:42 UTC (rev 7117)
+++ trunk/pywikipedia/wikipedia.py 2009-08-06 11:08:52 UTC (rev 7118)
@@ -5133,7 +5133,7 @@
retry_idle_time = 1
while True:
- if config.use_api:
+ if config.use_api and self.versionnumber() >= 12:
params = {
'action':'query',
'meta':'allmessages',
@@ -5718,7 +5718,7 @@
break
def randompage(self):
- if config.use_api:
+ if config.use_api and self.versionnumber() >= 12:
params = {
'action': 'query',
'list': 'random',
@@ -5736,7 +5736,7 @@
return Page(self, m.group('title'))
def randomredirectpage(self):
- if config.use_api:
+ if config.use_api and self.versionnumber() >= 12:
params = {
'action': 'query',
'list': 'random',
@@ -5980,7 +5980,7 @@
urlsToRetrieve = [siteurl]
if not siteurl.startswith('*.'):
urlsToRetrieve.append('*.' + siteurl)
- if config.use_api:
+ if config.use_api and self.versionnumber() >= 11:
output(u'Querying API exturlusage...')
for url in urlsToRetrieve:
params = {
@@ -6593,6 +6593,9 @@
NOTE 2: it returns the image WITHOUT the image namespace.
"""
+ if self.versionnumber() < 12:
+ return None
+
if hash_found is None: # If the hash is none return None and not continue
return None
# Now get all the images with the same hash
Revision: 7117
Author: alexsh
Date: 2009-08-06 10:45:42 +0000 (Thu, 06 Aug 2009)
Log Message:
-----------
*Site()._getUserData(): add description for collect data from API.
*Page().delete(): Add API option to delete page.(tested on redirect.py broken, could reduce more time to load delete page.)
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-08-06 07:15:41 UTC (rev 7116)
+++ trunk/pywikipedia/wikipedia.py 2009-08-06 10:45:42 UTC (rev 7117)
@@ -2396,50 +2396,71 @@
answer = 'y'
self.site()._noDeletePrompt = True
if answer == 'y':
- host = self.site().hostname()
- address = self.site().delete_address(self.urlname())
-
- reason = reason.encode(self.site().encoding())
+
token = self.site().getToken(self, sysop = True)
- predata = {
- 'wpDeleteReasonList': 'other',
- 'wpReason': reason,
- 'wpComment': reason,
- 'wpConfirm': '1',
- 'wpConfirmB': '1'
- }
- if token:
- predata['wpEditToken'] = token
- if self.site().hostname() in config.authenticate.keys():
- predata['Content-type'] = 'application/x-www-form-urlencoded'
- predata['User-agent'] = useragent
- data = self.site().urlEncode(predata)
- response = urllib2.urlopen(urllib2.Request(self.site().protocol() + '://' + self.site().hostname() + address, data))
- data = u''
- else:
- response, data = self.site().postForm(address, predata, sysop = True)
- if data:
- self.site().checkBlocks(sysop = True)
- if self.site().mediawiki_message('actioncomplete') in data:
+
+ if config.use_api and self.site().versionnumber() >= 12:
+ params = {
+ 'action': 'delete',
+ 'title': self.title(),
+ 'token': token,
+ 'reason': reason,
+ }
+ datas = query.GetData(params, self.site(), sysop = True)
+ if datas.has_key('delete'):
output(u'Page %s deleted' % self.aslink(forceInterwiki = True))
return True
- elif self.site().mediawiki_message('cannotdelete') in data:
- output(u'Page %s could not be deleted - it doesn\'t exist' % self.aslink(forceInterwiki = True))
- return False
else:
- output(u'Deletion of %s failed for an unknown reason. The response text is:' % self.aslink(forceInterwiki = True))
- try:
- ibegin = data.index('<!-- start content -->') + 22
- iend = data.index('<!-- end content -->')
- except ValueError:
- # if begin/end markers weren't found, show entire HTML file
- output(data)
+ if datas['error']['code'] == 'missingtitle':
+ output(u'Page %s could not be deleted - it doesn\'t exist' % self.aslink(forceInterwiki = True))
+ return False
else:
- # otherwise, remove the irrelevant sections
- data = data[ibegin:iend]
- output(data)
- return False
+ output(u'Deletion of %s failed for an unknown reason. The response text is:' % self.aslink(forceInterwiki = True))
+ output('%s' % datas)
+ else:
+ host = self.site().hostname()
+ address = self.site().delete_address(self.urlname())
+ reason = reason.encode(self.site().encoding())
+ predata = {
+ 'wpDeleteReasonList': 'other',
+ 'wpReason': reason,
+ 'wpComment': reason,
+ 'wpConfirm': '1',
+ 'wpConfirmB': '1'
+ }
+ if token:
+ predata['wpEditToken'] = token
+ if self.site().hostname() in config.authenticate.keys():
+ predata['Content-type'] = 'application/x-www-form-urlencoded'
+ predata['User-agent'] = useragent
+ data = self.site().urlEncode(predata)
+ response = urllib2.urlopen(urllib2.Request(self.site().protocol() + '://' + self.site().hostname() + address, data))
+ data = u''
+ else:
+ response, data = self.site().postForm(address, predata, sysop = True)
+ if data:
+ self.site().checkBlocks(sysop = True)
+ if self.site().mediawiki_message('actioncomplete') in data:
+ output(u'Page %s deleted' % self.aslink(forceInterwiki = True))
+ return True
+ elif self.site().mediawiki_message('cannotdelete') in data:
+ output(u'Page %s could not be deleted - it doesn\'t exist' % self.aslink(forceInterwiki = True))
+ return False
+ else:
+ output(u'Deletion of %s failed for an unknown reason. The response text is:' % self.aslink(forceInterwiki = True))
+ try:
+ ibegin = data.index('<!-- start content -->') + 22
+ iend = data.index('<!-- end content -->')
+ except ValueError:
+ # if begin/end markers weren't found, show entire HTML file
+ output(data)
+ else:
+ # otherwise, remove the irrelevant sections
+ data = data[ibegin:iend]
+ output(data)
+ return False
+
def loadDeletedRevisions(self):
"""Retrieve all deleted revisions for this Page from Special/Undelete.
@@ -4879,8 +4900,7 @@
index = self._userIndex(sysop)
if type(text) == dict: #text is dict, query from API
- # Check for blocks - but only if version is 1.11 (userinfo is available)
- # and the user data was not yet loaded
+ # Check for blocks
if text.has_key('blockedby') and not self._isBlocked[index]:
# Write a warning if not shown earlier
if sysop:
@@ -4890,7 +4910,7 @@
output(u'WARNING: %s on %s is blocked. Editing using this account will stop the run.' % (account, self))
self._isBlocked[index] = text.has_key('blockedby')
- # Check for new messages, show key 'messages' in dict.
+ # Check for new messages, the data must had key 'messages' in dict.
if text.has_key('messages'):
if not self._messages[index]:
# User has *new* messages
@@ -4907,16 +4927,16 @@
return
# Get username.
- # anonymous mode will show key 'anon'
+ # The data in anonymous mode had key 'anon'
+ # if 'anon' exist, username is IP address, not to collect it right now
if not text.has_key('anon'):
self._isLoggedIn[index] = True
self._userName[index] = text['name']
else:
self._isLoggedIn[index] = False
- # No idea what is the user name, and it isn't important
self._userName[index] = None
- # Check user groups and rights
+ # Get user groups and rights
if text.has_key('groups') and text['groups'] != []:
self._rights[index] = text['groups']
self._rights[index].extend(text['rights'])
@@ -4933,7 +4953,7 @@
if sysop and 'sysop' not in self._rights[index]:
output(u'WARNING: Your sysop account on %s does not seem to have sysop rights. You may not be able to perform any sysop-restricted actions using it.' % self)
else:
- # key groups is not exists, setup a default rights
+ # 'groups' is not exists, set default rights
self._rights[index] = []
if self._isLoggedIn[index]:
# Logged in user
@@ -4948,13 +4968,13 @@
#remove Duplicate rights
self._rights[index] = list(set(self._rights[index]))
- # Search for a token
+ # Get token
if text.has_key('preferencestoken') and len(text['preferencestoken']) > 2:
# anonymous token is '+\\', check len('+\\') = 2
# if preferencestoken > 2, it must be loggedin.
self._token[index] = text['preferencestoken']
if self._rights[index] is not None:
- # In this case, token and rights are loaded - user data is now loaded
+ # Token and rights are loaded - user data is now loaded
self._userData[index] = True
else:
output(u'WARNING: Token not found on %s. You will not be able to edit any page.' % self)
@@ -5213,6 +5233,7 @@
config.use_api = False
# Get data
+ # API Userinfo is available from version 1.11
if config.use_api and self.versionnumber() >= 11:
#Query userinfo
params = {
Revision: 7115
Author: purodha
Date: 2009-08-06 06:00:28 +0000 (Thu, 06 Aug 2009)
Log Message:
-----------
Add -query:(pagecount) parameter to replace.py - same as interwiki.py has.
Modified Paths:
--------------
trunk/pywikipedia/replace.py
Modified: trunk/pywikipedia/replace.py
===================================================================
--- trunk/pywikipedia/replace.py 2009-08-05 21:40:30 UTC (rev 7114)
+++ trunk/pywikipedia/replace.py 2009-08-06 06:00:28 UTC (rev 7115)
@@ -62,6 +62,9 @@
the bot will check every regex without waiting using all the
resources. This will slow it down between a regex and another
in order not to waste too much CPU.
+
+-query: The maximum number of pages that the bot will load at once.
+ Default value is 60. Ignored when reading an XML file.
-fix:XYZ Perform one of the predefined replacements tasks, which are
given in the dictionary 'fixes' defined inside the file
@@ -495,6 +498,8 @@
allowoverlap = False
# Do not recurse replacement
recursive = False
+ # This is the maximum number of pages to load per query
+ maxquerysize = 60
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
@@ -561,6 +566,8 @@
summary_commandline = True
elif arg.startswith('-allowoverlap'):
allowoverlap = True
+ elif arg.startswith('-query:'):
+ maxquerysize = int(arg[7:])
else:
if not genFactory.handleArg(arg):
commandline_replacements.append(arg)
@@ -696,7 +703,7 @@
preloadingGen = pagegenerators.PreloadingGenerator(gen,
pageNumber=20, lookahead=100)
else:
- preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=60)
+ preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=maxquerysize)
bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, editSummary)
bot.run()