[Pywikipedia-svn] SVN: [7337] trunk/pywikipedia/flickrripper.py
alexsh at svn.wikimedia.org
alexsh at svn.wikimedia.org
Tue Sep 29 19:20:05 UTC 2009
Revision: 7337
Author: alexsh
Date: 2009-09-29 19:20:05 +0000 (Tue, 29 Sep 2009)
Log Message:
-----------
flickrripper.py
- code cleanup.
- findDuplicateImages(): remove API query, same as wikipedia.Site().getFilesFromAnHash().
Modified Paths:
--------------
trunk/pywikipedia/flickrripper.py
Modified: trunk/pywikipedia/flickrripper.py
===================================================================
--- trunk/pywikipedia/flickrripper.py 2009-09-29 18:27:04 UTC (rev 7336)
+++ trunk/pywikipedia/flickrripper.py 2009-09-29 19:20:05 UTC (rev 7337)
@@ -84,7 +84,7 @@
else:
return False
-def getPhotoUrl(photoSizes=None):
+def getPhotoUrl(photoSizes = None):
'''
Get the url of the jpg file with the highest resolution
'''
@@ -94,7 +94,7 @@
url = size.attrib['source']
return url
-def downloadPhoto(photoUrl=''):
+def downloadPhoto(photoUrl = ''):
'''
Download the photo and store it in a StrinIO.StringIO object.
@@ -103,28 +103,16 @@
imageFile=urllib.urlopen(photoUrl).read()
return StringIO.StringIO(imageFile)
-def findDuplicateImages(photo=None, site=wikipedia.getSite()):
+def findDuplicateImages(photo = None, site = wikipedia.getSite()):
'''
Takes the photo, calculates the SHA1 hash and asks the mediawiki api for a list of duplicates.
TODO: Add exception handling, fix site thing
'''
- result = []
hashObject = hashlib.sha1()
hashObject.update(photo.getvalue())
- sha1Hash = base64.b16encode(hashObject.digest())
+ return site.getFilesFromAnHash(base64.b16encode(hashObject.digest()))
- params = {
- 'action' : 'query',
- 'list' : 'allimages',
- 'aisha1' : sha1Hash,
- 'aiprop' : '',
- }
- data = query.GetData(params, wikipedia.getSite(), encodeTitle = False)
- for image in data['query']['allimages']:
- result.append(image['name'])
- return result
-
def getTags(photoInfo = None):
'''
Get all the tags on a photo
@@ -161,10 +149,10 @@
else:
title = u''
- if (wikipedia.Page(title=u'File:Flickr - %s - %s.jpg' % (username, title), site=wikipedia.getSite()).exists()):
+ if wikipedia.Page(site, u'File:Flickr - %s - %s.jpg' % (username, title) ).exists():
i = 1
while True:
- if (wikipedia.Page(title=u'File:Flickr - %s - %s (%s).jpg' % (username, title, str(i)), site=wikipedia.getSite()).exists()):
+ if (wikipedia.Page(site, u'File:Flickr - %s - %s (%s).jpg' % (username, title, str(i))).exists()):
i = i + 1
else:
return u'Flickr - %s - %s (%s).jpg' % (username, title, str(i))
@@ -203,19 +191,20 @@
'''
description = flinfoDescription
- if(removeCategories):
- description = wikipedia.removeCategoryLinks(text=description, site=wikipedia.getSite(u'commons', u'commons'))
+ if removeCategories:
+ description = wikipedia.removeCategoryLinks(description, wikipedia.getSite('commons', 'commons'))
- if(override):
+ if override:
description = description.replace(u'{{cc-by-sa-2.0}}\n', u'')
description = description.replace(u'{{cc-by-2.0}}\n', u'')
description = description.replace(u'{{flickrreview}}\n', u'')
description = description.replace(u'{{copyvio|Flickr, licensed as "All Rights Reserved" which is not a free license --~~~~}}\n', u'')
description = description.replace(u'=={{int:license}}==', u'=={{int:license}}==\n' + override)
- elif(flickrreview):
- if(reviewer):
+ elif flickrreview:
+ if reviewer:
description = description.replace(u'{{flickrreview}}', u'{{flickrreview|' + reviewer + '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}')
- if(addCategory):
+
+ if addCategory:
description = description + u'\n[[Category:' + addCategory + ']]\n'
description = description.replace(u'\r\n', u'\n')
return description
@@ -224,23 +213,23 @@
'''
Process a single Flickr photo
'''
- if(photo_id):
+ if photo_id:
print photo_id
- (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photo_id)
- if (isAllowedLicense(photoInfo=photoInfo) or override):
+ (photoInfo, photoSizes) = getPhoto(flickr, photo_id)
+ if isAllowedLicense(photoInfo) or override:
#Get the url of the largest photo
- photoUrl = getPhotoUrl(photoSizes=photoSizes)
+ photoUrl = getPhotoUrl(photoSizes)
#Should download the photo only once
- photo = downloadPhoto(photoUrl=photoUrl)
+ photo = downloadPhoto(photoUrl)
#Don't upload duplicate images, should add override option
- duplicates = findDuplicateImages(photo=photo)
+ duplicates = findDuplicateImages(photo)
if duplicates:
wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
else:
- filename = getFilename(photoInfo=photoInfo)
- flinfoDescription = getFlinfoDescription(photo_id=photo_id)
- photoDescription = buildDescription(flinfoDescription=flinfoDescription, flickrreview=flickrreview, reviewer=reviewer, override=override, addCategory=addCategory, removeCategories=removeCategories)
+ filename = getFilename(photoInfo)
+ flinfoDescription = getFlinfoDescription(photo_id)
+ photoDescription = buildDescription(flinfoDescription, flickrreview, reviewer, override, addCategory, removeCategories)
#wikipedia.output(photoDescription)
if not autonomous:
(newPhotoDescription, newFilename, skip)=Tkdialog(photoDescription, photo, filename).run()
@@ -256,7 +245,7 @@
#Would be nice to check before I upload if the file is already at Commons
#Not that important for this program, but maybe for derived programs
if not skip:
- bot = upload.UploadRobot(url=photoUrl, description=newPhotoDescription, useFilename=newFilename, keepFilename=True, verifyDescription=False)
+ bot = upload.UploadRobot(photoUrl, description=newPhotoDescription, useFilename=newFilename, keepFilename=True, verifyDescription=False)
bot.upload_image(debug=False)
return 1
return 0
@@ -364,12 +353,12 @@
# http://www.flickr.com/services/api/flickr.groups.pools.getPhotos.html
# Get the photos in a group
- if(group_id):
+ if group_id:
#First get the total number of photo's in the group
photos = flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id, tags=tags, per_page='100', page='1')
pages = photos.find('photos').attrib['pages']
- for i in range(1, int(pages)+1):
+ for i in range(1, int(pages) + 1):
gotPhotos = False
while not gotPhotos:
try:
@@ -390,7 +379,7 @@
# http://www.flickr.com/services/api/flickr.photosets.getPhotos.html
# Get the photos in a photoset
- elif(photoset_id):
+ elif photoset_id:
photos = flickr.photosets_getPhotos(photoset_id=photoset_id, per_page='100', page='1')
pages = photos.find('photoset').attrib['pages']
@@ -416,7 +405,7 @@
# http://www.flickr.com/services/api/flickr.people.getPublicPhotos.html
# Get the (public) photos uploaded by a user
- elif(user_id):
+ elif user_id:
photos = flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page='1')
pages = photos.find('photos').attrib['pages']
#flickrapi.exceptions.FlickrError
@@ -426,10 +415,10 @@
try:
for photo in flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page=i).find('photos').getchildren():
gotPhotos = True
- if photo.attrib['id']==start_id:
+ if photo.attrib['id'] == start_id:
found_start_id=True
if found_start_id:
- if photo.attrib['id']==end_id:
+ if photo.attrib['id'] == end_id:
wikipedia.output('Found end_id')
return
else:
@@ -461,7 +450,7 @@
#imagerecat.initLists()
#Get the api key
- if(config.flickr['api_key']):
+ if config.flickr['api_key']:
flickr = flickrapi.FlickrAPI(config.flickr['api_key'])
else:
wikipedia.output('Flickr api key not found! Get yourself an api key')
@@ -553,9 +542,9 @@
autonomous = True
if user_id or group_id or photoset_id:
- for photo_id in getPhotos(flickr=flickr, user_id=user_id, group_id=group_id, photoset_id=photoset_id, start_id=start_id, end_id=end_id, tags=tags):
- uploadedPhotos = uploadedPhotos + processPhoto(flickr=flickr, photo_id=photo_id, flickrreview=flickrreview, reviewer=reviewer, override=override, addCategory=addCategory, removeCategories=removeCategories, autonomous=autonomous)
- totalPhotos = totalPhotos + 1
+ for photo_id in getPhotos(flickr, user_id, group_id, photoset_id, start_id, end_id, tags):
+ uploadedPhotos += processPhoto(flickr, photo_id, flickrreview, reviewer, override, addCategory, removeCategories, autonomous)
+ totalPhotos += 1
else:
usage()
More information about the Pywikipedia-svn
mailing list