Revision: 7337 Author: alexsh Date: 2009-09-29 19:20:05 +0000 (Tue, 29 Sep 2009)
Log Message: ----------- flickrripper.py - code cleanup. - findDuplicateImages(): remove API query, same as wikipedia.Site().getFilesFromAnHash().
Modified Paths: -------------- trunk/pywikipedia/flickrripper.py
Modified: trunk/pywikipedia/flickrripper.py =================================================================== --- trunk/pywikipedia/flickrripper.py 2009-09-29 18:27:04 UTC (rev 7336) +++ trunk/pywikipedia/flickrripper.py 2009-09-29 19:20:05 UTC (rev 7337) @@ -84,7 +84,7 @@ else: return False
-def getPhotoUrl(photoSizes=None): +def getPhotoUrl(photoSizes = None): ''' Get the url of the jpg file with the highest resolution ''' @@ -94,7 +94,7 @@ url = size.attrib['source'] return url
-def downloadPhoto(photoUrl=''): +def downloadPhoto(photoUrl = ''): ''' Download the photo and store it in a StrinIO.StringIO object.
@@ -103,28 +103,16 @@ imageFile=urllib.urlopen(photoUrl).read() return StringIO.StringIO(imageFile)
-def findDuplicateImages(photo=None, site=wikipedia.getSite()): +def findDuplicateImages(photo = None, site = wikipedia.getSite()): ''' Takes the photo, calculates the SHA1 hash and asks the mediawiki api for a list of duplicates.
TODO: Add exception handling, fix site thing ''' - result = [] hashObject = hashlib.sha1() hashObject.update(photo.getvalue()) - sha1Hash = base64.b16encode(hashObject.digest()) + return site.getFilesFromAnHash(base64.b16encode(hashObject.digest()))
- params = { - 'action' : 'query', - 'list' : 'allimages', - 'aisha1' : sha1Hash, - 'aiprop' : '', - } - data = query.GetData(params, wikipedia.getSite(), encodeTitle = False) - for image in data['query']['allimages']: - result.append(image['name']) - return result - def getTags(photoInfo = None): ''' Get all the tags on a photo @@ -161,10 +149,10 @@ else: title = u''
- if (wikipedia.Page(title=u'File:Flickr - %s - %s.jpg' % (username, title), site=wikipedia.getSite()).exists()): + if wikipedia.Page(site, u'File:Flickr - %s - %s.jpg' % (username, title) ).exists(): i = 1 while True: - if (wikipedia.Page(title=u'File:Flickr - %s - %s (%s).jpg' % (username, title, str(i)), site=wikipedia.getSite()).exists()): + if (wikipedia.Page(site, u'File:Flickr - %s - %s (%s).jpg' % (username, title, str(i))).exists()): i = i + 1 else: return u'Flickr - %s - %s (%s).jpg' % (username, title, str(i)) @@ -203,19 +191,20 @@ ''' description = flinfoDescription
- if(removeCategories): - description = wikipedia.removeCategoryLinks(text=description, site=wikipedia.getSite(u'commons', u'commons')) + if removeCategories: + description = wikipedia.removeCategoryLinks(description, wikipedia.getSite('commons', 'commons'))
- if(override): + if override: description = description.replace(u'{{cc-by-sa-2.0}}\n', u'') description = description.replace(u'{{cc-by-2.0}}\n', u'') description = description.replace(u'{{flickrreview}}\n', u'') description = description.replace(u'{{copyvio|Flickr, licensed as "All Rights Reserved" which is not a free license --~~~~}}\n', u'') description = description.replace(u'=={{int:license}}==', u'=={{int:license}}==\n' + override) - elif(flickrreview): - if(reviewer): + elif flickrreview: + if reviewer: description = description.replace(u'{{flickrreview}}', u'{{flickrreview|' + reviewer + '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}') - if(addCategory): + + if addCategory: description = description + u'\n[[Category:' + addCategory + ']]\n' description = description.replace(u'\r\n', u'\n') return description @@ -224,23 +213,23 @@ ''' Process a single Flickr photo ''' - if(photo_id): + if photo_id: print photo_id - (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photo_id) - if (isAllowedLicense(photoInfo=photoInfo) or override): + (photoInfo, photoSizes) = getPhoto(flickr, photo_id) + if isAllowedLicense(photoInfo) or override: #Get the url of the largest photo - photoUrl = getPhotoUrl(photoSizes=photoSizes) + photoUrl = getPhotoUrl(photoSizes) #Should download the photo only once - photo = downloadPhoto(photoUrl=photoUrl) + photo = downloadPhoto(photoUrl)
#Don't upload duplicate images, should add override option - duplicates = findDuplicateImages(photo=photo) + duplicates = findDuplicateImages(photo) if duplicates: wikipedia.output(u'Found duplicate image at %s' % duplicates.pop()) else: - filename = getFilename(photoInfo=photoInfo) - flinfoDescription = getFlinfoDescription(photo_id=photo_id) - photoDescription = buildDescription(flinfoDescription=flinfoDescription, flickrreview=flickrreview, reviewer=reviewer, override=override, addCategory=addCategory, removeCategories=removeCategories) + filename = getFilename(photoInfo) + flinfoDescription = getFlinfoDescription(photo_id) + photoDescription = buildDescription(flinfoDescription, flickrreview, reviewer, override, addCategory, removeCategories) #wikipedia.output(photoDescription) if not autonomous: (newPhotoDescription, newFilename, skip)=Tkdialog(photoDescription, photo, filename).run() @@ -256,7 +245,7 @@ #Would be nice to check before I upload if the file is already at Commons #Not that important for this program, but maybe for derived programs if not skip: - bot = upload.UploadRobot(url=photoUrl, description=newPhotoDescription, useFilename=newFilename, keepFilename=True, verifyDescription=False) + bot = upload.UploadRobot(photoUrl, description=newPhotoDescription, useFilename=newFilename, keepFilename=True, verifyDescription=False) bot.upload_image(debug=False) return 1 return 0 @@ -364,12 +353,12 @@
# http://www.flickr.com/services/api/flickr.groups.pools.getPhotos.html # Get the photos in a group - if(group_id): + if group_id: #First get the total number of photo's in the group photos = flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id, tags=tags, per_page='100', page='1') pages = photos.find('photos').attrib['pages']
- for i in range(1, int(pages)+1): + for i in range(1, int(pages) + 1): gotPhotos = False while not gotPhotos: try: @@ -390,7 +379,7 @@
# http://www.flickr.com/services/api/flickr.photosets.getPhotos.html # Get the photos in a photoset - elif(photoset_id): + elif photoset_id: photos = flickr.photosets_getPhotos(photoset_id=photoset_id, per_page='100', page='1') pages = photos.find('photoset').attrib['pages']
@@ -416,7 +405,7 @@
# http://www.flickr.com/services/api/flickr.people.getPublicPhotos.html # Get the (public) photos uploaded by a user - elif(user_id): + elif user_id: photos = flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page='1') pages = photos.find('photos').attrib['pages'] #flickrapi.exceptions.FlickrError @@ -426,10 +415,10 @@ try: for photo in flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page=i).find('photos').getchildren(): gotPhotos = True - if photo.attrib['id']==start_id: + if photo.attrib['id'] == start_id: found_start_id=True if found_start_id: - if photo.attrib['id']==end_id: + if photo.attrib['id'] == end_id: wikipedia.output('Found end_id') return else: @@ -461,7 +450,7 @@ #imagerecat.initLists()
#Get the api key - if(config.flickr['api_key']): + if config.flickr['api_key']: flickr = flickrapi.FlickrAPI(config.flickr['api_key']) else: wikipedia.output('Flickr api key not found! Get yourself an api key') @@ -553,9 +542,9 @@ autonomous = True
if user_id or group_id or photoset_id: - for photo_id in getPhotos(flickr=flickr, user_id=user_id, group_id=group_id, photoset_id=photoset_id, start_id=start_id, end_id=end_id, tags=tags): - uploadedPhotos = uploadedPhotos + processPhoto(flickr=flickr, photo_id=photo_id, flickrreview=flickrreview, reviewer=reviewer, override=override, addCategory=addCategory, removeCategories=removeCategories, autonomous=autonomous) - totalPhotos = totalPhotos + 1 + for photo_id in getPhotos(flickr, user_id, group_id, photoset_id, start_id, end_id, tags): + uploadedPhotos += processPhoto(flickr, photo_id, flickrreview, reviewer, override, addCategory, removeCategories, autonomous) + totalPhotos += 1 else: usage()
pywikipedia-svn@lists.wikimedia.org