Revision: 7191
Author: multichill
Date: 2009-08-31 15:40:11 +0000 (Mon, 31 Aug 2009)
Log Message:
-----------
More comments and prevent naming collisions
Modified Paths:
--------------
trunk/pywikipedia/flickrripper.py
Modified: trunk/pywikipedia/flickrripper.py
===================================================================
--- trunk/pywikipedia/flickrripper.py 2009-08-31 15:09:31 UTC (rev 7190)
+++ trunk/pywikipedia/flickrripper.py 2009-08-31 15:40:11 UTC (rev 7191)
@@ -42,6 +42,9 @@
def getPhoto(flickr = None, photo_id = ''):
'''
Get the photo info and the photo sizes so we can use these later on
+
+ TODO: Add exception handling
+
'''
photoInfo = flickr.photos_getInfo(photo_id=photo_id)
#xml.etree.ElementTree.dump(photoInfo)
@@ -52,6 +55,8 @@
def isAllowedLicense(photoInfo = None):
'''
Check if the image contains the right license
+
+ TODO: Maybe add more licenses
'''
license = photoInfo.find('photo').attrib['license']
if license == '4' or license == '5':
@@ -72,10 +77,20 @@
return url
def downloadPhoto(photoUrl=''):
+ '''
+ Download the photo and store it in a StrinIO.StringIO object.
+
+ TODO: Add exception handling
+ '''
imageFile=urllib.urlopen(photoUrl).read()
return StringIO.StringIO(imageFile)
def findDuplicateImages(photo=None, site=wikipedia.getSite()):
+ '''
+ Takes the photo, calculates the SHA1 hash and asks the mediawiki api for a list of
duplicates.
+
+ TODO: Add exception handling, fix site thing
+ '''
result = []
hashObject = hashlib.sha1()
hashObject.update(photo.getvalue())
@@ -87,7 +102,7 @@
'aisha1' : sha1Hash,
'aiprop' : '',
}
- data = query.GetData(params, wikipedia.getSite(), useAPI = True, encodeTitle =
False)
+ data = query.GetData(params, site=wikipedia.getSite(), useAPI = True, encodeTitle =
False)
for image in data['query']['allimages']:
result.append(image['name'])
return result
@@ -105,6 +120,8 @@
def getFlinfoDescription(photo_id = 0):
'''
Get the description from
http://wikipedia.ramselehof.de/flinfo.php
+
+ TODO: Add exception handling, try a couple of times
'''
parameters = urllib.urlencode({'id' : photo_id, 'raw' :
'on'})
@@ -113,9 +130,11 @@
#print rawDescription.decode('utf-8')
return rawDescription.decode('utf-8')
-def getFilename(photoInfo=None):
+def getFilename(photoInfo=None, site=wikipedia.getSite()):
'''
- Build a good filename for the upload based on the username and the title
+ Build a good filename for the upload based on the username and the title.
+ Prevents naming collisions.
+
'''
username =
photoInfo.find('photo').find('owner').attrib['username']
title = photoInfo.find('photo').find('title').text
@@ -124,9 +143,20 @@
else:
title = u''
- return u'Flickr - %s - %s.jpg' % (username, title)
+ if (wikipedia.Page(title=u'File:Flickr - %s - %s.jpg' % (username, title),
site=wikipedia.getSite()).exists()):
+ i = 1
+ while True:
+ if (wikipedia.Page(title=u'File:Flickr - %s - %s (%s).jpg' %
(username, title, str(i)), site=wikipedia.getSite()).exists()):
+ i = i + 1
+ else:
+ return u'Flickr - %s - %s (%s).jpg' % (username, title, str(i))
+ else:
+ return u'Flickr - %s - %s.jpg' % (username, title)
def cleanUpTitle(title):
+ '''
+ Clean up the title of a potential mediawiki page. Otherwise the title of the page
might not be allowed by the software.
+ '''
title = title.strip()
title = re.sub("[<{\\[]", "(", title)
@@ -168,6 +198,9 @@
return description
def processPhoto(flickr=None, photo_id=u'', flickrreview=False,
reviewer=u'', override=u''):
+ '''
+ Process a single Flickr photo
+ '''
if(photo_id):
print photo_id
(photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photo_id)
@@ -202,6 +235,9 @@
return 0
class Tkdialog:
+ '''
+ The user dialog.
+ '''
def __init__(self, photoDescription, photo, filename):
self.root=Tk()
#"%dx%d%+d%+d" % (width, height, xoffset, yoffset)
@@ -257,6 +293,9 @@
self.descriptionScrollbar.grid(row=14, column=5)
def getImage(self, photo, width, height):
+ '''
+ Take the StringIO object and build an imageTK thumbnail
+ '''
image = Image.open(photo)
image.thumbnail((width, height))
imageTk = ImageTk.PhotoImage(image)
@@ -285,8 +324,12 @@
return (self.photoDescription, self.filename, self.skip)
def getPhotos(flickr=None, user_id=u'', group_id=u'',
photoset_id=u'', tags=u''):
+ '''
+ Loop over a set of Flickr photos.
+ '''
result = []
#
http://www.flickr.com/services/api/flickr.groups.pools.getPhotos.html
+ # Get the photos in a group
if(group_id):
#First get the total number of photo's in the group
photos = flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id,
tags=tags, per_page='100', page='1')
@@ -297,6 +340,7 @@
yield photo.attrib['id']
#
http://www.flickr.com/services/api/flickr.photosets.getPhotos.html
+ # Get the photos in a photoset
elif(photoset_id):
photos = flickr.photosets_getPhotos(photoset_id=photoset_id,
per_page='100', page='1')
pages = photos.find('photos').attrib['pages']
@@ -306,6 +350,7 @@
yield photo.attrib['id']
#
http://www.flickr.com/services/api/flickr.people.getPublicPhotos.html
+ # Get the (public) photos uploaded by a user
elif(user_id):
photos = flickr.people_getPublicPhotos(user_id=user_id, per_page='100',
page='1')
pages = photos.find('photos').attrib['pages']
@@ -316,6 +361,11 @@
return
def usage():
+ '''
+ Print usage information
+
+ TODO : Need more.
+ '''
wikipedia.output(u"Flickrripper is a tool to transfer flickr photos to Wikimedia
Commons")
wikipedia.output(u"-group_id:<group_id>\n")
wikipedia.output(u"-photoset_id:<photoset_id>\n")