Revision: 7191 Author: multichill Date: 2009-08-31 15:40:11 +0000 (Mon, 31 Aug 2009)
Log Message: ----------- More comments and prevent naming collisions
Modified Paths: -------------- trunk/pywikipedia/flickrripper.py
Modified: trunk/pywikipedia/flickrripper.py =================================================================== --- trunk/pywikipedia/flickrripper.py 2009-08-31 15:09:31 UTC (rev 7190) +++ trunk/pywikipedia/flickrripper.py 2009-08-31 15:40:11 UTC (rev 7191) @@ -42,6 +42,9 @@ def getPhoto(flickr = None, photo_id = ''): ''' Get the photo info and the photo sizes so we can use these later on + + TODO: Add exception handling + ''' photoInfo = flickr.photos_getInfo(photo_id=photo_id) #xml.etree.ElementTree.dump(photoInfo) @@ -52,6 +55,8 @@ def isAllowedLicense(photoInfo = None): ''' Check if the image contains the right license + + TODO: Maybe add more licenses ''' license = photoInfo.find('photo').attrib['license'] if license == '4' or license == '5': @@ -72,10 +77,20 @@ return url
def downloadPhoto(photoUrl=''): + ''' + Download the photo and store it in a StrinIO.StringIO object. + + TODO: Add exception handling + ''' imageFile=urllib.urlopen(photoUrl).read() return StringIO.StringIO(imageFile)
def findDuplicateImages(photo=None, site=wikipedia.getSite()): + ''' + Takes the photo, calculates the SHA1 hash and asks the mediawiki api for a list of duplicates. + + TODO: Add exception handling, fix site thing + ''' result = [] hashObject = hashlib.sha1() hashObject.update(photo.getvalue()) @@ -87,7 +102,7 @@ 'aisha1' : sha1Hash, 'aiprop' : '', } - data = query.GetData(params, wikipedia.getSite(), useAPI = True, encodeTitle = False) + data = query.GetData(params, site=wikipedia.getSite(), useAPI = True, encodeTitle = False) for image in data['query']['allimages']: result.append(image['name']) return result @@ -105,6 +120,8 @@ def getFlinfoDescription(photo_id = 0): ''' Get the description from http://wikipedia.ramselehof.de/flinfo.php + + TODO: Add exception handling, try a couple of times ''' parameters = urllib.urlencode({'id' : photo_id, 'raw' : 'on'})
@@ -113,9 +130,11 @@ #print rawDescription.decode('utf-8') return rawDescription.decode('utf-8')
-def getFilename(photoInfo=None): +def getFilename(photoInfo=None, site=wikipedia.getSite()): ''' - Build a good filename for the upload based on the username and the title + Build a good filename for the upload based on the username and the title. + Prevents naming collisions. + ''' username = photoInfo.find('photo').find('owner').attrib['username'] title = photoInfo.find('photo').find('title').text @@ -124,9 +143,20 @@ else: title = u''
- return u'Flickr - %s - %s.jpg' % (username, title) + if (wikipedia.Page(title=u'File:Flickr - %s - %s.jpg' % (username, title), site=wikipedia.getSite()).exists()): + i = 1 + while True: + if (wikipedia.Page(title=u'File:Flickr - %s - %s (%s).jpg' % (username, title, str(i)), site=wikipedia.getSite()).exists()): + i = i + 1 + else: + return u'Flickr - %s - %s (%s).jpg' % (username, title, str(i)) + else: + return u'Flickr - %s - %s.jpg' % (username, title)
def cleanUpTitle(title): + ''' + Clean up the title of a potential mediawiki page. Otherwise the title of the page might not be allowed by the software. + ''' title = title.strip()
title = re.sub("[<{\[]", "(", title) @@ -168,6 +198,9 @@ return description
def processPhoto(flickr=None, photo_id=u'', flickrreview=False, reviewer=u'', override=u''): + ''' + Process a single Flickr photo + ''' if(photo_id): print photo_id (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photo_id) @@ -202,6 +235,9 @@ return 0
class Tkdialog: + ''' + The user dialog. + ''' def __init__(self, photoDescription, photo, filename): self.root=Tk() #"%dx%d%+d%+d" % (width, height, xoffset, yoffset) @@ -257,6 +293,9 @@ self.descriptionScrollbar.grid(row=14, column=5)
def getImage(self, photo, width, height): + ''' + Take the StringIO object and build an imageTK thumbnail + ''' image = Image.open(photo) image.thumbnail((width, height)) imageTk = ImageTk.PhotoImage(image) @@ -285,8 +324,12 @@ return (self.photoDescription, self.filename, self.skip)
def getPhotos(flickr=None, user_id=u'', group_id=u'', photoset_id=u'', tags=u''): + ''' + Loop over a set of Flickr photos. + ''' result = [] # http://www.flickr.com/services/api/flickr.groups.pools.getPhotos.html + # Get the photos in a group if(group_id): #First get the total number of photo's in the group photos = flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id, tags=tags, per_page='100', page='1') @@ -297,6 +340,7 @@ yield photo.attrib['id']
# http://www.flickr.com/services/api/flickr.photosets.getPhotos.html + # Get the photos in a photoset elif(photoset_id): photos = flickr.photosets_getPhotos(photoset_id=photoset_id, per_page='100', page='1') pages = photos.find('photos').attrib['pages'] @@ -306,6 +350,7 @@ yield photo.attrib['id']
# http://www.flickr.com/services/api/flickr.people.getPublicPhotos.html + # Get the (public) photos uploaded by a user elif(user_id): photos = flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page='1') pages = photos.find('photos').attrib['pages'] @@ -316,6 +361,11 @@ return
def usage(): + ''' + Print usage information + + TODO : Need more. + ''' wikipedia.output(u"Flickrripper is a tool to transfer flickr photos to Wikimedia Commons") wikipedia.output(u"-group_id:<group_id>\n") wikipedia.output(u"-photoset_id:<photoset_id>\n")
pywikipedia-svn@lists.wikimedia.org