Pywikipedia-svn August 2009

pywikipedia-svn@lists.wikimedia.org

11 participants
93 discussions

SVN: [7194] trunk/pywikipedia/families/wikipedia_family.py

by purodha＠svn.wikimedia.org

Revision: 7194 Author: purodha Date: 2009-08-31 21:52:10 +0000 (Mon, 31 Aug 2009) Log Message: ----------- Applying patch #2809532 of nakor-wikipedia fixing the interwiki sort order in the fy.wikipedia Modified Paths: -------------- trunk/pywikipedia/families/wikipedia_family.py Modified: trunk/pywikipedia/families/wikipedia_family.py =================================================================== --- trunk/pywikipedia/families/wikipedia_family.py 2009-08-31 20:49:50 UTC (rev 7193) +++ trunk/pywikipedia/families/wikipedia_family.py 2009-08-31 21:52:10 UTC (rev 7194) @@ -928,6 +928,10 @@ # Order for fy: alphabetical by code, but y counts as i def fycomp(x,y): + if x == 'nb': + x = 'no' + if y == 'nb': + y = 'no' x = x.replace("y","i")+x.count("y")*"!" y = y.replace("y","i")+y.count("y")*"!" return cmp(x,y)

14 years, 7 months

SVN: [7193] trunk/pywikipedia/pagegenerators.py

by multichill＠svn.wikimedia.org

Revision: 7193 Author: multichill Date: 2009-08-31 20:49:50 +0000 (Mon, 31 Aug 2009) Log Message: ----------- Restore previous behavior: Work from new to old. Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2009-08-31 20:00:38 UTC (rev 7192) +++ trunk/pywikipedia/pagegenerators.py 2009-08-31 20:49:50 UTC (rev 7193) @@ -494,7 +494,7 @@ 'ucuser': username, 'ucprop': 'title', 'uclimit': int(number), - 'ucdir': 'newer', + 'ucdir': 'older', }

14 years, 7 months

SVN: [7192] trunk/pywikipedia/wikipedia.py

by alexsh＠svn.wikimedia.org

Revision: 7192 Author: alexsh Date: 2009-08-31 20:00:38 +0000 (Mon, 31 Aug 2009) Log Message: ----------- Page().getVersionHistory(): * remove duplicate codes * use a temporary list to save revisions and dump into self._versionhistory or self._versionhistoryearliest after the process. Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-08-31 15:40:11 UTC (rev 7191) +++ trunk/pywikipedia/wikipedia.py 2009-08-31 20:00:38 UTC (rev 7192) @@ -2283,7 +2283,6 @@ unless getAll is True. """ - site = self.site() # regular expression matching one edit in the version history. # results will have 4 groups: oldid, edit date/time, user name, and edit @@ -2297,6 +2296,7 @@ startFromPage = None thisHistoryDone = False skip = False # Used in determining whether we need to skip the first page + dataQuery = [] RLinkToNextPage = re.compile('&offset=(.*?)&') @@ -2309,6 +2309,7 @@ # Cause a reload, or at least make the loop run thisHistoryDone = False skip = True + dataQuery = self._versionhistoryearliest else: thisHistoryDone = True elif not hasattr(self, '_versionhistory') or forceReload: @@ -2317,11 +2318,12 @@ # Cause a reload, or at least make the loop run thisHistoryDone = False skip = True + dataQuery = self._versionhistory else: thisHistoryDone = True while not thisHistoryDone: - path = site.family.version_history_address(self.site().language(), self.urlname(), revCount) + path = self.site().family.version_history_address(self.site().language(), self.urlname(), revCount) if reverseOrder: path += '&dir=prev' @@ -2341,108 +2343,60 @@ else: output(u'Getting version history of %s' % self.aslink(forceInterwiki = True)) - txt = site.getUrl(path) + txt = self.site().getUrl(path) # save a copy of the text self_txt = txt - if reverseOrder: - # If we are getting all of the page history... - if getAll: - if len(self._versionhistoryearliest) == 0: - matchObj = RLinkToNextPage.search(self_txt) - if matchObj: - startFromPage = matchObj.group(1) - else: - thisHistoryDone = True - - edits = editR.findall(self_txt) - edits.reverse() - for edit in edits: - self._versionhistoryearliest.append(edit) - if len(edits) < revCount: - thisHistoryDone = True - else: - if not skip: - edits = editR.findall(self_txt) - edits.reverse() - for edit in edits: - self._versionhistoryearliest.append(edit) - if len(edits) < revCount: - thisHistoryDone = True - - matchObj = RLinkToNextPage.search(self_txt) - if matchObj: - startFromPage = matchObj.group(1) - else: - thisHistoryDone = True - - else: - # Skip the first page only, - skip = False - - matchObj = RLinkToNextPage.search(self_txt) - if matchObj: - startFromPage = matchObj.group(1) - else: - thisHistoryDone = True + # If we are getting all of the page history... + if getAll: + #Find the nextPage link, if not exist, the page is last history page + matchObj = RLinkToNextPage.search(self_txt) + if matchObj: + startFromPage = matchObj.group(1) else: - # If we are not getting all, we stop on the first page. - for edit in editR.findall(self_txt): - self._versionhistoryearliest.append(edit) - self._versionhistoryearliest.reverse() - thisHistoryDone = True - else: - # If we are getting all of the page history... - if getAll: - if len(self._versionhistory) == 0: - matchObj = RLinkToNextPage.search(self_txt) - if matchObj: - startFromPage = matchObj.group(1) - else: - thisHistoryDone = True + if len(dataQuery) == 0: + edits = editR.findall(self_txt) + if reverseOrder: + edits.reverse() + #for edit in edits: + dataQuery.extend([edit for edit in edits]) + if len(edits) < revCount: + thisHistoryDone = True + else: + if not skip: edits = editR.findall(self_txt) - for edit in edits: - self._versionhistory.append(edit) + if reverseOrder: + edits.reverse() + #for edit in edits: + dataQuery.extend([edit for edit in edits]) if len(edits) < revCount: thisHistoryDone = True else: - if not skip: - edits = editR.findall(self_txt) - for edit in edits: - self._versionhistory.append(edit) - if len(edits) < revCount: - thisHistoryDone = True + # Skip the first page only, + skip = False + else: + # If we are not getting all, we stop on the first page. + #for edit in editR.findall(self_txt): + dataQuery.extend([edit for edit in editR.findall(self_txt)] ) + if reverseOrder: + dataQuery.reverse() + thisHistoryDone = True - matchObj = RLinkToNextPage.findall(self_txt) - if len(matchObj) >= 2: - startFromPage = matchObj[1] - else: - thisHistoryDone = True - else: - # Skip the first page only, - skip = False - - matchObj = RLinkToNextPage.search(self_txt) - if matchObj: - startFromPage = matchObj.group(1) - else: - thisHistoryDone = True - else: - # If we are not getting all, we stop on the first page. - for edit in editR.findall(self_txt): - self._versionhistory.append(edit) - - thisHistoryDone = True - if reverseOrder: # Return only revCount edits, even if the version history is extensive + if dataQuery != []: + self._versionhistoryearliest = dataQuery + del dataQuery if len(self._versionhistoryearliest) > revCount and not getAll: return self._versionhistoryearliest[0:revCount] return self._versionhistoryearliest + if dataQuery != []: + self._versionhistory = dataQuery + del dataQuery # Return only revCount edits, even if the version history is extensive if len(self._versionhistory) > revCount and not getAll: return self._versionhistory[0:revCount]

14 years, 7 months

SVN: [7191] trunk/pywikipedia/flickrripper.py

by multichill＠svn.wikimedia.org

Revision: 7191 Author: multichill Date: 2009-08-31 15:40:11 +0000 (Mon, 31 Aug 2009) Log Message: ----------- More comments and prevent naming collisions Modified Paths: -------------- trunk/pywikipedia/flickrripper.py Modified: trunk/pywikipedia/flickrripper.py =================================================================== --- trunk/pywikipedia/flickrripper.py 2009-08-31 15:09:31 UTC (rev 7190) +++ trunk/pywikipedia/flickrripper.py 2009-08-31 15:40:11 UTC (rev 7191) @@ -42,6 +42,9 @@ def getPhoto(flickr = None, photo_id = ''): ''' Get the photo info and the photo sizes so we can use these later on + + TODO: Add exception handling + ''' photoInfo = flickr.photos_getInfo(photo_id=photo_id) #xml.etree.ElementTree.dump(photoInfo) @@ -52,6 +55,8 @@ def isAllowedLicense(photoInfo = None): ''' Check if the image contains the right license + + TODO: Maybe add more licenses ''' license = photoInfo.find('photo').attrib['license'] if license == '4' or license == '5': @@ -72,10 +77,20 @@ return url def downloadPhoto(photoUrl=''): + ''' + Download the photo and store it in a StrinIO.StringIO object. + + TODO: Add exception handling + ''' imageFile=urllib.urlopen(photoUrl).read() return StringIO.StringIO(imageFile) def findDuplicateImages(photo=None, site=wikipedia.getSite()): + ''' + Takes the photo, calculates the SHA1 hash and asks the mediawiki api for a list of duplicates. + + TODO: Add exception handling, fix site thing + ''' result = [] hashObject = hashlib.sha1() hashObject.update(photo.getvalue()) @@ -87,7 +102,7 @@ 'aisha1' : sha1Hash, 'aiprop' : '', } - data = query.GetData(params, wikipedia.getSite(), useAPI = True, encodeTitle = False) + data = query.GetData(params, site=wikipedia.getSite(), useAPI = True, encodeTitle = False) for image in data['query']['allimages']: result.append(image['name']) return result @@ -105,6 +120,8 @@ def getFlinfoDescription(photo_id = 0): ''' Get the description from http://wikipedia.ramselehof.de/flinfo.php + + TODO: Add exception handling, try a couple of times ''' parameters = urllib.urlencode({'id' : photo_id, 'raw' : 'on'}) @@ -113,9 +130,11 @@ #print rawDescription.decode('utf-8') return rawDescription.decode('utf-8') -def getFilename(photoInfo=None): +def getFilename(photoInfo=None, site=wikipedia.getSite()): ''' - Build a good filename for the upload based on the username and the title + Build a good filename for the upload based on the username and the title. + Prevents naming collisions. + ''' username = photoInfo.find('photo').find('owner').attrib['username'] title = photoInfo.find('photo').find('title').text @@ -124,9 +143,20 @@ else: title = u'' - return u'Flickr - %s - %s.jpg' % (username, title) + if (wikipedia.Page(title=u'File:Flickr - %s - %s.jpg' % (username, title), site=wikipedia.getSite()).exists()): + i = 1 + while True: + if (wikipedia.Page(title=u'File:Flickr - %s - %s (%s).jpg' % (username, title, str(i)), site=wikipedia.getSite()).exists()): + i = i + 1 + else: + return u'Flickr - %s - %s (%s).jpg' % (username, title, str(i)) + else: + return u'Flickr - %s - %s.jpg' % (username, title) def cleanUpTitle(title): + ''' + Clean up the title of a potential mediawiki page. Otherwise the title of the page might not be allowed by the software. + ''' title = title.strip() title = re.sub("[<{\\[]", "(", title) @@ -168,6 +198,9 @@ return description def processPhoto(flickr=None, photo_id=u'', flickrreview=False, reviewer=u'', override=u''): + ''' + Process a single Flickr photo + ''' if(photo_id): print photo_id (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photo_id) @@ -202,6 +235,9 @@ return 0 class Tkdialog: + ''' + The user dialog. + ''' def __init__(self, photoDescription, photo, filename): self.root=Tk() #"%dx%d%+d%+d" % (width, height, xoffset, yoffset) @@ -257,6 +293,9 @@ self.descriptionScrollbar.grid(row=14, column=5) def getImage(self, photo, width, height): + ''' + Take the StringIO object and build an imageTK thumbnail + ''' image = Image.open(photo) image.thumbnail((width, height)) imageTk = ImageTk.PhotoImage(image) @@ -285,8 +324,12 @@ return (self.photoDescription, self.filename, self.skip) def getPhotos(flickr=None, user_id=u'', group_id=u'', photoset_id=u'', tags=u''): + ''' + Loop over a set of Flickr photos. + ''' result = [] # http://www.flickr.com/services/api/flickr.groups.pools.getPhotos.html + # Get the photos in a group if(group_id): #First get the total number of photo's in the group photos = flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id, tags=tags, per_page='100', page='1') @@ -297,6 +340,7 @@ yield photo.attrib['id'] # http://www.flickr.com/services/api/flickr.photosets.getPhotos.html + # Get the photos in a photoset elif(photoset_id): photos = flickr.photosets_getPhotos(photoset_id=photoset_id, per_page='100', page='1') pages = photos.find('photos').attrib['pages'] @@ -306,6 +350,7 @@ yield photo.attrib['id'] # http://www.flickr.com/services/api/flickr.people.getPublicPhotos.html + # Get the (public) photos uploaded by a user elif(user_id): photos = flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page='1') pages = photos.find('photos').attrib['pages'] @@ -316,6 +361,11 @@ return def usage(): + ''' + Print usage information + + TODO : Need more. + ''' wikipedia.output(u"Flickrripper is a tool to transfer flickr photos to Wikimedia Commons") wikipedia.output(u"-group_id:<group_id>\n") wikipedia.output(u"-photoset_id:<photoset_id>\n")

14 years, 7 months

SVN: [7190] trunk/pywikipedia/flickrripper.py

by multichill＠svn.wikimedia.org

Revision: 7190 Author: multichill Date: 2009-08-31 15:09:31 +0000 (Mon, 31 Aug 2009) Log Message: ----------- *Skipping dupes now *Rearranged some code Modified Paths: -------------- trunk/pywikipedia/flickrripper.py Modified: trunk/pywikipedia/flickrripper.py =================================================================== --- trunk/pywikipedia/flickrripper.py 2009-08-31 11:00:07 UTC (rev 7189) +++ trunk/pywikipedia/flickrripper.py 2009-08-31 15:09:31 UTC (rev 7190) @@ -31,7 +31,7 @@ # __version__ = '$Id$' -import sys, urllib, re, StringIO +import sys, urllib, re, StringIO, hashlib, base64 import wikipedia, config, query, imagerecat, upload import flickrapi # see: http://stuvel.eu/projects/flickrapi @@ -61,6 +61,37 @@ #We don't accept other licenses return False +def getPhotoUrl(photoSizes=None): + ''' + Get the url of the jpg file with the highest resolution + ''' + url = '' + # The assumption is that the largest image is last + for size in photoSizes.find('sizes').findall('size'): + url = size.attrib['source'] + return url + +def downloadPhoto(photoUrl=''): + imageFile=urllib.urlopen(photoUrl).read() + return StringIO.StringIO(imageFile) + +def findDuplicateImages(photo=None, site=wikipedia.getSite()): + result = [] + hashObject = hashlib.sha1() + hashObject.update(photo.getvalue()) + sha1Hash = base64.b16encode(hashObject.digest()) + + params = { + 'action' : 'query', + 'list' : 'allimages', + 'aisha1' : sha1Hash, + 'aiprop' : '', + } + data = query.GetData(params, wikipedia.getSite(), useAPI = True, encodeTitle = False) + for image in data['query']['allimages']: + result.append(image['name']) + return result + def getTags(photoInfo = None): ''' Get all the tags on a photo @@ -116,17 +147,8 @@ title = title.replace(" ", "_") return title + -def getPhotoUrl(photoSizes=None): - ''' - Get the url of the jpg file with the highest resolution - ''' - url = '' - # The assumption is that the largest image is last - for size in photoSizes.find('sizes').findall('size'): - url = size.attrib['source'] - return url - def buildDescription(flinfoDescription=u'', flickrreview=False, reviewer=u'', override=u''): ''' Build the final description for the image. The description is based on the info from flickrinfo and improved. @@ -143,57 +165,28 @@ if(reviewer): description = description.replace(u'{{flickrreview}}', u'{{flickrreview|' + reviewer + '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}') description = description.replace(u'\r\n', u'\n') - return description + return description -def getPhotos(flickr=None, user_id=u'', group_id=u'', photoset_id=u'', tags=u''): - result = [] - # http://www.flickr.com/services/api/flickr.groups.pools.getPhotos.html - if(group_id): - #First get the total number of photo's in the group - photos = flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id, tags=tags, per_page='100', page='1') - pages = photos.find('photos').attrib['pages'] - - for i in range(1, int(pages)): - for photo in flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id, tags=tags, per_page='100', page=i).find('photos').getchildren(): - yield photo.attrib['id'] - - # http://www.flickr.com/services/api/flickr.photosets.getPhotos.html - elif(photoset_id): - photos = flickr.photosets_getPhotos(photoset_id=photoset_id, per_page='100', page='1') - pages = photos.find('photos').attrib['pages'] - - for i in range(1, int(pages)): - for photo in flickr.photosets_getPhotos(photoset_id=photoset_id, per_page='100', page=i).find('photos').getchildren(): - yield photo.attrib['id'] - - # http://www.flickr.com/services/api/flickr.people.getPublicPhotos.html - elif(user_id): - photos = flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page='1') - pages = photos.find('photos').attrib['pages'] - - for i in range(1, int(pages)): - for photo in flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page=i).find('photos').getchildren(): - yield photo.attrib['id'] - return - - def processPhoto(flickr=None, photo_id=u'', flickrreview=False, reviewer=u'', override=u''): if(photo_id): print photo_id (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photo_id) if (isAllowedLicense(photoInfo=photoInfo) or override): - # Tags not needed atm - #tags=getTags(photoInfo=photoInfo) + #Get the url of the largest photo + photoUrl = getPhotoUrl(photoSizes=photoSizes) + #Should download the photo only once + photo = downloadPhoto(photoUrl=photoUrl) - flinfoDescription = getFlinfoDescription(photo_id=photo_id) - - filename = getFilename(photoInfo=photoInfo) - #print filename - photoUrl = getPhotoUrl(photoSizes=photoSizes) - #print photoUrl - photoDescription = buildDescription(flinfoDescription=flinfoDescription, flickrreview=flickrreview, reviewer=reviewer, override=override) - #wikipedia.output(photoDescription) - (newPhotoDescription, newFilename, skip)=Tkdialog(photoDescription, photoUrl, filename).run() + #Don't upload duplicate images, should add override option + duplicates = findDuplicateImages(photo=photo) + if duplicates: + wikipedia.output(u'Found duplicate image at %s' % duplicates.pop()) + else: + filename = getFilename(photoInfo=photoInfo) + flinfoDescription = getFlinfoDescription(photo_id=photo_id) + photoDescription = buildDescription(flinfoDescription=flinfoDescription, flickrreview=flickrreview, reviewer=reviewer, override=override) + #wikipedia.output(photoDescription) + (newPhotoDescription, newFilename, skip)=Tkdialog(photoDescription, photo, filename).run() #wikipedia.output(newPhotoDescription) #if (wikipedia.Page(title=u'File:'+ filename, site=wikipedia.getSite()).exists()): # I should probably check if the hash is the same and if not upload it under a different name @@ -202,13 +195,14 @@ #Do the actual upload #Would be nice to check before I upload if the file is already at Commons #Not that important for this program, but maybe for derived programs - if not skip: - bot = upload.UploadRobot(url=photoUrl, description=newPhotoDescription, useFilename=newFilename, keepFilename=True, verifyDescription=False) - bot.upload_image(debug=False) - return 0 + if not skip: + bot = upload.UploadRobot(url=photoUrl, description=newPhotoDescription, useFilename=newFilename, keepFilename=True, verifyDescription=False) + bot.upload_image(debug=False) + return 1 + return 0 class Tkdialog: - def __init__(self, photoDescription, photoUrl, filename): + def __init__(self, photoDescription, photo, filename): self.root=Tk() #"%dx%d%+d%+d" % (width, height, xoffset, yoffset) self.root.geometry("%ix%i+10-10"%(config.tkhorsize, config.tkvertsize)) @@ -216,13 +210,13 @@ self.root.title(filename) self.photoDescription = photoDescription self.filename = filename - self.photoUrl = photoUrl + self.photo = photo self.skip=False self.exit=False ## Init of the widgets # The image - self.image=self.getImage(self.photoUrl, 800, 600) + self.image=self.getImage(self.photo, 800, 600) self.imagePanel=Label(self.root, image=self.image) self.imagePanel.image = self.image @@ -262,12 +256,10 @@ self.descriptionField.grid(row=14, column=1, columnspan=3) self.descriptionScrollbar.grid(row=14, column=5) - def getImage(self, url, width, height): - image=urllib.urlopen(url).read() - output = StringIO.StringIO(image) - image2 = Image.open(output) - image2.thumbnail((width, height)) - imageTk = ImageTk.PhotoImage(image2) + def getImage(self, photo, width, height): + image = Image.open(photo) + image.thumbnail((width, height)) + imageTk = ImageTk.PhotoImage(image) return imageTk def okFile(self): @@ -292,7 +284,37 @@ self.root.mainloop() return (self.photoDescription, self.filename, self.skip) +def getPhotos(flickr=None, user_id=u'', group_id=u'', photoset_id=u'', tags=u''): + result = [] + # http://www.flickr.com/services/api/flickr.groups.pools.getPhotos.html + if(group_id): + #First get the total number of photo's in the group + photos = flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id, tags=tags, per_page='100', page='1') + pages = photos.find('photos').attrib['pages'] + for i in range(1, int(pages)): + for photo in flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id, tags=tags, per_page='100', page=i).find('photos').getchildren(): + yield photo.attrib['id'] + + # http://www.flickr.com/services/api/flickr.photosets.getPhotos.html + elif(photoset_id): + photos = flickr.photosets_getPhotos(photoset_id=photoset_id, per_page='100', page='1') + pages = photos.find('photos').attrib['pages'] + + for i in range(1, int(pages)): + for photo in flickr.photosets_getPhotos(photoset_id=photoset_id, per_page='100', page=i).find('photos').getchildren(): + yield photo.attrib['id'] + + # http://www.flickr.com/services/api/flickr.people.getPublicPhotos.html + elif(user_id): + photos = flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page='1') + pages = photos.find('photos').attrib['pages'] + + for i in range(1, int(pages)): + for photo in flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page=i).find('photos').getchildren(): + yield photo.attrib['id'] + return + def usage(): wikipedia.output(u"Flickrripper is a tool to transfer flickr photos to Wikimedia Commons") wikipedia.output(u"-group_id:<group_id>\n")

14 years, 7 months

SVN: [7189] trunk/pywikipedia/imageuncat.py

by multichill＠svn.wikimedia.org

Revision: 7189 Author: multichill Date: 2009-08-31 11:00:07 +0000 (Mon, 31 Aug 2009) Log Message: ----------- Raised the delay to 2 hours. Modified Paths: -------------- trunk/pywikipedia/imageuncat.py Modified: trunk/pywikipedia/imageuncat.py =================================================================== --- trunk/pywikipedia/imageuncat.py 2009-08-31 09:30:38 UTC (rev 7188) +++ trunk/pywikipedia/imageuncat.py 2009-08-31 11:00:07 UTC (rev 7189) @@ -1254,7 +1254,7 @@ return pagegenerators.PagesFromTitlesGenerator(result, site) -def recentChanges(site = None, delay=60, block=70): +def recentChanges(site = None, delay=0, block=70): ''' Return a pagegenerator containing all the images edited in a certain timespan. The delay is the amount of minutes to wait and the block is the timespan to return images in. @@ -1348,7 +1348,7 @@ if arg.startswith('-yesterday'): generator = uploadedYesterday(site) elif arg.startswith('-recentchanges'): - generator = recentChanges(site) + generator = recentChanges(site=site, delay=120) else: genFactory.handleArg(arg)

14 years, 7 months

SVN: [7188] trunk/pywikipedia/flickrripper.py

by multichill＠svn.wikimedia.org

Revision: 7188 Author: multichill Date: 2009-08-31 09:30:38 +0000 (Mon, 31 Aug 2009) Log Message: ----------- svn properties Modified Paths: -------------- trunk/pywikipedia/flickrripper.py Property Changed: ---------------- trunk/pywikipedia/flickrripper.py Modified: trunk/pywikipedia/flickrripper.py =================================================================== --- trunk/pywikipedia/flickrripper.py 2009-08-28 23:11:46 UTC (rev 7187) +++ trunk/pywikipedia/flickrripper.py 2009-08-31 09:30:38 UTC (rev 7188) @@ -1,392 +1,392 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -''' -Tool to copy a flickr stream to Commons - -# Get a set to work on (start with just a username). -# * Make it possible to delimit the set (from/to) -#For each image -#*Check the license -#*Check if it isn't already on Commons -#*Build suggested filename -#**Check for name collision and maybe alter it -#*Pull description from Flinfo -#*Show image and description to user -#**Add a nice hotcat lookalike for the adding of categories -#**Filter the categories -#*Upload the image - -Todo: -*Check if the image is already uploaded (SHA hash) -*Check and prevent filename collisions -**Initial suggestion -**User input -*Filter the categories - -''' -# -# (C) Multichill, 2009 -# -# Distributed under the terms of the MIT license. -# -__version__ = '$Id: upload_to_commons.py 69 2009-08-23 11:44:26Z multichill $' - -import sys, urllib, re, StringIO -import wikipedia, config, query, imagerecat, upload - -import flickrapi # see: http://stuvel.eu/projects/flickrapi -import xml.etree.ElementTree -from Tkinter import * -from PIL import Image, ImageTk # see: http://www.pythonware.com/products/pil/ - -def getPhoto(flickr = None, photo_id = ''): - ''' - Get the photo info and the photo sizes so we can use these later on - ''' - photoInfo = flickr.photos_getInfo(photo_id=photo_id) - #xml.etree.ElementTree.dump(photoInfo) - photoSizes = flickr.photos_getSizes(photo_id=photo_id) - #xml.etree.ElementTree.dump(photoSizes) - return (photoInfo, photoSizes) - -def isAllowedLicense(photoInfo = None): - ''' - Check if the image contains the right license - ''' - license = photoInfo.find('photo').attrib['license'] - if license == '4' or license == '5': - #Is cc-by or cc-by-sa - return True - else: - #We don't accept other licenses - return False - -def getTags(photoInfo = None): - ''' - Get all the tags on a photo - ''' - result = [] - for tag in photoInfo.find('photo').find('tags').findall('tag'): - result.append(tag.text.lower()) - - return result - -def getFlinfoDescription(photo_id = 0): - ''' - Get the description from http://wikipedia.ramselehof.de/flinfo.php - ''' - parameters = urllib.urlencode({'id' : photo_id, 'raw' : 'on'}) - - #print 'Flinfo gaat nu aan de slag' - rawDescription = urllib.urlopen("http://wikipedia.ramselehof.de/flinfo.php?%s" % parameters).read() - #print rawDescription.decode('utf-8') - return rawDescription.decode('utf-8') - -def getFilename(photoInfo=None): - ''' - Build a good filename for the upload based on the username and the title - ''' - username = photoInfo.find('photo').find('owner').attrib['username'] - title = photoInfo.find('photo').find('title').text - if title: - title = cleanUpTitle(title) - else: - title = u'' - - return u'Flickr - %s - %s.jpg' % (username, title) - -def cleanUpTitle(title): - title = title.strip() - - title = re.sub("[<{\\[]", "(", title) - title = re.sub("[>}\\]]", ")", title) - title = re.sub("[ _]?\$!\$", "", title) - title = re.sub(",:[ _]", ", ", title) - title = re.sub("[;:][ _]", ", ", title) - title = re.sub("[\t\n ]+", " ", title) - title = re.sub("[\r\n ]+", " ", title) - title = re.sub("[\n]+", "", title) - title = re.sub("[?!]([.\"]|$)", "\\1", title) - title = re.sub("[&#%?!]", "^", title) - title = re.sub("[;]", ",", title) - title = re.sub("[/+\\\\:]", "-", title) - title = re.sub("--+", "-", title) - title = re.sub(",,+", ",", title) - title = re.sub("[-,^]([.]|$)", "\\1", title) - title = title.replace(" ", "_") - - return title - -def getPhotoUrl(photoSizes=None): - ''' - Get the url of the jpg file with the highest resolution - ''' - url = '' - # The assumption is that the largest image is last - for size in photoSizes.find('sizes').findall('size'): - url = size.attrib['source'] - return url - -def buildDescription(flinfoDescription=u'', flickrreview=False, reviewer=u'', override=u''): - ''' - Build the final description for the image. The description is based on the info from flickrinfo and improved. - ''' - description = flinfoDescription - - if(override): - description = description.replace(u'{{cc-by-sa-2.0}}\n', u'') - description = description.replace(u'{{cc-by-2.0}}\n', u'') - description = description.replace(u'{{flickrreview}}\n', u'') - description = description.replace(u'{{copyvio|Flickr, licensed as "All Rights Reserved" which is not a free license --~~~~}}\n', u'') - description = description.replace(u'=={{int:license}}==', u'=={{int:license}}==\n' + override) - elif(flickrreview): - if(reviewer): - description = description.replace(u'{{flickrreview}}', u'{{flickrreview|' + reviewer + '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}') - description = description.replace(u'\r\n', u'\n') - return description - -def getPhotos(flickr=None, user_id=u'', group_id=u'', photoset_id=u'', tags=u''): - result = [] - # http://www.flickr.com/services/api/flickr.groups.pools.getPhotos.html - if(group_id): - #First get the total number of photo's in the group - photos = flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id, tags=tags, per_page='100', page='1') - pages = photos.find('photos').attrib['pages'] - - for i in range(1, int(pages)): - for photo in flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id, tags=tags, per_page='100', page=i).find('photos').getchildren(): - yield photo.attrib['id'] - - # http://www.flickr.com/services/api/flickr.photosets.getPhotos.html - elif(photoset_id): - photos = flickr.photosets_getPhotos(photoset_id=photoset_id, per_page='100', page='1') - pages = photos.find('photos').attrib['pages'] - - for i in range(1, int(pages)): - for photo in flickr.photosets_getPhotos(photoset_id=photoset_id, per_page='100', page=i).find('photos').getchildren(): - yield photo.attrib['id'] - - # http://www.flickr.com/services/api/flickr.people.getPublicPhotos.html - elif(user_id): - photos = flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page='1') - pages = photos.find('photos').attrib['pages'] - - for i in range(1, int(pages)): - for photo in flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page=i).find('photos').getchildren(): - yield photo.attrib['id'] - return - - -def processPhoto(flickr=None, photo_id=u'', flickrreview=False, reviewer=u'', override=u''): - if(photo_id): - print photo_id - (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photo_id) - if (isAllowedLicense(photoInfo=photoInfo) or override): - # Tags not needed atm - #tags=getTags(photoInfo=photoInfo) - - flinfoDescription = getFlinfoDescription(photo_id=photo_id) - - filename = getFilename(photoInfo=photoInfo) - #print filename - photoUrl = getPhotoUrl(photoSizes=photoSizes) - #print photoUrl - photoDescription = buildDescription(flinfoDescription=flinfoDescription, flickrreview=flickrreview, reviewer=reviewer, override=override) - #wikipedia.output(photoDescription) - (newPhotoDescription, newFilename, skip)=Tkdialog(photoDescription, photoUrl, filename).run() - #wikipedia.output(newPhotoDescription) - #if (wikipedia.Page(title=u'File:'+ filename, site=wikipedia.getSite()).exists()): - # I should probably check if the hash is the same and if not upload it under a different name - #wikipedia.output(u'File:' + filename + u' already exists!') - #else: - #Do the actual upload - #Would be nice to check before I upload if the file is already at Commons - #Not that important for this program, but maybe for derived programs - if not skip: - bot = upload.UploadRobot(url=photoUrl, description=newPhotoDescription, useFilename=newFilename, keepFilename=True, verifyDescription=False) - bot.upload_image(debug=False) - return 0 - -class Tkdialog: - def __init__(self, photoDescription, photoUrl, filename): - self.root=Tk() - #"%dx%d%+d%+d" % (width, height, xoffset, yoffset) - self.root.geometry("%ix%i+10-10"%(config.tkhorsize, config.tkvertsize)) - - self.root.title(filename) - self.photoDescription = photoDescription - self.filename = filename - self.photoUrl = photoUrl - self.skip=False - self.exit=False - - ## Init of the widgets - # The image - self.image=self.getImage(self.photoUrl, 800, 600) - self.imagePanel=Label(self.root, image=self.image) - - self.imagePanel.image = self.image - - # The filename - self.filenameLabel=Label(self.root,text=u"Suggested filename") - self.filenameField=Entry(self.root, width=100) - self.filenameField.insert(END, filename) - - # The description - self.descriptionLabel=Label(self.root,text=u"Suggested description") - self.descriptionScrollbar=Scrollbar(self.root, orient=VERTICAL) - self.descriptionField=Text(self.root) - self.descriptionField.insert(END, photoDescription) - self.descriptionField.config(state=NORMAL, height=12, width=100, padx=0, pady=0, wrap=WORD, yscrollcommand=self.descriptionScrollbar.set) - self.descriptionScrollbar.config(command=self.descriptionField.yview) - - # The buttons - self.okButton=Button(self.root, text="OK", command=self.okFile) - self.skipButton=Button(self.root, text="Skip", command=self.skipFile) - - ## Start grid - - # The image - self.imagePanel.grid(row=0, column=0, rowspan=11, columnspan=4) - - # The buttons - self.okButton.grid(row=11, column=1, rowspan=2) - self.skipButton.grid(row=11, column=2, rowspan=2) - - # The filename - self.filenameLabel.grid(row=13, column=0) - self.filenameField.grid(row=13, column=1, columnspan=3) - - # The description - self.descriptionLabel.grid(row=14, column=0) - self.descriptionField.grid(row=14, column=1, columnspan=3) - self.descriptionScrollbar.grid(row=14, column=5) - - def getImage(self, url, width, height): - image=urllib.urlopen(url).read() - output = StringIO.StringIO(image) - image2 = Image.open(output) - image2.thumbnail((width, height)) - imageTk = ImageTk.PhotoImage(image2) - return imageTk - - def okFile(self): - ''' - The user pressed the OK button. - ''' - self.filename=self.filenameField.get() - self.photoDescription=self.descriptionField.get(0.0, END) - self.root.destroy() - - def skipFile(self): - ''' - The user pressed the Skip button. - ''' - self.skip=True - self.root.destroy() - - def run(self): - ''' - Activate the dialog and return the new name and if the image is skipped. - ''' - self.root.mainloop() - return (self.photoDescription, self.filename, self.skip) - - -def usage(): - wikipedia.output(u"Flickrripper is a tool to transfer flickr photos to Wikimedia Commons") - wikipedia.output(u"-group_id:<group_id>\n") - wikipedia.output(u"-photoset_id:<photoset_id>\n") - wikipedia.output(u"-user_id:<user_id>\n") - wikipedia.output(u"-tags:<tag>\n") - return - -def main(): - site = wikipedia.getSite(u'commons', u'commons') - wikipedia.setSite(site) - #imagerecat.initLists() - - #Get the api key - if(config.flickr['api_key']): - flickr = flickrapi.FlickrAPI(config.flickr['api_key']) - else: - wikipedia.output('Flickr api key not found! Get yourself an api key') - wikipedia.output('Any flickr user can get a key at http://www.flickr.com/services/api/keys/apply/') - return - - group_id = u'' - photoset_id = u'' - user_id = u'' - tags = u'' - totalPhotos = 0 - uploadedPhotos = 0 - - # Do we mark the images as reviewed right away? - if config.flickr['review']: - flickrreview = config.flickr['review'] - else: - flickrreview = False - - # Set the Flickr reviewer - if config.flickr['reviewer']: - reviewer = config.flickr['reviewer'] - elif 'commons' in config.sysopnames['commons']: - print config.sysopnames['commons'] - reviewer = config.sysopnames['commons']['commons'] - elif 'commons' in config.usernames['commons']: - reviewer = config.usernames['commons']['commons'] - else: - reviewer = u'' - - override = u'' - - for arg in wikipedia.handleArgs(): - if arg.startswith('-group_id'): - if len(arg) == 9: - group_id = wikipedia.input(u'What is the group_id of the pool?') - else: - group_id = arg[10:] - elif arg.startswith('-photoset_id'): - if len(arg) == 12: - photoset_id = wikipedia.input(u'What is the photoset_id)?') - else: - photoset_id = arg[13:] - elif arg.startswith('-user_id'): - if len(arg) == 8: - user_id = wikipedia.input(u'What is the user_id of the flickr user?') - else: - user_id = arg[9:] - elif arg.startswith('-tags'): - if len(arg) == 5: - tags = wikipedia.input(u'What is the tag you want to filter out (currently only one supported)?') - else: - tags = arg[6:] - elif arg == '-flickrreview': - flickrreview = True - elif arg.startswith('-reviewer'): - if len(arg) == 9: - reviewer = wikipedia.input(u'Who is the reviewer?') - else: - reviewer = arg[10:] - elif arg.startswith('-override'): - if len(arg) == 9: - override = wikipedia.input(u'What is the override text?') - else: - override = arg[10:] - - if user_id or group_id or photoset_id: - for photo_id in getPhotos(flickr=flickr, user_id=user_id, group_id=group_id, photoset_id=photoset_id, tags=tags): - uploadedPhotos = uploadedPhotos + processPhoto(flickr=flickr, photo_id=photo_id, flickrreview=flickrreview, reviewer=reviewer, override=override) - totalPhotos = totalPhotos + 1 - else: - usage() - - wikipedia.output(u'Finished running') - wikipedia.output(u'Total photos: ' + str(totalPhotos)) - wikipedia.output(u'Uploaded photos: ' + str(uploadedPhotos)) - -if __name__ == "__main__": - try: - main() - finally: - wikipedia.stopme() +#!/usr/bin/python +# -*- coding: utf-8 -*- +''' +Tool to copy a flickr stream to Commons + +# Get a set to work on (start with just a username). +# * Make it possible to delimit the set (from/to) +#For each image +#*Check the license +#*Check if it isn't already on Commons +#*Build suggested filename +#**Check for name collision and maybe alter it +#*Pull description from Flinfo +#*Show image and description to user +#**Add a nice hotcat lookalike for the adding of categories +#**Filter the categories +#*Upload the image + +Todo: +*Check if the image is already uploaded (SHA hash) +*Check and prevent filename collisions +**Initial suggestion +**User input +*Filter the categories + +''' +# +# (C) Multichill, 2009 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$' + +import sys, urllib, re, StringIO +import wikipedia, config, query, imagerecat, upload + +import flickrapi # see: http://stuvel.eu/projects/flickrapi +import xml.etree.ElementTree +from Tkinter import * +from PIL import Image, ImageTk # see: http://www.pythonware.com/products/pil/ + +def getPhoto(flickr = None, photo_id = ''): + ''' + Get the photo info and the photo sizes so we can use these later on + ''' + photoInfo = flickr.photos_getInfo(photo_id=photo_id) + #xml.etree.ElementTree.dump(photoInfo) + photoSizes = flickr.photos_getSizes(photo_id=photo_id) + #xml.etree.ElementTree.dump(photoSizes) + return (photoInfo, photoSizes) + +def isAllowedLicense(photoInfo = None): + ''' + Check if the image contains the right license + ''' + license = photoInfo.find('photo').attrib['license'] + if license == '4' or license == '5': + #Is cc-by or cc-by-sa + return True + else: + #We don't accept other licenses + return False + +def getTags(photoInfo = None): + ''' + Get all the tags on a photo + ''' + result = [] + for tag in photoInfo.find('photo').find('tags').findall('tag'): + result.append(tag.text.lower()) + + return result + +def getFlinfoDescription(photo_id = 0): + ''' + Get the description from http://wikipedia.ramselehof.de/flinfo.php + ''' + parameters = urllib.urlencode({'id' : photo_id, 'raw' : 'on'}) + + #print 'Flinfo gaat nu aan de slag' + rawDescription = urllib.urlopen("http://wikipedia.ramselehof.de/flinfo.php?%s" % parameters).read() + #print rawDescription.decode('utf-8') + return rawDescription.decode('utf-8') + +def getFilename(photoInfo=None): + ''' + Build a good filename for the upload based on the username and the title + ''' + username = photoInfo.find('photo').find('owner').attrib['username'] + title = photoInfo.find('photo').find('title').text + if title: + title = cleanUpTitle(title) + else: + title = u'' + + return u'Flickr - %s - %s.jpg' % (username, title) + +def cleanUpTitle(title): + title = title.strip() + + title = re.sub("[<{\\[]", "(", title) + title = re.sub("[>}\\]]", ")", title) + title = re.sub("[ _]?\$!\$", "", title) + title = re.sub(",:[ _]", ", ", title) + title = re.sub("[;:][ _]", ", ", title) + title = re.sub("[\t\n ]+", " ", title) + title = re.sub("[\r\n ]+", " ", title) + title = re.sub("[\n]+", "", title) + title = re.sub("[?!]([.\"]|$)", "\\1", title) + title = re.sub("[&#%?!]", "^", title) + title = re.sub("[;]", ",", title) + title = re.sub("[/+\\\\:]", "-", title) + title = re.sub("--+", "-", title) + title = re.sub(",,+", ",", title) + title = re.sub("[-,^]([.]|$)", "\\1", title) + title = title.replace(" ", "_") + + return title + +def getPhotoUrl(photoSizes=None): + ''' + Get the url of the jpg file with the highest resolution + ''' + url = '' + # The assumption is that the largest image is last + for size in photoSizes.find('sizes').findall('size'): + url = size.attrib['source'] + return url + +def buildDescription(flinfoDescription=u'', flickrreview=False, reviewer=u'', override=u''): + ''' + Build the final description for the image. The description is based on the info from flickrinfo and improved. + ''' + description = flinfoDescription + + if(override): + description = description.replace(u'{{cc-by-sa-2.0}}\n', u'') + description = description.replace(u'{{cc-by-2.0}}\n', u'') + description = description.replace(u'{{flickrreview}}\n', u'') + description = description.replace(u'{{copyvio|Flickr, licensed as "All Rights Reserved" which is not a free license --~~~~}}\n', u'') + description = description.replace(u'=={{int:license}}==', u'=={{int:license}}==\n' + override) + elif(flickrreview): + if(reviewer): + description = description.replace(u'{{flickrreview}}', u'{{flickrreview|' + reviewer + '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}') + description = description.replace(u'\r\n', u'\n') + return description + +def getPhotos(flickr=None, user_id=u'', group_id=u'', photoset_id=u'', tags=u''): + result = [] + # http://www.flickr.com/services/api/flickr.groups.pools.getPhotos.html + if(group_id): + #First get the total number of photo's in the group + photos = flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id, tags=tags, per_page='100', page='1') + pages = photos.find('photos').attrib['pages'] + + for i in range(1, int(pages)): + for photo in flickr.groups_pools_getPhotos(group_id=group_id, user_id=user_id, tags=tags, per_page='100', page=i).find('photos').getchildren(): + yield photo.attrib['id'] + + # http://www.flickr.com/services/api/flickr.photosets.getPhotos.html + elif(photoset_id): + photos = flickr.photosets_getPhotos(photoset_id=photoset_id, per_page='100', page='1') + pages = photos.find('photos').attrib['pages'] + + for i in range(1, int(pages)): + for photo in flickr.photosets_getPhotos(photoset_id=photoset_id, per_page='100', page=i).find('photos').getchildren(): + yield photo.attrib['id'] + + # http://www.flickr.com/services/api/flickr.people.getPublicPhotos.html + elif(user_id): + photos = flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page='1') + pages = photos.find('photos').attrib['pages'] + + for i in range(1, int(pages)): + for photo in flickr.people_getPublicPhotos(user_id=user_id, per_page='100', page=i).find('photos').getchildren(): + yield photo.attrib['id'] + return + + +def processPhoto(flickr=None, photo_id=u'', flickrreview=False, reviewer=u'', override=u''): + if(photo_id): + print photo_id + (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photo_id) + if (isAllowedLicense(photoInfo=photoInfo) or override): + # Tags not needed atm + #tags=getTags(photoInfo=photoInfo) + + flinfoDescription = getFlinfoDescription(photo_id=photo_id) + + filename = getFilename(photoInfo=photoInfo) + #print filename + photoUrl = getPhotoUrl(photoSizes=photoSizes) + #print photoUrl + photoDescription = buildDescription(flinfoDescription=flinfoDescription, flickrreview=flickrreview, reviewer=reviewer, override=override) + #wikipedia.output(photoDescription) + (newPhotoDescription, newFilename, skip)=Tkdialog(photoDescription, photoUrl, filename).run() + #wikipedia.output(newPhotoDescription) + #if (wikipedia.Page(title=u'File:'+ filename, site=wikipedia.getSite()).exists()): + # I should probably check if the hash is the same and if not upload it under a different name + #wikipedia.output(u'File:' + filename + u' already exists!') + #else: + #Do the actual upload + #Would be nice to check before I upload if the file is already at Commons + #Not that important for this program, but maybe for derived programs + if not skip: + bot = upload.UploadRobot(url=photoUrl, description=newPhotoDescription, useFilename=newFilename, keepFilename=True, verifyDescription=False) + bot.upload_image(debug=False) + return 0 + +class Tkdialog: + def __init__(self, photoDescription, photoUrl, filename): + self.root=Tk() + #"%dx%d%+d%+d" % (width, height, xoffset, yoffset) + self.root.geometry("%ix%i+10-10"%(config.tkhorsize, config.tkvertsize)) + + self.root.title(filename) + self.photoDescription = photoDescription + self.filename = filename + self.photoUrl = photoUrl + self.skip=False + self.exit=False + + ## Init of the widgets + # The image + self.image=self.getImage(self.photoUrl, 800, 600) + self.imagePanel=Label(self.root, image=self.image) + + self.imagePanel.image = self.image + + # The filename + self.filenameLabel=Label(self.root,text=u"Suggested filename") + self.filenameField=Entry(self.root, width=100) + self.filenameField.insert(END, filename) + + # The description + self.descriptionLabel=Label(self.root,text=u"Suggested description") + self.descriptionScrollbar=Scrollbar(self.root, orient=VERTICAL) + self.descriptionField=Text(self.root) + self.descriptionField.insert(END, photoDescription) + self.descriptionField.config(state=NORMAL, height=12, width=100, padx=0, pady=0, wrap=WORD, yscrollcommand=self.descriptionScrollbar.set) + self.descriptionScrollbar.config(command=self.descriptionField.yview) + + # The buttons + self.okButton=Button(self.root, text="OK", command=self.okFile) + self.skipButton=Button(self.root, text="Skip", command=self.skipFile) + + ## Start grid + + # The image + self.imagePanel.grid(row=0, column=0, rowspan=11, columnspan=4) + + # The buttons + self.okButton.grid(row=11, column=1, rowspan=2) + self.skipButton.grid(row=11, column=2, rowspan=2) + + # The filename + self.filenameLabel.grid(row=13, column=0) + self.filenameField.grid(row=13, column=1, columnspan=3) + + # The description + self.descriptionLabel.grid(row=14, column=0) + self.descriptionField.grid(row=14, column=1, columnspan=3) + self.descriptionScrollbar.grid(row=14, column=5) + + def getImage(self, url, width, height): + image=urllib.urlopen(url).read() + output = StringIO.StringIO(image) + image2 = Image.open(output) + image2.thumbnail((width, height)) + imageTk = ImageTk.PhotoImage(image2) + return imageTk + + def okFile(self): + ''' + The user pressed the OK button. + ''' + self.filename=self.filenameField.get() + self.photoDescription=self.descriptionField.get(0.0, END) + self.root.destroy() + + def skipFile(self): + ''' + The user pressed the Skip button. + ''' + self.skip=True + self.root.destroy() + + def run(self): + ''' + Activate the dialog and return the new name and if the image is skipped. + ''' + self.root.mainloop() + return (self.photoDescription, self.filename, self.skip) + + +def usage(): + wikipedia.output(u"Flickrripper is a tool to transfer flickr photos to Wikimedia Commons") + wikipedia.output(u"-group_id:<group_id>\n") + wikipedia.output(u"-photoset_id:<photoset_id>\n") + wikipedia.output(u"-user_id:<user_id>\n") + wikipedia.output(u"-tags:<tag>\n") + return + +def main(): + site = wikipedia.getSite(u'commons', u'commons') + wikipedia.setSite(site) + #imagerecat.initLists() + + #Get the api key + if(config.flickr['api_key']): + flickr = flickrapi.FlickrAPI(config.flickr['api_key']) + else: + wikipedia.output('Flickr api key not found! Get yourself an api key') + wikipedia.output('Any flickr user can get a key at http://www.flickr.com/services/api/keys/apply/') + return + + group_id = u'' + photoset_id = u'' + user_id = u'' + tags = u'' + totalPhotos = 0 + uploadedPhotos = 0 + + # Do we mark the images as reviewed right away? + if config.flickr['review']: + flickrreview = config.flickr['review'] + else: + flickrreview = False + + # Set the Flickr reviewer + if config.flickr['reviewer']: + reviewer = config.flickr['reviewer'] + elif 'commons' in config.sysopnames['commons']: + print config.sysopnames['commons'] + reviewer = config.sysopnames['commons']['commons'] + elif 'commons' in config.usernames['commons']: + reviewer = config.usernames['commons']['commons'] + else: + reviewer = u'' + + override = u'' + + for arg in wikipedia.handleArgs(): + if arg.startswith('-group_id'): + if len(arg) == 9: + group_id = wikipedia.input(u'What is the group_id of the pool?') + else: + group_id = arg[10:] + elif arg.startswith('-photoset_id'): + if len(arg) == 12: + photoset_id = wikipedia.input(u'What is the photoset_id)?') + else: + photoset_id = arg[13:] + elif arg.startswith('-user_id'): + if len(arg) == 8: + user_id = wikipedia.input(u'What is the user_id of the flickr user?') + else: + user_id = arg[9:] + elif arg.startswith('-tags'): + if len(arg) == 5: + tags = wikipedia.input(u'What is the tag you want to filter out (currently only one supported)?') + else: + tags = arg[6:] + elif arg == '-flickrreview': + flickrreview = True + elif arg.startswith('-reviewer'): + if len(arg) == 9: + reviewer = wikipedia.input(u'Who is the reviewer?') + else: + reviewer = arg[10:] + elif arg.startswith('-override'): + if len(arg) == 9: + override = wikipedia.input(u'What is the override text?') + else: + override = arg[10:] + + if user_id or group_id or photoset_id: + for photo_id in getPhotos(flickr=flickr, user_id=user_id, group_id=group_id, photoset_id=photoset_id, tags=tags): + uploadedPhotos = uploadedPhotos + processPhoto(flickr=flickr, photo_id=photo_id, flickrreview=flickrreview, reviewer=reviewer, override=override) + totalPhotos = totalPhotos + 1 + else: + usage() + + wikipedia.output(u'Finished running') + wikipedia.output(u'Total photos: ' + str(totalPhotos)) + wikipedia.output(u'Uploaded photos: ' + str(uploadedPhotos)) + +if __name__ == "__main__": + try: + main() + finally: + wikipedia.stopme() Property changes on: trunk/pywikipedia/flickrripper.py ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision Added: svn:eol-style + native

14 years, 7 months

SVN: [7187] trunk/pywikipedia

by a_engels＠svn.wikimedia.org

Revision: 7187 Author: a_engels Date: 2009-08-28 23:11:46 +0000 (Fri, 28 Aug 2009) Log Message: ----------- 2 changes to flickrripper: 1. Put the buttons 'OK' and 'Skip' directly under the image to avoid having them fall off the screen; 2. Make screen size a configuration variable Modified Paths: -------------- trunk/pywikipedia/config.py trunk/pywikipedia/flickrripper.py Modified: trunk/pywikipedia/config.py =================================================================== --- trunk/pywikipedia/config.py 2009-08-28 21:29:48 UTC (rev 7186) +++ trunk/pywikipedia/config.py 2009-08-28 23:11:46 UTC (rev 7187) @@ -146,6 +146,11 @@ except: colorized_output = False +# An indication of the size of your screen, or rather the size of the screen +# to be shown, for flickrripper +tkhorsize = 1600 +tkvertsize = 1000 + ############## EXTERNAL EDITOR SETTINGS ############## # The command for the editor you want to use. If set to None, a simple Tkinter # editor will be used. Modified: trunk/pywikipedia/flickrripper.py =================================================================== --- trunk/pywikipedia/flickrripper.py 2009-08-28 21:29:48 UTC (rev 7186) +++ trunk/pywikipedia/flickrripper.py 2009-08-28 23:11:46 UTC (rev 7187) @@ -211,7 +211,7 @@ def __init__(self, photoDescription, photoUrl, filename): self.root=Tk() #"%dx%d%+d%+d" % (width, height, xoffset, yoffset) - self.root.geometry("1600x1000+10-10") + self.root.geometry("%ix%i+10-10"%(config.tkhorsize, config.tkvertsize)) self.root.title(filename) self.photoDescription = photoDescription @@ -245,22 +245,23 @@ self.skipButton=Button(self.root, text="Skip", command=self.skipFile) ## Start grid + # The image self.imagePanel.grid(row=0, column=0, rowspan=11, columnspan=4) - + + # The buttons + self.okButton.grid(row=11, column=1, rowspan=2) + self.skipButton.grid(row=11, column=2, rowspan=2) + # The filename - self.filenameLabel.grid(row=11, column=0) - self.filenameField.grid(row=11, column=1, columnspan=3) + self.filenameLabel.grid(row=13, column=0) + self.filenameField.grid(row=13, column=1, columnspan=3) # The description - self.descriptionLabel.grid(row=12, column=0) - self.descriptionField.grid(row=12, column=1, columnspan=3) - self.descriptionScrollbar.grid(row=12, column=5) + self.descriptionLabel.grid(row=14, column=0) + self.descriptionField.grid(row=14, column=1, columnspan=3) + self.descriptionScrollbar.grid(row=14, column=5) - # The buttons - self.okButton.grid(row=13, column=1, rowspan=2) - self.skipButton.grid(row=13, column=2, rowspan=2) - def getImage(self, url, width, height): image=urllib.urlopen(url).read() output = StringIO.StringIO(image)

14 years, 7 months

SVN: [7186] trunk/pywikipedia

by a_engels＠svn.wikimedia.org

Revision: 7186 Author: a_engels Date: 2009-08-28 21:29:48 +0000 (Fri, 28 Aug 2009) Log Message: ----------- enable configuring flickrripper Modified Paths: -------------- trunk/pywikipedia/config.py trunk/pywikipedia/flickrripper.py Modified: trunk/pywikipedia/config.py =================================================================== --- trunk/pywikipedia/config.py 2009-08-28 20:52:34 UTC (rev 7185) +++ trunk/pywikipedia/config.py 2009-08-28 21:29:48 UTC (rev 7186) @@ -330,6 +330,13 @@ # http://search.msn.com/developer msn_appid = '' +# Using the Flickr api +flickr = { + 'api_key': None, # Provide your key! + 'review': False, # Do we use automatically make our uploads reviewed? + 'reviewer': None, # If so, under what reviewer name? + } + ############## COPYRIGHT SETTINGS ############## # Enable/disable search engine in copyright.py script Modified: trunk/pywikipedia/flickrripper.py =================================================================== --- trunk/pywikipedia/flickrripper.py 2009-08-28 20:52:34 UTC (rev 7185) +++ trunk/pywikipedia/flickrripper.py 2009-08-28 21:29:48 UTC (rev 7186) @@ -34,10 +34,10 @@ import sys, urllib, re, StringIO import wikipedia, config, query, imagerecat, upload -import flickrapi +import flickrapi # see: http://stuvel.eu/projects/flickrapi import xml.etree.ElementTree from Tkinter import * -from PIL import Image, ImageTk +from PIL import Image, ImageTk # see: http://www.pythonware.com/products/pil/ def getPhoto(flickr = None, photo_id = ''): ''' @@ -329,9 +329,10 @@ # Set the Flickr reviewer if config.flickr['reviewer']: reviewer = config.flickr['reviewer'] - elif config.sysopnames['commons']['commons']: + elif 'commons' in config.sysopnames['commons']: + print config.sysopnames['commons'] reviewer = config.sysopnames['commons']['commons'] - elif config.usernames['commons']['commons']: + elif 'commons' in config.usernames['commons']: reviewer = config.usernames['commons']['commons'] else: reviewer = u''

14 years, 7 months

SVN: [7185] trunk/pywikipedia/interwiki.py

by a_engels＠svn.wikimedia.org

Revision: 7185 Author: a_engels Date: 2009-08-28 20:52:34 +0000 (Fri, 28 Aug 2009) Log Message: ----------- adding command-line option -lack, to only work on pages lacking an interwiki to a specified language. Modified Paths: -------------- trunk/pywikipedia/interwiki.py Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2009-08-28 17:09:46 UTC (rev 7184) +++ trunk/pywikipedia/interwiki.py 2009-08-28 20:52:34 UTC (rev 7185) @@ -81,6 +81,11 @@ -skipauto use to skip all pages that can be translated automatically, like dates, centuries, months, etc. (note: without ending colon) + -lack: used as -lack:xx with xx a language code: only work on pages + without links to language xx. You can also add a number nn + lick -lack:xx:nn, so that the bot only works on pages with + at least n interwiki links (the default value for n is 1). + These arguments are useful to provide hints to the bot: -hint: used as -hint:de:Anweisung to give the robot a hint @@ -506,6 +511,8 @@ nobackonly = False hintsareright = False contentsondisk = config.interwiki_contents_on_disk + lacklanguage = None + minlinks = 0 class StoredPage(wikipedia.Page): """ @@ -745,6 +752,7 @@ self.untranslated = None self.hintsAsked = False self.forcedStop = False + self.workonme = True def getFoundDisambig(self, site): """ @@ -1007,6 +1015,9 @@ f.close() def askForHints(self, counter): + if not self.workonme: + # Do not ask hints for pages that we don't work on anyway + return if (self.untranslated or globalvar.askhints) and not self.hintsAsked and not self.originPage.isRedirectPage(): # Only once! self.hintsAsked = True @@ -1138,6 +1149,13 @@ if globalvar.untranslatedonly: # Ignore the interwiki links. iw = () + if globalvar.lacklanguage: + if globalvar.lacklanguage in [link.site().language() for link in iw]: + iw = () + self.workonme = False + if len(iw) < globalvar.minlinks: + iw = () + self.workonme = False elif globalvar.autonomous and duplicate: @@ -1324,6 +1342,8 @@ be told to make another get request first.""" if not self.isDone(): raise "Bugcheck: finish called before done" + if not self.workonme: + return if self.forcedStop: wikipedia.output(u"======Aborted processing %s======" % self.originPage.aslink(True)) return @@ -2080,6 +2100,13 @@ globalvar.minsubjects = int(arg[7:]) elif arg.startswith('-query:'): globalvar.maxquerysize = int(arg[7:]) + elif arg.startswith('-lack:'): + remainder = arg[6:].split(':') + globalvar.lacklanguage = remainder[0] + if len(remainder) > 1: + globalvar.minlinks = int(remainder[1]) + else: + globalvar.minlinks = 1 elif arg == '-back': globalvar.nobackonly = True else:

14 years, 7 months

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Pywikipedia-svn August 2009