http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11718
Revision: 11718 Author: drtrigon Date: 2013-07-07 09:43:39 +0000 (Sun, 07 Jul 2013) Log Message: ----------- bug fix; enable import of 'Pillow' instead of 'PIL', see http://fedoraproject.org/wiki/Features/Pillow improvement; enable usage of 'XMP' data from exiftool in order to get file history for categorization
Modified Paths: -------------- trunk/pywikipedia/catimages.py
Modified: trunk/pywikipedia/catimages.py =================================================================== --- trunk/pywikipedia/catimages.py 2013-07-07 09:39:57 UTC (rev 11717) +++ trunk/pywikipedia/catimages.py 2013-07-07 09:43:39 UTC (rev 11718) @@ -50,7 +50,11 @@ import re, urllib2, os, locale, sys, datetime, math, shutil, mimetypes, shelve import StringIO, json # fallback: simplejson from subprocess import Popen, PIPE -import Image, imghdr +try: + import Image # classic 'PIL' +except ImportError: + from PIL import Image # new 'PIL' fork 'Pillow' (fedora 19) +import imghdr #import ImageFilter
scriptdir = os.path.dirname(sys.argv[0]) @@ -58,6 +62,7 @@ scriptdir = os.path.abspath(os.path.join(os.curdir, scriptdir))
# additional python packages (non-default but common) +sys.exc_clear() try: import numpy as np from scipy import ndimage, fftpack#, signal @@ -213,10 +218,6 @@ #'Dimensions': tuple(exif['ImageSize'].split(u'x')) if 'ImageSize' in exif else (None, None),} #'Mode': exif['ColorType'], }
-# TODO: if '_detect_History' is not needed here, moveit back into _JpegFile !!! - #print "self._detect_History()" - #print self._detect_History() - ## https://pypi.python.org/pypi/hachoir-metadata (needs 'core' and 'parser') # #from hachoir_core.error import HachoirError @@ -333,46 +334,7 @@
return self._buffer_EXIF
- def _detect_History(self): - res = self._util_get_DataTags_EXIF()
- #a = [] - #for k in res.keys(): - # if 'history' in k.lower(): - # a.append( k ) - #for item in sorted(a): - # print item - # http://tilloy.net/dev/pyexiv2/api.html#pyexiv2.xmp.XmpTag - #print [getattr(res['Xmp.xmpMM.History'], item) for item in ['key', 'type', 'name', 'title', 'description', 'raw_value', 'value', ]] - result = [] - i = 1 - while (('Xmp.xmpMM.History[%i]' % i) in res): - data = { 'ID': i, - 'Software': u'-', - 'Timestamp': u'-', - 'Action': u'-', - 'Info': u'-', } - if ('Xmp.xmpMM.History[%i]/stEvt:softwareAgent'%i) in res: - data['Software'] = res['Xmp.xmpMM.History[%i]/stEvt:softwareAgent'%i].value - data['Timestamp'] = res['Xmp.xmpMM.History[%i]/stEvt:when'%i].value - data['Action'] = res['Xmp.xmpMM.History[%i]/stEvt:action'%i].value - if ('Xmp.xmpMM.History[%i]/stEvt:changed'%i) in res: - data['Info'] = res['Xmp.xmpMM.History[%i]/stEvt:changed'%i].value - #print res['Xmp.xmpMM.History[%i]/stEvt:instanceID'%i].value - result.append( data ) - elif ('Xmp.xmpMM.History[%i]/stEvt:parameters'%i) in res: - data['Action'] = res['Xmp.xmpMM.History[%i]/stEvt:action'%i].value - data['Info'] = res['Xmp.xmpMM.History[%i]/stEvt:parameters'%i].value - #data['Action'] = data['Info'].split(' ')[0] - result.append( data ) - else: - pass - i += 1 - - self._features['History'] = result - return - - class _JpegFile(_UnknownFile): # for '_detect_Trained' cascade_files = [(u'Legs', 'haarcascade_lowerbody.xml'), @@ -2258,6 +2220,45 @@ self._features['Faces'] += data return
+ def _detect_History(self): + res = self._util_get_DataTags_EXIF() + + #a = [] + #for k in res.keys(): + # if 'history' in k.lower(): + # a.append( k ) + #for item in sorted(a): + # print item + # http://tilloy.net/dev/pyexiv2/api.html#pyexiv2.xmp.XmpTag + #print [getattr(res['Xmp.xmpMM.History'], item) for item in ['key', 'type', 'name', 'title', 'description', 'raw_value', 'value', ]] + result = [] + i = 1 + while (('Xmp.xmpMM.History[%i]' % i) in res): + data = { 'ID': i, + 'Software': u'-', + 'Timestamp': u'-', + 'Action': u'-', + 'Info': u'-', } + if ('Xmp.xmpMM.History[%i]/stEvt:softwareAgent'%i) in res: + data['Software'] = res['Xmp.xmpMM.History[%i]/stEvt:softwareAgent'%i].value + data['Timestamp'] = res['Xmp.xmpMM.History[%i]/stEvt:when'%i].value + data['Action'] = res['Xmp.xmpMM.History[%i]/stEvt:action'%i].value + if ('Xmp.xmpMM.History[%i]/stEvt:changed'%i) in res: + data['Info'] = res['Xmp.xmpMM.History[%i]/stEvt:changed'%i].value + #print res['Xmp.xmpMM.History[%i]/stEvt:instanceID'%i].value + result.append( data ) + elif ('Xmp.xmpMM.History[%i]/stEvt:parameters'%i) in res: + data['Action'] = res['Xmp.xmpMM.History[%i]/stEvt:action'%i].value + data['Info'] = res['Xmp.xmpMM.History[%i]/stEvt:parameters'%i].value + #data['Action'] = data['Info'].split(' ')[0] + result.append( data ) + else: + pass + i += 1 + + self._features['History'] = result + return + def _util_merge_Regions(self, regs, sub=False, overlap=False, close=False): # sub=False, overlap=False, close=False ; level 0 ; similar regions, similar position (default) # sub=True, overlap=False, close=False ; level 1 ; region contained in other, any shape/size @@ -3345,8 +3346,10 @@ # Category:Created_with_OpenOffice.org (pdf) # Category:Created_with_Tux_Paint (pdf) # Category:Created_with_Microsoft_Image_Composite_Editor (jpg) - def _cat_meta_general(self): - result = self._info_filter['Metadata'] + def _cat_meta_and_history_general(self): + results = self._info_filter['Metadata'] +\ + [{'*': item['Software']} for item in self._info_filter['History']] + cats = set() for key, magic, cat in [('Desc', u"Generated automatically by: GNU LilyPond", u'MIDI files created with GNU LilyPond'), ('Software', u"www.inkscape.org", u'Bitmap from Inkscape'), ('Misc', u"org.inkscape.output.svg.inkscape", u'Created with Inkscape'), # 'Output_extension' @@ -3376,12 +3379,13 @@ ('Comment', u"LEAD Technologies Inc.", u'Created with PhotoStitch'), ('Producer', u"Scribus PDF Library", u'Created with Scribus'), ('Producer', u"OpenOffice.org", u'Created with OpenOffice.org'),]: - relevance = len(result) and (key in result[0]) and \ - (magic in result[0][key]) - if relevance: - break + for result in results: + relevance = ((key in result) or ('*' in result)) and \ + (magic in result.get(key, result.get('*'))) + if relevance: + cats.add( cat )
- return (cat, bool(relevance)) + return (list(cats), bool(len(cats)))
# Category:Categorized by DrTrigonBot def _addcat_BOT(self): @@ -3654,10 +3658,13 @@
# categorization: use explicit searches for classification (rel = ?) for item in self._funcs['cat']: - (cat, rel) = getattr(self, item)() + (cats, rel) = getattr(self, item)() #print cat, result, len(result) + if not isinstance(cats, list): # because of 'Histroy' and '_cat_meta_and_history_general' + cats = [cats] # which return multiple results... if rel: - self._result_check.append( cat ) + for cat in cats: + self._result_check.append( cat ) self._result_check = list(set(self._result_check))
# categorization: conditional (only if the ones before are present) @@ -4073,6 +4080,11 @@ result = self._info['Streams'] return {'Streams': result}
+ def _filter_History(self): + # use all, (should be reliable) + result = self._info['History'] + return {'History': result} + # def _filter_Audio(self): # # use all, (should be reliable) # result = self._info['Audio']
pywikipedia-svn@lists.wikimedia.org