http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11508
Revision: 11508 Author: drtrigon Date: 2013-05-05 19:05:10 +0000 (Sun, 05 May 2013) Log Message: ----------- improved; fft (svd) spectrum with peak detection new feature; added face landmark recognition (for pose estimation usable) new feature; started wavelet algorithms, e.g. general classification (like BoW) but more like audio, video, ... up to come
Modified Paths: -------------- trunk/pywikipedia/catimages.py
Modified: trunk/pywikipedia/catimages.py =================================================================== --- trunk/pywikipedia/catimages.py 2013-05-05 18:32:52 UTC (rev 11507) +++ trunk/pywikipedia/catimages.py 2013-05-05 19:05:10 UTC (rev 11508) @@ -60,7 +60,7 @@ # additional python packages (more exotic and problematic ones) try: import numpy as np - from scipy import ndimage, fftpack, linalg + from scipy import ndimage, fftpack, linalg#, signal import cv # TS: nonofficial cv2.so backport of the testing-version of # python-opencv because of missing build-host, done by DaB @@ -393,6 +393,80 @@ self._info['Faces'] += result return
+ # https://pypi.python.org/pypi/xbob.flandmark + # http://cmp.felk.cvut.cz/~uricamic/flandmark/ + def _detect_FaceLandmark_xBOB(self): + """Prints the locations of any face landmark(s) found, respective + converts them to usual face position data""" + + #self._info['Faces'] = [] + scale = 1. + try: + #video = bob.io.VideoReader(self.image_path_JPEG.encode('utf-8')) + video = [cv2.imread( self.image_path_JPEG, cv.CV_LOAD_IMAGE_COLOR )] + #if img == None: + # raise IOError + + # !!! the 'scale' here IS RELEVANT FOR THE DETECTION RATE; + # how small and how many features are detected as faces (or eyes) + scale = max([1., np.average(np.array(video[0].shape)[0:2]/750.)]) + except IOError: + pywikibot.warning(u'unknown file type [_detect_FaceLandmark_xBOB]') + return + except AttributeError: + pywikibot.warning(u'unknown file type [_detect_FaceLandmark_xBOB]') + return + + smallImg = np.empty( (cv.Round(video[0].shape[1]/scale), cv.Round(video[0].shape[0]/scale)), dtype=np.uint8 ) + video = [ cv2.resize( img, smallImg.shape, interpolation=cv2.INTER_LINEAR ) for img in video ] + + import _bob as bob + import xbob_flandmark as xbob + + localize = xbob.flandmark.Localizer() + + result = [] + for frame in video: # currently ALWAYS contains ONE (1!) entry + frame = np.transpose(frame, (2,0,1)) + img = np.transpose(frame, (1,2,0)) + + for i, flm in enumerate(localize(frame)): + #for pi, point in enumerate(flm['landmark']): + # cv2.circle(img, tuple(map(int, point)), 3, ( 0, 0, 255)) + # cv2.circle(img, tuple(map(int, point)), 5, ( 0, 255, 0)) + # cv2.circle(img, tuple(map(int, point)), 7, (255, 0, 0)) + # cv2.putText(img, str(pi), tuple(map(int, point)), cv2.FONT_HERSHEY_PLAIN, 1.0, (0,255,0)) + #cv2.rectangle(img, tuple(map(int, flm['bbox'][:2])), tuple(map(int, (flm['bbox'][0]+flm['bbox'][2], flm['bbox'][1]+flm['bbox'][3]))), (0, 255, 0)) + mat = np.array([flm['landmark'][3], flm['landmark'][4]]) + mi = np.min(mat, axis=0) + mouth = tuple(mi.astype(int)) + tuple((np.max(mat, axis=0)-mi).astype(int)) + #cv2.rectangle(img, tuple(mi.astype(int)), tuple(np.max(mat, axis=0).astype(int)), (0, 255, 0)) + mat = np.array([flm['landmark'][5], flm['landmark'][1]]) + mi = np.min(mat, axis=0) + leye = tuple(mi.astype(int)) + tuple((np.max(mat, axis=0)-mi).astype(int)) + #cv2.rectangle(img, tuple(mi.astype(int)), tuple(np.max(mat, axis=0).astype(int)), (0, 255, 0)) + mat = np.array([flm['landmark'][2], flm['landmark'][6]]) + mi = np.min(mat, axis=0) + reye = tuple(mi.astype(int)) + tuple((np.max(mat, axis=0)-mi).astype(int)) + #cv2.rectangle(img, tuple(mi.astype(int)), tuple(np.max(mat, axis=0).astype(int)), (0, 255, 0)) + data = { 'ID': (i+1), + 'Position': flm['bbox'], + 'Type': u'Landmark', + 'Eyes': [leye, reye], + 'Mouth': mouth, + 'Nose': tuple(np.array(flm['landmark'][7]).astype(int)) + (0, 0), + 'Ears': [], + 'Landmark': [tuple(lm) for lm in np.array(flm['landmark']).astype(int)], } + data['Coverage'] = float(data['Position'][2]*data['Position'][3])/(self.image_size[0]*self.image_size[1]) + result.append(data) + + #img = img.astype('uint8') + #cv2.imshow("people detector", img) + #cv2.waitKey() + + self._info['Faces'] += result + return + # .../opencv/samples/cpp/peopledetect.cpp # + Haar/Cascade detection def _detect_People_CV(self): @@ -499,8 +573,9 @@
result = self._util_get_Geometry_CVnSCIPY()
- self._info['Geometry'] = [{'Lines': result['Lines'], 'Circles': result['Circles'], 'Corners': result['Corners'], - 'FFT_Comp': result['FFT_Comp'], 'SVD_Comp': result['SVD_Comp'], 'SVD_Min': result['SVD_Min']}] + self._info['Geometry'] = [{'Lines': result['Lines'], + 'Circles': result['Circles'], + 'Corners': result['Corners'],}] return
# https://code.ros.org/trac/opencv/browser/trunk/opencv/samples/python/houghli... @@ -514,7 +589,7 @@ return self._buffer_Geometry
self._buffer_Geometry = {'Lines': '-', 'Circles': '-', 'Edge_Ratio': '-', 'Corners': '-', - 'FFT_Comp': '-', 'FFT_Peaks': '-', 'SVD_Comp': '-', 'SVD_Min': '-'} + 'FFT_Peaks': '-'}
scale = 1. try: @@ -610,54 +685,48 @@ #cv2.imshow("people detector", color_dst) #c = cv2.waitKey(0) & 255
- # fft + # fft spectral/frequency/momentum analysis with svd peak detection gray = cv2.resize( _gray, smallImg.shape, interpolation=cv2.INTER_LINEAR ) - #s = (self.image_size[1], self.image_size[0]) - s = gray.shape + ##s = (self.image_size[1], self.image_size[0]) + #s = gray.shape fft = fftpack.fftn(gray) - peaks = np.where(fft > (fft.max()*0.001))[0].shape[0] + #fft = np.fft.fftn(gray) + #Image.fromarray(fft.real).show() # shift quadrants so that low spatial frequencies are in the center - #fft = fftpack.fftshift(fft) - #fft = np.fft.fftn(gray) - c = (np.array(s)/2.).astype(int) - for i in range(0, min(c)-1, max( int(min(c)/50.), 1 )): - fft[(c[0]-i):(c[0]+i+1),(c[1]-i):(c[1]+i+1)] = 0. - #new = np.zeros(s) - #new[(c[0]-i):(c[0]+i+1),(c[1]-i):(c[1]+i+1)] = fft[(c[0]-i):(c[0]+i+1),(c[1]-i):(c[1]+i+1)] - #Image.fromarray(fftpack.fftshift(fft).real).show() - ##Image.fromarray(fftpack.ifftn(fftpack.ifftshift(new)).real - gray).show() - #Image.fromarray(fftpack.ifftn(fft).real - gray).show() - if ((fftpack.ifftn(fft).real - gray).max() >= (255/2.)): - break - #fft = fftpack.ifftshift(fft) - #Image.fromarray(fftpack.ifftn(fft).real).show() - #Image.fromarray(np.fft.ifftn(fft).real).show() - data['FFT_Comp'] = 1.-float(i*i)/(s[0]*s[1]) - data['FFT_Peaks'] = peaks - #pywikibot.output( u'FFT_Comp: %s %s' % (1.-float(i*i)/(s[0]*s[1]), peaks) ) + fft = fftpack.fftshift(fft) + #Image.fromarray(fft.real).show() + ##Image.fromarray(fftpack.ifftn(fft).real).show() + ##Image.fromarray(fftpack.ifftn(fftpack.ifftshift(fft)).real).show() + ##Image.fromarray(fftpack.ifftn(fftpack.ifftshift(fft.real)).real).show() + try: + U, S, Vh = linalg.svd(np.matrix(fft)) + ma = 0.01*max(S) + count = sum([int(c > ma) for c in S])
- # svd - try: - U, S, Vh = linalg.svd(np.matrix(gray)) - #U, S, Vh = linalg.svd(np.matrix(fft)) # do combined 'svd of fft' - SS = np.zeros(s) - ss = min(s) - for i in range(0, len(S)-1, max( int(len(S)/100.), 1 )): # (len(S)==ss) -> else; problem! - #SS = np.zeros(s) - #SS[:(ss-i),:(ss-i)] = np.diag(S[:(ss-i)]) - SS[:(i+1),:(i+1)] = np.diag(S[:(i+1)]) - #Image.fromarray(np.dot(np.dot(U, SS), Vh) - gray).show() - #if ((np.dot(np.dot(U, SS), Vh) - gray).max() >= (255/4.)): - if ((np.dot(np.dot(U, SS), Vh) - gray).max() < (255/4.)): - break - #data['SVD_Comp'] = 1.-float(i)/ss - data['SVD_Comp'] = float(i)/ss - data['SVD_Min'] = S[:(i+1)].min() - #pywikibot.output( u'SVD_Comp: %s' % (1.-float(i)/ss) ) - #pywikibot.output( u'SVD_Comp: %s %s %s' % (float(i)/ss, S[:(i+1)].min(), S[:(i+1)].max()) ) + #SS = np.zeros(s) + #ss = min(s) + #for i in range(0, len(S)-1, max( int(len(S)/100.), 1 )): # (len(S)==ss) -> else; problem! + # #SS = np.zeros(s) + # #SS[:(ss-i),:(ss-i)] = np.diag(S[:(ss-i)]) + # SS[:(i+1),:(i+1)] = np.diag(S[:(i+1)]) + # #Image.fromarray((np.dot(np.dot(U, SS), Vh) - fft).real).show() + # #Image.fromarray(fftpack.ifftn(fftpack.ifftshift(np.dot(np.dot(U, SS), Vh))).real - gray).show() + # print i, ((np.dot(np.dot(U, SS), Vh) - fft).real).max() + # print i, (fftpack.ifftn(fftpack.ifftshift(np.dot(np.dot(U, SS), Vh))).real - gray).max() + # #if ((np.dot(np.dot(U, SS), Vh) - fft).max() < (255/4.)): + # # break + #data['SVD_Comp'] = float(i)/ss + #data['SVD_Min'] = S[:(i+1)].min() + + data['FFT_Peaks'] = float(count)/len(S) + #pywikibot.output( u'FFT_Peaks: %s' % data['FFT_Peaks'] ) except linalg.LinAlgError: # SVD did not converge; in fact this should NEVER happen...(?!?) pass + # use wavelet transformation (FWT) from e.g. pywt, scipy signal or mlpy + # (may be other) in addition to FFT and compare the spectra with FFT... + # confer; "A Practical Guide to Wavelet Analysis" (http://journals.ametsoc.org/doi/pdf/10.1175/1520-0477%281998%29079%3C0061%3A...) + # on how to convert and adopt FFT and wavlet spectra frequency scales
if data: self._buffer_Geometry.update(data) @@ -667,6 +736,8 @@ def _detectclassify_ObjectAll_CV(self): """Uses the 'The Bag of Words model' for detection and classification"""
+ # CAN ALSO BE USED FOR: TEXT, ... + # http://app-solut.com/blog/2011/07/the-bag-of-words-model-in-opencv-2-2/ # http://app-solut.com/blog/2011/07/using-the-normal-bayes-classifier-for-imag... # http://authors.library.caltech.edu/7694/ @@ -723,6 +794,41 @@ self._info['Classify'] = [dict([ (trained[i], r) for i, r in enumerate(result) ])] return
+ def _detectclassify_ObjectAll_PYWT(self): + """Uses the 'Fast Wavelet-Based Visual Classification' for detection + and classification""" + # Fast Wavelet-Based Visual Classification + # http://www.cmap.polytechnique.fr/~yu/publications/ICPR08Final.pdf + + # CAN ALSO BE USED FOR: TEXT, AUDIO, (VIDEO), ... +# TODO: for audio and video (time-based) also...!!! + + import pywt # python-pywt + +# TODO: improve (honestly; truly apply) wavelet in a meaningful and USEFUL (correct) way/manner! +# TODO: truly apply FFT and SVD (used before) + # wavelet transformation + # https://github.com/nigma/pywt/tree/master/demo + # image_blender, dwt_signal_decomposition.py, wp_scalogram.py, dwt_multidim.py, user_filter_banks.py: + #coeffs = pywt.dwtn(gray, 'db1') # Single-level n-dimensional Discrete Wavelet Transform + coeffs = pywt.dwt2(gray, 'db1') # 2D Discrete Wavelet Transform + #coeffs = pywt.wavedec2(gray, 'db1') # Multilevel 2D Discrete Wavelet Transform + pass + result = pywt.idwt2(coeffs, 'db1') # 2D Inverse Discrete Wavelet Transform + #result = pywt.waverec2(coeffs, 'db1') # Multilevel 2D Inverse Discrete Wavelet Transform + result = result[:gray.shape[0],:gray.shape[1]] + # consider 'swt' (2D Stationary Wavelet Transform) instead of 'dwt' too + pywikibot.output(u'%s' % coeffs) + pywikibot.output(u'%s' % np.abs(result - gray).max()) + #data['Wavelet_Comp'] = coeffs + + # https://github.com/nigma/pywt/blob/master/demo/image_blender.py + # http://www.ncbi.nlm.nih.gov/pubmed/18713675 + # https://github.com/nigma/pywt/blob/master/demo/wp_scalogram.py + # https://github.com/nigma/pywt/blob/master/demo/swt2.py + + return + # a lot more paper and possible algos exist; (those with code are...) # http://www.lix.polytechnique.fr/~schwander/python-srm/ # http://library.wolfram.com/infocenter/Demos/5725/#downloads @@ -940,6 +1046,12 @@ ma = 0.01*max(h) count = sum([int(c > ma) for c in h])
+# # TODO: peak detection (not supported by my local scipy version yet) +# # http://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks_... +# peakind = signal.find_peaks_cwt(fft, np.arange(1,10)) +# print peaks +# print len(peakind), peakind + data = { #'histogram': h, 'RGB': rgb, 'Peaks': float(count)/len(h), } @@ -2515,7 +2627,7 @@ result = self._info_filter['ColorAverage'] relevance = (result and result[0]['Gradient'] < 0.1) and \ (0.005 < result[0]['Peaks'] < 0.1) # black/white texts are below that - #(result[0]['FFT_Peaks'] < 500) # has to be tested first !!! + #(result[0]['FFT_Peaks'] < 0.2) # has to be tested first !!!
return (u'Graphics', bool(relevance))
@@ -3175,6 +3287,8 @@ self._detect_Faces_EXIF() # Faces and eyes (opencv pre-trained haar) self._detect_Faces_CV() + # Face via Landmark(s) +# self._detect_FaceLandmark_xBOB() # exclude duplicates (CV and EXIF) faces = [item['Position'] for item in self._info['Faces']] for i in self._util_merge_Regions(faces)[1]: @@ -3208,9 +3322,11 @@ # Chessboard (opencv reference detector) self._detect_Chessboard_CV()
- # general (self-trained) detection WITH classification (BoW) - # uses feature detection (SIFT, SURF, ...) AND classification (SVM, ...) + # general (self-trained) detection WITH classification + # BoW: uses feature detection (SIFT, SURF, ...) AND classification (SVM, ...) # self._detectclassify_ObjectAll_CV() + # Wavelet: uses wavelet transformation AND classification (machine learning) +# self._detectclassify_ObjectAll_PYWT()
# general handling of all audio and video formats self._detect_Streams_FFMPEG()