http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11147
Revision: 11147 Author: drtrigon Date: 2013-03-01 22:08:18 +0000 (Fri, 01 Mar 2013) Log Message: ----------- new features; + FFT / + SVD / + chessboard pose detection (beta)
Modified Paths: -------------- trunk/pywikipedia/catimages.py
Modified: trunk/pywikipedia/catimages.py =================================================================== --- trunk/pywikipedia/catimages.py 2013-03-01 20:24:29 UTC (rev 11146) +++ trunk/pywikipedia/catimages.py 2013-03-01 22:08:18 UTC (rev 11147) @@ -49,7 +49,7 @@ #
# python default packages -import re, urllib2, os, locale, sys, datetime, math, shutil, mimetypes +import re, urllib2, os, locale, sys, datetime, math, shutil, mimetypes, shelve import StringIO, json # fallback: simplejson from subprocess import Popen, PIPE import Image @@ -62,7 +62,7 @@ # additional python packages (more exotic and problematic ones) try: import numpy as np - from scipy import ndimage + from scipy import ndimage, fftpack, linalg import cv # TS: nonofficial cv2.so backport of the testing-version of # python-opencv because of missing build-host, done by DaB @@ -498,22 +498,24 @@ if (self.image_mime[1] in ['ogg', 'pdf', 'vnd.djvu']): return
- result = self._util_get_Geometry_CV() + result = self._util_get_Geometry_CVnSCIPY()
- self._info['Geometry'] = [{'Lines': result['Lines'], 'Circles': result['Circles'], 'Corners': result['Corners']}] + self._info['Geometry'] = [{'Lines': result['Lines'], 'Circles': result['Circles'], 'Corners': result['Corners'], + 'FFT_Comp': result['FFT_Comp'], 'SVD_Comp': result['SVD_Comp'], 'SVD_Min': result['SVD_Min']}] return
# https://code.ros.org/trac/opencv/browser/trunk/opencv/samples/python/houghli... - def _util_get_Geometry_CV(self): + def _util_get_Geometry_CVnSCIPY(self): # http://docs.opencv.org/modules/imgproc/doc/feature_detection.html#cornerharr... # http://docs.opencv.org/modules/imgproc/doc/feature_detection.html#houghcircl... # http://docs.opencv.org/modules/imgproc/doc/feature_detection.html#houghlines # http://docs.opencv.org/modules/imgproc/doc/feature_detection.html#houghlines... - + if hasattr(self, '_buffer_Geometry'): return self._buffer_Geometry
- self._buffer_Geometry = {'Lines': '-', 'Circles': '-', 'Edge_Ratio': '-', 'Corners': '-'} + self._buffer_Geometry = {'Lines': '-', 'Circles': '-', 'Edge_Ratio': '-', 'Corners': '-', + 'FFT_Comp': '-', 'FFT_Peaks': '-', 'SVD_Comp': '-', 'SVD_Min': '-'}
scale = 1. try: @@ -534,14 +536,14 @@
# similar to face or people detection smallImg = np.empty( (cv.Round(img.shape[1]/scale), cv.Round(img.shape[0]/scale)), dtype=np.uint8 ) - gray = cv2.cvtColor( img, cv.CV_BGR2GRAY ) + _gray = cv2.cvtColor( img, cv.CV_BGR2GRAY ) # smooth it, otherwise a lot of false circles may be detected - #gray = cv2.GaussianBlur( gray, (9, 9), 2 ) - gray = cv2.GaussianBlur( gray, (5, 5), 2 ) + #gray = cv2.GaussianBlur( _gray, (9, 9), 2 ) + gray = cv2.GaussianBlur( _gray, (5, 5), 2 ) smallImg = cv2.resize( gray, smallImg.shape, interpolation=cv2.INTER_LINEAR ) #smallImg = cv2.equalizeHist( smallImg ) src = smallImg - + # https://code.ros.org/trac/opencv/browser/trunk/opencv/samples/python/houghli... #dst = cv2.Canny(src, 50, 200) dst = cv2.Canny(src, 10, 10) @@ -609,6 +611,51 @@ #cv2.imshow("people detector", color_dst) #c = cv2.waitKey(0) & 255
+ # fft + gray = cv2.resize( _gray, smallImg.shape, interpolation=cv2.INTER_LINEAR ) + #s = (self.image_size[1], self.image_size[0]) + s = gray.shape + fft = fftpack.fftn(gray) + peaks = np.where(fft > (fft.max()*0.001))[0].shape[0] + # shift quadrants so that low spatial frequencies are in the center + #fft = fftpack.fftshift(fft) + #fft = np.fft.fftn(gray) + c = (np.array(s)/2.).astype(int) + for i in range(0, min(c)-1, int(min(c)/50.)): + fft[(c[0]-i):(c[0]+i+1),(c[1]-i):(c[1]+i+1)] = 0. + #new = np.zeros(s) + #new[(c[0]-i):(c[0]+i+1),(c[1]-i):(c[1]+i+1)] = fft[(c[0]-i):(c[0]+i+1),(c[1]-i):(c[1]+i+1)] + #Image.fromarray(fftpack.fftshift(fft).real).show() + ##Image.fromarray(fftpack.ifftn(fftpack.ifftshift(new)).real - gray).show() + #Image.fromarray(fftpack.ifftn(fft).real - gray).show() + if ((fftpack.ifftn(fft).real - gray).max() >= (255/2.)): + break + #fft = fftpack.ifftshift(fft) + #Image.fromarray(fftpack.ifftn(fft).real).show() + #Image.fromarray(np.fft.ifftn(fft).real).show() + data['FFT_Comp'] = 1.-float(i*i)/(s[0]*s[1]) + data['FFT_Peaks'] = peaks + #pywikibot.output( u'FFT_Comp: %s %s' % (1.-float(i*i)/(s[0]*s[1]), peaks) ) + + # svd + U, S, Vh = linalg.svd(np.matrix(gray)) + #U, S, Vh = linalg.svd(np.matrix(fft)) # do combined 'svd of fft' + SS = np.zeros(s) + ss = min(s) + for i in range(0, len(S)-1, int(len(S)/100.)): # (len(S)==ss) -> else; problem! + #SS = np.zeros(s) + #SS[:(ss-i),:(ss-i)] = np.diag(S[:(ss-i)]) + SS[:(i+1),:(i+1)] = np.diag(S[:(i+1)]) + #Image.fromarray(np.dot(np.dot(U, SS), Vh) - gray).show() + #if ((np.dot(np.dot(U, SS), Vh) - gray).max() >= (255/4.)): + if ((np.dot(np.dot(U, SS), Vh) - gray).max() < (255/4.)): + break + #data['SVD_Comp'] = 1.-float(i)/ss + data['SVD_Comp'] = float(i)/ss + data['SVD_Min'] = S[:(i+1)].min() + #pywikibot.output( u'SVD_Comp: %s' % (1.-float(i)/ss) ) + #pywikibot.output( u'SVD_Comp: %s %s %s' % (float(i)/ss, S[:(i+1)].min(), S[:(i+1)].max()) ) + if data: self._buffer_Geometry.update(data) return self._buffer_Geometry @@ -756,8 +803,9 @@ pywikibot.output(u'WARNING: unknown file type [_detect_AverageColor_PILnCV]') return
- result = self._util_average_Color_colormath(h) - result['Gradient'] = self._util_get_Geometry_CV().get('Edge_Ratio', None) or '-' + result = self._util_average_Color_colormath(h) + result['Gradient'] = self._util_get_Geometry_CVnSCIPY().get('Edge_Ratio', None) or '-' + result['FFT_Peaks'] = self._util_get_Geometry_CVnSCIPY().get('FFT_Peaks', None) or '-' self._info['ColorAverage'] = [result] return
@@ -1443,7 +1491,7 @@ if im == None: raise IOError
- scale = max([1., np.average(np.array(im.shape)[0:2]/500.)]) + scale = max([1., np.average(np.array(im.shape)[0:2]/1000.)]) except IOError: pywikibot.output(u'WARNING: unknown file type [_detect_Chessboard_CV]') return @@ -1463,18 +1511,206 @@ found_all, corners = cv2.findChessboardCorners( im, chessboard_dim ) except cv2.error, e: pywikibot.output(u'%s' % e) - - ##cv.DrawChessboardCorners( im3, chessboard_dim, corners, found_all ) - #cv.ShowImage("win", im3); - #cv.WaitKey()
- # further detection ? + cv2.drawChessboardCorners( im, chessboard_dim, corners, found_all ) + #cv2.imshow("win", im) + #cv2.waitKey()
+ if corners is not None: + corners = [ tuple(item[0]) for item in corners ] + self._info['Chessboard'] = [{ 'Corners': corners, }] + +# # chess board recognition (more tolerant) +# # http://codebazaar.blogspot.ch/2011/08/chess-board-recognition-project-part-1... +# # https://code.ros.org/trac/opencv/browser/trunk/opencv/samples/python/houghli... +# # http://docs.opencv.org/doc/tutorials/imgproc/imgtrans/canny_detector/canny_d... +# dst = im.copy() +# color_dst = cv2.cvtColor(dst, cv.CV_GRAY2BGR) +# dst = cv2.GaussianBlur(dst, (3, 3), 5) +# thr = 150 +# dst = cv2.Canny(dst, thr, 3*thr) +# cv2.imshow("win", dst) +# cv2.waitKey() +# # lines to find grid +# # http://dsp.stackexchange.com/questions/2420/alternatives-to-hough-transform-... +# USE_STANDARD = True +# if USE_STANDARD: +# #lines = cv.HoughLines2(dst, storage, cv.CV_HOUGH_STANDARD, 1, pi / 180, 100, 0, 0) +# #lines = cv2.HoughLines(dst, 1, math.pi / 180, 100) +# lines = cv2.HoughLines(dst, 1, math.pi / 180, 150) +# if (lines is not None) and len(lines): +# lines = lines[0] +# #data['Lines'] = len(lines) +# +# ls = np.array(lines) +# import pylab +# (n, bins, patches) = pylab.hist(ls[:,1]) +# print n, bins, patches +# pylab.grid(True) +# pylab.show() +# +# for (rho, theta) in lines: +# #if theta > 0.3125: continue +# a = math.cos(theta) +# b = math.sin(theta) +# x0 = a * rho +# y0 = b * rho +# pt1 = (cv.Round(x0 + 1000*(-b)), cv.Round(y0 + 1000*(a))) +# pt2 = (cv.Round(x0 - 1000*(-b)), cv.Round(y0 - 1000*(a))) +# cv2.line(color_dst, pt1, pt2, cv.RGB(255, 0, 0), 3, 8) +# else: +# #lines = cv.HoughLines2(dst, storage, cv.CV_HOUGH_PROBABILISTIC, 1, pi / 180, 50, 50, 10) +# lines = cv2.HoughLinesP(dst, 1, math.pi / 180, 100) +# +# for line in lines[0]: +# print line +# cv2.line(color_dst, tuple(line[0:2]), tuple(line[2:4]), cv.CV_RGB(255, 0, 0), 3, 8) +# cv2.imshow("win", color_dst) +# cv2.waitKey() + if found_all: - self._info['Chessboard'] = [{'Corners': corners}] + # pose detection + # http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconst... + # http://stackoverflow.com/questions/10022568/opencv-2-3-camera-calibration + d = shelve.open( os.path.join(scriptdir, 'dtbext/opencv/camera_virtual_default') ) + if ('retval' not in d): + # http://commons.wikimedia.org/wiki/File:Mutilated_checkerboard_3.jpg + pywikibot.output(u"Doing (virtual) camera calibration onto reference image 'File:Mutilated_checkerboard_3.jpg'") + im3 = cv2.imread( 'Mutilated_checkerboard_3.jpg', cv2.CV_LOAD_IMAGE_GRAYSCALE ) + im3 = cv2.resize( im3, (cv.Round(im3.shape[1]/scale), cv.Round(im3.shape[0]/scale)), interpolation=cv2.INTER_LINEAR ) + # Compute the the three dimensional world-coordinates + tmp = [] + for h in range(chessboard_dim[0]): + for w in range(chessboard_dim[1]): + tmp.append( (float(h), float(w), 0.0) ) + objectPoints = np.array(tmp) + # Compute matrices + _found_all, _corners = cv2.findChessboardCorners( im3, chessboard_dim, flags=cv.CV_CALIB_CB_ADAPTIVE_THRESH | cv.CV_CALIB_CB_FILTER_QUADS ) + #cv2.drawChessboardCorners( im3, chessboard_dim, _corners, _found_all ) + retval, cameraMatrix, distCoeffs, rvecs, tvecs = cv2.calibrateCamera([objectPoints.astype('float32')], [_corners.astype('float32')], im3.shape, np.eye(3), np.zeros((5, 1))) + fovx, fovy, focalLength, principalPoint, aspectRatio = cv2.calibrationMatrixValues(cameraMatrix, im3.shape, 1.0, 1.0) + d['objectPoints'] = [objectPoints.astype('float32')] # shape: (49, 3) in a list of 1 item + d['imagePoints'] = [_corners.astype('float32')] # shape: (49, 1, 2) in a list of 1 item + d['cameraMatrix'] = cameraMatrix + d['distCoeffs'] = distCoeffs + d['rvecs'] = rvecs + d['tvecs'] = tvecs + d['imageSize'] = im3.shape + d['apertureWidth'] = 1.0 + d['apertureHeight'] = 1.0 + d['fovx'] = fovx + d['fovy'] = fovy + d['focalLength'] = focalLength + d['principalPoint'] = principalPoint + d['aspectRatio'] = aspectRatio + d['retval'] = retval + else: + objectPoints = d['objectPoints'][0] + cameraMatrix, distCoeffs = d['cameraMatrix'], d['distCoeffs'] + # would be nice to use these: + #cameraMatrix, distCoeffs = np.eye(3), np.zeros((5,1)) + # ..,since they are simple... else other have to be documented as "used calibration" !!! + d.close() + # http://answers.opencv.org/question/1073/what-format-does-cv2solvepnp-use-for... + rvec, tvec = cv2.solvePnP(objectPoints, corners, cameraMatrix, distCoeffs) + #rvec, tvec = cv2.solvePnP(objectPoints, corners, cameraMatrix, None) + # http://www.opencv.org.cn/opencvdoc/2.3.2/html/modules/calib3d/doc/camera_cal... + # http://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula + #print cv2.Rodrigues(rvec)[0], linalg.norm(rvec), rvec + #print tvec + #cv2.composeRT + #(cv2.findFundamentalMat, cv2.findHomography or from 'pose', cv2.estimateAffine3D)
+ im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) + ## draw the rotated 3D object + #imagePoints, jacobian = cv2.projectPoints(objectPoints, rvec, tvec, cameraMatrix, distCoeffs) + #for i in range(len(imagePoints)-1): + # cv2.line(im, tuple(imagePoints[i][0].astype(int)), tuple(imagePoints[i+1][0].astype(int)), (125.,125.,125.), 3) + + mat = np.eye(3) + color = [(0., 0., 255.), (0., 255., 0.), (255., 0., 0.)] + label = ['x', 'y', 'z'] + # axis-cross + matD2raw, matD2norm, matnorm = self._util_getD2coords( mat, cameraMatrix, distCoeffs, sign=-1 ) + for i in range(3): + imagePoints, D2norm, norm = matD2raw[:,:,:,i], 40*matD2norm[:,i], matnorm[:,i] + #cv2.line(im, tuple(imagePoints[0][0].astype(int)), tuple(imagePoints[1][0].astype(int)), color[i], 1) + #cv2.putText(im, label[i], tuple(imagePoints[1][0].astype(int)), cv2.FONT_HERSHEY_PLAIN, 1.5, color[i]) + cv2.line(im, (50,50), (50+D2norm[0].astype(int),50+D2norm[1].astype(int)), color[i], 1) + cv2.putText(im, label[i], (50+D2norm[0].astype(int),50+D2norm[1].astype(int)), cv2.FONT_HERSHEY_PLAIN, 1., color[i]) + # rotated axis-cross + matD2raw, matD2norm, matnorm = self._util_getD2coords( mat, cameraMatrix, distCoeffs, rvec=rvec, tvec=tvec ) + for i in range(3): + imagePoints, D2norm, norm = matD2raw[:,:,:,i], 40*matD2norm[:,i], matnorm[:,i] + cv2.line(im, tuple(imagePoints[0][0].astype(int)), tuple(imagePoints[1][0].astype(int)), color[i], 3) + cv2.putText(im, label[i], tuple(imagePoints[1][0].astype(int)), cv2.FONT_HERSHEY_PLAIN, 1.5, color[i]) + cv2.line(im, (50,100), (50+D2norm[0].astype(int),100+D2norm[1].astype(int)), color[i], 1) + cv2.putText(im, label[i], (50+D2norm[0].astype(int),100+D2norm[1].astype(int)), cv2.FONT_HERSHEY_PLAIN, 1., color[i]) + ortho = imagePoints[1][0]-imagePoints[0][0] # z-axis is orthogonal to object surface + ortho = ortho/linalg.norm(ortho) +# self-calculated rotated axis-cross + rmat = np.zeros((3,4)) + rmat[:,0:3] = cv2.Rodrigues(rvec)[0] + #rmat[:,3] = tvec[:,0] + mat = np.dot(rmat, cv2.convertPointsToHomogeneous(np.eye(3).astype('float32')).transpose()[:,0,:]) + ## rotation between z-axis (direction of view) and translation point tvec + #vec = np.array([0.,0.,1.]) + #angle = np.arccos( np.dot(tvec[:,0], vec)/(linalg.norm(tvec[:,0])*linalg.norm(vec)) ) + #axis = np.cross(tvec[:,0], vec) + #rvec2 = axis/linalg.norm(axis) * -angle + #rmat2 = cv2.Rodrigues(rvec2)[0] + ##mat = np.dot(rmat2, mat) + ##rot = cv2.Rodrigues(np.dot(rmat2, rmat[:,0:3]))[0] + rot = rvec + perp = mat + # what follows SHOULD be invartiant of the choice of 'cameraMatrix' and 'distCoeffs' ... ! is it ? + #cameraMatrix = np.eye(3) + #distCoeffs = np.zeros((5,1)) + mat = np.dot((cameraMatrix), mat) # linalg.inv(cameraMatrix) + #_cameraMatrix, rotMatrix, transVect, rotMatrixX, rotMatrixY, rotMatrixZ, eulerAngles = cv2.decomposeProjectionMatrix(rmat) + #mat = np.dot(rotMatrix, np.eye(3)) + matD2raw, matD2norm, matnorm = self._util_getD2coords( mat, cameraMatrix, distCoeffs ) + for i in range(3): + imagePoints, D2norm, norm = matD2raw[:,:,:,i], 40*matD2norm[:,i], matnorm[:,i] + D2norm = D2norm/linalg.norm(D2norm)*40 + cv2.line(im, (50,200), (50+D2norm[0].astype(int),200+D2norm[1].astype(int)), color[i], 1) + cv2.putText(im, label[i], (50+D2norm[0].astype(int),200+D2norm[1].astype(int)), cv2.FONT_HERSHEY_PLAIN, 1., color[i]) + #ortho = imagePoints[1][0]-imagePoints[0][0] # z-axis is orthogonal to object surface + + #cv2.imshow("win", im) + #cv2.waitKey() + pywikibot.output(u'result for calibrated camera:\n rot=%s\n perp=%s\n perp2D=%s' % (rot.transpose()[0], perp[:,2], ortho)) + pywikibot.output(u'nice would be to do the same for uncalibrated/default cam settings') + +# still beta/experimental thus suppress value output for the moment +# self._info['Chessboard'][0]['Rotation'] = tuple(rot.transpose()[0]) +# self._info['Chessboard'][0]['Perp_Dir'] = tuple(perp[:,2]) +# self._info['Chessboard'][0]['Perp_Dir_2D'] = tuple(ortho) + return
+ def _util_getD2coords(self, D3coords, cameraMatrix, distCoeffs, rvec=None, tvec=None, sign=1): + if rvec is None: + rvec = np.zeros((3,1)) + if tvec is None: + tvec = np.zeros((3,1)) + matD2raw = np.zeros((2,1,2,D3coords.shape[0])) + matD2norm = np.zeros((2,D3coords.shape[0])) + matnorm = np.zeros((1,D3coords.shape[0])) + for i in range(D3coords.shape[0]): +# D2raw, jacobian = cv2.projectPoints(np.array([[0.,0.,0.],D3coords[:,i]]), rvec, tvec, cameraMatrix, distCoeffs) + D2raw, jacobian = cv2.projectPoints(np.array([[0.,0.,-1.],[D3coords[0,i],D3coords[1,i],D3coords[2,i]-1.]]), rvec, tvec, cameraMatrix, distCoeffs) + D2norm = (D2raw[1][0]-D2raw[0][0]) + norm = linalg.norm(D2norm) +# D2norm[1] *= sign # usual 2D coords <-> pixel/picture coords + D2norm[0] *= sign # usual 2D coords <-> pixel/picture coords + D2norm *= sign # invert all + matD2raw[:,:,:,i] = D2raw + matD2norm[:,i] = D2norm + matnorm[:,i] = norm + matD2norm = matD2norm/max(matnorm[0]) + return (matD2raw, matD2norm, matnorm) + def _util_get_DataTags_EXIF(self): # http://tilloy.net/dev/pyexiv2/tutorial.html # (is UNFORTUNATELY NOT ABLE to handle all tags, e.g. 'FacesDetected', ...) @@ -2048,19 +2284,7 @@ self._info['Audio'] = [data] return
-# TODO: TEST for FFT and SVD ; use e.g. for Category:Graphics !!! -# def _detect_xxxFeatures_SCIPY(self): -# from scipy import fftpack, linalg -# -# #img = cv2.imread( self.image_path_JPEG, 1 ) -# fft = fftpack.fft(img) -# U, S, Vh = linalg.svd(img) -# # linalg.svd(fft) -# invert fft and svd with loss, then compare to original image (e.g. subtraction) -# count how many values are needed to restore up to a fixed threshold... use this -# as additional 'simplicy' detection for graphics...
- # all classification and categorization methods and definitions - default variation # use simplest classification I can think of (self-made) and do categorization # mostly based on filtered/reported features @@ -2277,6 +2501,7 @@ result = self._info_filter['ColorAverage'] relevance = (result and result[0]['Gradient'] < 0.1) and \ (0.005 < result[0]['Peaks'] < 0.1) # black/white texts are below that + #(result[0]['FFT_Peaks'] < 500) # has to be tested first !!!
return (u'Graphics', bool(relevance))