#include #include #include #include #include #include #include #include #include "region_generator.hh" /***** Math helper functions ******/ //! return square of argument template inline X sqr( X x ) { return x*x; } //! Determinant of 3x3 matrix inline double Determinant_3x3( const float *data ) { float detS = data[0]*(data[4]*data[8]-data[7]*data[5]) - data[1]*(data[3]*data[8]-data[5]*data[6]) + data[2]*(data[3]*data[7]-data[4]*data[6]); return detS; } //! Inverse of 3x3 matrix inline void Inverse_3x3( const float *data, float *m ) { float invdet = 1.0f/Determinant_3x3( data ); m[0] = (data[4]*data[8]-data[7]*data[5])*invdet; m[1] = -(data[1]*data[8]-data[2]*data[7])*invdet; m[2] = (data[1]*data[5]-data[2]*data[4])*invdet; m[3] = -(data[3]*data[8]-data[5]*data[6])*invdet; m[4] = (data[0]*data[8]-data[2]*data[6])*invdet; m[5] = -(data[0]*data[5]-data[2]*data[3])*invdet; m[6] = (data[3]*data[7]-data[4]*data[6])*invdet; m[7] = -(data[0]*data[7]-data[1]*data[6])*invdet; m[8] = (data[0]*data[4]-data[1]*data[3])*invdet; } #include //! Inversion of 4x4 matrix // Intel SIMD reference implementation // c.f. http://download.intel.com/design/PentiumIII/sml/24504301.pdf inline void PIII_Inverse_4x4(float* src) { __m128 minor0, minor1, minor2, minor3; __m128 row0, row1, row2, row3; __m128 det, tmp1; tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src)), (__m64*)(src+ 4)); row1 = _mm_loadh_pi(_mm_loadl_pi(row1, (__m64*)(src+8)), (__m64*)(src+12)); row0 = _mm_shuffle_ps(tmp1, row1, 0x88); row1 = _mm_shuffle_ps(row1, tmp1, 0xDD); tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src+ 2)), (__m64*)(src+ 6)); row3 = _mm_loadh_pi(_mm_loadl_pi(row3, (__m64*)(src+10)), (__m64*)(src+14)); row2 = _mm_shuffle_ps(tmp1, row3, 0x88); row3 = _mm_shuffle_ps(row3, tmp1, 0xDD); // ----------------------------------------------- tmp1 = _mm_mul_ps(row2, row3); tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); minor0 = _mm_mul_ps(row1, tmp1); minor1 = _mm_mul_ps(row0, tmp1); tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0); minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1); minor1 = _mm_shuffle_ps(minor1, minor1, 0x4E); // ----------------------------------------------- tmp1 = _mm_mul_ps(row1, row2); tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0); minor3 = _mm_mul_ps(row0, tmp1); tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1)); minor3 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3); minor3 = _mm_shuffle_ps(minor3, minor3, 0x4E); // ----------------------------------------------- tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3); tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); row2 = _mm_shuffle_ps(row2, row2, 0x4E); minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0); minor2 = _mm_mul_ps(row0, tmp1); tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1)); minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2); minor2 = _mm_shuffle_ps(minor2, minor2, 0x4E); // ----------------------------------------------- tmp1 = _mm_mul_ps(row0, row1); tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2); minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3); tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2); minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1)); // ----------------------------------------------- tmp1 = _mm_mul_ps(row0, row3); tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1)); minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2); tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1); minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1)); // ----------------------------------------------- tmp1 = _mm_mul_ps(row0, row2); tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1); minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1)); tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1)); minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3); // ----------------------------------------------- det = _mm_mul_ps(row0, minor0); det = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det); det = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det); tmp1 = _mm_rcp_ss(det); det = _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1))); det = _mm_shuffle_ps(det, det, 0x00); minor0 = _mm_mul_ps(det, minor0); _mm_storel_pi((__m64*)(src), minor0); _mm_storeh_pi((__m64*)(src+2), minor0); minor1 = _mm_mul_ps(det, minor1); _mm_storel_pi((__m64*)(src+4), minor1); _mm_storeh_pi((__m64*)(src+6), minor1); minor2 = _mm_mul_ps(det, minor2); _mm_storel_pi((__m64*)(src+ 8), minor2); _mm_storeh_pi((__m64*)(src+10), minor2); minor3 = _mm_mul_ps(det, minor3); _mm_storel_pi((__m64*)(src+12), minor3); _mm_storeh_pi((__m64*)(src+14), minor3); } /***** Minimum Volume Bounding Ellipsoid Implementation ******/ /* * Finds the minimum volume enclosing ellipsoid (MVEE) of a set of data * points stored in matrix P. The following optimization problem is solved: * * minimize log(det(A)) * s.t. (P_i - c)'*A*(P_i - c)<= 1 * * in variables A and c, where P_i is the i-th column of the matrix P. * The solver is based on Khachiyan Algorithm, and the final solution is * different from the optimal value by the pre-specified amount of 'tolerance'. * * The ellipsoid equation is given in the canonical form * (x-c)' A (x-c) <= 1 */ class min_ellipsoid { protected: size_t N; float X[16]; float c[3]; float A[9]; float *Q; float *u; float v1[3],v2[3],v3[3],r1,r2,r3; float V[9], mu[3]; bool axes_computed; void compute_axes( void ) { gsl_vector *eval; gsl_matrix *evec; gsl_eigen_symmv_workspace *w; eval = gsl_vector_alloc(3); evec = gsl_matrix_alloc(3, 3); w = gsl_eigen_symmv_alloc(3); // promote to double, GSL wants double double dA[9]; for( int i=0; i<9; ++i ) dA[i] = (double)A[i]; gsl_matrix_view m = gsl_matrix_view_array( dA, 3, 3); gsl_eigen_symmv( &m.matrix, eval, evec, w); gsl_eigen_symmv_sort( eval, evec, GSL_EIGEN_SORT_VAL_ASC); gsl_vector_view evec_i; for( int i=0; i<3; ++i ) { mu[i] = gsl_vector_get(eval, i); evec_i = gsl_matrix_column (evec, i); for( int j=0; j<3; ++j ) V[3*i+j] = gsl_vector_get(&evec_i.vector,j); } r1 = 1.0 / sqrt( gsl_vector_get(eval, 0) ); r2 = 1.0 / sqrt( gsl_vector_get(eval, 1) ); r3 = 1.0 / sqrt( gsl_vector_get(eval, 2) ); evec_i = gsl_matrix_column (evec, 0); v1[0] = gsl_vector_get(&evec_i.vector,0); v1[1] = gsl_vector_get(&evec_i.vector,1); v1[2] = gsl_vector_get(&evec_i.vector,2); evec_i = gsl_matrix_column (evec, 1); v2[0] = gsl_vector_get(&evec_i.vector,0); v2[1] = gsl_vector_get(&evec_i.vector,1); v2[2] = gsl_vector_get(&evec_i.vector,2); evec_i = gsl_matrix_column (evec, 2); v3[0] = gsl_vector_get(&evec_i.vector,0); v3[1] = gsl_vector_get(&evec_i.vector,1); v3[2] = gsl_vector_get(&evec_i.vector,2); gsl_vector_free(eval); gsl_matrix_free(evec); gsl_eigen_symmv_free (w); axes_computed = true; } void compute( double tol = 0.01, int maxit = 300 ) { double err = 10.0 * tol; float *unew = new float[N]; int count = 0; while( err > tol && count < maxit ) { for( int i=0; i<4; ++i ) for( int j=0,i4=4*i; j<4; ++j ) { const int k = i4+j; X[k] = 0.0f; for( size_t l=0; l Mmax ) { imax = i; Mmax = m; } } //std::cerr << "max(M) = " << Mmax << std::endl; float step_size = (Mmax-4.0f)/(4.0f*(Mmax-1.0f)), step_size1 = 1.0f-step_size; for( size_t i=0; i bool check_point( const T *x ) { float q[3] = {x[0]-c[0],x[1]-c[1],x[2]-c[2]}; double r = 0.0; for( int i=0; i<3; ++i ) for( int j=0; j<3; ++j ) r += q[i]*A[3*j+i]*q[j]; if( r<= 1.0 ) std::cerr << "q = (" << q[0] << ", " << q[1] << ", " << q[2] << ") r = " << r << std::endl; return r <= 1.0; } template void get_AABB( T *left, T *right ) { std::cout << "A = \n"; for( int i=0; i<9; ++i ) { if( i%3==0 ) std::cout << std::endl; std::cout << A[i] << " "; } std::cout << std::endl; std::cout << "c = (" << c[0] << ", " << c[1] << ", " << c[2] << ")\n"; for( int i=0; i<3; ++i ) { left[i] = c[i] - 1.0/sqrt(A[3*i+i]); right[i] = c[i] + 1.0/sqrt(A[3*i+i]); } } void expand_ellipsoid( float dr ) { if( !axes_computed ) compute_axes(); float munew[3]; for( int i=0; i<3; ++i ) munew[i] = 1.0/sqr(1.0/sqrt(mu[i])+dr); float Anew[9]; for(int i=0; i<3; ++i ) for( int j=0; j<3; ++j ) { Anew[3*i+j] = 0.0; for( int k=0; k<3; ++k ) Anew[3*i+j] += V[3*k+i] * munew[k] * V[3*k+j]; } for( int i=0; i<9; ++i ) A[i] = Anew[i]; } }; //! Minimum volume enclosing ellipsoid plugin class region_ellipsoid_plugin : public region_generator_plugin{ private: min_ellipsoid *pellip_; void read_points_from_file( std::string fname, std::vector& p ) { std::ifstream ifs(fname.c_str()); if( !ifs ) { LOGERR("region_ellipsoid_plugin::read_points_from_file : Could not open file \'%s\'",fname.c_str()); throw std::runtime_error("region_ellipsoid_plugin::read_points_from_file : cannot open point file."); } while( ifs ) { std::string s; if( !getline(ifs,s) )break; std::stringstream ss(s); while(ss) { if( !getline(ss,s,' ') ) break; p.push_back( strtod(s.c_str(),NULL) ); } } if( p.size()%3 != 0 ) { LOGERR("Region point file \'%s\' does not contain triplets",fname.c_str()); throw std::runtime_error("region_ellipsoid_plugin::read_points_from_file : file does not contain triplets."); } /*for( size_t i=0; i 0.5 ) dx -= 1.0; p[i+j] = x0[j] + dx; } } } public: explicit region_ellipsoid_plugin( config_file& cf ) : region_generator_plugin( cf ) { std::vector pp; std::string point_file = cf.getValue("setup","region_point_file"); read_points_from_file( point_file, pp ); pellip_ = new min_ellipsoid( pp.size()/3, &pp[0] ); } ~region_ellipsoid_plugin() { delete pellip_; } void get_AABB( double *left, double *right) { pellip_->get_AABB( left, right ); } bool query_point( double *x ) { return pellip_->check_point( x ); } }; namespace{ region_generator_plugin_creator_concrete< region_ellipsoid_plugin > creator("ellipsoid"); }