SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Gunnar Raetsch 00008 * Written (W) 1999-2008 Soeren Sonnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/config.h" 00013 #include "lib/Mathematics.h" 00014 00015 #include <string.h> 00016 #include <stdlib.h> 00017 00018 #ifdef HAVE_LAPACK 00019 #include "lib/lapack.h" 00020 00021 #include "lib/common.h" 00022 #include "preproc/PCACut.h" 00023 #include "preproc/SimplePreProc.h" 00024 #include "features/Features.h" 00025 #include "features/SimpleFeatures.h" 00026 #include "lib/io.h" 00027 00028 using namespace shogun; 00029 00030 CPCACut::CPCACut(int32_t do_whitening_, float64_t thresh_) 00031 : CSimplePreProc<float64_t>("PCACut", "PCAC"), T(NULL), num_dim(0), mean(NULL), 00032 initialized(false), do_whitening(do_whitening_), thresh(thresh_) 00033 { 00034 } 00035 00036 CPCACut::~CPCACut() 00037 { 00038 delete[] T; 00039 delete[] mean; 00040 } 00041 00043 bool CPCACut::init(CFeatures* f) 00044 { 00045 if (!initialized) 00046 { 00047 ASSERT(f->get_feature_class()==C_SIMPLE); 00048 ASSERT(f->get_feature_type()==F_DREAL); 00049 00050 SG_INFO("calling CPCACut::init\n") ; 00051 int32_t num_vectors=((CSimpleFeatures<float64_t>*)f)->get_num_vectors() ; 00052 int32_t num_features=((CSimpleFeatures<float64_t>*)f)->get_num_features() ; 00053 SG_INFO("num_examples: %ld num_features: %ld \n", num_vectors, num_features); 00054 delete[] mean ; 00055 mean=new float64_t[num_features+1] ; 00056 00057 int32_t i,j; 00058 00060 00061 // clear 00062 for (j=0; j<num_features; j++) 00063 mean[j]=0 ; 00064 00065 // sum 00066 for (i=0; i<num_vectors; i++) 00067 { 00068 int32_t len; 00069 bool free; 00070 float64_t* vec=((CSimpleFeatures<float64_t>*) f)->get_feature_vector(i, len, free); 00071 for (j=0; j<num_features; j++) 00072 mean[j]+= vec[j]; 00073 00074 ((CSimpleFeatures<float64_t>*) f)->free_feature_vector(vec, i, free); 00075 } 00076 00077 //divide 00078 for (j=0; j<num_features; j++) 00079 mean[j]/=num_vectors; 00080 00081 SG_DONE(); 00082 SG_DEBUG("Computing covariance matrix... of size %.2f M\n", num_features*num_features/1024.0/1024.0); 00083 float64_t *cov=new float64_t[num_features*num_features]; 00084 00085 for (j=0; j<num_features*num_features; j++) 00086 cov[j]=0.0 ; 00087 00088 for (i=0; i<num_vectors; i++) 00089 { 00090 if (!(i % (num_vectors/10+1))) 00091 SG_PROGRESS(i, 0, num_vectors); 00092 00093 int32_t len; 00094 bool free; 00095 00096 float64_t* vec=((CSimpleFeatures<float64_t>*) f)->get_feature_vector(i, len, free) ; 00097 00098 for (int32_t jj=0; jj<num_features; jj++) 00099 vec[jj]-=mean[jj] ; 00100 00102 int nf = (int) num_features; /* calling external lib */ 00103 double* vec_double = (double*) vec; /* calling external lib */ 00104 cblas_dger(CblasColMajor, nf, nf, 1.0, vec_double, 1, vec_double, 00105 1, (double*) cov, nf); 00106 00107 //for (int32_t k=0; k<num_features; k++) 00108 // for (int32_t l=0; l<num_features; l++) 00109 // cov[k*num_features+l]+=feature[l]*feature[k] ; 00110 00111 ((CSimpleFeatures<float64_t>*) f)->free_feature_vector(vec, i, free) ; 00112 } 00113 00114 SG_DONE(); 00115 00116 for (i=0; i<num_features; i++) 00117 for (j=0; j<num_features; j++) 00118 cov[i*num_features+j]/=num_vectors ; 00119 00120 SG_DONE(); 00121 00122 SG_INFO("Computing Eigenvalues ... ") ; 00123 char V='V'; 00124 char U='U'; 00125 int32_t info; 00126 int32_t ord= num_features; 00127 int32_t lda= num_features; 00128 float64_t* eigenvalues=new float64_t[num_features] ; 00129 00130 for (i=0; i<num_features; i++) 00131 eigenvalues[i]=0; 00132 00133 // lapack sym matrix eigenvalues+vectors 00134 wrap_dsyev(V, U, (int) ord, (double*) cov, (int) lda, 00135 (double*) eigenvalues, (int*) &info); 00136 00137 00138 num_dim=0; 00139 for (i=0; i<num_features; i++) 00140 { 00141 // SG_DEBUG( "EV[%i]=%e\n", i, values[i]) ; 00142 if (eigenvalues[i]>thresh) 00143 num_dim++ ; 00144 } ; 00145 00146 SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim) ; 00147 00148 delete[] T; 00149 T=new float64_t[num_dim*num_features]; 00150 num_old_dim=num_features; 00151 00152 if (do_whitening) 00153 { 00154 int32_t offs=0 ; 00155 for (i=0; i<num_features; i++) 00156 { 00157 if (eigenvalues[i]>thresh) 00158 { 00159 for (int32_t jj=0; jj<num_features; jj++) 00160 T[offs+jj*num_dim]=cov[num_features*i+jj]/sqrt(eigenvalues[i]) ; 00161 offs++ ; 00162 } ; 00163 } 00164 } ; 00165 00166 delete[] eigenvalues; 00167 delete[] cov; 00168 initialized=true; 00169 SG_INFO("Done\n") ; 00170 return true ; 00171 } 00172 return 00173 false; 00174 } 00175 00177 void CPCACut::cleanup() 00178 { 00179 delete[] T ; 00180 T=NULL ; 00181 } 00182 00186 float64_t* CPCACut::apply_to_feature_matrix(CFeatures* f) 00187 { 00188 int32_t num_vectors=0; 00189 int32_t num_features=0; 00190 00191 float64_t* m=((CSimpleFeatures<float64_t>*) f)->get_feature_matrix(num_features, num_vectors); 00192 SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features) ; 00193 00194 if (m) 00195 { 00196 SG_INFO("Preprocessing feature matrix\n"); 00197 float64_t* res= new float64_t[num_dim]; 00198 float64_t* sub_mean= new float64_t[num_features]; 00199 00200 for (int32_t vec=0; vec<num_vectors; vec++) 00201 { 00202 int32_t i; 00203 00204 for (i=0; i<num_features; i++) 00205 sub_mean[i]=m[num_features*vec+i]-mean[i] ; 00206 00207 int nd = (int) num_dim; /* calling external lib */ 00208 cblas_dgemv(CblasColMajor, CblasNoTrans, nd, (int) num_features, 00209 1.0, T, nd, (double*) sub_mean, 1, 0, (double*) res, 1); 00210 00211 float64_t* m_transformed=&m[num_dim*vec]; 00212 for (i=0; i<num_dim; i++) 00213 m_transformed[i]=m[i]; 00214 } 00215 delete[] res; 00216 delete[] sub_mean; 00217 00218 ((CSimpleFeatures<float64_t>*) f)->set_num_features(num_dim); 00219 ((CSimpleFeatures<float64_t>*) f)->get_feature_matrix(num_features, num_vectors); 00220 SG_INFO("new Feature matrix: %ix%i\n", num_vectors, num_features); 00221 } 00222 00223 return m; 00224 } 00225 00228 float64_t* CPCACut::apply_to_feature_vector(float64_t* f, int32_t &len) 00229 { 00230 float64_t *ret=new float64_t[num_dim]; 00231 float64_t *sub_mean=new float64_t[len]; 00232 for (int32_t i=0; i<len; i++) 00233 sub_mean[i]=f[i]-mean[i]; 00234 00235 int nd = (int) num_dim; /* calling external lib */ 00236 cblas_dgemv(CblasColMajor, CblasNoTrans, nd, (int) len, 1.0, (double*) T, 00237 nd, (double*) sub_mean, 1, 0, (double*) ret, 1); 00238 00239 delete[] sub_mean ; 00240 len=num_dim ; 00241 // SG_DEBUG( "num_dim: %d\n", num_dim); 00242 return ret; 00243 } 00244 #endif