SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include "lib/common.h" 00012 00013 #ifdef HAVE_LAPACK 00014 #include "classifier/Classifier.h" 00015 #include "classifier/LinearClassifier.h" 00016 #include "classifier/LDA.h" 00017 #include "features/Labels.h" 00018 #include "lib/Mathematics.h" 00019 #include "lib/lapack.h" 00020 00021 using namespace shogun; 00022 00023 CLDA::CLDA(float64_t gamma) 00024 : CLinearClassifier(), m_gamma(gamma) 00025 { 00026 } 00027 00028 CLDA::CLDA(float64_t gamma, CSimpleFeatures<float64_t>* traindat, CLabels* trainlab) 00029 : CLinearClassifier(), m_gamma(gamma) 00030 { 00031 set_features(traindat); 00032 set_labels(trainlab); 00033 } 00034 00035 00036 CLDA::~CLDA() 00037 { 00038 } 00039 00040 bool CLDA::train(CFeatures* data) 00041 { 00042 ASSERT(labels); 00043 if (data) 00044 { 00045 if (!data->has_property(FP_DOT)) 00046 SG_ERROR("Specified features are not of type CDotFeatures\n"); 00047 set_features((CDotFeatures*) data); 00048 } 00049 ASSERT(features); 00050 int32_t num_train_labels=0; 00051 int32_t* train_labels=labels->get_int_labels(num_train_labels); 00052 ASSERT(train_labels); 00053 00054 int32_t num_feat=features->get_dim_feature_space(); 00055 int32_t num_vec=features->get_num_vectors(); 00056 ASSERT(num_vec==num_train_labels); 00057 00058 int32_t* classidx_neg=new int32_t[num_vec]; 00059 int32_t* classidx_pos=new int32_t[num_vec]; 00060 00061 int32_t i=0; 00062 int32_t j=0; 00063 int32_t num_neg=0; 00064 int32_t num_pos=0; 00065 for (i=0; i<num_train_labels; i++) 00066 { 00067 if (train_labels[i]==-1) 00068 classidx_neg[num_neg++]=i; 00069 else if (train_labels[i]==+1) 00070 classidx_pos[num_pos++]=i; 00071 else 00072 { 00073 SG_ERROR( "found label != +/- 1 bailing..."); 00074 return false; 00075 } 00076 } 00077 00078 if (num_neg<=0 && num_pos<=0) 00079 { 00080 SG_ERROR( "whooooo ? only a single class found\n"); 00081 return false; 00082 } 00083 00084 delete[] w; 00085 w=new float64_t[num_feat]; 00086 w_dim=num_feat; 00087 00088 float64_t* mean_neg=new float64_t[num_feat]; 00089 memset(mean_neg,0,num_feat*sizeof(float64_t)); 00090 00091 float64_t* mean_pos=new float64_t[num_feat]; 00092 memset(mean_pos,0,num_feat*sizeof(float64_t)); 00093 00094 /* calling external lib */ 00095 double* scatter=new double[num_feat*num_feat]; 00096 double* buffer=new double[num_feat*CMath::max(num_neg, num_pos)]; 00097 int nf = (int) num_feat; 00098 00099 CSimpleFeatures<float64_t>* rf = (CSimpleFeatures<float64_t>*) features; 00100 //mean neg 00101 for (i=0; i<num_neg; i++) 00102 { 00103 int32_t vlen; 00104 bool vfree; 00105 float64_t* vec= 00106 rf->get_feature_vector(classidx_neg[i], vlen, vfree); 00107 ASSERT(vec); 00108 00109 for (j=0; j<vlen; j++) 00110 { 00111 mean_neg[j]+=vec[j]; 00112 buffer[num_feat*i+j]=vec[j]; 00113 } 00114 00115 rf->free_feature_vector(vec, classidx_neg[i], vfree); 00116 } 00117 00118 for (j=0; j<num_feat; j++) 00119 mean_neg[j]/=num_neg; 00120 00121 for (i=0; i<num_neg; i++) 00122 { 00123 for (j=0; j<num_feat; j++) 00124 buffer[num_feat*i+j]-=mean_neg[j]; 00125 } 00126 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, nf, nf, 00127 (int) num_neg, 1.0, buffer, nf, buffer, nf, 0, scatter, nf); 00128 00129 //mean pos 00130 for (i=0; i<num_pos; i++) 00131 { 00132 int32_t vlen; 00133 bool vfree; 00134 float64_t* vec= 00135 rf->get_feature_vector(classidx_pos[i], vlen, vfree); 00136 ASSERT(vec); 00137 00138 for (j=0; j<vlen; j++) 00139 { 00140 mean_pos[j]+=vec[j]; 00141 buffer[num_feat*i+j]=vec[j]; 00142 } 00143 00144 rf->free_feature_vector(vec, classidx_pos[i], vfree); 00145 } 00146 00147 for (j=0; j<num_feat; j++) 00148 mean_pos[j]/=num_pos; 00149 00150 for (i=0; i<num_pos; i++) 00151 { 00152 for (j=0; j<num_feat; j++) 00153 buffer[num_feat*i+j]-=mean_pos[j]; 00154 } 00155 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, nf, nf, (int) num_pos, 00156 1.0/(num_train_labels-1), buffer, nf, buffer, nf, 00157 1.0/(num_train_labels-1), scatter, nf); 00158 00159 float64_t trace=CMath::trace((float64_t*) scatter, num_feat, num_feat); 00160 00161 double s=1.0-m_gamma; /* calling external lib; indirectly */ 00162 for (i=0; i<num_feat*num_feat; i++) 00163 scatter[i]*=s; 00164 00165 for (i=0; i<num_feat; i++) 00166 scatter[i*num_feat+i]+= trace*m_gamma/num_feat; 00167 00168 double* inv_scatter= (double*) CMath::pinv( 00169 scatter, num_feat, num_feat, NULL); 00170 00171 float64_t* w_pos=buffer; 00172 float64_t* w_neg=&buffer[num_feat]; 00173 00174 cblas_dsymv(CblasColMajor, CblasUpper, nf, 1.0, inv_scatter, nf, 00175 (double*) mean_pos, 1, 0., (double*) w_pos, 1); 00176 cblas_dsymv(CblasColMajor, CblasUpper, nf, 1.0, inv_scatter, nf, 00177 (double*) mean_neg, 1, 0, (double*) w_neg, 1); 00178 00179 bias=0.5*(CMath::dot(w_neg, mean_neg, num_feat)-CMath::dot(w_pos, mean_pos, num_feat)); 00180 for (i=0; i<num_feat; i++) 00181 w[i]=w_pos[i]-w_neg[i]; 00182 00183 #ifdef DEBUG_LDA 00184 SG_PRINT("bias: %f\n", bias); 00185 CMath::display_vector(w, num_feat, "w"); 00186 CMath::display_vector(w_pos, num_feat, "w_pos"); 00187 CMath::display_vector(w_neg, num_feat, "w_neg"); 00188 CMath::display_vector(mean_pos, num_feat, "mean_pos"); 00189 CMath::display_vector(mean_neg, num_feat, "mean_neg"); 00190 #endif 00191 00192 delete[] train_labels; 00193 delete[] mean_neg; 00194 delete[] mean_pos; 00195 delete[] scatter; 00196 delete[] inv_scatter; 00197 delete[] classidx_neg; 00198 delete[] classidx_pos; 00199 delete[] buffer; 00200 return true; 00201 } 00202 #endif