SHOGUN v0.9.0
|
00001 #include "features/PolyFeatures.h" 00002 00003 using namespace shogun; 00004 00005 CPolyFeatures::CPolyFeatures(void) :CDotFeatures() 00006 { 00007 SG_UNSTABLE("CPolyFeatures::CPolyFeatures(void)", "\n"); 00008 00009 m_feat = NULL; 00010 m_degree = 0; 00011 m_normalize = false; 00012 m_input_dimensions = 0; 00013 m_multi_index = NULL; 00014 m_multinomial_coefficients = NULL; 00015 m_normalization_values = NULL; 00016 } 00017 00018 CPolyFeatures::CPolyFeatures(CSimpleFeatures<float64_t>* feat, int32_t degree, bool normalize) 00019 : CDotFeatures(), m_multi_index(NULL), m_multinomial_coefficients(NULL), 00020 m_normalization_values(NULL) 00021 { 00022 ASSERT(feat); 00023 00024 m_feat = feat; 00025 SG_REF(m_feat); 00026 m_degree=degree; 00027 m_normalize=normalize; 00028 m_input_dimensions=feat->get_num_features(); 00029 m_output_dimensions=calc_feature_space_dimensions(m_input_dimensions, m_degree); 00030 00031 store_multi_index(); 00032 store_multinomial_coefficients(); 00033 if (m_normalize) 00034 store_normalization_values(); 00035 } 00036 00037 CPolyFeatures::~CPolyFeatures() 00038 { 00039 delete[] m_multi_index; 00040 delete[] m_multinomial_coefficients; 00041 delete[] m_normalization_values; 00042 SG_UNREF(m_feat); 00043 } 00044 00045 float64_t CPolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2) 00046 { 00047 ASSERT(df); 00048 ASSERT(df->get_feature_type() == get_feature_type()); 00049 ASSERT(df->get_feature_class() == get_feature_class()); 00050 00051 CPolyFeatures* pf=(CPolyFeatures*) df; 00052 00053 int32_t len1; 00054 bool do_free1; 00055 float64_t* vec1 = m_feat->get_feature_vector(vec_idx1, len1, do_free1); 00056 00057 int32_t len2; 00058 bool do_free2; 00059 float64_t* vec2 = pf->m_feat->get_feature_vector(vec_idx2, len2, do_free2); 00060 00061 float64_t sum=0; 00062 int cnt=0; 00063 for (int j=0; j<m_output_dimensions; j++) 00064 { 00065 float64_t out1=m_multinomial_coefficients[j]; 00066 float64_t out2=m_multinomial_coefficients[j]; 00067 for (int k=0; k<m_degree; k++) 00068 { 00069 out1*=vec1[m_multi_index[cnt]]; 00070 out2*=vec2[m_multi_index[cnt]]; 00071 cnt++; 00072 } 00073 sum+=out1*out2; 00074 } 00075 m_feat->free_feature_vector(vec1, len1, do_free1); 00076 pf->m_feat->free_feature_vector(vec2, len2, do_free2); 00077 00078 return sum; 00079 } 00080 00081 float64_t CPolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00082 { 00083 if (vec2_len != m_output_dimensions) 00084 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions); 00085 00086 int32_t len; 00087 bool do_free; 00088 float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free); 00089 00090 00091 int cnt=0; 00092 float64_t sum=0; 00093 for (int j=0; j<vec2_len; j++) 00094 { 00095 float64_t output=m_multinomial_coefficients[j]; 00096 for (int k=0; k<m_degree; k++) 00097 { 00098 output*=vec[m_multi_index[cnt]]; 00099 cnt++; 00100 } 00101 sum+=output*vec2[j]; 00102 } 00103 if (m_normalize) 00104 sum = sum/m_normalization_values[vec_idx1]; 00105 00106 m_feat->free_feature_vector(vec, len, do_free); 00107 return sum; 00108 } 00109 void CPolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val) 00110 { 00111 if (vec2_len != m_output_dimensions) 00112 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions); 00113 00114 int32_t len; 00115 bool do_free; 00116 float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free); 00117 00118 00119 int cnt=0; 00120 float32_t norm_val=1; 00121 if (m_normalize) 00122 norm_val = m_normalization_values[vec_idx1]; 00123 alpha/=norm_val; 00124 for (int j=0; j<vec2_len; j++) 00125 { 00126 float64_t output=m_multinomial_coefficients[j]; 00127 for (int k=0; k<m_degree; k++) 00128 { 00129 output*=vec[m_multi_index[cnt]]; 00130 cnt++; 00131 } 00132 if (abs_val) 00133 output=CMath::abs(output); 00134 00135 vec2[j]+=alpha*output; 00136 } 00137 m_feat->free_feature_vector(vec, len, do_free); 00138 } 00139 void CPolyFeatures::store_normalization_values() 00140 { 00141 delete[] m_normalization_values; 00142 00143 int32_t num_vec = this->get_num_vectors(); 00144 00145 m_normalization_values=new float32_t[num_vec]; 00146 for (int i=0; i<num_vec; i++) 00147 { 00148 float64_t tmp = CMath::sqrt(dot(i, this,i)); 00149 if (tmp==0) 00150 // trap division by zero 00151 m_normalization_values[i]=1; 00152 else 00153 m_normalization_values[i]=tmp; 00154 } 00155 00156 } 00157 00158 void CPolyFeatures::store_multi_index() 00159 { 00160 delete[] m_multi_index; 00161 00162 m_multi_index=new uint16_t[m_output_dimensions*m_degree]; 00163 00164 uint16_t* exponents = new uint16_t[m_input_dimensions]; 00165 if (!exponents) 00166 SG_ERROR( "Error allocating mem \n"); 00167 /*copy adress: otherwise it will be overwritten in recursion*/ 00168 uint16_t* index = m_multi_index; 00169 enumerate_multi_index(0, &index, exponents, m_degree); 00170 00171 delete[] exponents; 00172 } 00173 00174 void CPolyFeatures::enumerate_multi_index(const int32_t feat_idx, uint16_t** index, uint16_t* exponents, const int32_t degree) 00175 { 00176 if (feat_idx==m_input_dimensions-1 || degree==0) 00177 { 00178 if (feat_idx==m_input_dimensions-1) 00179 exponents[feat_idx] = degree; 00180 if (degree==0) 00181 exponents[feat_idx] = 0; 00182 int32_t i, j; 00183 for (j=0; j<feat_idx+1; j++) 00184 for (i=0; i<exponents[j]; i++) 00185 { 00186 **index = j; 00187 (*index)++; 00188 } 00189 exponents[feat_idx] = 0; 00190 return; 00191 } 00192 int32_t k; 00193 for (k=0; k<=degree; k++) 00194 { 00195 exponents[feat_idx] = k; 00196 enumerate_multi_index(feat_idx+1, index, exponents, degree-k); 00197 } 00198 return; 00199 00200 } 00201 00202 void CPolyFeatures::store_multinomial_coefficients() 00203 { 00204 delete[] m_multinomial_coefficients; 00205 00206 m_multinomial_coefficients = new float64_t[m_output_dimensions]; 00207 int32_t* exponents = new int32_t[m_input_dimensions]; 00208 if (!exponents) 00209 SG_ERROR( "Error allocating mem \n"); 00210 int32_t j=0; 00211 for (j=0; j<m_input_dimensions; j++) 00212 exponents[j] = 0; 00213 int32_t k, cnt=0; 00214 for (j=0; j<m_output_dimensions; j++) 00215 { 00216 for (k=0; k<m_degree; k++) 00217 { 00218 exponents[m_multi_index[cnt]] ++; 00219 cnt++; 00220 } 00221 m_multinomial_coefficients[j] = sqrt((double) multinomialcoef(exponents, m_input_dimensions)); 00222 for (k=0; k<m_input_dimensions; k++) 00223 { 00224 exponents[k]=0; 00225 } 00226 } 00227 delete[] exponents; 00228 } 00229 00230 int32_t CPolyFeatures::bico2(int32_t n, int32_t k) 00231 { 00232 00233 /* for this problem k is usually small (<=degree), 00234 * thus it is efficient to 00235 * to use recursion and prune end recursions*/ 00236 if (n<k) 00237 return 0; 00238 if (k>n/2) 00239 k = n-k; 00240 if (k<0) 00241 return 0; 00242 if (k==0) 00243 return 1; 00244 if (k==1) 00245 return n; 00246 if (k<4) 00247 return bico2(n-1, k-1)+bico2(n-1, k); 00248 00249 /* call function as implemented in numerical recipies: 00250 * much more efficient for large binomial coefficients*/ 00251 return bico(n, k); 00252 00253 } 00254 00255 int32_t CPolyFeatures::calc_feature_space_dimensions(int32_t N, int32_t D) 00256 { 00257 if (N==1) 00258 return 1; 00259 if (D==0) 00260 return 1; 00261 int32_t d; 00262 int32_t ret = 0; 00263 for (d=0; d<=D; d++) 00264 ret += calc_feature_space_dimensions(N-1, d); 00265 00266 return ret; 00267 } 00268 00269 int32_t CPolyFeatures::multinomialcoef(int32_t* exps, int32_t len) 00270 { 00271 int32_t ret = 1, i; 00272 int32_t n = 0; 00273 for (i=0; i<len; i++) 00274 { 00275 n += exps[i]; 00276 ret *= bico2(n, exps[i]); 00277 } 00278 return ret; 00279 } 00280 00281 /* gammln as implemented in the 00282 * second edition of Numerical Recipes in C */ 00283 float64_t CPolyFeatures::gammln(float64_t xx) 00284 { 00285 float64_t x,y,tmp,ser; 00286 static float64_t cof[6]={76.18009172947146, -86.50532032941677, 00287 24.01409824083091, -1.231739572450155, 00288 0.1208650973866179e-2,-0.5395239384953e-5}; 00289 int32_t j; 00290 00291 y=x=xx; 00292 tmp=x+5.5; 00293 tmp -= (x+0.5)*log(tmp); 00294 ser=1.000000000190015; 00295 for (j=0;j<=5;j++) ser += cof[j]/++y; 00296 return -tmp+log(2.5066282746310005*ser/x); 00297 } 00298 00299 float64_t CPolyFeatures::factln(int32_t n) 00300 { 00301 static float64_t a[101]; 00302 00303 if (n < 0) SG_ERROR("Negative factorial in routine factln\n"); 00304 if (n <= 1) return 0.0; 00305 if (n <= 100) return a[n] ? a[n] : (a[n]=gammln(n+1.0)); 00306 else return gammln(n+1.0); 00307 } 00308 00309 int32_t CPolyFeatures::bico(int32_t n, int32_t k) 00310 { 00311 /* use floor to clean roundoff errors*/ 00312 return (int32_t) floor(0.5+exp(factln(n)-factln(k)-factln(n-k))); 00313 } 00314 CFeatures* CPolyFeatures::duplicate() const 00315 { 00316 return new CPolyFeatures(*this); 00317 }