SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009-2010 Soeren Sonnenburg 00008 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 * Copyright (C) 2010 Berlin Institute of Technology 00010 */ 00011 00012 #include "features/CombinedDotFeatures.h" 00013 #include "lib/io.h" 00014 #include "lib/Mathematics.h" 00015 00016 using namespace shogun; 00017 00018 void 00019 CCombinedDotFeatures::init(void) 00020 { 00021 m_parameters->add(&num_dimensions, "num_dimensions", 00022 "Total number of dimensions."); 00023 m_parameters->add(&num_vectors, "num_vectors", 00024 "Total number of vectors."); 00025 m_parameters->add((CSGObject**) &feature_list, 00026 "feature_list", "Feature list."); 00027 } 00028 00029 CCombinedDotFeatures::CCombinedDotFeatures() : CDotFeatures() 00030 { 00031 init(); 00032 00033 feature_list=new CList(true); 00034 update_dim_feature_space_and_num_vec(); 00035 } 00036 00037 CCombinedDotFeatures::CCombinedDotFeatures(const CCombinedDotFeatures & orig) 00038 : CDotFeatures(orig), num_vectors(orig.num_vectors), 00039 num_dimensions(orig.num_dimensions) 00040 { 00041 init(); 00042 00043 feature_list=new CList(true); 00044 } 00045 00046 CFeatures* CCombinedDotFeatures::duplicate() const 00047 { 00048 return new CCombinedDotFeatures(*this); 00049 } 00050 00051 CCombinedDotFeatures::~CCombinedDotFeatures() 00052 { 00053 delete feature_list; 00054 } 00055 00056 void CCombinedDotFeatures::list_feature_objs() 00057 { 00058 SG_INFO( "BEGIN COMBINED DOTFEATURES LIST (%d, %d) - ", num_vectors, num_dimensions); 00059 this->list_feature_obj(); 00060 00061 CListElement* current = NULL ; 00062 CDotFeatures* f=get_first_feature_obj(current); 00063 00064 while (f) 00065 { 00066 f->list_feature_obj(); 00067 f=get_next_feature_obj(current); 00068 } 00069 00070 SG_INFO( "END COMBINED DOTFEATURES LIST (%d, %d) - ", num_vectors, num_dimensions); 00071 this->list_feature_obj(); 00072 } 00073 00074 void CCombinedDotFeatures::update_dim_feature_space_and_num_vec() 00075 { 00076 CListElement* current = NULL ; 00077 CDotFeatures* f=get_first_feature_obj(current); 00078 00079 int32_t dim=0; 00080 int32_t vec=-1; 00081 00082 while (f) 00083 { 00084 dim+= f->get_dim_feature_space(); 00085 if (vec==-1) 00086 vec=f->get_num_vectors(); 00087 else if (vec != f->get_num_vectors()) 00088 { 00089 f->list_feature_obj(); 00090 SG_ERROR("Number of vectors (%d) mismatches in above feature obj (%d)\n", vec, f->get_num_vectors()); 00091 } 00092 00093 SG_UNREF(f); 00094 00095 f=get_next_feature_obj(current); 00096 } 00097 00098 num_dimensions=dim; 00099 num_vectors=vec; 00100 SG_DEBUG("vecs=%d, dims=%d\n", num_vectors, num_dimensions); 00101 } 00102 00103 float64_t CCombinedDotFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2) 00104 { 00105 float64_t result=0; 00106 00107 ASSERT(df); 00108 ASSERT(df->get_feature_type() == get_feature_type()); 00109 ASSERT(df->get_feature_class() == get_feature_class()); 00110 CCombinedDotFeatures* cf = (CCombinedDotFeatures*) df; 00111 00112 CListElement* current1 = NULL; 00113 CDotFeatures* f1=get_first_feature_obj(current1); 00114 00115 CListElement* current2 = NULL; 00116 CDotFeatures* f2=cf->get_first_feature_obj(current2); 00117 00118 while (f1 && f2) 00119 { 00120 result += f1->dot(vec_idx1, f2,vec_idx2) * 00121 f1->get_combined_feature_weight() * 00122 f2->get_combined_feature_weight(); 00123 00124 SG_UNREF(f1); 00125 SG_UNREF(f2); 00126 f1=get_next_feature_obj(current1); 00127 f2=cf->get_next_feature_obj(current2); 00128 } 00129 00130 // check that both have same number of feature objects inside 00131 ASSERT(f1 == NULL && f2 == NULL); 00132 00133 return result; 00134 } 00135 00136 float64_t CCombinedDotFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00137 { 00138 float64_t result=0; 00139 00140 CListElement* current = NULL ; 00141 CDotFeatures* f=get_first_feature_obj(current); 00142 uint32_t offs=0; 00143 00144 while (f) 00145 { 00146 int32_t dim = f->get_dim_feature_space(); 00147 result += f->dense_dot(vec_idx1, vec2+offs, dim)*f->get_combined_feature_weight(); 00148 offs += dim; 00149 00150 SG_UNREF(f); 00151 f=get_next_feature_obj(current); 00152 } 00153 00154 return result; 00155 } 00156 00157 void CCombinedDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b) 00158 { 00159 if (stop<=start) 00160 return; 00161 ASSERT(dim==num_dimensions); 00162 00163 CListElement* current = NULL; 00164 CDotFeatures* f=get_first_feature_obj(current); 00165 uint32_t offs=0; 00166 bool first=true; 00167 int32_t num=stop-start; 00168 float64_t* tmp=new float64_t[num]; 00169 00170 while (f) 00171 { 00172 int32_t f_dim = f->get_dim_feature_space(); 00173 if (first) 00174 { 00175 f->dense_dot_range(output, start, stop, alphas, vec+offs, f_dim, b); 00176 first=false; 00177 } 00178 else 00179 { 00180 f->dense_dot_range(tmp, start, stop, alphas, vec+offs, f_dim, b); 00181 for (int32_t i=0; i<num; i++) 00182 output[i]+=tmp[i]; 00183 } 00184 offs += f_dim; 00185 00186 SG_UNREF(f); 00187 f=get_next_feature_obj(current); 00188 } 00189 delete[] tmp; 00190 } 00191 00192 void CCombinedDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b) 00193 { 00194 if (num<=0) 00195 return; 00196 ASSERT(dim==num_dimensions); 00197 00198 CListElement* current = NULL; 00199 CDotFeatures* f=get_first_feature_obj(current); 00200 uint32_t offs=0; 00201 bool first=true; 00202 float64_t* tmp=new float64_t[num]; 00203 00204 while (f) 00205 { 00206 int32_t f_dim = f->get_dim_feature_space(); 00207 if (first) 00208 { 00209 f->dense_dot_range_subset(sub_index, num, output, alphas, vec+offs, f_dim, b); 00210 first=false; 00211 } 00212 else 00213 { 00214 f->dense_dot_range_subset(sub_index, num, tmp, alphas, vec+offs, f_dim, b); 00215 for (int32_t i=0; i<num; i++) 00216 output[i]+=tmp[i]; 00217 } 00218 offs += f_dim; 00219 00220 SG_UNREF(f); 00221 f=get_next_feature_obj(current); 00222 } 00223 delete[] tmp; 00224 } 00225 00226 void CCombinedDotFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val) 00227 { 00228 CListElement* current = NULL ; 00229 CDotFeatures* f=get_first_feature_obj(current); 00230 uint32_t offs=0; 00231 00232 while (f) 00233 { 00234 int32_t dim = f->get_dim_feature_space(); 00235 f->add_to_dense_vec(alpha*f->get_combined_feature_weight(), vec_idx1, vec2+offs, dim, abs_val); 00236 offs += dim; 00237 00238 SG_UNREF(f); 00239 f=get_next_feature_obj(current); 00240 } 00241 } 00242 00243 00244 int32_t CCombinedDotFeatures::get_nnz_features_for_vector(int32_t num) 00245 { 00246 CListElement* current = NULL ; 00247 CDotFeatures* f=get_first_feature_obj(current); 00248 int32_t result=0; 00249 00250 while (f) 00251 { 00252 result+=f->get_nnz_features_for_vector(num); 00253 00254 SG_UNREF(f); 00255 f=get_next_feature_obj(current); 00256 } 00257 00258 return result; 00259 } 00260 00261 void CCombinedDotFeatures::get_subfeature_weights(float64_t** weights, int32_t* num_weights) 00262 { 00263 *num_weights = get_num_feature_obj(); 00264 ASSERT(*num_weights > 0); 00265 00266 *weights=new float64_t[*num_weights]; 00267 float64_t* w = *weights; 00268 00269 CListElement* current = NULL; 00270 CDotFeatures* f = get_first_feature_obj(current); 00271 00272 while (f) 00273 { 00274 *w++=f->get_combined_feature_weight(); 00275 00276 SG_UNREF(f); 00277 f = get_next_feature_obj(current); 00278 } 00279 } 00280 00281 void CCombinedDotFeatures::set_subfeature_weights( 00282 float64_t* weights, int32_t num_weights) 00283 { 00284 int32_t i=0 ; 00285 CListElement* current = NULL ; 00286 CDotFeatures* f = get_first_feature_obj(current); 00287 00288 ASSERT(num_weights==get_num_feature_obj()); 00289 00290 while(f) 00291 { 00292 f->set_combined_feature_weight(weights[i]); 00293 00294 SG_UNREF(f); 00295 f = get_next_feature_obj(current); 00296 i++; 00297 } 00298 }