SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009 Soeren Sonnenburg 00008 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include "features/DotFeatures.h" 00012 #include "lib/io.h" 00013 #include "lib/Signal.h" 00014 #include "base/Parallel.h" 00015 #include "base/Parameter.h" 00016 00017 #ifndef WIN32 00018 #include <pthread.h> 00019 #endif 00020 00021 using namespace shogun; 00022 00023 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00024 struct DF_THREAD_PARAM 00025 { 00026 CDotFeatures* df; 00027 int32_t* sub_index; 00028 float64_t* output; 00029 int32_t start; 00030 int32_t stop; 00031 float64_t* alphas; 00032 float64_t* vec; 00033 int32_t dim; 00034 float64_t bias; 00035 bool progress; 00036 }; 00037 #endif // DOXYGEN_SHOULD_SKIP_THIS 00038 00039 00040 CDotFeatures::CDotFeatures(int32_t size) 00041 :CFeatures(size), combined_weight(1.0) 00042 { 00043 init(); 00044 set_property(FP_DOT); 00045 } 00046 00047 00048 CDotFeatures::CDotFeatures(const CDotFeatures & orig) 00049 :CFeatures(orig), combined_weight(orig.combined_weight) 00050 { 00051 init(); 00052 } 00053 00054 00055 CDotFeatures::CDotFeatures(CFile* loader) 00056 :CFeatures(loader) 00057 { 00058 init(); 00059 } 00060 00061 void 00062 CDotFeatures::init(void) 00063 { 00064 m_parameters->add(&combined_weight, "combined_weight", 00065 "Feature weighting in combined dot features."); 00066 } 00067 00068 void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b) 00069 { 00070 ASSERT(output); 00071 // write access is internally between output[start..stop] so the following 00072 // line is necessary to write to output[0...(stop-start-1)] 00073 output-=start; 00074 ASSERT(start>=0); 00075 ASSERT(start<stop); 00076 ASSERT(stop<=get_num_vectors()); 00077 00078 int32_t num_vectors=stop-start; 00079 ASSERT(num_vectors>0); 00080 00081 int32_t num_threads=parallel->get_num_threads(); 00082 ASSERT(num_threads>0); 00083 00084 CSignal::clear_cancel(); 00085 00086 #ifndef WIN32 00087 if (num_threads < 2) 00088 { 00089 #endif 00090 DF_THREAD_PARAM params; 00091 params.df=this; 00092 params.sub_index=NULL; 00093 params.output=output; 00094 params.start=start; 00095 params.stop=stop; 00096 params.alphas=alphas; 00097 params.vec=vec; 00098 params.dim=dim; 00099 params.bias=b; 00100 params.progress=false; //true; 00101 dense_dot_range_helper((void*) ¶ms); 00102 #ifndef WIN32 00103 } 00104 else 00105 { 00106 pthread_t* threads = new pthread_t[num_threads-1]; 00107 DF_THREAD_PARAM* params = new DF_THREAD_PARAM[num_threads]; 00108 int32_t step= num_vectors/num_threads; 00109 00110 int32_t t; 00111 00112 for (t=0; t<num_threads-1; t++) 00113 { 00114 params[t].df = this; 00115 params[t].sub_index=NULL; 00116 params[t].output = output; 00117 params[t].start = start+t*step; 00118 params[t].stop = start+(t+1)*step; 00119 params[t].alphas=alphas; 00120 params[t].vec=vec; 00121 params[t].dim=dim; 00122 params[t].bias=b; 00123 params[t].progress = false; 00124 pthread_create(&threads[t], NULL, 00125 CDotFeatures::dense_dot_range_helper, (void*)¶ms[t]); 00126 } 00127 00128 params[t].df = this; 00129 params[t].output = output; 00130 params[t].sub_index=NULL; 00131 params[t].start = start+t*step; 00132 params[t].stop = stop; 00133 params[t].alphas=alphas; 00134 params[t].vec=vec; 00135 params[t].dim=dim; 00136 params[t].bias=b; 00137 params[t].progress = false; //true; 00138 dense_dot_range_helper((void*) ¶ms[t]); 00139 00140 for (t=0; t<num_threads-1; t++) 00141 pthread_join(threads[t], NULL); 00142 00143 delete[] params; 00144 delete[] threads; 00145 } 00146 #endif 00147 00148 #ifndef WIN32 00149 if ( CSignal::cancel_computations() ) 00150 SG_INFO( "prematurely stopped. \n"); 00151 #endif 00152 } 00153 00154 void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b) 00155 { 00156 ASSERT(sub_index); 00157 ASSERT(output); 00158 00159 int32_t num_threads=parallel->get_num_threads(); 00160 ASSERT(num_threads>0); 00161 00162 CSignal::clear_cancel(); 00163 00164 #ifndef WIN32 00165 if (num_threads < 2) 00166 { 00167 #endif 00168 DF_THREAD_PARAM params; 00169 params.df=this; 00170 params.sub_index=sub_index; 00171 params.output=output; 00172 params.start=0; 00173 params.stop=num; 00174 params.alphas=alphas; 00175 params.vec=vec; 00176 params.dim=dim; 00177 params.bias=b; 00178 params.progress=false; //true; 00179 dense_dot_range_helper((void*) ¶ms); 00180 #ifndef WIN32 00181 } 00182 else 00183 { 00184 pthread_t* threads = new pthread_t[num_threads-1]; 00185 DF_THREAD_PARAM* params = new DF_THREAD_PARAM[num_threads]; 00186 int32_t step= num/num_threads; 00187 00188 int32_t t; 00189 00190 for (t=0; t<num_threads-1; t++) 00191 { 00192 params[t].df = this; 00193 params[t].sub_index=sub_index; 00194 params[t].output = output; 00195 params[t].start = t*step; 00196 params[t].stop = (t+1)*step; 00197 params[t].alphas=alphas; 00198 params[t].vec=vec; 00199 params[t].dim=dim; 00200 params[t].bias=b; 00201 params[t].progress = false; 00202 pthread_create(&threads[t], NULL, 00203 CDotFeatures::dense_dot_range_helper, (void*)¶ms[t]); 00204 } 00205 00206 params[t].df = this; 00207 params[t].sub_index=sub_index; 00208 params[t].output = output; 00209 params[t].start = t*step; 00210 params[t].stop = num; 00211 params[t].alphas=alphas; 00212 params[t].vec=vec; 00213 params[t].dim=dim; 00214 params[t].bias=b; 00215 params[t].progress = false; //true; 00216 dense_dot_range_helper((void*) ¶ms[t]); 00217 00218 for (t=0; t<num_threads-1; t++) 00219 pthread_join(threads[t], NULL); 00220 00221 delete[] params; 00222 delete[] threads; 00223 } 00224 #endif 00225 00226 #ifndef WIN32 00227 if ( CSignal::cancel_computations() ) 00228 SG_INFO( "prematurely stopped. \n"); 00229 #endif 00230 } 00231 00232 void* CDotFeatures::dense_dot_range_helper(void* p) 00233 { 00234 DF_THREAD_PARAM* par=(DF_THREAD_PARAM*) p; 00235 CDotFeatures* df=par->df; 00236 int32_t* sub_index=par->sub_index; 00237 float64_t* output=par->output; 00238 int32_t start=par->start; 00239 int32_t stop=par->stop; 00240 float64_t* alphas=par->alphas; 00241 float64_t* vec=par->vec; 00242 int32_t dim=par->dim; 00243 float64_t bias=par->bias; 00244 bool progress=par->progress; 00245 00246 if (sub_index) 00247 { 00248 #ifdef WIN32 00249 for (int32_t i=start; i<stop i++) 00250 #else 00251 for (int32_t i=start; i<stop && 00252 !CSignal::cancel_computations(); i++) 00253 #endif 00254 { 00255 if (alphas) 00256 output[i]=alphas[sub_index[i]]*df->dense_dot(sub_index[i], vec, dim)+bias; 00257 else 00258 output[i]=df->dense_dot(sub_index[i], vec, dim)+bias; 00259 if (progress) 00260 df->display_progress(start, stop, i); 00261 } 00262 00263 } 00264 else 00265 { 00266 #ifdef WIN32 00267 for (int32_t i=start; i<stop i++) 00268 #else 00269 for (int32_t i=start; i<stop && 00270 !CSignal::cancel_computations(); i++) 00271 #endif 00272 { 00273 if (alphas) 00274 output[i]=alphas[i]*df->dense_dot(i, vec, dim)+bias; 00275 else 00276 output[i]=df->dense_dot(i, vec, dim)+bias; 00277 if (progress) 00278 df->display_progress(start, stop, i); 00279 } 00280 } 00281 00282 return NULL; 00283 } 00284 00285 void CDotFeatures::get_feature_matrix(float64_t** dst, int32_t* num_feat, int32_t* num_vec) 00286 { 00287 int64_t offs=0; 00288 int32_t num=get_num_vectors(); 00289 int32_t dim=get_dim_feature_space(); 00290 ASSERT(num>0); 00291 ASSERT(dim>0); 00292 00293 int64_t sz=((uint64_t) num)* dim; 00294 00295 *num_feat=dim; 00296 *num_vec=num; 00297 *dst=new float64_t[sz]; 00298 memset(*dst, 0, sz*sizeof(float64_t)); 00299 00300 for (int32_t i=0; i<num; i++) 00301 { 00302 add_to_dense_vec(1.0, i, &((*dst)[offs]), dim); 00303 offs+=dim; 00304 } 00305 } 00306 00307 void CDotFeatures::get_feature_vector(float64_t** dst, int32_t* len, int32_t num) 00308 { 00309 int32_t dim=get_dim_feature_space(); 00310 ASSERT(num>=0 && num<=num); 00311 ASSERT(dim>0); 00312 00313 *len=dim; 00314 *dst=new float64_t[dim]; 00315 memset(*dst, 0, dim*sizeof(float64_t)); 00316 00317 add_to_dense_vec(1.0, num, *dst, dim); 00318 } 00319 00320 void CDotFeatures::benchmark_add_to_dense_vector(int32_t repeats) 00321 { 00322 int32_t num=get_num_vectors(); 00323 int32_t d=get_dim_feature_space(); 00324 float64_t* w= new float64_t[d]; 00325 CMath::fill_vector(w, d, 0.0); 00326 00327 CTime t; 00328 float64_t start_cpu=t.get_runtime(); 00329 float64_t start_wall=t.get_curtime(); 00330 for (int32_t r=0; r<repeats; r++) 00331 { 00332 for (int32_t i=0; i<num; i++) 00333 add_to_dense_vec(1.172343*(r+1), i, w, d); 00334 } 00335 00336 SG_PRINT("Time to process %d x num=%d add_to_dense_vector ops: cputime %fs walltime %fs\n", 00337 repeats, num, (t.get_runtime()-start_cpu)/repeats, 00338 (t.get_curtime()-start_wall)/repeats); 00339 00340 delete[] w; 00341 } 00342 00343 void CDotFeatures::benchmark_dense_dot_range(int32_t repeats) 00344 { 00345 int32_t num=get_num_vectors(); 00346 int32_t d=get_dim_feature_space(); 00347 float64_t* w= new float64_t[d]; 00348 float64_t* out= new float64_t[num]; 00349 float64_t* alphas= new float64_t[num]; 00350 CMath::range_fill_vector(w, d, 17.0); 00351 CMath::range_fill_vector(alphas, num, 1.2345); 00352 //CMath::fill_vector(w, d, 17.0); 00353 //CMath::fill_vector(alphas, num, 1.2345); 00354 00355 CTime t; 00356 float64_t start_cpu=t.get_runtime(); 00357 float64_t start_wall=t.get_curtime(); 00358 00359 for (int32_t r=0; r<repeats; r++) 00360 dense_dot_range(out, 0, num, alphas, w, d, 23); 00361 00362 #ifdef DEBUG_DOTFEATURES 00363 CMath::display_vector(out, 40, "dense_dot_range"); 00364 float64_t* out2= new float64_t[num]; 00365 00366 for (int32_t r=0; r<repeats; r++) 00367 { 00368 CMath::fill_vector(out2, num, 0.0); 00369 for (int32_t i=0; i<num; i++) 00370 out2[i]+=dense_dot(i, w, d)*alphas[i]+23; 00371 } 00372 CMath::display_vector(out2, 40, "dense_dot"); 00373 for (int32_t i=0; i<num; i++) 00374 out2[i]-=out[i]; 00375 CMath::display_vector(out2, 40, "diff"); 00376 #endif 00377 SG_PRINT("Time to process %d x num=%d dense_dot_range ops: cputime %fs walltime %fs\n", 00378 repeats, num, (t.get_runtime()-start_cpu)/repeats, 00379 (t.get_curtime()-start_wall)/repeats); 00380 00381 delete[] alphas; 00382 delete[] out; 00383 delete[] w; 00384 }