SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DotFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2009 Soeren Sonnenburg
8  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
12 #include <shogun/io/SGIO.h>
13 #include <shogun/lib/Signal.h>
14 #include <shogun/lib/Time.h>
16 #include <shogun/base/Parallel.h>
17 #include <shogun/base/Parameter.h>
18 
19 #ifdef HAVE_PTHREAD
20 #include <pthread.h>
21 #endif
22 
23 using namespace shogun;
24 
25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 struct DF_THREAD_PARAM
27 {
28  CDotFeatures* df;
29  int32_t* sub_index;
30  float64_t* output;
31  int32_t start;
32  int32_t stop;
33  float64_t* alphas;
34  float64_t* vec;
35  int32_t dim;
36  float64_t bias;
37  bool progress;
38 };
39 #endif // DOXYGEN_SHOULD_SKIP_THIS
40 
41 
43  :CFeatures(size), combined_weight(1.0)
44 {
45  init();
46 }
47 
48 
50  :CFeatures(orig), combined_weight(orig.combined_weight)
51 {
52  init();
53 }
54 
55 
57  :CFeatures(loader)
58 {
59  init();
60 }
61 
62 void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
63 {
64  ASSERT(output);
65  // write access is internally between output[start..stop] so the following
66  // line is necessary to write to output[0...(stop-start-1)]
67  output-=start;
68  ASSERT(start>=0);
69  ASSERT(start<stop);
70  ASSERT(stop<=get_num_vectors());
71 
72  int32_t num_vectors=stop-start;
73  ASSERT(num_vectors>0);
74 
75  int32_t num_threads=parallel->get_num_threads();
76  ASSERT(num_threads>0);
77 
79 
80 #ifdef HAVE_PTHREAD
81  if (num_threads < 2)
82  {
83 #endif
84  DF_THREAD_PARAM params;
85  params.df=this;
86  params.sub_index=NULL;
87  params.output=output;
88  params.start=start;
89  params.stop=stop;
90  params.alphas=alphas;
91  params.vec=vec;
92  params.dim=dim;
93  params.bias=b;
94  params.progress=false; //true;
95  dense_dot_range_helper((void*) &params);
96 #ifdef HAVE_PTHREAD
97  }
98  else
99  {
100  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
101  DF_THREAD_PARAM* params = SG_MALLOC(DF_THREAD_PARAM, num_threads);
102  int32_t step= num_vectors/num_threads;
103 
104  int32_t t;
105 
106  for (t=0; t<num_threads-1; t++)
107  {
108  params[t].df = this;
109  params[t].sub_index=NULL;
110  params[t].output = output;
111  params[t].start = start+t*step;
112  params[t].stop = start+(t+1)*step;
113  params[t].alphas=alphas;
114  params[t].vec=vec;
115  params[t].dim=dim;
116  params[t].bias=b;
117  params[t].progress = false;
118  pthread_create(&threads[t], NULL,
119  CDotFeatures::dense_dot_range_helper, (void*)&params[t]);
120  }
121 
122  params[t].df = this;
123  params[t].output = output;
124  params[t].sub_index=NULL;
125  params[t].start = start+t*step;
126  params[t].stop = stop;
127  params[t].alphas=alphas;
128  params[t].vec=vec;
129  params[t].dim=dim;
130  params[t].bias=b;
131  params[t].progress = false; //true;
132  dense_dot_range_helper((void*) &params[t]);
133 
134  for (t=0; t<num_threads-1; t++)
135  pthread_join(threads[t], NULL);
136 
137  SG_FREE(params);
138  SG_FREE(threads);
139  }
140 #endif
141 
142 #ifndef WIN32
144  SG_INFO( "prematurely stopped. \n");
145 #endif
146 }
147 
148 void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
149 {
150  ASSERT(sub_index);
151  ASSERT(output);
152 
153  int32_t num_threads=parallel->get_num_threads();
154  ASSERT(num_threads>0);
155 
157 
158 #ifdef HAVE_PTHREAD
159  if (num_threads < 2)
160  {
161 #endif
162  DF_THREAD_PARAM params;
163  params.df=this;
164  params.sub_index=sub_index;
165  params.output=output;
166  params.start=0;
167  params.stop=num;
168  params.alphas=alphas;
169  params.vec=vec;
170  params.dim=dim;
171  params.bias=b;
172  params.progress=false; //true;
173  dense_dot_range_helper((void*) &params);
174 #ifdef HAVE_PTHREAD
175  }
176  else
177  {
178  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
179  DF_THREAD_PARAM* params = SG_MALLOC(DF_THREAD_PARAM, num_threads);
180  int32_t step= num/num_threads;
181 
182  int32_t t;
183 
184  for (t=0; t<num_threads-1; t++)
185  {
186  params[t].df = this;
187  params[t].sub_index=sub_index;
188  params[t].output = output;
189  params[t].start = t*step;
190  params[t].stop = (t+1)*step;
191  params[t].alphas=alphas;
192  params[t].vec=vec;
193  params[t].dim=dim;
194  params[t].bias=b;
195  params[t].progress = false;
196  pthread_create(&threads[t], NULL,
197  CDotFeatures::dense_dot_range_helper, (void*)&params[t]);
198  }
199 
200  params[t].df = this;
201  params[t].sub_index=sub_index;
202  params[t].output = output;
203  params[t].start = t*step;
204  params[t].stop = num;
205  params[t].alphas=alphas;
206  params[t].vec=vec;
207  params[t].dim=dim;
208  params[t].bias=b;
209  params[t].progress = false; //true;
210  dense_dot_range_helper((void*) &params[t]);
211 
212  for (t=0; t<num_threads-1; t++)
213  pthread_join(threads[t], NULL);
214 
215  SG_FREE(params);
216  SG_FREE(threads);
217  }
218 #endif
219 
220 #ifndef WIN32
222  SG_INFO( "prematurely stopped. \n");
223 #endif
224 }
225 
227 {
228  DF_THREAD_PARAM* par=(DF_THREAD_PARAM*) p;
229  CDotFeatures* df=par->df;
230  int32_t* sub_index=par->sub_index;
231  float64_t* output=par->output;
232  int32_t start=par->start;
233  int32_t stop=par->stop;
234  float64_t* alphas=par->alphas;
235  float64_t* vec=par->vec;
236  int32_t dim=par->dim;
237  float64_t bias=par->bias;
238  bool progress=par->progress;
239 
240  if (sub_index)
241  {
242 #ifdef WIN32
243  for (int32_t i=start; i<stop i++)
244 #else
245  for (int32_t i=start; i<stop &&
247 #endif
248  {
249  if (alphas)
250  output[i]=alphas[sub_index[i]]*df->dense_dot(sub_index[i], vec, dim)+bias;
251  else
252  output[i]=df->dense_dot(sub_index[i], vec, dim)+bias;
253  if (progress)
254  df->display_progress(start, stop, i);
255  }
256 
257  }
258  else
259  {
260 #ifdef WIN32
261  for (int32_t i=start; i<stop i++)
262 #else
263  for (int32_t i=start; i<stop &&
265 #endif
266  {
267  if (alphas)
268  output[i]=alphas[i]*df->dense_dot(i, vec, dim)+bias;
269  else
270  output[i]=df->dense_dot(i, vec, dim)+bias;
271  if (progress)
272  df->display_progress(start, stop, i);
273  }
274  }
275 
276  return NULL;
277 }
278 
280 {
282 
283  int64_t offs=0;
284  int32_t num=get_num_vectors();
285  int32_t dim=get_dim_feature_space();
286  ASSERT(num>0);
287  ASSERT(dim>0);
288 
289  int64_t sz=((uint64_t) num)* dim;
290 
291  m.do_free=true;
292  m.num_cols=dim;
293  m.num_rows=num;
294  m.matrix=SG_MALLOC(float64_t, sz);
295  memset(m.matrix, 0, sz*sizeof(float64_t));
296 
297  for (int32_t i=0; i<num; i++)
298  {
299  add_to_dense_vec(1.0, i, &(m.matrix[offs]), dim);
300  offs+=dim;
301  }
302 
303  return m;
304 }
305 
307 {
309 
310  int32_t dim=get_dim_feature_space();
311  ASSERT(num>=0 && num<=get_num_vectors());
312  ASSERT(dim>0);
313 
314  v.do_free=true;
315  v.vlen=dim;
316  v.vector=SG_MALLOC(float64_t, dim);
317  memset(v.vector, 0, dim*sizeof(float64_t));
318 
319  add_to_dense_vec(1.0, num, v.vector, dim);
320  return v;
321 }
322 
324 {
325  int32_t num=get_num_vectors();
326  int32_t d=get_dim_feature_space();
327  float64_t* w= SG_MALLOC(float64_t, d);
328  CMath::fill_vector(w, d, 0.0);
329 
330  CTime t;
331  float64_t start_cpu=t.get_runtime();
332  float64_t start_wall=t.get_curtime();
333  for (int32_t r=0; r<repeats; r++)
334  {
335  for (int32_t i=0; i<num; i++)
336  add_to_dense_vec(1.172343*(r+1), i, w, d);
337  }
338 
339  SG_PRINT("Time to process %d x num=%d add_to_dense_vector ops: cputime %fs walltime %fs\n",
340  repeats, num, (t.get_runtime()-start_cpu)/repeats,
341  (t.get_curtime()-start_wall)/repeats);
342 
343  SG_FREE(w);
344 }
345 
347 {
348  int32_t num=get_num_vectors();
349  int32_t d=get_dim_feature_space();
350  float64_t* w= SG_MALLOC(float64_t, d);
351  float64_t* out= SG_MALLOC(float64_t, num);
352  float64_t* alphas= SG_MALLOC(float64_t, num);
353  CMath::range_fill_vector(w, d, 17.0);
354  CMath::range_fill_vector(alphas, num, 1.2345);
355  //CMath::fill_vector(w, d, 17.0);
356  //CMath::fill_vector(alphas, num, 1.2345);
357 
358  CTime t;
359  float64_t start_cpu=t.get_runtime();
360  float64_t start_wall=t.get_curtime();
361 
362  for (int32_t r=0; r<repeats; r++)
363  dense_dot_range(out, 0, num, alphas, w, d, 23);
364 
365 #ifdef DEBUG_DOTFEATURES
366  CMath::display_vector(out, 40, "dense_dot_range");
367  float64_t* out2= SG_MALLOC(float64_t, num);
368 
369  for (int32_t r=0; r<repeats; r++)
370  {
371  CMath::fill_vector(out2, num, 0.0);
372  for (int32_t i=0; i<num; i++)
373  out2[i]+=dense_dot(i, w, d)*alphas[i]+23;
374  }
375  CMath::display_vector(out2, 40, "dense_dot");
376  for (int32_t i=0; i<num; i++)
377  out2[i]-=out[i];
378  CMath::display_vector(out2, 40, "diff");
379 #endif
380  SG_PRINT("Time to process %d x num=%d dense_dot_range ops: cputime %fs walltime %fs\n",
381  repeats, num, (t.get_runtime()-start_cpu)/repeats,
382  (t.get_curtime()-start_wall)/repeats);
383 
384  SG_FREE(alphas);
385  SG_FREE(out);
386  SG_FREE(w);
387 }
388 
390 {
391  int32_t num=get_num_vectors();
392  int32_t dim=get_dim_feature_space();
393  ASSERT(num>0);
394  ASSERT(dim>0);
395 
396  SGVector<float64_t> mean(dim);
397  memset(mean.vector, 0, sizeof(float64_t)*dim);
398 
399  for (int i = 0; i < num; i++)
400  add_to_dense_vec(1, i, mean.vector, dim);
401  for (int j = 0; j < dim; j++)
402  mean.vector[j] /= num;
403 
404  return mean;
405 }
406 
408 {
409  int32_t num=get_num_vectors();
410  int32_t dim=get_dim_feature_space();
411  ASSERT(num>0);
412  ASSERT(dim>0);
413 
414  SGMatrix<float64_t> cov(dim, dim);
415 
416  memset(cov.matrix, 0, sizeof(float64_t)*dim*dim);
417 
418  SGVector<float64_t> mean = get_mean();
419 
420  for (int i = 0; i < num; i++)
421  {
423  CMath::add<float64_t>(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
424  for (int m = 0; m < v.vlen; m++)
425  {
426  for (int n = 0; n <= m ; n++)
427  {
428  (cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n];
429  }
430  }
431  v.free_vector();
432  }
433  for (int m = 0; m < dim; m++)
434  {
435  for (int n = 0; n <= m ; n++)
436  {
437  (cov.matrix)[m*dim+n] /= num;
438  }
439  }
440  for (int m = 0; m < dim-1; m++)
441  {
442  for (int n = m+1; n < dim; n++)
443  {
444  (cov.matrix)[m*dim+n] = (cov.matrix)[n*dim+m];
445  }
446  }
447  mean.destroy_vector();
448  return cov;
449 }
450 
451 void CDotFeatures::display_progress(int32_t start, int32_t stop, int32_t v)
452 {
453  int32_t num_vectors=stop-start;
454  int32_t i=v-start;
455 
456  if ( (i% (num_vectors/100+1))== 0)
457  SG_PROGRESS(v, 0.0, num_vectors-1);
458 }
459 
460 void CDotFeatures::init()
461 {
463  m_parameters->add(&combined_weight, "combined_weight",
464  "Feature weighting in combined dot features.");
465 }

SHOGUN Machine Learning Toolbox - Documentation