SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
PruneVarSubMean.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Gunnar Raetsch
8  * Written (W) 1999-2009 Soeren Sonnenburg
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
16 #include <shogun/io/SGIO.h>
18 
19 using namespace shogun;
20 
22 : CSimplePreprocessor<float64_t>(), idx(NULL), mean(NULL),
23  std(NULL), num_idx(0), divide_by_std(divide), initialized(false)
24 {
25 }
26 
28 {
29  cleanup();
30 }
31 
33 bool CPruneVarSubMean::init(CFeatures* features)
34 {
35  if (!initialized)
36  {
37  ASSERT(features->get_feature_class()==C_SIMPLE);
38  ASSERT(features->get_feature_type()==F_DREAL);
39 
40  CSimpleFeatures<float64_t>* simple_features=(CSimpleFeatures<float64_t>*) features;
41  int32_t num_examples = simple_features->get_num_vectors();
42  int32_t num_features = simple_features->get_num_features();
43 
44  SG_FREE(mean);
45  SG_FREE(idx);
46  SG_FREE(std);
47  mean=NULL;
48  idx=NULL;
49  std=NULL;
50 
51  mean=SG_MALLOC(float64_t, num_features);
52  float64_t* var=SG_MALLOC(float64_t, num_features);
53  int32_t i,j;
54 
55  for (i=0; i<num_features; i++)
56  {
57  mean[i]=0;
58  var[i]=0 ;
59  }
60 
61  SGMatrix<float64_t> feature_matrix = simple_features->get_feature_matrix();
62 
63  // compute mean
64  for (i=0; i<num_examples; i++)
65  {
66  for (j=0; j<num_features; j++)
67  mean[j]+=feature_matrix.matrix[i*num_features+j];
68  }
69 
70  for (j=0; j<num_features; j++)
71  mean[j]/=num_examples;
72 
73  // compute var
74  for (i=0; i<num_examples; i++)
75  {
76  for (j=0; j<num_features; j++)
77  var[j]+=CMath::sq(mean[j]-feature_matrix.matrix[i*num_features+j]);
78  }
79 
80  int32_t num_ok=0;
81  int32_t* idx_ok=SG_MALLOC(int, num_features);
82 
83  for (j=0; j<num_features; j++)
84  {
85  var[j]/=num_examples;
86 
87  if (var[j]>=1e-14)
88  {
89  idx_ok[num_ok]=j;
90  num_ok++ ;
91  }
92  }
93 
94  SG_INFO( "Reducing number of features from %i to %i\n", num_features, num_ok) ;
95 
96  SG_FREE(idx);
97  idx=SG_MALLOC(int, num_ok);
98  float64_t* new_mean=SG_MALLOC(float64_t, num_ok);
99  std=SG_MALLOC(float64_t, num_ok);
100 
101  for (j=0; j<num_ok; j++)
102  {
103  idx[j]=idx_ok[j] ;
104  new_mean[j]=mean[idx_ok[j]];
105  std[j]=sqrt(var[idx_ok[j]]);
106  }
107  num_idx = num_ok ;
108  SG_FREE(idx_ok);
109  SG_FREE(mean);
110  SG_FREE(var);
111  mean = new_mean;
112 
113  initialized = true;
114  return true;
115  }
116  else
117  return false;
118 }
119 
122 {
123  SG_FREE(idx);
124  idx=NULL;
125  SG_FREE(mean);
126  mean=NULL;
127  SG_FREE(std);
128  std=NULL;
129 }
130 
135 {
137 
138  int32_t num_vectors=0;
139  int32_t num_features=0;
140  float64_t* m=((CSimpleFeatures<float64_t>*) features)->get_feature_matrix(num_features, num_vectors);
141 
142  SG_INFO( "get Feature matrix: %ix%i\n", num_vectors, num_features);
143  SG_INFO( "Preprocessing feature matrix\n");
144  for (int32_t vec=0; vec<num_vectors; vec++)
145  {
146  float64_t* v_src=&m[num_features*vec];
147  float64_t* v_dst=&m[num_idx*vec];
148 
149  if (divide_by_std)
150  {
151  for (int32_t feat=0; feat<num_idx; feat++)
152  v_dst[feat]=(v_src[idx[feat]]-mean[feat])/std[feat];
153  }
154  else
155  {
156  for (int32_t feat=0; feat<num_idx; feat++)
157  v_dst[feat]=(v_src[idx[feat]]-mean[feat]);
158  }
159  }
160 
161  ((CSimpleFeatures<float64_t>*) features)->set_num_features(num_idx);
162  ((CSimpleFeatures<float64_t>*) features)->get_feature_matrix(num_features, num_vectors);
163  SG_INFO( "new Feature matrix: %ix%i\n", num_vectors, num_features);
164 
165  return ((CSimpleFeatures<float64_t>*) features)->get_feature_matrix();
166 }
167 
171 {
172  float64_t* ret=NULL;
173 
174  if (initialized)
175  {
177 
178  if (divide_by_std)
179  {
180  for (int32_t i=0; i<num_idx; i++)
181  ret[i]=(vector.vector[idx[i]]-mean[i])/std[i];
182  }
183  else
184  {
185  for (int32_t i=0; i<num_idx; i++)
186  ret[i]=(vector.vector[idx[i]]-mean[i]);
187  }
188  }
189  else
190  {
191  ret=SG_MALLOC(float64_t, vector.vlen);
192  for (int32_t i=0; i<vector.vlen; i++)
193  ret[i]=vector.vector[i];
194  }
195 
196  return SGVector<float64_t>(ret,num_idx);
197 }

SHOGUN Machine Learning Toolbox - Documentation