SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
LDA.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
11 #include <shogun/lib/common.h>
12 
13 #ifdef HAVE_LAPACK
14 #include <shogun/machine/Machine.h>
16 #include <shogun/classifier/LDA.h>
17 #include <shogun/features/Labels.h>
20 
21 using namespace shogun;
22 
24 : CLinearMachine(), m_gamma(gamma)
25 {
26 }
27 
29 : CLinearMachine(), m_gamma(gamma)
30 {
31  set_features(traindat);
32  set_labels(trainlab);
33 }
34 
35 
37 {
38 }
39 
41 {
42  ASSERT(labels);
43  if (data)
44  {
45  if (!data->has_property(FP_DOT))
46  SG_ERROR("Specified features are not of type CDotFeatures\n");
47  set_features((CDotFeatures*) data);
48  }
50  SGVector<int32_t> train_labels=labels->get_int_labels();
51  ASSERT(train_labels.vector);
52 
53  int32_t num_feat=features->get_dim_feature_space();
54  int32_t num_vec=features->get_num_vectors();
55  ASSERT(num_vec==train_labels.vlen);
56 
57  int32_t* classidx_neg=SG_MALLOC(int32_t, num_vec);
58  int32_t* classidx_pos=SG_MALLOC(int32_t, num_vec);
59 
60  int32_t i=0;
61  int32_t j=0;
62  int32_t num_neg=0;
63  int32_t num_pos=0;
64  for (i=0; i<train_labels.vlen; i++)
65  {
66  if (train_labels.vector[i]==-1)
67  classidx_neg[num_neg++]=i;
68  else if (train_labels.vector[i]==+1)
69  classidx_pos[num_pos++]=i;
70  else
71  {
72  SG_ERROR( "found label != +/- 1 bailing...");
73  return false;
74  }
75  }
76 
77  if (num_neg<=0 && num_pos<=0)
78  {
79  SG_ERROR( "whooooo ? only a single class found\n");
80  return false;
81  }
82 
83  SG_FREE(w);
84  w=SG_MALLOC(float64_t, num_feat);
85  w_dim=num_feat;
86 
87  float64_t* mean_neg=SG_MALLOC(float64_t, num_feat);
88  memset(mean_neg,0,num_feat*sizeof(float64_t));
89 
90  float64_t* mean_pos=SG_MALLOC(float64_t, num_feat);
91  memset(mean_pos,0,num_feat*sizeof(float64_t));
92 
93  /* calling external lib */
94  double* scatter=SG_MALLOC(double, num_feat*num_feat);
95  double* buffer=SG_MALLOC(double, num_feat*CMath::max(num_neg, num_pos));
96  int nf = (int) num_feat;
97 
99  //mean neg
100  for (i=0; i<num_neg; i++)
101  {
102  int32_t vlen;
103  bool vfree;
104  float64_t* vec=
105  rf->get_feature_vector(classidx_neg[i], vlen, vfree);
106  ASSERT(vec);
107 
108  for (j=0; j<vlen; j++)
109  {
110  mean_neg[j]+=vec[j];
111  buffer[num_feat*i+j]=vec[j];
112  }
113 
114  rf->free_feature_vector(vec, classidx_neg[i], vfree);
115  }
116 
117  for (j=0; j<num_feat; j++)
118  mean_neg[j]/=num_neg;
119 
120  for (i=0; i<num_neg; i++)
121  {
122  for (j=0; j<num_feat; j++)
123  buffer[num_feat*i+j]-=mean_neg[j];
124  }
125  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, nf, nf,
126  (int) num_neg, 1.0, buffer, nf, buffer, nf, 0, scatter, nf);
127 
128  //mean pos
129  for (i=0; i<num_pos; i++)
130  {
131  int32_t vlen;
132  bool vfree;
133  float64_t* vec=
134  rf->get_feature_vector(classidx_pos[i], vlen, vfree);
135  ASSERT(vec);
136 
137  for (j=0; j<vlen; j++)
138  {
139  mean_pos[j]+=vec[j];
140  buffer[num_feat*i+j]=vec[j];
141  }
142 
143  rf->free_feature_vector(vec, classidx_pos[i], vfree);
144  }
145 
146  for (j=0; j<num_feat; j++)
147  mean_pos[j]/=num_pos;
148 
149  for (i=0; i<num_pos; i++)
150  {
151  for (j=0; j<num_feat; j++)
152  buffer[num_feat*i+j]-=mean_pos[j];
153  }
154  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, nf, nf, (int) num_pos,
155  1.0/(train_labels.vlen-1), buffer, nf, buffer, nf,
156  1.0/(train_labels.vlen-1), scatter, nf);
157 
158  float64_t trace=CMath::trace((float64_t*) scatter, num_feat, num_feat);
159 
160  double s=1.0-m_gamma; /* calling external lib; indirectly */
161  for (i=0; i<num_feat*num_feat; i++)
162  scatter[i]*=s;
163 
164  for (i=0; i<num_feat; i++)
165  scatter[i*num_feat+i]+= trace*m_gamma/num_feat;
166 
167  double* inv_scatter= (double*) CMath::pinv(
168  scatter, num_feat, num_feat, NULL);
169 
170  float64_t* w_pos=buffer;
171  float64_t* w_neg=&buffer[num_feat];
172 
173  cblas_dsymv(CblasColMajor, CblasUpper, nf, 1.0, inv_scatter, nf,
174  (double*) mean_pos, 1, 0., (double*) w_pos, 1);
175  cblas_dsymv(CblasColMajor, CblasUpper, nf, 1.0, inv_scatter, nf,
176  (double*) mean_neg, 1, 0, (double*) w_neg, 1);
177 
178  bias=0.5*(CMath::dot(w_neg, mean_neg, num_feat)-CMath::dot(w_pos, mean_pos, num_feat));
179  for (i=0; i<num_feat; i++)
180  w[i]=w_pos[i]-w_neg[i];
181 
182 #ifdef DEBUG_LDA
183  SG_PRINT("bias: %f\n", bias);
184  CMath::display_vector(w, num_feat, "w");
185  CMath::display_vector(w_pos, num_feat, "w_pos");
186  CMath::display_vector(w_neg, num_feat, "w_neg");
187  CMath::display_vector(mean_pos, num_feat, "mean_pos");
188  CMath::display_vector(mean_neg, num_feat, "mean_neg");
189 #endif
190 
191  train_labels.free_vector();
192  SG_FREE(mean_neg);
193  SG_FREE(mean_pos);
194  SG_FREE(scatter);
195  SG_FREE(inv_scatter);
196  SG_FREE(classidx_neg);
197  SG_FREE(classidx_pos);
198  SG_FREE(buffer);
199  return true;
200 }
201 #endif

SHOGUN Machine Learning Toolbox - Documentation