SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SGDQN.cpp
Go to the documentation of this file.
1 /*
2  SVM with Quasi-Newton stochastic gradient
3  Copyright (C) 2009- Antoine Bordes
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program; if not, write to the Free Software
17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
18 
19  Shogun adjustments (w) 2011 Siddharth Kherada
20 */
21 
23 #include <shogun/base/Parameter.h>
24 #include <shogun/lib/Signal.h>
26 #include <shogun/loss/HingeLoss.h>
27 
28 using namespace shogun;
29 
32 {
33  init();
34 }
35 
38 {
39  init();
40 
41  C1=C;
42  C2=C;
43 }
44 
45 CSGDQN::CSGDQN(float64_t C, CDotFeatures* traindat, CLabels* trainlab)
47 {
48  init();
49  C1=C;
50  C2=C;
51 
52  set_features(traindat);
53  set_labels(trainlab);
54 }
55 
57 {
58  SG_UNREF(loss);
59 }
60 
62 {
63  if (loss)
64  SG_UNREF(loss);
65  loss=loss_func;
66  SG_REF(loss);
67 }
68 
69 void CSGDQN::compute_ratio(float64_t* W,float64_t* W_1,float64_t* B,float64_t* dst,int32_t dim,float64_t lambda,float64_t loss_val)
70 {
71  for (int32_t i=0; i < dim;i++)
72  {
73  float64_t diffw=W_1[i]-W[i];
74  if(diffw)
75  B[i]+=diffw/ (lambda*diffw+ loss_val*dst[i]);
76  else
77  B[i]+=1/lambda;
78  }
79 }
80 
82 {
83  for (int32_t i=0; i < dim;i++)
84  {
85  if(B[i])
86  {
87  Bc[i] = Bc[i] * c1 + B[i] * c2;
88  Bc[i]= CMath::min(CMath::max(Bc[i],v1),v2);
89  }
90  }
91 }
92 
94 {
95 
96  ASSERT(labels);
97 
98  if (data)
99  {
100  if (!data->has_property(FP_DOT))
101  SG_ERROR("Specified features are not of type CDotFeatures\n");
102  set_features((CDotFeatures*) data);
103  }
104 
105  ASSERT(features);
107 
108  int32_t num_train_labels=labels->get_num_labels();
110  int32_t num_vec=features->get_num_vectors();
111 
112  ASSERT(num_vec==num_train_labels);
113  ASSERT(num_vec>0);
114 
115  SG_FREE(w);
117  memset(w, 0, w_dim*sizeof(float64_t));
118 
119  float64_t lambda= 1.0/(C1*num_vec);
120 
121  // Shift t in order to have a
122  // reasonable initial learning rate.
123  // This assumes |x| \approx 1.
124  float64_t maxw = 1.0 / sqrt(lambda);
125  float64_t typw = sqrt(maxw);
126  float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
127  t = 1 / (eta0 * lambda);
128 
129  SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0);
130 
131 
133  CMath::fill_vector(Bc, w_dim, 1/lambda);
134 
138 
139  //Calibrate
140  calibrate();
141 
142  SG_INFO("Training on %d vectors\n", num_vec);
144 
145  ELossType loss_type = loss->get_loss_type();
146  bool is_log_loss = false;
147  if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
148  is_log_loss = true;
149 
150  for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
151  {
152  count = skip;
153  bool updateB=false;
154  for (int32_t i=0; i<num_vec; i++)
155  {
157  ASSERT(w_dim==v.vlen);
158  float64_t eta = 1.0/t;
159  float64_t y = labels->get_label(i);
160  float64_t z = y * features->dense_dot(i, w, w_dim);
161  if(updateB==true)
162  {
163  if (z < 1 || is_log_loss)
164  {
165  w_1=w;
166  float64_t loss_1=-loss->first_derivative(z,1);
167  CMath::vector_multiply(result,Bc,v.vector,w_dim);
168  CMath::add(w,eta*loss_1*y,result,1.0,w,w_dim);
169  float64_t z2 = y * features->dense_dot(i, w, w_dim);
170  float64_t diffloss = -loss->first_derivative(z2,1) - loss_1;
171  if(diffloss)
172  {
173  compute_ratio(w,w_1,B,v.vector,w_dim,lambda,y*diffloss);
174  if(t>skip)
175  combine_and_clip(Bc,B,w_dim,(t-skip)/(t+skip),2*skip/(t+skip),1/(100*lambda),100/lambda);
176  else
177  combine_and_clip(Bc,B,w_dim,t/(t+skip),skip/(t+skip),1/(100*lambda),100/lambda);
178  }
179  }
180  updateB=false;
181  }
182  else
183  {
184  if(--count<=0)
185  {
186  CMath::vector_multiply(result,Bc,w,w_dim);
187  CMath::add(w,-skip*lambda*eta,result,1.0,w,w_dim);
188  count = skip;
189  updateB=true;
190  }
191 
192  if (z < 1 || is_log_loss)
193  {
194  CMath::vector_multiply(result,Bc,v.vector,w_dim);
195  CMath::add(w,eta*-loss->first_derivative(z,1)*y,result,1.0,w,w_dim);
196  }
197  }
198  t++;
199 
200  v.free_vector();
201  }
202  }
203  SG_FREE(result);
204  SG_FREE(w_1);
205  SG_FREE(B);
206 
207  return true;
208 }
209 
210 
211 
213 {
214  ASSERT(features);
215  int32_t num_vec=features->get_num_vectors();
216  int32_t c_dim=features->get_dim_feature_space();
217 
218  ASSERT(num_vec>0);
219  ASSERT(c_dim>0);
220 
221  SG_INFO("Estimating sparsity num_vec=%d num_feat=%d.\n", num_vec, c_dim);
222 
223  int32_t n = 0;
224  float64_t r = 0;
225 
226  for (int32_t j=0; j<num_vec ; j++, n++)
228 
229 
230  // compute weight decay skip
231  skip = (int32_t) ((16 * n * c_dim) / r);
232 }
233 
234 void CSGDQN::init()
235 {
236  t=0;
237  C1=1;
238  C2=1;
239  epochs=5;
240  skip=1000;
241  count=1000;
242 
243  loss=new CHingeLoss();
244  SG_REF(loss);
245 
246  m_parameters->add(&C1, "C1", "Cost constant 1.");
247  m_parameters->add(&C2, "C2", "Cost constant 2.");
248  m_parameters->add(&epochs, "epochs", "epochs");
249  m_parameters->add(&skip, "skip", "skip");
250  m_parameters->add(&count, "count", "count");
251 }

SHOGUN Machine Learning Toolbox - Documentation