SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SVM_linear.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2007-2009 The LIBLINEAR Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  *
16  * 3. Neither name of copyright holders nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <shogun/lib/config.h>
34 #ifndef DOXYGEN_SHOULD_SKIP_THIS
35 #ifdef HAVE_LAPACK
36 #include <math.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <stdarg.h>
41 
45 
46 using namespace shogun;
47 
48 l2r_lr_fun::l2r_lr_fun(const problem *p, float64_t Cp, float64_t Cn)
49 {
50  int i;
51  int l=p->l;
52  int *y=p->y;
53 
54  this->prob = p;
55 
56  z = SG_MALLOC(double, l);
57  D = SG_MALLOC(double, l);
58  C = SG_MALLOC(double, l);
59 
60  for (i=0; i<l; i++)
61  {
62  if (y[i] == 1)
63  C[i] = Cp;
64  else
65  C[i] = Cn;
66  }
67 }
68 
69 l2r_lr_fun::~l2r_lr_fun()
70 {
71  SG_FREE(z);
72  SG_FREE(D);
73  SG_FREE(C);
74 }
75 
76 
77 double l2r_lr_fun::fun(double *w)
78 {
79  int i;
80  double f=0;
81  int *y=prob->y;
82  int l=prob->l;
83  int32_t n=prob->n;
84 
85  Xv(w, z);
86  for(i=0;i<l;i++)
87  {
88  double yz = y[i]*z[i];
89  if (yz >= 0)
90  f += C[i]*log(1 + exp(-yz));
91  else
92  f += C[i]*(-yz+log(1 + exp(yz)));
93  }
94  f += 0.5 *CMath::dot(w,w,n);
95 
96  return(f);
97 }
98 
99 void l2r_lr_fun::grad(double *w, double *g)
100 {
101  int i;
102  int *y=prob->y;
103  int l=prob->l;
104  int w_size=get_nr_variable();
105 
106  for(i=0;i<l;i++)
107  {
108  z[i] = 1/(1 + exp(-y[i]*z[i]));
109  D[i] = z[i]*(1-z[i]);
110  z[i] = C[i]*(z[i]-1)*y[i];
111  }
112  XTv(z, g);
113 
114  for(i=0;i<w_size;i++)
115  g[i] = w[i] + g[i];
116 }
117 
118 int l2r_lr_fun::get_nr_variable()
119 {
120  return prob->n;
121 }
122 
123 void l2r_lr_fun::Hv(double *s, double *Hs)
124 {
125  int i;
126  int l=prob->l;
127  int w_size=get_nr_variable();
128  double *wa = SG_MALLOC(double, l);
129 
130  Xv(s, wa);
131  for(i=0;i<l;i++)
132  wa[i] = C[i]*D[i]*wa[i];
133 
134  XTv(wa, Hs);
135  for(i=0;i<w_size;i++)
136  Hs[i] = s[i] + Hs[i];
137  SG_FREE(wa);
138 }
139 
140 void l2r_lr_fun::Xv(double *v, double *res_Xv)
141 {
142  int32_t l=prob->l;
143  int32_t n=prob->n;
144  float64_t bias=0;
145 
146  if (prob->use_bias)
147  {
148  n--;
149  bias=v[n];
150  }
151 
152  prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
153 }
154 
155 void l2r_lr_fun::XTv(double *v, double *res_XTv)
156 {
157  int l=prob->l;
158  int32_t n=prob->n;
159 
160  memset(res_XTv, 0, sizeof(double)*prob->n);
161 
162  if (prob->use_bias)
163  n--;
164 
165  for (int32_t i=0;i<l;i++)
166  {
167  prob->x->add_to_dense_vec(v[i], i, res_XTv, n);
168 
169  if (prob->use_bias)
170  res_XTv[n]+=v[i];
171  }
172 }
173 
174 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *p, double Cp, double Cn)
175 {
176  int i;
177  int l=p->l;
178  int *y=p->y;
179 
180  this->prob = p;
181 
182  z = SG_MALLOC(double, l);
183  D = SG_MALLOC(double, l);
184  C = SG_MALLOC(double, l);
185  I = SG_MALLOC(int, l);
186 
187  for (i=0; i<l; i++)
188  {
189  if (y[i] == 1)
190  C[i] = Cp;
191  else
192  C[i] = Cn;
193  }
194 }
195 
196 l2r_l2_svc_fun::~l2r_l2_svc_fun()
197 {
198  SG_FREE(z);
199  SG_FREE(D);
200  SG_FREE(C);
201  SG_FREE(I);
202 }
203 
204 double l2r_l2_svc_fun::fun(double *w)
205 {
206  int i;
207  double f=0;
208  int *y=prob->y;
209  int l=prob->l;
210  int w_size=get_nr_variable();
211 
212  Xv(w, z);
213  for(i=0;i<l;i++)
214  {
215  z[i] = y[i]*z[i];
216  double d = 1-z[i];
217  if (d > 0)
218  f += C[i]*d*d;
219  }
220  f += 0.5*CMath::dot(w, w, w_size);
221 
222  return(f);
223 }
224 
225 void l2r_l2_svc_fun::grad(double *w, double *g)
226 {
227  int i;
228  int *y=prob->y;
229  int l=prob->l;
230  int w_size=get_nr_variable();
231 
232  sizeI = 0;
233  for (i=0;i<l;i++)
234  if (z[i] < 1)
235  {
236  z[sizeI] = C[i]*y[i]*(z[i]-1);
237  I[sizeI] = i;
238  sizeI++;
239  }
240  subXTv(z, g);
241 
242  for(i=0;i<w_size;i++)
243  g[i] = w[i] + 2*g[i];
244 }
245 
246 int l2r_l2_svc_fun::get_nr_variable()
247 {
248  return prob->n;
249 }
250 
251 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
252 {
253  int i;
254  int l=prob->l;
255  int w_size=get_nr_variable();
256  double *wa = SG_MALLOC(double, l);
257 
258  subXv(s, wa);
259  for(i=0;i<sizeI;i++)
260  wa[i] = C[I[i]]*wa[i];
261 
262  subXTv(wa, Hs);
263  for(i=0;i<w_size;i++)
264  Hs[i] = s[i] + 2*Hs[i];
265  SG_FREE(wa);
266 }
267 
268 void l2r_l2_svc_fun::Xv(double *v, double *res_Xv)
269 {
270  int32_t l=prob->l;
271  int32_t n=prob->n;
272  float64_t bias=0;
273 
274  if (prob->use_bias)
275  {
276  n--;
277  bias=v[n];
278  }
279 
280  prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
281 }
282 
283 void l2r_l2_svc_fun::subXv(double *v, double *res_Xv)
284 {
285  int32_t n=prob->n;
286  float64_t bias=0;
287 
288  if (prob->use_bias)
289  {
290  n--;
291  bias=v[n];
292  }
293 
294  prob->x->dense_dot_range_subset(I, sizeI, res_Xv, NULL, v, n, bias);
295 
296  /*for (int32_t i=0;i<sizeI;i++)
297  {
298  res_Xv[i]=prob->x->dense_dot(I[i], v, n);
299 
300  if (prob->use_bias)
301  res_Xv[i]+=bias;
302  }*/
303 }
304 
305 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
306 {
307  int32_t n=prob->n;
308 
309  if (prob->use_bias)
310  n--;
311 
312  memset(XTv, 0, sizeof(float64_t)*prob->n);
313  for (int32_t i=0;i<sizeI;i++)
314  {
315  prob->x->add_to_dense_vec(v[i], I[i], XTv, n);
316 
317  if (prob->use_bias)
318  XTv[n]+=v[i];
319  }
320 }
321 
322 // A coordinate descent algorithm for
323 // multi-class support vector machines by Crammer and Singer
324 //
325 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
326 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
327 //
328 // where e^m_i = 0 if y_i = m,
329 // e^m_i = 1 if y_i != m,
330 // C^m_i = C if m = y_i,
331 // C^m_i = 0 if m != y_i,
332 // and w_m(\alpha) = \sum_i \alpha^m_i x_i
333 //
334 // Given:
335 // x, y, C
336 // eps is the stopping tolerance
337 //
338 // solution will be put in w
339 
340 #define GETI(i) (prob->y[i])
341 // To support weights for instances, use GETI(i) (i)
342 
343 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class, double *weighted_C, double epsilon, int max_it)
344 {
345  this->w_size = prob->n;
346  this->l = prob->l;
347  this->nr_class = n_class;
348  this->eps = epsilon;
349  this->max_iter = max_it;
350  this->prob = p;
351  this->C = weighted_C;
352  this->B = SG_MALLOC(double, nr_class);
353  this->G = SG_MALLOC(double, nr_class);
354 }
355 
356 Solver_MCSVM_CS::~Solver_MCSVM_CS()
357 {
358  SG_FREE(B);
359  SG_FREE(G);
360 }
361 
362 int compare_double(const void *a, const void *b)
363 {
364  if(*(double *)a > *(double *)b)
365  return -1;
366  if(*(double *)a < *(double *)b)
367  return 1;
368  return 0;
369 }
370 
371 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
372 {
373  int r;
374  double *D=CMath::clone_vector(B, active_i);
375 
376  if(yi < active_i)
377  D[yi] += A_i*C_yi;
378  qsort(D, active_i, sizeof(double), compare_double);
379 
380  double beta = D[0] - A_i*C_yi;
381  for(r=1;r<active_i && beta<r*D[r];r++)
382  beta += D[r];
383 
384  beta /= r;
385  for(r=0;r<active_i;r++)
386  {
387  if(r == yi)
388  alpha_new[r] = CMath::min(C_yi, (beta-B[r])/A_i);
389  else
390  alpha_new[r] = CMath::min((double)0, (beta - B[r])/A_i);
391  }
392  SG_FREE(D);
393 }
394 
395 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
396 {
397  double bound = 0;
398  if(m == yi)
399  bound = C[GETI(i)];
400  if(alpha_i == bound && G[m] < minG)
401  return true;
402  return false;
403 }
404 
405 void Solver_MCSVM_CS::Solve(double *w)
406 {
407  int i, m, s;
408  int iter = 0;
409  double *alpha = SG_MALLOC(double, l*nr_class);
410  double *alpha_new = SG_MALLOC(double, nr_class);
411  int *index = SG_MALLOC(int, l);
412  double *QD = SG_MALLOC(double, l);
413  int *d_ind = SG_MALLOC(int, nr_class);
414  double *d_val = SG_MALLOC(double, nr_class);
415  int *alpha_index = SG_MALLOC(int, nr_class*l);
416  int *y_index = SG_MALLOC(int, l);
417  int active_size = l;
418  int *active_size_i = SG_MALLOC(int, l);
419  double eps_shrink = CMath::max(10.0*eps, 1.0); // stopping tolerance for shrinking
420  bool start_from_all = true;
421  // initial
422  for(i=0;i<l*nr_class;i++)
423  alpha[i] = 0;
424  for(i=0;i<w_size*nr_class;i++)
425  w[i] = 0;
426  for(i=0;i<l;i++)
427  {
428  for(m=0;m<nr_class;m++)
429  alpha_index[i*nr_class+m] = m;
430 
431  QD[i] = prob->x->dot(i, prob->x,i);
432 
433  active_size_i[i] = nr_class;
434  y_index[i] = prob->y[i];
435  index[i] = i;
436  }
437 
438  while(iter < max_iter)
439  {
440  double stopping = -CMath::INFTY;
441  for(i=0;i<active_size;i++)
442  {
443  int j = i+rand()%(active_size-i);
444  CMath::swap(index[i], index[j]);
445  }
446  for(s=0;s<active_size;s++)
447  {
448  i = index[s];
449  double Ai = QD[i];
450  double *alpha_i = &alpha[i*nr_class];
451  int *alpha_index_i = &alpha_index[i*nr_class];
452 
453  if(Ai > 0)
454  {
455  for(m=0;m<active_size_i[i];m++)
456  G[m] = 1;
457  if(y_index[i] < active_size_i[i])
458  G[y_index[i]] = 0;
459 
461  /* FIXME
462  feature_node *xi = prob->x[i];
463  while(xi->index!= -1)
464  {
465  double *w_i = &w[(xi->index-1)*nr_class];
466  for(m=0;m<active_size_i[i];m++)
467  G[m] += w_i[alpha_index_i[m]]*(xi->value);
468  xi++;
469  }
470  */
471 
472  double minG = CMath::INFTY;
473  double maxG = -CMath::INFTY;
474  for(m=0;m<active_size_i[i];m++)
475  {
476  if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
477  minG = G[m];
478  if(G[m] > maxG)
479  maxG = G[m];
480  }
481  if(y_index[i] < active_size_i[i])
482  if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
483  minG = G[y_index[i]];
484 
485  for(m=0;m<active_size_i[i];m++)
486  {
487  if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
488  {
489  active_size_i[i]--;
490  while(active_size_i[i]>m)
491  {
492  if(!be_shrunk(i, active_size_i[i], y_index[i],
493  alpha_i[alpha_index_i[active_size_i[i]]], minG))
494  {
495  CMath::swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
496  CMath::swap(G[m], G[active_size_i[i]]);
497  if(y_index[i] == active_size_i[i])
498  y_index[i] = m;
499  else if(y_index[i] == m)
500  y_index[i] = active_size_i[i];
501  break;
502  }
503  active_size_i[i]--;
504  }
505  }
506  }
507 
508  if(active_size_i[i] <= 1)
509  {
510  active_size--;
511  CMath::swap(index[s], index[active_size]);
512  s--;
513  continue;
514  }
515 
516  if(maxG-minG <= 1e-12)
517  continue;
518  else
519  stopping = CMath::CMath::max(maxG - minG, stopping);
520 
521  for(m=0;m<active_size_i[i];m++)
522  B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
523 
524  solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
525  int nz_d = 0;
526  for(m=0;m<active_size_i[i];m++)
527  {
528  double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
529  alpha_i[alpha_index_i[m]] = alpha_new[m];
530  if(fabs(d) >= 1e-12)
531  {
532  d_ind[nz_d] = alpha_index_i[m];
533  d_val[nz_d] = d;
534  nz_d++;
535  }
536  }
537 
538  /* FIXME
539  xi = prob->x[i];
540  while(xi->index != -1)
541  {
542  double *w_i = &w[(xi->index-1)*nr_class];
543  for(m=0;m<nz_d;m++)
544  w_i[d_ind[m]] += d_val[m]*xi->value;
545  xi++;
546  }*/
547  }
548  }
549 
550  iter++;
551  if(iter % 10 == 0)
552  {
553  SG_SINFO(".");
554  }
555 
556  if(stopping < eps_shrink)
557  {
558  if(stopping < eps && start_from_all == true)
559  break;
560  else
561  {
562  active_size = l;
563  for(i=0;i<l;i++)
564  active_size_i[i] = nr_class;
565  SG_SINFO("*");
566  eps_shrink = CMath::max(eps_shrink/2, eps);
567  start_from_all = true;
568  }
569  }
570  else
571  start_from_all = false;
572  }
573 
574  SG_SINFO("\noptimization finished, #iter = %d\n",iter);
575  if (iter >= max_iter)
576  SG_SINFO("Warning: reaching max number of iterations\n");
577 
578  // calculate objective value
579  double v = 0;
580  int nSV = 0;
581  for(i=0;i<w_size*nr_class;i++)
582  v += w[i]*w[i];
583  v = 0.5*v;
584  for(i=0;i<l*nr_class;i++)
585  {
586  v += alpha[i];
587  if(fabs(alpha[i]) > 0)
588  nSV++;
589  }
590  for(i=0;i<l;i++)
591  v -= alpha[i*nr_class+prob->y[i]];
592  SG_SINFO("Objective value = %lf\n",v);
593  SG_SINFO("nSV = %d\n",nSV);
594 
595  delete [] alpha;
596  delete [] alpha_new;
597  delete [] index;
598  delete [] QD;
599  delete [] d_ind;
600  delete [] d_val;
601  delete [] alpha_index;
602  delete [] y_index;
603  delete [] active_size_i;
604 }
605 
606 //
607 // Interface functions
608 //
609 
610 void destroy_model(struct model *model_)
611 {
612  SG_FREE(model_->w);
613  SG_FREE(model_->label);
614  SG_FREE(model_);
615 }
616 
617 void destroy_param(parameter* param)
618 {
619  SG_FREE(param->weight_label);
620  SG_FREE(param->weight);
621 }
622 #endif //HAVE_LAPACK
623 #endif // DOXYGEN_SHOULD_SKIP_THIS

SHOGUN Machine Learning Toolbox - Documentation