SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SubGradientLPM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2009 Soeren Sonnenburg
8  * Written (W) 2007-2008 Vojtech Franc
9  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/config.h>
13 
14 #ifdef USE_CPLEX
15 
17 #include <shogun/lib/Signal.h>
18 #include <shogun/lib/Time.h>
21 #include <shogun/classifier/svm/qpbsvmlib.h>
23 #include <shogun/features/Labels.h>
24 
25 using namespace shogun;
26 
27 #define DEBUG_SUBGRADIENTLPM
28 
30 : CLinearClassifier(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
31  qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
32 {
33 }
34 
36  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
37 : CLinearClassifier(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
38  qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
39 {
40  CLinearClassifier::features=traindat;
41  CClassifier::labels=trainlab;
42 }
43 
44 
46 {
47  cleanup();
48 }
49 
51  int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
52 {
53  //delta_active=0;
54  //num_active=0;
55  //num_bound=0;
56 
57  //for (int32_t i=0; i<num_vec; i++)
58  //{
59  // active[i]=0;
60 
61  // //within margin/wrong side
62  // if (proj[i] < 1-work_epsilon)
63  // {
64  // idx_active[num_active++]=i;
65  // active[i]=1;
66  // }
67 
68  // //on margin
69  // if (CMath::abs(proj[i]-1) <= work_epsilon)
70  // {
71  // idx_bound[num_bound++]=i;
72  // active[i]=2;
73  // }
74 
75  // if (active[i]!=old_active[i])
76  // delta_active++;
77  //}
78 
79  delta_bound=0;
80  delta_active=0;
81  num_active=0;
82  num_bound=0;
83 
84  for (int32_t i=0; i<num_vec; i++)
85  {
86  active[i]=0;
87 
88  //within margin/wrong side
89  if (proj[i] < 1-autoselected_epsilon)
90  {
91  idx_active[num_active++]=i;
92  active[i]=1;
93  }
94 
95  //on margin
97  {
98  idx_bound[num_bound++]=i;
99  active[i]=2;
100  }
101 
102  if (active[i]!=old_active[i])
103  delta_active++;
104 
105  if (active[i]==2 && old_active[i]==2)
106  delta_bound++;
107  }
108 
109 
110  if (delta_active==0 && work_epsilon<=epsilon) //we converged
111  return 0;
112  else if (delta_active==0) //lets decrease work_epsilon
113  {
116  num_bound=qpsize;
117  }
118 
119  delta_bound=0;
120  delta_active=0;
121  num_active=0;
122  num_bound=0;
123 
124  for (int32_t i=0; i<num_vec; i++)
125  {
126  tmp_proj[i]=CMath::abs(proj[i]-1);
127  tmp_proj_idx[i]=i;
128  }
129 
131 
133 
134 #ifdef DEBUG_SUBGRADIENTSVM
135  //SG_PRINT("autoseleps: %15.15f\n", autoselected_epsilon);
136 #endif
137 
140 
142  {
144 
145  int32_t i=0;
146  while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
147  i++;
148 
149  //SG_PRINT("lower bound on epsilon requires %d variables in qp\n", i);
150 
151  if (i>=qpsize_max && autoselected_epsilon>epsilon) //qpsize limit
152  {
153  SG_PRINT("qpsize limit (%d) reached\n", qpsize_max);
154  int32_t num_in_qp=i;
155  while (--i>=0 && num_in_qp>=qpsize_max)
156  {
158  {
160  num_in_qp--;
161  }
162  }
163 
164  //SG_PRINT("new qpsize will be %d, autoeps:%15.15f\n", num_in_qp, autoselected_epsilon);
165  }
166  }
167 
168  for (int32_t i=0; i<num_vec; i++)
169  {
170  active[i]=0;
171 
172  //within margin/wrong side
173  if (proj[i] < 1-autoselected_epsilon)
174  {
175  idx_active[num_active++]=i;
176  active[i]=1;
177  }
178 
179  //on margin
180  if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
181  {
182  idx_bound[num_bound++]=i;
183  active[i]=2;
184  }
185 
186  if (active[i]!=old_active[i])
187  delta_active++;
188 
189  if (active[i]==2 && old_active[i]==2)
190  delta_bound++;
191  }
192 
193  pos_idx=0;
194  neg_idx=0;
195  zero_idx=0;
196 
197  for (int32_t i=0; i<num_feat; i++)
198  {
199  if (w[i]>work_epsilon)
200  {
201  w_pos[pos_idx++]=i;
202  grad_w[i]=1;
203  }
204  else if (w[i]<-work_epsilon)
205  {
206  w_neg[neg_idx++]=i;
207  grad_w[i]=-1;
208  }
209 
210  if (CMath::abs(w[i])<=work_epsilon)
211  {
212  w_zero[zero_idx++]=i;
213  grad_w[i]=-1;
214  }
215  }
216 
217  return delta_active;
218 }
219 
220 
221 void CSubGradientLPM::update_active(int32_t num_feat, int32_t num_vec)
222 {
223  for (int32_t i=0; i<num_vec; i++)
224  {
225  if (active[i]==1 && old_active[i]!=1)
226  {
227  features->add_to_dense_vec(C1*get_label(i), i, sum_CXy_active, num_feat);
228  if (use_bias)
229  sum_Cy_active+=C1*get_label(i);
230  }
231  else if (old_active[i]==1 && active[i]!=1)
232  {
233  features->add_to_dense_vec(-C1*get_label(i), i, sum_CXy_active, num_feat);
234  if (use_bias)
235  sum_Cy_active-=C1*get_label(i);
236  }
237  }
238 
240 }
241 
242 float64_t CSubGradientLPM::line_search(int32_t num_feat, int32_t num_vec)
243 {
244  int32_t num_hinge=0;
245  float64_t alpha=0;
246  float64_t sgrad=0;
247 
248  float64_t* A=SG_MALLOC(float64_t, num_feat+num_vec);
249  float64_t* B=SG_MALLOC(float64_t, num_feat+num_vec);
250  float64_t* C=SG_MALLOC(float64_t, num_feat+num_vec);
251  float64_t* D=SG_MALLOC(float64_t, num_feat+num_vec);
252 
253  for (int32_t i=0; i<num_feat+num_vec; i++)
254  {
255  if (i<num_feat)
256  {
257  A[i]=-grad_w[i];
258  B[i]=w[i];
259  C[i]=+grad_w[i];
260  D[i]=-w[i];
261  }
262  else
263  {
264  float64_t p=get_label(i-num_feat)*(features->dense_dot(i-num_feat, grad_w, num_feat)+grad_b);
265  grad_proj[i-num_feat]=p;
266 
267  A[i]=0;
268  B[i]=0;
269  C[i]=C1*p;
270  D[i]=C1*(1-proj[i-num_feat]);
271  }
272 
273  if (A[i]==C[i] && B[i]>D[i])
274  sgrad+=A[i]+C[i];
275  else if (A[i]==C[i] && B[i]==D[i])
276  sgrad+=CMath::max(A[i],C[i]);
277  else if (A[i]!=C[i])
278  {
279  hinge_point[num_hinge]=(D[i]-B[i])/(A[i]-C[i]);
280  hinge_idx[num_hinge]=i; // index into A,B,C,D arrays
281  num_hinge++;
282 
283  if (A[i]>C[i])
284  sgrad+=C[i];
285  if (A[i]<C[i])
286  sgrad+=A[i];
287  }
288  }
289 
290  //SG_PRINT("sgrad:%f\n", sgrad);
291  //CMath::display_vector(A, num_feat+num_vec, "A");
292  //CMath::display_vector(B, num_feat+num_vec, "B");
293  //CMath::display_vector(C, num_feat+num_vec, "C");
294  //CMath::display_vector(D, num_feat+num_vec, "D");
295  //CMath::display_vector(hinge_point, num_feat+num_vec, "hinge_point");
296  //CMath::display_vector(hinge_idx, num_feat+num_vec, "hinge_idx");
297  //ASSERT(0);
298 
300  //CMath::display_vector(hinge_point, num_feat+num_vec, "hinge_point_sorted");
301 
302 
303  int32_t i=-1;
304  while (i < num_hinge-1 && sgrad < 0)
305  {
306  i+=1;
307 
308  if (A[hinge_idx[i]] > C[hinge_idx[i]])
309  sgrad += A[hinge_idx[i]] - C[hinge_idx[i]];
310  else
311  sgrad += C[hinge_idx[i]] - A[hinge_idx[i]];
312  }
313 
314  alpha = hinge_point[i];
315 
316  SG_FREE(D);
317  SG_FREE(C);
318  SG_FREE(B);
319  SG_FREE(A);
320 
321  //SG_PRINT("alpha=%f\n", alpha);
322  return alpha;
323 }
324 
326  int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound)
327 {
328  float64_t dir_deriv=0;
329  solver->init(E_QP);
330 
331  if (zero_idx+num_bound > 0)
332  {
333  //SG_PRINT("num_var:%d (zero:%d, bound:%d) num_feat:%d\n", zero_idx+num_bound, zero_idx,num_bound, num_feat);
334  //CMath::display_vector(grad_w, num_feat+1, "grad_w");
335  CMath::add(grad_w, 1.0, grad_w, -1.0, sum_CXy_active, num_feat);
336  grad_w[num_feat]= -sum_Cy_active;
337 
339 
340  //CMath::display_vector(sum_CXy_active, num_feat, "sum_CXy_active");
341  //SG_PRINT("sum_Cy_active=%10.10f\n", sum_Cy_active);
342 
343  //CMath::display_vector(grad_w, num_feat+1, "grad_w");
344 
345  solver->setup_subgradientlpm_QP(C1, labels, (CSparseFeatures<float64_t>*) features, idx_bound, num_bound,
346  w_zero, zero_idx,
347  grad_w, num_feat+1,
348  use_bias);
349 
350  solver->optimize(beta);
351  //CMath::display_vector(beta, num_feat+1, "v");
352 
353  //compute dir_deriv here, variable grad_w constains still 'v' and beta
354  //contains the future gradient
355  dir_deriv = CMath::dot(beta, grad_w, num_feat);
356  dir_deriv-=beta[num_feat]*sum_Cy_active;
357 
358  for (int32_t i=0; i<num_bound; i++)
359  {
360  float64_t val= C1*get_label(idx_bound[i])*(features->dense_dot(idx_bound[i], beta, num_feat)+ beta[num_feat]);
361  dir_deriv += CMath::max(0.0, val);
362  }
363 
364  for (int32_t i=0; i<num_feat; i++)
365  grad_w[i]=beta[i];
366 
367  if (use_bias)
368  grad_b=beta[num_feat];
369 
370  //for (int32_t i=0; i<zero_idx+num_bound; i++)
371  // beta[i]=beta[i+num_feat+1];
372 
373  //CMath::display_vector(beta, zero_idx+num_bound, "beta");
374  //SG_PRINT("beta[0]=%10.16f\n", beta[0]);
375  //ASSERT(0);
376 
377  //for (int32_t i=0; i<zero_idx+num_bound; i++)
378  //{
379  // if (i<zero_idx)
380  // grad_w[w_zero[i]]+=beta[w_zero[i]];
381  // else
382  // {
383  // features->add_to_dense_vec(-C1*beta[i]*get_label(idx_bound[i-zero_idx]), idx_bound[i-zero_idx], grad_w, num_feat);
384  // if (use_bias)
385  // grad_b -= C1 * get_label(idx_bound[i-zero_idx])*beta[i-zero_idx];
386  // }
387  //}
388 
389  //CMath::display_vector(w_zero, zero_idx, "w_zero");
390  //CMath::display_vector(grad_w, num_feat, "grad_w");
391  //SG_PRINT("grad_b=%f\n", grad_b);
392  //
393  }
394  else
395  {
396  CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
398 
399  dir_deriv = CMath::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
400  }
401 
402  solver->cleanup();
403 
404 
405  //SG_PRINT("Gradient : |subgrad_W|^2=%f, |subgrad_b|^2=%f\n",
406  // CMath::dot(grad_w, grad_w, num_feat), grad_b*grad_b);
407 
408  return dir_deriv;
409 }
410 
411 float64_t CSubGradientLPM::compute_objective(int32_t num_feat, int32_t num_vec)
412 {
413  float64_t result= CMath::sum_abs(w, num_feat);
414 
415  for (int32_t i=0; i<num_vec; i++)
416  {
417  if (proj[i]<1.0)
418  result += C1 * (1.0-proj[i]);
419  }
420 
421  return result;
422 }
423 
424 void CSubGradientLPM::compute_projection(int32_t num_feat, int32_t num_vec)
425 {
426  for (int32_t i=0; i<num_vec; i++)
427  proj[i]=get_label(i)*(features->dense_dot(i, w, num_feat) + bias);
428 }
429 
430 void CSubGradientLPM::update_projection(float64_t alpha, int32_t num_vec)
431 {
433 }
434 
435 void CSubGradientLPM::init(int32_t num_vec, int32_t num_feat)
436 {
437  // alloc normal and bias inited with 0
438  SG_FREE(w);
439  w=SG_MALLOC(float64_t, num_feat);
440  w_dim=num_feat;
441  for (int32_t i=0; i<num_feat; i++)
442  w[i]=1.0;
443  //CMath::random_vector(w, num_feat, -1.0, 1.0);
444  bias=0;
446  grad_b=0;
447 
448  w_pos=SG_MALLOC(int32_t, num_feat);
449  memset(w_pos,0,sizeof(int32_t)*num_feat);
450 
451  w_zero=SG_MALLOC(int32_t, num_feat);
452  memset(w_zero,0,sizeof(int32_t)*num_feat);
453 
454  w_neg=SG_MALLOC(int32_t, num_feat);
455  memset(w_neg,0,sizeof(int32_t)*num_feat);
456 
457  grad_w=SG_MALLOC(float64_t, num_feat+1);
458  memset(grad_w,0,sizeof(float64_t)*(num_feat+1));
459 
461  memset(sum_CXy_active,0,sizeof(float64_t)*num_feat);
462 
463  sum_Cy_active=0;
464 
465  proj=SG_MALLOC(float64_t, num_vec);
466  memset(proj,0,sizeof(float64_t)*num_vec);
467 
468  tmp_proj=SG_MALLOC(float64_t, num_vec);
469  memset(proj,0,sizeof(float64_t)*num_vec);
470 
471  tmp_proj_idx=SG_MALLOC(int32_t, num_vec);
472  memset(tmp_proj_idx,0,sizeof(int32_t)*num_vec);
473 
474  grad_proj=SG_MALLOC(float64_t, num_vec);
475  memset(grad_proj,0,sizeof(float64_t)*num_vec);
476 
477  hinge_point=SG_MALLOC(float64_t, num_vec+num_feat);
478  memset(hinge_point,0,sizeof(float64_t)*(num_vec+num_feat));
479 
480  hinge_idx=SG_MALLOC(int32_t, num_vec+num_feat);
481  memset(hinge_idx,0,sizeof(int32_t)*(num_vec+num_feat));
482 
483  active=SG_MALLOC(uint8_t, num_vec);
484  memset(active,0,sizeof(uint8_t)*num_vec);
485 
486  old_active=SG_MALLOC(uint8_t, num_vec);
487  memset(old_active,0,sizeof(uint8_t)*num_vec);
488 
489  idx_bound=SG_MALLOC(int32_t, num_vec);
490  memset(idx_bound,0,sizeof(int32_t)*num_vec);
491 
492  idx_active=SG_MALLOC(int32_t, num_vec);
493  memset(idx_active,0,sizeof(int32_t)*num_vec);
494 
495  beta=SG_MALLOC(float64_t, num_feat+1+num_feat+num_vec);
496  memset(beta,0,sizeof(float64_t)*num_feat+1+num_feat+num_vec);
497 
498  solver=new CCplex();
499 }
500 
502 {
506  SG_FREE(proj);
507  SG_FREE(tmp_proj);
509  SG_FREE(active);
514  SG_FREE(w_pos);
515  SG_FREE(w_zero);
516  SG_FREE(w_neg);
517  SG_FREE(grad_w);
518  SG_FREE(beta);
519 
520  hinge_idx=NULL;
521  hinge_point=NULL;
522  grad_proj=NULL;
523  proj=NULL;
524  tmp_proj=NULL;
525  tmp_proj_idx=NULL;
526  active=NULL;
527  old_active=NULL;
528  idx_bound=NULL;
529  idx_active=NULL;
530  sum_CXy_active=NULL;
531  w_pos=NULL;
532  w_zero=NULL;
533  w_neg=NULL;
534  grad_w=NULL;
535  beta=NULL;
536 
537  delete solver;
538  solver=NULL;
539 }
540 
542 {
543  lpmtim=0;
544  SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
545  ASSERT(labels);
546  if (data)
547  {
548  if (!data->has_property(FP_DOT))
549  SG_ERROR("Specified features are not of type CDotFeatures\n");
550  set_features((CDotFeatures*) data);
551  }
552  ASSERT(features);
553 
554  int32_t num_iterations=0;
555  int32_t num_train_labels=labels->get_num_labels();
556  int32_t num_feat=features->get_dim_feature_space();
557  int32_t num_vec=features->get_num_vectors();
558 
559  ASSERT(num_vec==num_train_labels);
560 
561  init(num_vec, num_feat);
562 
563  int32_t num_active=0;
564  int32_t num_bound=0;
565  float64_t alpha=0;
566  float64_t dir_deriv=0;
567  float64_t obj=0;
568  delta_active=num_vec;
570 
571  work_epsilon=0.99;
573 
574  compute_projection(num_feat, num_vec);
575 
576  CTime time;
577  float64_t loop_time=0;
578  while (!(CSignal::cancel_computations()))
579  {
580  CTime t;
581  delta_active=find_active(num_feat, num_vec, num_active, num_bound);
582 
583  update_active(num_feat, num_vec);
584 
585 #ifdef DEBUG_SUBGRADIENTLPM
586  SG_PRINT("==================================================\niteration: %d ", num_iterations);
587  obj=compute_objective(num_feat, num_vec);
588  SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
589  obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
590 #else
592 #endif
593  //CMath::display_vector(w, w_dim, "w");
594  //SG_PRINT("bias: %f\n", bias);
595  //CMath::display_vector(proj, num_vec, "proj");
596  //CMath::display_vector(idx_active, num_active, "idx_active");
597  //SG_PRINT("num_active: %d\n", num_active);
598  //CMath::display_vector(idx_bound, num_bound, "idx_bound");
599  //SG_PRINT("num_bound: %d\n", num_bound);
600  //CMath::display_vector(sum_CXy_active, num_feat, "sum_CXy_active");
601  //SG_PRINT("sum_Cy_active: %f\n", sum_Cy_active);
602  //CMath::display_vector(grad_w, num_feat, "grad_w");
603  //SG_PRINT("grad_b:%f\n", grad_b);
604 
605  dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
606 
607  alpha=line_search(num_feat, num_vec);
608 
609  if (num_it_noimprovement==10 || num_bound<qpsize_max)
610  {
611  float64_t norm_grad=CMath::dot(grad_w, grad_w, num_feat) +
612  grad_b*grad_b;
613 
614  SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
615  "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
616  work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
617 
618  if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
619  break;
620  else
622  }
623 
624  //if (work_epsilon<=epsilon && delta_active==0 && num_it_noimprovement)
625  if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
626  {
627  if (last_it_noimprovement==num_iterations-1)
628  {
629  SG_PRINT("no improvement...\n");
631  }
632  else
634 
635  last_it_noimprovement=num_iterations;
636  }
637 
638  CMath::vec1_plus_scalar_times_vec2(w, -alpha, grad_w, num_feat);
639  bias-=alpha*grad_b;
640 
641  update_projection(alpha, num_vec);
642 
643  t.stop();
644  loop_time=t.time_diff_sec();
645  num_iterations++;
646 
647  if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
648  break;
649  }
650 
651  SG_INFO("converged after %d iterations\n", num_iterations);
652 
653  obj=compute_objective(num_feat, num_vec);
654  SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d\n",
655  obj, alpha, dir_deriv, num_bound, num_active);
656 
657 #ifdef DEBUG_SUBGRADIENTLPM
658  CMath::display_vector(w, w_dim, "w");
659  SG_PRINT("bias: %f\n", bias);
660 #endif
661  SG_PRINT("solver time:%f s\n", lpmtim);
662 
663  cleanup();
664 
665  return true;
666 }
667 #endif //USE_CPLEX

SHOGUN Machine Learning Toolbox - Documentation