SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SVM_libsvm.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  *
16  * 3. Neither name of copyright holders nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Shogun specific adjustments (w) 2006-2009 Soeren Sonnenburg
34  */
35 
36 #ifndef DOXYGEN_SHOULD_SKIP_THIS
37 
38 #include <shogun/lib/memory.h>
40 #include <shogun/kernel/Kernel.h>
41 #include <shogun/io/SGIO.h>
42 #include <shogun/lib/Time.h>
43 #include <shogun/lib/Signal.h>
44 #include <shogun/lib/common.h>
46 
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <ctype.h>
50 #include <float.h>
51 #include <string.h>
52 #include <stdarg.h>
53 
54 #ifdef HAVE_PTHREAD
55 #include <pthread.h>
56 #endif
57 
58 namespace shogun
59 {
60 
61 typedef KERNELCACHE_ELEM Qfloat;
62 typedef float64_t schar;
63 
64 template <class S, class T> inline void clone(T*& dst, S* src, int32_t n)
65 {
66  dst = SG_MALLOC(T, n);
67  memcpy((void *)dst,(void *)src,sizeof(T)*n);
68 }
69 #define INF HUGE_VAL
70 #define TAU 1e-12
71 
72 class QMatrix;
73 class SVC_QMC;
74 
75 //
76 // Kernel Cache
77 //
78 // l is the number of total data items
79 // size is the cache size limit in bytes
80 //
81 class Cache
82 {
83 public:
84  Cache(int32_t l, int64_t size);
85  ~Cache();
86 
87  // request data [0,len)
88  // return some position p where [p,len) need to be filled
89  // (p >= len if nothing needs to be filled)
90  int32_t get_data(const int32_t index, Qfloat **data, int32_t len);
91  void swap_index(int32_t i, int32_t j); // future_option
92 
93 private:
94  int32_t l;
95  int64_t size;
96  struct head_t
97  {
98  head_t *prev, *next; // a circular list
99  Qfloat *data;
100  int32_t len; // data[0,len) is cached in this entry
101  };
102 
103  head_t *head;
104  head_t lru_head;
105  void lru_delete(head_t *h);
106  void lru_insert(head_t *h);
107 };
108 
109 Cache::Cache(int32_t l_, int64_t size_):l(l_),size(size_)
110 {
111  head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
112  size /= sizeof(Qfloat);
113  size -= l * sizeof(head_t) / sizeof(Qfloat);
114  size = CMath::max(size, (int64_t) 2*l); // cache must be large enough for two columns
115  lru_head.next = lru_head.prev = &lru_head;
116 }
117 
118 Cache::~Cache()
119 {
120  for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
121  SG_FREE(h->data);
122  SG_FREE(head);
123 }
124 
125 void Cache::lru_delete(head_t *h)
126 {
127  // delete from current location
128  h->prev->next = h->next;
129  h->next->prev = h->prev;
130 }
131 
132 void Cache::lru_insert(head_t *h)
133 {
134  // insert to last position
135  h->next = &lru_head;
136  h->prev = lru_head.prev;
137  h->prev->next = h;
138  h->next->prev = h;
139 }
140 
141 int32_t Cache::get_data(const int32_t index, Qfloat **data, int32_t len)
142 {
143  head_t *h = &head[index];
144  if(h->len) lru_delete(h);
145  int32_t more = len - h->len;
146 
147  if(more > 0)
148  {
149  // free old space
150  while(size < more)
151  {
152  head_t *old = lru_head.next;
153  lru_delete(old);
154  SG_FREE(old->data);
155  size += old->len;
156  old->data = 0;
157  old->len = 0;
158  }
159 
160  // allocate new space
161  h->data = SG_REALLOC(Qfloat, h->data, len);
162  size -= more;
163  CMath::swap(h->len,len);
164  }
165 
166  lru_insert(h);
167  *data = h->data;
168  return len;
169 }
170 
171 void Cache::swap_index(int32_t i, int32_t j)
172 {
173  if(i==j) return;
174 
175  if(head[i].len) lru_delete(&head[i]);
176  if(head[j].len) lru_delete(&head[j]);
177  CMath::swap(head[i].data,head[j].data);
178  CMath::swap(head[i].len,head[j].len);
179  if(head[i].len) lru_insert(&head[i]);
180  if(head[j].len) lru_insert(&head[j]);
181 
182  if(i>j) CMath::swap(i,j);
183  for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
184  {
185  if(h->len > i)
186  {
187  if(h->len > j)
188  CMath::swap(h->data[i],h->data[j]);
189  else
190  {
191  // give up
192  lru_delete(h);
193  SG_FREE(h->data);
194  size += h->len;
195  h->data = 0;
196  h->len = 0;
197  }
198  }
199  }
200 }
201 
202 //
203 // Kernel evaluation
204 //
205 // the static method k_function is for doing single kernel evaluation
206 // the constructor of Kernel prepares to calculate the l*l kernel matrix
207 // the member function get_Q is for getting one column from the Q Matrix
208 //
209 class QMatrix {
210 public:
211  virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0;
212  virtual Qfloat *get_QD() const = 0;
213  virtual void swap_index(int32_t i, int32_t j) const = 0;
214  virtual ~QMatrix() {}
215 
216  float64_t max_train_time;
217 };
218 
219 class LibSVMKernel;
220 
221 // helper struct for threaded processing
222 struct Q_THREAD_PARAM
223 {
224  int32_t i;
225  int32_t start;
226  int32_t end;
227  Qfloat* data;
228  float64_t* y;
229  const LibSVMKernel* q;
230 };
231 
232 extern Parallel* sg_parallel;
233 
234 class LibSVMKernel: public QMatrix {
235 public:
236  LibSVMKernel(int32_t l, svm_node * const * x, const svm_parameter& param);
237  virtual ~LibSVMKernel();
238 
239  virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0;
240  virtual Qfloat *get_QD() const = 0;
241  virtual void swap_index(int32_t i, int32_t j) const // no so const...
242  {
243  CMath::swap(x[i],x[j]);
244  if(x_square) CMath::swap(x_square[i],x_square[j]);
245  }
246 
247  static void* compute_Q_parallel_helper(void* p)
248  {
249  Q_THREAD_PARAM* params= (Q_THREAD_PARAM*) p;
250  int32_t i=params->i;
251  int32_t start=params->start;
252  int32_t end=params->end;
253  float64_t* y=params->y;
254  Qfloat* data=params->data;
255  const LibSVMKernel* q=params->q;
256 
257  if (y) // two class
258  {
259  for(int32_t j=start;j<end;j++)
260  data[j] = (Qfloat) y[i]*y[j]*q->kernel_function(i,j);
261  }
262  else // one class, eps svr
263  {
264  for(int32_t j=start;j<end;j++)
265  data[j] = (Qfloat) q->kernel_function(i,j);
266  }
267 
268  return NULL;
269  }
270 
271  void compute_Q_parallel(Qfloat* data, float64_t* lab, int32_t i, int32_t start, int32_t len) const
272  {
273  int32_t num_threads=sg_parallel->get_num_threads();
274  if (num_threads < 2)
275  {
276  Q_THREAD_PARAM params;
277  params.i=i;
278  params.start=start;
279  params.end=len;
280  params.y=lab;
281  params.data=data;
282  params.q=this;
283  compute_Q_parallel_helper((void*) &params);
284  }
285  else
286  {
287  int32_t total_num=(len-start);
288  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
289  Q_THREAD_PARAM* params = SG_MALLOC(Q_THREAD_PARAM, num_threads);
290  int32_t step= total_num/num_threads;
291 
292  int32_t t;
293 
294  num_threads--;
295  for (t=0; t<num_threads; t++)
296  {
297  params[t].i=i;
298  params[t].start=t*step;
299  params[t].end=(t+1)*step;
300  params[t].y=lab;
301  params[t].data=data;
302  params[t].q=this;
303 
304  int code=pthread_create(&threads[t], NULL,
305  compute_Q_parallel_helper, (void*)&params[t]);
306 
307  if (code != 0)
308  {
309  SG_SWARNING("Thread creation failed (thread %d of %d) "
310  "with error:'%s'\n",t, num_threads, strerror(code));
311  num_threads=t;
312  break;
313  }
314  }
315 
316  params[t].i=i;
317  params[t].start=t*step;
318  params[t].end=len;
319  params[t].y=lab;
320  params[t].data=data;
321  params[t].q=this;
322  compute_Q_parallel_helper(&params[t]);
323 
324  for (t=0; t<num_threads; t++)
325  {
326  if (pthread_join(threads[t], NULL) != 0)
327  SG_SWARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
328  }
329 
330  SG_FREE(params);
331  SG_FREE(threads);
332  }
333  }
334 
335  inline float64_t kernel_function(int32_t i, int32_t j) const
336  {
337  return kernel->kernel(x[i]->index,x[j]->index);
338  }
339 
340 private:
341  CKernel* kernel;
342  const svm_node **x;
343  float64_t *x_square;
344 
345  // svm_parameter
346  const int32_t kernel_type;
347  const int32_t degree;
348  const float64_t gamma;
349  const float64_t coef0;
350 };
351 
352 LibSVMKernel::LibSVMKernel(int32_t l, svm_node * const * x_, const svm_parameter& param)
353 : kernel_type(param.kernel_type), degree(param.degree),
354  gamma(param.gamma), coef0(param.coef0)
355 {
356  clone(x,x_,l);
357  x_square = 0;
358  kernel=param.kernel;
359  max_train_time=param.max_train_time;
360 }
361 
362 LibSVMKernel::~LibSVMKernel()
363 {
364  SG_FREE(x);
365  SG_FREE(x_square);
366 }
367 
368 // Generalized SMO+SVMlight algorithm
369 // Solves:
370 //
371 // min 0.5(\alpha^T Q \alpha) + b^T \alpha
372 //
373 // y^T \alpha = \delta
374 // y_i = +1 or -1
375 // 0 <= alpha_i <= Cp for y_i = 1
376 // 0 <= alpha_i <= Cn for y_i = -1
377 //
378 // Given:
379 //
380 // Q, p, y, Cp, Cn, and an initial feasible point \alpha
381 // l is the size of vectors and matrices
382 // eps is the stopping tolerance
383 //
384 // solution will be put in \alpha, objective value will be put in obj
385 //
386 class Solver {
387 public:
388  Solver() {};
389  virtual ~Solver() {};
390 
391  struct SolutionInfo {
392  float64_t obj;
393  float64_t rho;
394  float64_t upper_bound_p;
395  float64_t upper_bound_n;
396  float64_t r; // for Solver_NU
397  };
398 
399  void Solve(
400  int32_t l, const QMatrix& Q, const float64_t *p_, const schar *y_,
401  float64_t *alpha_, float64_t Cp, float64_t Cn, float64_t eps,
402  SolutionInfo* si, int32_t shrinking, bool use_bias);
403 
404 protected:
405  int32_t active_size;
406  schar *y;
407  float64_t *G; // gradient of objective function
408  enum { LOWER_BOUND, UPPER_BOUND, FREE };
409  char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
410  float64_t *alpha;
411  const QMatrix *Q;
412  const Qfloat *QD;
413  float64_t eps;
414  float64_t Cp,Cn;
415  float64_t *p;
416  int32_t *active_set;
417  float64_t *G_bar; // gradient, if we treat free variables as 0
418  int32_t l;
419  bool unshrink; // XXX
420 
421  float64_t get_C(int32_t i)
422  {
423  return (y[i] > 0)? Cp : Cn;
424  }
425  void update_alpha_status(int32_t i)
426  {
427  if(alpha[i] >= get_C(i))
428  alpha_status[i] = UPPER_BOUND;
429  else if(alpha[i] <= 0)
430  alpha_status[i] = LOWER_BOUND;
431  else alpha_status[i] = FREE;
432  }
433  bool is_upper_bound(int32_t i) { return alpha_status[i] == UPPER_BOUND; }
434  bool is_lower_bound(int32_t i) { return alpha_status[i] == LOWER_BOUND; }
435  bool is_free(int32_t i) { return alpha_status[i] == FREE; }
436  void swap_index(int32_t i, int32_t j);
437  void reconstruct_gradient();
438  virtual int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
439  virtual float64_t calculate_rho();
440  virtual void do_shrinking();
441 private:
442  bool be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2);
443 };
444 
445 void Solver::swap_index(int32_t i, int32_t j)
446 {
447  Q->swap_index(i,j);
448  CMath::swap(y[i],y[j]);
449  CMath::swap(G[i],G[j]);
450  CMath::swap(alpha_status[i],alpha_status[j]);
451  CMath::swap(alpha[i],alpha[j]);
452  CMath::swap(p[i],p[j]);
453  CMath::swap(active_set[i],active_set[j]);
454  CMath::swap(G_bar[i],G_bar[j]);
455 }
456 
457 void Solver::reconstruct_gradient()
458 {
459  // reconstruct inactive elements of G from G_bar and free variables
460 
461  if(active_size == l) return;
462 
463  int32_t i,j;
464  int32_t nr_free = 0;
465 
466  for(j=active_size;j<l;j++)
467  G[j] = G_bar[j] + p[j];
468 
469  for(j=0;j<active_size;j++)
470  if(is_free(j))
471  nr_free++;
472 
473  if (nr_free*l > 2*active_size*(l-active_size))
474  {
475  for(i=active_size;i<l;i++)
476  {
477  const Qfloat *Q_i = Q->get_Q(i,active_size);
478  for(j=0;j<active_size;j++)
479  if(is_free(j))
480  G[i] += alpha[j] * Q_i[j];
481  }
482  }
483  else
484  {
485  for(i=0;i<active_size;i++)
486  if(is_free(i))
487  {
488  const Qfloat *Q_i = Q->get_Q(i,l);
489  float64_t alpha_i = alpha[i];
490  for(j=active_size;j<l;j++)
491  G[j] += alpha_i * Q_i[j];
492  }
493  }
494 }
495 
496 void Solver::Solve(
497  int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
498  const schar *p_y, float64_t *p_alpha, float64_t p_Cp, float64_t p_Cn,
499  float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
500 {
501  this->l = p_l;
502  this->Q = &p_Q;
503  QD=Q->get_QD();
504  clone(p, p_p,l);
505  clone(y, p_y,l);
506  clone(alpha,p_alpha,l);
507  this->Cp = p_Cp;
508  this->Cn = p_Cn;
509  this->eps = p_eps;
510  unshrink = false;
511 
512  // initialize alpha_status
513  {
514  alpha_status = SG_MALLOC(char, l);
515  for(int32_t i=0;i<l;i++)
516  update_alpha_status(i);
517  }
518 
519  // initialize active set (for shrinking)
520  {
521  active_set = SG_MALLOC(int32_t, l);
522  for(int32_t i=0;i<l;i++)
523  active_set[i] = i;
524  active_size = l;
525  }
526 
527  // initialize gradient
529  CTime start_time;
530  {
531  G = SG_MALLOC(float64_t, l);
532  G_bar = SG_MALLOC(float64_t, l);
533  int32_t i;
534  for(i=0;i<l;i++)
535  {
536  G[i] = p_p[i];
537  G_bar[i] = 0;
538  }
539  SG_SINFO("Computing gradient for initial set of non-zero alphas\n");
540  //CMath::display_vector(alpha, l, "alphas");
541  for(i=0;i<l && !CSignal::cancel_computations(); i++)
542  {
543  if(!is_lower_bound(i))
544  {
545  const Qfloat *Q_i = Q->get_Q(i,l);
546  float64_t alpha_i = alpha[i];
547  int32_t j;
548  for(j=0;j<l;j++)
549  G[j] += alpha_i*Q_i[j];
550  if(is_upper_bound(i))
551  for(j=0;j<l;j++)
552  G_bar[j] += get_C(i) * Q_i[j];
553  }
554  SG_SPROGRESS(i, 0, l);
555  }
556  SG_SDONE();
557  }
558 
559  // optimization step
560 
561  int32_t iter = 0;
562  int32_t counter = CMath::min(l,1000)+1;
563 
565  {
566  if (Q->max_train_time > 0 && start_time.cur_time_diff() > Q->max_train_time)
567  break;
568 
569  // show progress and do shrinking
570 
571  if(--counter == 0)
572  {
573  counter = CMath::min(l,1000);
574  if(shrinking) do_shrinking();
575  //SG_SINFO(".");
576  }
577 
578  int32_t i,j;
579  float64_t gap;
580  if(select_working_set(i,j, gap)!=0)
581  {
582  // reconstruct the whole gradient
583  reconstruct_gradient();
584  // reset active set size and check
585  active_size = l;
586  //SG_SINFO("*");
587  if(select_working_set(i,j, gap)!=0)
588  break;
589  else
590  counter = 1; // do shrinking next iteration
591  }
592 
593  SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
594 
595  ++iter;
596 
597  // update alpha[i] and alpha[j], handle bounds carefully
598 
599  const Qfloat *Q_i = Q->get_Q(i,active_size);
600  const Qfloat *Q_j = Q->get_Q(j,active_size);
601 
602  float64_t C_i = get_C(i);
603  float64_t C_j = get_C(j);
604 
605  float64_t old_alpha_i = alpha[i];
606  float64_t old_alpha_j = alpha[j];
607 
608  if (!use_bias)
609  {
610  double pi=G[i]-Q_i[i]*alpha[i]-Q_i[j]*alpha[j];
611  double pj=G[j]-Q_i[j]*alpha[i]-Q_j[j]*alpha[j];
612  double det=Q_i[i]*Q_j[j]-Q_i[j]*Q_i[j];
613  double alpha_i=-(Q_j[j]*pi-Q_i[j]*pj)/det;
614  alpha_i=CMath::min(C_i,CMath::max(0.0,alpha_i));
615  double alpha_j=-(-Q_i[j]*pi+Q_i[i]*pj)/det;
616  alpha_j=CMath::min(C_j,CMath::max(0.0,alpha_j));
617 
618  if (alpha_i==0 || alpha_i == C_i)
619  alpha_j=CMath::min(C_j,CMath::max(0.0,-(pj+Q_i[j]*alpha_i)/Q_j[j]));
620  if (alpha_j==0 || alpha_j == C_j)
621  alpha_i=CMath::min(C_i,CMath::max(0.0,-(pi+Q_i[j]*alpha_j)/Q_i[i]));
622 
623  alpha[i]=alpha_i; alpha[j]=alpha_j;
624  }
625  else
626  {
627  if(y[i]!=y[j])
628  {
629  float64_t quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
630  if (quad_coef <= 0)
631  quad_coef = TAU;
632  float64_t delta = (-G[i]-G[j])/quad_coef;
633  float64_t diff = alpha[i] - alpha[j];
634  alpha[i] += delta;
635  alpha[j] += delta;
636 
637  if(diff > 0)
638  {
639  if(alpha[j] < 0)
640  {
641  alpha[j] = 0;
642  alpha[i] = diff;
643  }
644  }
645  else
646  {
647  if(alpha[i] < 0)
648  {
649  alpha[i] = 0;
650  alpha[j] = -diff;
651  }
652  }
653  if(diff > C_i - C_j)
654  {
655  if(alpha[i] > C_i)
656  {
657  alpha[i] = C_i;
658  alpha[j] = C_i - diff;
659  }
660  }
661  else
662  {
663  if(alpha[j] > C_j)
664  {
665  alpha[j] = C_j;
666  alpha[i] = C_j + diff;
667  }
668  }
669  }
670  else
671  {
672  float64_t quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
673  if (quad_coef <= 0)
674  quad_coef = TAU;
675  float64_t delta = (G[i]-G[j])/quad_coef;
676  float64_t sum = alpha[i] + alpha[j];
677  alpha[i] -= delta;
678  alpha[j] += delta;
679 
680  if(sum > C_i)
681  {
682  if(alpha[i] > C_i)
683  {
684  alpha[i] = C_i;
685  alpha[j] = sum - C_i;
686  }
687  }
688  else
689  {
690  if(alpha[j] < 0)
691  {
692  alpha[j] = 0;
693  alpha[i] = sum;
694  }
695  }
696  if(sum > C_j)
697  {
698  if(alpha[j] > C_j)
699  {
700  alpha[j] = C_j;
701  alpha[i] = sum - C_j;
702  }
703  }
704  else
705  {
706  if(alpha[i] < 0)
707  {
708  alpha[i] = 0;
709  alpha[j] = sum;
710  }
711  }
712  }
713  }
714 
715  // update G
716 
717  float64_t delta_alpha_i = alpha[i] - old_alpha_i;
718  float64_t delta_alpha_j = alpha[j] - old_alpha_j;
719 
720  for(int32_t k=0;k<active_size;k++)
721  {
722  G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
723  }
724 
725  // update alpha_status and G_bar
726 
727  {
728  bool ui = is_upper_bound(i);
729  bool uj = is_upper_bound(j);
730  update_alpha_status(i);
731  update_alpha_status(j);
732  int32_t k;
733  if(ui != is_upper_bound(i))
734  {
735  Q_i = Q->get_Q(i,l);
736  if(ui)
737  for(k=0;k<l;k++)
738  G_bar[k] -= C_i * Q_i[k];
739  else
740  for(k=0;k<l;k++)
741  G_bar[k] += C_i * Q_i[k];
742  }
743 
744  if(uj != is_upper_bound(j))
745  {
746  Q_j = Q->get_Q(j,l);
747  if(uj)
748  for(k=0;k<l;k++)
749  G_bar[k] -= C_j * Q_j[k];
750  else
751  for(k=0;k<l;k++)
752  G_bar[k] += C_j * Q_j[k];
753  }
754  }
755 
756 #ifdef MCSVM_DEBUG
757  // calculate objective value
758  {
759  float64_t v = 0;
760  for(i=0;i<l;i++)
761  v += alpha[i] * (G[i] + p[i]);
762 
763  p_si->obj = v/2;
764 
765  float64_t primal=0;
766  //float64_t gap=100000;
767  SG_SPRINT("dual obj=%f primal obf=%f gap=%f\n", v/2, primal, gap);
768  }
769 #endif
770  }
771 
772  // calculate rho
773 
774  if (!use_bias)
775  p_si->rho = 0;
776  else
777  p_si->rho = calculate_rho();
778 
779  // calculate objective value
780  {
781  float64_t v = 0;
782  int32_t i;
783  for(i=0;i<l;i++)
784  v += alpha[i] * (G[i] + p[i]);
785 
786  p_si->obj = v/2;
787  }
788 
789  // put back the solution
790  {
791  for(int32_t i=0;i<l;i++)
792  p_alpha[active_set[i]] = alpha[i];
793  }
794 
795  p_si->upper_bound_p = Cp;
796  p_si->upper_bound_n = Cn;
797 
798  SG_SINFO("\noptimization finished, #iter = %d\n",iter);
799 
800  SG_FREE(p);
801  SG_FREE(y);
802  SG_FREE(alpha);
803  SG_FREE(alpha_status);
804  SG_FREE(active_set);
805  SG_FREE(G);
806  SG_FREE(G_bar);
807 }
808 
809 // return 1 if already optimal, return 0 otherwise
810 int32_t Solver::select_working_set(
811  int32_t &out_i, int32_t &out_j, float64_t &gap)
812 {
813  // return i,j such that
814  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
815  // j: minimizes the decrease of obj value
816  // (if quadratic coefficient <= 0, replace it with tau)
817  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
818 
819  float64_t Gmax = -INF;
820  float64_t Gmax2 = -INF;
821  int32_t Gmax_idx = -1;
822  int32_t Gmin_idx = -1;
823  float64_t obj_diff_min = INF;
824 
825  for(int32_t t=0;t<active_size;t++)
826  if(y[t]==+1)
827  {
828  if(!is_upper_bound(t))
829  if(-G[t] >= Gmax)
830  {
831  Gmax = -G[t];
832  Gmax_idx = t;
833  }
834  }
835  else
836  {
837  if(!is_lower_bound(t))
838  if(G[t] >= Gmax)
839  {
840  Gmax = G[t];
841  Gmax_idx = t;
842  }
843  }
844 
845  int32_t i = Gmax_idx;
846  const Qfloat *Q_i = NULL;
847  if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
848  Q_i = Q->get_Q(i,active_size);
849 
850  for(int32_t j=0;j<active_size;j++)
851  {
852  if(y[j]==+1)
853  {
854  if (!is_lower_bound(j))
855  {
856  float64_t grad_diff=Gmax+G[j];
857  if (G[j] >= Gmax2)
858  Gmax2 = G[j];
859  if (grad_diff > 0)
860  {
861  float64_t obj_diff;
862  float64_t quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j];
863  if (quad_coef > 0)
864  obj_diff = -(grad_diff*grad_diff)/quad_coef;
865  else
866  obj_diff = -(grad_diff*grad_diff)/TAU;
867 
868  if (obj_diff <= obj_diff_min)
869  {
870  Gmin_idx=j;
871  obj_diff_min = obj_diff;
872  }
873  }
874  }
875  }
876  else
877  {
878  if (!is_upper_bound(j))
879  {
880  float64_t grad_diff= Gmax-G[j];
881  if (-G[j] >= Gmax2)
882  Gmax2 = -G[j];
883  if (grad_diff > 0)
884  {
885  float64_t obj_diff;
886  float64_t quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j];
887  if (quad_coef > 0)
888  obj_diff = -(grad_diff*grad_diff)/quad_coef;
889  else
890  obj_diff = -(grad_diff*grad_diff)/TAU;
891 
892  if (obj_diff <= obj_diff_min)
893  {
894  Gmin_idx=j;
895  obj_diff_min = obj_diff;
896  }
897  }
898  }
899  }
900  }
901 
902  gap=Gmax+Gmax2;
903  if(gap < eps)
904  return 1;
905 
906  out_i = Gmax_idx;
907  out_j = Gmin_idx;
908  return 0;
909 }
910 
911 bool Solver::be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2)
912 {
913  if(is_upper_bound(i))
914  {
915  if(y[i]==+1)
916  return(-G[i] > Gmax1);
917  else
918  return(-G[i] > Gmax2);
919  }
920  else if(is_lower_bound(i))
921  {
922  if(y[i]==+1)
923  return(G[i] > Gmax2);
924  else
925  return(G[i] > Gmax1);
926  }
927  else
928  return(false);
929 }
930 
931 
932 void Solver::do_shrinking()
933 {
934  int32_t i;
935  float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
936  float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
937 
938  // find maximal violating pair first
939  for(i=0;i<active_size;i++)
940  {
941  if(y[i]==+1)
942  {
943  if(!is_upper_bound(i))
944  {
945  if(-G[i] >= Gmax1)
946  Gmax1 = -G[i];
947  }
948  if(!is_lower_bound(i))
949  {
950  if(G[i] >= Gmax2)
951  Gmax2 = G[i];
952  }
953  }
954  else
955  {
956  if(!is_upper_bound(i))
957  {
958  if(-G[i] >= Gmax2)
959  Gmax2 = -G[i];
960  }
961  if(!is_lower_bound(i))
962  {
963  if(G[i] >= Gmax1)
964  Gmax1 = G[i];
965  }
966  }
967  }
968 
969  if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
970  {
971  unshrink = true;
972  reconstruct_gradient();
973  active_size = l;
974  }
975 
976  for(i=0;i<active_size;i++)
977  if (be_shrunk(i, Gmax1, Gmax2))
978  {
979  active_size--;
980  while (active_size > i)
981  {
982  if (!be_shrunk(active_size, Gmax1, Gmax2))
983  {
984  swap_index(i,active_size);
985  break;
986  }
987  active_size--;
988  }
989  }
990 }
991 
992 float64_t Solver::calculate_rho()
993 {
994  float64_t r;
995  int32_t nr_free = 0;
996  float64_t ub = INF, lb = -INF, sum_free = 0;
997  for(int32_t i=0;i<active_size;i++)
998  {
999  float64_t yG = y[i]*G[i];
1000 
1001  if(is_upper_bound(i))
1002  {
1003  if(y[i]==-1)
1004  ub = CMath::min(ub,yG);
1005  else
1006  lb = CMath::max(lb,yG);
1007  }
1008  else if(is_lower_bound(i))
1009  {
1010  if(y[i]==+1)
1011  ub = CMath::min(ub,yG);
1012  else
1013  lb = CMath::max(lb,yG);
1014  }
1015  else
1016  {
1017  ++nr_free;
1018  sum_free += yG;
1019  }
1020  }
1021 
1022  if(nr_free>0)
1023  r = sum_free/nr_free;
1024  else
1025  r = (ub+lb)/2;
1026 
1027  return r;
1028 }
1029 
1030 
1031 //
1032 //Solve with individually weighted examples
1033 //
1034 class WeightedSolver : public Solver
1035 {
1036 
1037 public:
1038 
1039  WeightedSolver(float64_t* cost_vec)
1040  {
1041 
1042  this->Cs = cost_vec;
1043 
1044  }
1045 
1046  virtual float64_t get_C(int32_t i)
1047  {
1048 
1049  return Cs[i];
1050  }
1051 
1052 protected:
1053 
1054  float64_t* Cs;
1055 
1056 };
1057 
1058 
1059 //
1060 // Solver for nu-svm classification and regression
1061 //
1062 // additional constraint: e^T \alpha = constant
1063 //
1064 class Solver_NU : public Solver
1065 {
1066 public:
1067  Solver_NU() {}
1068  void Solve(
1069  int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
1070  const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn,
1071  float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
1072  {
1073  this->si = p_si;
1074  Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,
1075  shrinking,use_bias);
1076  }
1077 private:
1078  SolutionInfo *si;
1079  int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
1080  float64_t calculate_rho();
1081  bool be_shrunk(
1082  int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
1083  float64_t Gmax4);
1084  void do_shrinking();
1085 };
1086 
1087 // return 1 if already optimal, return 0 otherwise
1088 int32_t Solver_NU::select_working_set(
1089  int32_t &out_i, int32_t &out_j, float64_t &gap)
1090 {
1091  // return i,j such that y_i = y_j and
1092  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1093  // j: minimizes the decrease of obj value
1094  // (if quadratic coefficient <= 0, replace it with tau)
1095  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1096 
1097  float64_t Gmaxp = -INF;
1098  float64_t Gmaxp2 = -INF;
1099  int32_t Gmaxp_idx = -1;
1100 
1101  float64_t Gmaxn = -INF;
1102  float64_t Gmaxn2 = -INF;
1103  int32_t Gmaxn_idx = -1;
1104 
1105  int32_t Gmin_idx = -1;
1106  float64_t obj_diff_min = INF;
1107 
1108  for(int32_t t=0;t<active_size;t++)
1109  if(y[t]==+1)
1110  {
1111  if(!is_upper_bound(t))
1112  if(-G[t] >= Gmaxp)
1113  {
1114  Gmaxp = -G[t];
1115  Gmaxp_idx = t;
1116  }
1117  }
1118  else
1119  {
1120  if(!is_lower_bound(t))
1121  if(G[t] >= Gmaxn)
1122  {
1123  Gmaxn = G[t];
1124  Gmaxn_idx = t;
1125  }
1126  }
1127 
1128  int32_t ip = Gmaxp_idx;
1129  int32_t in = Gmaxn_idx;
1130  const Qfloat *Q_ip = NULL;
1131  const Qfloat *Q_in = NULL;
1132  if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1133  Q_ip = Q->get_Q(ip,active_size);
1134  if(in != -1)
1135  Q_in = Q->get_Q(in,active_size);
1136 
1137  for(int32_t j=0;j<active_size;j++)
1138  {
1139  if(y[j]==+1)
1140  {
1141  if (!is_lower_bound(j))
1142  {
1143  float64_t grad_diff=Gmaxp+G[j];
1144  if (G[j] >= Gmaxp2)
1145  Gmaxp2 = G[j];
1146  if (grad_diff > 0)
1147  {
1148  float64_t obj_diff;
1149  float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
1150  if (quad_coef > 0)
1151  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1152  else
1153  obj_diff = -(grad_diff*grad_diff)/TAU;
1154 
1155  if (obj_diff <= obj_diff_min)
1156  {
1157  Gmin_idx=j;
1158  obj_diff_min = obj_diff;
1159  }
1160  }
1161  }
1162  }
1163  else
1164  {
1165  if (!is_upper_bound(j))
1166  {
1167  float64_t grad_diff=Gmaxn-G[j];
1168  if (-G[j] >= Gmaxn2)
1169  Gmaxn2 = -G[j];
1170  if (grad_diff > 0)
1171  {
1172  float64_t obj_diff;
1173  float64_t quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
1174  if (quad_coef > 0)
1175  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1176  else
1177  obj_diff = -(grad_diff*grad_diff)/TAU;
1178 
1179  if (obj_diff <= obj_diff_min)
1180  {
1181  Gmin_idx=j;
1182  obj_diff_min = obj_diff;
1183  }
1184  }
1185  }
1186  }
1187  }
1188 
1189  gap=CMath::max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2);
1190  if(gap < eps)
1191  return 1;
1192 
1193  if (y[Gmin_idx] == +1)
1194  out_i = Gmaxp_idx;
1195  else
1196  out_i = Gmaxn_idx;
1197  out_j = Gmin_idx;
1198 
1199  return 0;
1200 }
1201 
1202 bool Solver_NU::be_shrunk(
1203  int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
1204  float64_t Gmax4)
1205 {
1206  if(is_upper_bound(i))
1207  {
1208  if(y[i]==+1)
1209  return(-G[i] > Gmax1);
1210  else
1211  return(-G[i] > Gmax4);
1212  }
1213  else if(is_lower_bound(i))
1214  {
1215  if(y[i]==+1)
1216  return(G[i] > Gmax2);
1217  else
1218  return(G[i] > Gmax3);
1219  }
1220  else
1221  return(false);
1222 }
1223 
1224 void Solver_NU::do_shrinking()
1225 {
1226  float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1227  float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1228  float64_t Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1229  float64_t Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1230 
1231  // find maximal violating pair first
1232  int32_t i;
1233  for(i=0;i<active_size;i++)
1234  {
1235  if(!is_upper_bound(i))
1236  {
1237  if(y[i]==+1)
1238  {
1239  if(-G[i] > Gmax1) Gmax1 = -G[i];
1240  }
1241  else if(-G[i] > Gmax4) Gmax4 = -G[i];
1242  }
1243  if(!is_lower_bound(i))
1244  {
1245  if(y[i]==+1)
1246  {
1247  if(G[i] > Gmax2) Gmax2 = G[i];
1248  }
1249  else if(G[i] > Gmax3) Gmax3 = G[i];
1250  }
1251  }
1252 
1253  if(unshrink == false && CMath::max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1254  {
1255  unshrink = true;
1256  reconstruct_gradient();
1257  active_size = l;
1258  }
1259 
1260  for(i=0;i<active_size;i++)
1261  if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1262  {
1263  active_size--;
1264  while (active_size > i)
1265  {
1266  if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1267  {
1268  swap_index(i,active_size);
1269  break;
1270  }
1271  active_size--;
1272  }
1273  }
1274 }
1275 
1276 float64_t Solver_NU::calculate_rho()
1277 {
1278  int32_t nr_free1 = 0,nr_free2 = 0;
1279  float64_t ub1 = INF, ub2 = INF;
1280  float64_t lb1 = -INF, lb2 = -INF;
1281  float64_t sum_free1 = 0, sum_free2 = 0;
1282 
1283  for(int32_t i=0; i<active_size; i++)
1284  {
1285  if(y[i]==+1)
1286  {
1287  if(is_upper_bound(i))
1288  lb1 = CMath::max(lb1,G[i]);
1289  else if(is_lower_bound(i))
1290  ub1 = CMath::min(ub1,G[i]);
1291  else
1292  {
1293  ++nr_free1;
1294  sum_free1 += G[i];
1295  }
1296  }
1297  else
1298  {
1299  if(is_upper_bound(i))
1300  lb2 = CMath::max(lb2,G[i]);
1301  else if(is_lower_bound(i))
1302  ub2 = CMath::min(ub2,G[i]);
1303  else
1304  {
1305  ++nr_free2;
1306  sum_free2 += G[i];
1307  }
1308  }
1309  }
1310 
1311  float64_t r1,r2;
1312  if(nr_free1 > 0)
1313  r1 = sum_free1/nr_free1;
1314  else
1315  r1 = (ub1+lb1)/2;
1316 
1317  if(nr_free2 > 0)
1318  r2 = sum_free2/nr_free2;
1319  else
1320  r2 = (ub2+lb2)/2;
1321 
1322  si->r = (r1+r2)/2;
1323  return (r1-r2)/2;
1324 }
1325 
1326 class SVC_QMC: public LibSVMKernel
1327 {
1328 public:
1329  SVC_QMC(const svm_problem& prob, const svm_parameter& param, const schar *y_, int32_t n_class, float64_t fac)
1330  :LibSVMKernel(prob.l, prob.x, param)
1331  {
1332  nr_class=n_class;
1333  factor=fac;
1334  clone(y,y_,prob.l);
1335  cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
1336  QD = SG_MALLOC(Qfloat, prob.l);
1337  for(int32_t i=0;i<prob.l;i++)
1338  {
1339  QD[i]= factor*(nr_class-1)*kernel_function(i,i);
1340  }
1341  }
1342 
1343  Qfloat *get_Q(int32_t i, int32_t len) const
1344  {
1345  Qfloat *data;
1346  int32_t start;
1347  if((start = cache->get_data(i,&data,len)) < len)
1348  {
1349  compute_Q_parallel(data, NULL, i, start, len);
1350 
1351  for(int32_t j=start;j<len;j++)
1352  {
1353  if (y[i]==y[j])
1354  data[j] *= (factor*(nr_class-1));
1355  else
1356  data[j] *= (-factor);
1357  }
1358  }
1359  return data;
1360  }
1361 
1362  inline Qfloat get_orig_Qij(Qfloat Q, int32_t i, int32_t j)
1363  {
1364  if (y[i]==y[j])
1365  return Q/(nr_class-1);
1366  else
1367  return -Q;
1368  }
1369 
1370  Qfloat *get_QD() const
1371  {
1372  return QD;
1373  }
1374 
1375  void swap_index(int32_t i, int32_t j) const
1376  {
1377  cache->swap_index(i,j);
1378  LibSVMKernel::swap_index(i,j);
1379  CMath::swap(y[i],y[j]);
1380  CMath::swap(QD[i],QD[j]);
1381  }
1382 
1383  ~SVC_QMC()
1384  {
1385  SG_FREE(y);
1386  delete cache;
1387  SG_FREE(QD);
1388  }
1389 private:
1390  float64_t factor;
1391  float64_t nr_class;
1392  schar *y;
1393  Cache *cache;
1394  Qfloat *QD;
1395 };
1396 
1397 //
1398 // Solver for nu-svm classification and regression
1399 //
1400 // additional constraint: e^T \alpha = constant
1401 //
1402 class Solver_NUMC : public Solver
1403 {
1404 public:
1405  Solver_NUMC(int32_t n_class, float64_t svm_nu)
1406  {
1407  nr_class=n_class;
1408  nu=svm_nu;
1409  }
1410 
1411  void Solve(
1412  int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
1413  const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn,
1414  float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
1415  {
1416  this->si = p_si;
1417  Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,shrinking, use_bias);
1418  }
1419  float64_t compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases,float64_t* normwcw);
1420 
1421 private:
1422  SolutionInfo *si;
1423  int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
1424  float64_t calculate_rho();
1425  bool be_shrunk(
1426  int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
1427  float64_t Gmax4);
1428  void do_shrinking();
1429 
1430 private:
1431  int32_t nr_class;
1432  float64_t nu;
1433 };
1434 
1435 float64_t Solver_NUMC::compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases, float64_t* normwcw)
1436 {
1437  clone(y, p_y,l);
1438  clone(alpha,p_alpha,l);
1439 
1440  alpha_status = SG_MALLOC(char, l);
1441  for(int32_t i=0;i<l;i++)
1442  update_alpha_status(i);
1443 
1444  float64_t* class_count = SG_MALLOC(float64_t, nr_class);
1445  float64_t* outputs = SG_MALLOC(float64_t, l);
1446 
1447  for (int32_t i=0; i<nr_class; i++)
1448  {
1449  class_count[i]=0;
1450  biases[i+1]=0;
1451  }
1452 
1453  for (int32_t i=0; i<active_size; i++)
1454  {
1455  update_alpha_status(i);
1456  if(!is_upper_bound(i) && !is_lower_bound(i))
1457  class_count[(int32_t) y[i]]++;
1458  }
1459 
1460  //CMath::display_vector(class_count, nr_class, "class_count");
1461 
1462  float64_t mu=((float64_t) nr_class)/(nu*l);
1463  //SG_SPRINT("nr_class=%d, l=%d, active_size=%d, nu=%f, mu=%f\n", nr_class, l, active_size, nu, mu);
1464 
1465  float64_t rho=0;
1466  float64_t quad=0;
1467  float64_t* zero_counts = SG_MALLOC(float64_t, nr_class);
1468  float64_t normwc_const = 0;
1469 
1470  for (int32_t i=0; i<nr_class; i++)
1471  {
1472  zero_counts[i]=-INF;
1473  normwcw[i]=0;
1474  }
1475 
1476  for (int32_t i=0; i<active_size; i++)
1477  {
1478  float64_t sum_free=0;
1479  float64_t sum_atbound=0;
1480  float64_t sum_zero_count=0;
1481 
1482  Qfloat* Q_i = Q->get_Q(i,active_size);
1483  outputs[i]=0;
1484 
1485  for (int j=0; j<active_size; j++)
1486  {
1487  quad+= alpha[i]*alpha[j]*Q_i[j];
1488  float64_t tmp= alpha[j]*Q_i[j]/mu;
1489 
1490  if(!is_upper_bound(i) && !is_lower_bound(i))
1491  sum_free+=tmp;
1492  else
1493  sum_atbound+=tmp;
1494 
1495  if (class_count[(int32_t) y[i]] == 0 && y[j]==y[i])
1496  sum_zero_count+= tmp;
1497 
1498  SVC_QMC* QMC=(SVC_QMC*) Q;
1499  float64_t norm_tmp=alpha[i]*alpha[j]*QMC->get_orig_Qij(Q_i[j], i, j);
1500  if (y[i]==y[j])
1501  normwcw[(int32_t) y[i]]+=norm_tmp;
1502 
1503  normwcw[(int32_t) y[i]]-=2.0/nr_class*norm_tmp;
1504  normwc_const+=norm_tmp;
1505  }
1506 
1507  if (class_count[(int32_t) y[i]] == 0)
1508  {
1509  if (zero_counts[(int32_t) y[i]]<sum_zero_count)
1510  zero_counts[(int32_t) y[i]]=sum_zero_count;
1511  }
1512 
1513  biases[(int32_t) y[i]+1]-=sum_free;
1514  if (class_count[(int32_t) y[i]] != 0.0)
1515  rho+=sum_free/class_count[(int32_t) y[i]];
1516  outputs[i]+=sum_free+sum_atbound;
1517  }
1518 
1519  for (int32_t i=0; i<nr_class; i++)
1520  {
1521  if (class_count[i] == 0.0)
1522  rho+=zero_counts[i];
1523 
1524  normwcw[i]+=normwc_const/CMath::sq(nr_class);
1525  normwcw[i]=CMath::sqrt(normwcw[i]);
1526  }
1527 
1528  CMath::display_vector(normwcw, nr_class, "normwcw");
1529 
1530  rho/=nr_class;
1531 
1532  SG_SPRINT("rho=%f\n", rho);
1533 
1534  float64_t sumb=0;
1535  for (int32_t i=0; i<nr_class; i++)
1536  {
1537  if (class_count[i] != 0.0)
1538  biases[i+1]=biases[i+1]/class_count[i]+rho;
1539  else
1540  biases[i+1]+=rho-zero_counts[i];
1541 
1542  SG_SPRINT("biases=%f\n", biases[i+1]);
1543 
1544  sumb+=biases[i+1];
1545  }
1546  SG_SPRINT("sumb=%f\n", sumb);
1547 
1548  SG_FREE(zero_counts);
1549 
1550  for (int32_t i=0; i<l; i++)
1551  outputs[i]+=biases[(int32_t) y[i]+1];
1552 
1553  biases[0]=rho;
1554 
1555  //CMath::display_vector(outputs, l, "outputs");
1556 
1557 
1558  float64_t xi=0;
1559  for (int32_t i=0; i<active_size; i++)
1560  {
1561  if (is_lower_bound(i))
1562  continue;
1563  xi+=rho-outputs[i];
1564  }
1565 
1566  //SG_SPRINT("xi=%f\n", xi);
1567 
1568  //SG_SPRINT("quad=%f Cp=%f xi*mu=%f\n", quad, nr_class*rho, xi*mu);
1569 
1570  float64_t primal=0.5*quad- nr_class*rho+xi*mu;
1571 
1572  //SG_SPRINT("primal=%10.10f\n", primal);
1573 
1574  SG_FREE(y);
1575  SG_FREE(alpha);
1576  SG_FREE(alpha_status);
1577 
1578  return primal;
1579 }
1580 
1581 
1582 // return 1 if already optimal, return 0 otherwise
1583 int32_t Solver_NUMC::select_working_set(
1584  int32_t &out_i, int32_t &out_j, float64_t &gap)
1585 {
1586  // return i,j such that y_i = y_j and
1587  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1588  // j: minimizes the decrease of obj value
1589  // (if quadratic coefficient <= 0, replace it with tau)
1590  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1591 
1592  int32_t retval=0;
1593  float64_t best_gap=0;
1594  int32_t best_out_i=-1;
1595  int32_t best_out_j=-1;
1596 
1597  float64_t* Gmaxp = SG_MALLOC(float64_t, nr_class);
1598  float64_t* Gmaxp2 = SG_MALLOC(float64_t, nr_class);
1599  int32_t* Gmaxp_idx = SG_MALLOC(int32_t, nr_class);
1600 
1601  int32_t* Gmin_idx = SG_MALLOC(int32_t, nr_class);
1602  float64_t* obj_diff_min = SG_MALLOC(float64_t, nr_class);
1603 
1604  for (int32_t i=0; i<nr_class; i++)
1605  {
1606  Gmaxp[i]=-INF;
1607  Gmaxp2[i]=-INF;
1608  Gmaxp_idx[i]=-1;
1609  Gmin_idx[i]=-1;
1610  obj_diff_min[i]=INF;
1611  }
1612 
1613  for(int32_t t=0;t<active_size;t++)
1614  {
1615  int32_t cidx=y[t];
1616  if(!is_upper_bound(t))
1617  {
1618  if(-G[t] >= Gmaxp[cidx])
1619  {
1620  Gmaxp[cidx] = -G[t];
1621  Gmaxp_idx[cidx] = t;
1622  }
1623  }
1624  }
1625 
1626  for(int32_t j=0;j<active_size;j++)
1627  {
1628  int32_t cidx=y[j];
1629  int32_t ip = Gmaxp_idx[cidx];
1630  const Qfloat *Q_ip = NULL;
1631  if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1632  Q_ip = Q->get_Q(ip,active_size);
1633 
1634  if (!is_lower_bound(j))
1635  {
1636  float64_t grad_diff=Gmaxp[cidx]+G[j];
1637  if (G[j] >= Gmaxp2[cidx])
1638  Gmaxp2[cidx] = G[j];
1639  if (grad_diff > 0)
1640  {
1641  float64_t obj_diff;
1642  float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
1643  if (quad_coef > 0)
1644  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1645  else
1646  obj_diff = -(grad_diff*grad_diff)/TAU;
1647 
1648  if (obj_diff <= obj_diff_min[cidx])
1649  {
1650  Gmin_idx[cidx]=j;
1651  obj_diff_min[cidx] = obj_diff;
1652  }
1653  }
1654  }
1655 
1656  gap=Gmaxp[cidx]+Gmaxp2[cidx];
1657  if (gap>=best_gap && Gmin_idx[cidx]>=0 &&
1658  Gmaxp_idx[cidx]>=0 && Gmin_idx[cidx]<active_size)
1659  {
1660  out_i = Gmaxp_idx[cidx];
1661  out_j = Gmin_idx[cidx];
1662 
1663  best_gap=gap;
1664  best_out_i=out_i;
1665  best_out_j=out_j;
1666  }
1667  }
1668 
1669  gap=best_gap;
1670  out_i=best_out_i;
1671  out_j=best_out_j;
1672 
1673  SG_SDEBUG("i=%d j=%d best_gap=%f y_i=%f y_j=%f\n", out_i, out_j, gap, y[out_i], y[out_j]);
1674 
1675 
1676  if(gap < eps)
1677  retval=1;
1678 
1679  SG_FREE(Gmaxp);
1680  SG_FREE(Gmaxp2);
1681  SG_FREE(Gmaxp_idx);
1682  SG_FREE(Gmin_idx);
1683  SG_FREE(obj_diff_min);
1684 
1685  return retval;
1686 }
1687 
1688 bool Solver_NUMC::be_shrunk(
1689  int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
1690  float64_t Gmax4)
1691 {
1692  return false;
1693 }
1694 
1695 void Solver_NUMC::do_shrinking()
1696 {
1697 }
1698 
1699 float64_t Solver_NUMC::calculate_rho()
1700 {
1701  return 0;
1702 }
1703 
1704 
1705 //
1706 // Q matrices for various formulations
1707 //
1708 class SVC_Q: public LibSVMKernel
1709 {
1710 public:
1711  SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1712  :LibSVMKernel(prob.l, prob.x, param)
1713  {
1714  clone(y,y_,prob.l);
1715  cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
1716  QD = SG_MALLOC(Qfloat, prob.l);
1717  for(int32_t i=0;i<prob.l;i++)
1718  QD[i]= (Qfloat)kernel_function(i,i);
1719  }
1720 
1721  Qfloat *get_Q(int32_t i, int32_t len) const
1722  {
1723  Qfloat *data;
1724  int32_t start;
1725  if((start = cache->get_data(i,&data,len)) < len)
1726  compute_Q_parallel(data, y, i, start, len);
1727 
1728  return data;
1729  }
1730 
1731  Qfloat *get_QD() const
1732  {
1733  return QD;
1734  }
1735 
1736  void swap_index(int32_t i, int32_t j) const
1737  {
1738  cache->swap_index(i,j);
1739  LibSVMKernel::swap_index(i,j);
1740  CMath::swap(y[i],y[j]);
1741  CMath::swap(QD[i],QD[j]);
1742  }
1743 
1744  ~SVC_Q()
1745  {
1746  SG_FREE(y);
1747  delete cache;
1748  SG_FREE(QD);
1749  }
1750 private:
1751  schar *y;
1752  Cache *cache;
1753  Qfloat *QD;
1754 };
1755 
1756 
1757 class ONE_CLASS_Q: public LibSVMKernel
1758 {
1759 public:
1760  ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1761  :LibSVMKernel(prob.l, prob.x, param)
1762  {
1763  cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
1764  QD = SG_MALLOC(Qfloat, prob.l);
1765  for(int32_t i=0;i<prob.l;i++)
1766  QD[i]= (Qfloat)kernel_function(i,i);
1767  }
1768 
1769  Qfloat *get_Q(int32_t i, int32_t len) const
1770  {
1771  Qfloat *data;
1772  int32_t start;
1773  if((start = cache->get_data(i,&data,len)) < len)
1774  compute_Q_parallel(data, NULL, i, start, len);
1775 
1776  return data;
1777  }
1778 
1779  Qfloat *get_QD() const
1780  {
1781  return QD;
1782  }
1783 
1784  void swap_index(int32_t i, int32_t j) const
1785  {
1786  cache->swap_index(i,j);
1787  LibSVMKernel::swap_index(i,j);
1788  CMath::swap(QD[i],QD[j]);
1789  }
1790 
1791  ~ONE_CLASS_Q()
1792  {
1793  delete cache;
1794  SG_FREE(QD);
1795  }
1796 private:
1797  Cache *cache;
1798  Qfloat *QD;
1799 };
1800 
1801 class SVR_Q: public LibSVMKernel
1802 {
1803 public:
1804  SVR_Q(const svm_problem& prob, const svm_parameter& param)
1805  :LibSVMKernel(prob.l, prob.x, param)
1806  {
1807  l = prob.l;
1808  cache = new Cache(l,(int64_t)(param.cache_size*(1l<<20)));
1809  QD = SG_MALLOC(Qfloat, 2*l);
1810  sign = SG_MALLOC(schar, 2*l);
1811  index = SG_MALLOC(int32_t, 2*l);
1812  for(int32_t k=0;k<l;k++)
1813  {
1814  sign[k] = 1;
1815  sign[k+l] = -1;
1816  index[k] = k;
1817  index[k+l] = k;
1818  QD[k]= (Qfloat)kernel_function(k,k);
1819  QD[k+l]=QD[k];
1820  }
1821  buffer[0] = SG_MALLOC(Qfloat, 2*l);
1822  buffer[1] = SG_MALLOC(Qfloat, 2*l);
1823  next_buffer = 0;
1824  }
1825 
1826  void swap_index(int32_t i, int32_t j) const
1827  {
1828  CMath::swap(sign[i],sign[j]);
1829  CMath::swap(index[i],index[j]);
1830  CMath::swap(QD[i],QD[j]);
1831  }
1832 
1833  Qfloat *get_Q(int32_t i, int32_t len) const
1834  {
1835  Qfloat *data;
1836  int32_t real_i = index[i];
1837  if(cache->get_data(real_i,&data,l) < l)
1838  compute_Q_parallel(data, NULL, real_i, 0, l);
1839 
1840  // reorder and copy
1841  Qfloat *buf = buffer[next_buffer];
1842  next_buffer = 1 - next_buffer;
1843  schar si = sign[i];
1844  for(int32_t j=0;j<len;j++)
1845  buf[j] = si * sign[j] * data[index[j]];
1846  return buf;
1847  }
1848 
1849  Qfloat *get_QD() const
1850  {
1851  return QD;
1852  }
1853 
1854  ~SVR_Q()
1855  {
1856  delete cache;
1857  SG_FREE(sign);
1858  SG_FREE(index);
1859  SG_FREE(buffer[0]);
1860  SG_FREE(buffer[1]);
1861  SG_FREE(QD);
1862  }
1863 
1864 private:
1865  int32_t l;
1866  Cache *cache;
1867  schar *sign;
1868  int32_t *index;
1869  mutable int32_t next_buffer;
1870  Qfloat *buffer[2];
1871  Qfloat *QD;
1872 };
1873 
1874 //
1875 // construct and solve various formulations
1876 //
1877 static void solve_c_svc(
1878  const svm_problem *prob, const svm_parameter* param,
1879  float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn)
1880 {
1881  int32_t l = prob->l;
1882  schar *y = SG_MALLOC(schar, l);
1883 
1884  int32_t i;
1885 
1886  for(i=0;i<l;i++)
1887  {
1888  alpha[i] = 0;
1889  if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
1890  }
1891 
1892  Solver s;
1893  s.Solve(l, SVC_Q(*prob,*param,y), prob->pv, y,
1894  alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias);
1895 
1896  float64_t sum_alpha=0;
1897  for(i=0;i<l;i++)
1898  sum_alpha += alpha[i];
1899 
1900  if (Cp==Cn)
1901  SG_SINFO("nu = %f\n", sum_alpha/(param->C*prob->l));
1902 
1903  for(i=0;i<l;i++)
1904  alpha[i] *= y[i];
1905 
1906  SG_FREE(y);
1907 }
1908 
1909 
1910 //two weighted datasets
1911 void solve_c_svc_weighted(
1912  const svm_problem *prob, const svm_parameter* param,
1913  float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn)
1914 {
1915  int l = prob->l;
1916  float64_t *minus_ones = SG_MALLOC(float64_t, l);
1917  schar *y = SG_MALLOC(schar, l);
1918 
1919  int i;
1920 
1921  for(i=0;i<l;i++)
1922  {
1923  alpha[i] = 0;
1924  minus_ones[i] = -1;
1925  if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
1926  }
1927 
1928  WeightedSolver s = WeightedSolver(prob->C);
1929  s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1930  alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias);
1931 
1932  float64_t sum_alpha=0;
1933  for(i=0;i<l;i++)
1934  sum_alpha += alpha[i];
1935 
1936  //if (Cp==Cn)
1937  // SG_SINFO("nu = %f\n", sum_alpha/(prob->C*prob->l));
1938 
1939  for(i=0;i<l;i++)
1940  alpha[i] *= y[i];
1941 
1942  SG_FREE(minus_ones);
1943  SG_FREE(y);
1944 }
1945 
1946 static void solve_nu_svc(
1947  const svm_problem *prob, const svm_parameter *param,
1948  float64_t *alpha, Solver::SolutionInfo* si)
1949 {
1950  int32_t i;
1951  int32_t l = prob->l;
1952  float64_t nu = param->nu;
1953 
1954  schar *y = SG_MALLOC(schar, l);
1955 
1956  for(i=0;i<l;i++)
1957  if(prob->y[i]>0)
1958  y[i] = +1;
1959  else
1960  y[i] = -1;
1961 
1962  float64_t sum_pos = nu*l/2;
1963  float64_t sum_neg = nu*l/2;
1964 
1965  for(i=0;i<l;i++)
1966  if(y[i] == +1)
1967  {
1968  alpha[i] = CMath::min(1.0,sum_pos);
1969  sum_pos -= alpha[i];
1970  }
1971  else
1972  {
1973  alpha[i] = CMath::min(1.0,sum_neg);
1974  sum_neg -= alpha[i];
1975  }
1976 
1977  float64_t *zeros = SG_MALLOC(float64_t, l);
1978 
1979  for(i=0;i<l;i++)
1980  zeros[i] = 0;
1981 
1982  Solver_NU s;
1983  s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1984  alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias);
1985  float64_t r = si->r;
1986 
1987  SG_SINFO("C = %f\n",1/r);
1988 
1989  for(i=0;i<l;i++)
1990  alpha[i] *= y[i]/r;
1991 
1992  si->rho /= r;
1993  si->obj /= (r*r);
1994  si->upper_bound_p = 1/r;
1995  si->upper_bound_n = 1/r;
1996 
1997  SG_FREE(y);
1998  SG_FREE(zeros);
1999 }
2000 
2001 static void solve_nu_multiclass_svc(const svm_problem *prob,
2002  const svm_parameter *param, Solver::SolutionInfo* si, svm_model* model)
2003 {
2004  int32_t l = prob->l;
2005  float64_t nu = param->nu;
2006 
2007  float64_t *alpha = SG_MALLOC(float64_t, prob->l);
2008  schar *y = SG_MALLOC(schar, l);
2009 
2010  for(int32_t i=0;i<l;i++)
2011  {
2012  alpha[i] = 0;
2013  y[i]=prob->y[i];
2014  }
2015 
2016  int32_t nr_class=param->nr_class;
2017  float64_t* sum_class = SG_MALLOC(float64_t, nr_class);
2018 
2019  for (int32_t j=0; j<nr_class; j++)
2020  sum_class[j] = nu*l/nr_class;
2021 
2022  for(int32_t i=0;i<l;i++)
2023  {
2024  alpha[i] = CMath::min(1.0,sum_class[int32_t(y[i])]);
2025  sum_class[int32_t(y[i])] -= alpha[i];
2026  }
2027  SG_FREE(sum_class);
2028 
2029 
2030  float64_t *zeros = SG_MALLOC(float64_t, l);
2031 
2032  for (int32_t i=0;i<l;i++)
2033  zeros[i] = 0;
2034 
2035  Solver_NUMC s(nr_class, nu);
2036  SVC_QMC Q(*prob,*param,y, nr_class, ((float64_t) nr_class)/CMath::sq(nu*l));
2037 
2038  s.Solve(l, Q, zeros, y,
2039  alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias);
2040 
2041 
2042  int32_t* class_sv_count=SG_MALLOC(int32_t, nr_class);
2043 
2044  for (int32_t i=0; i<nr_class; i++)
2045  class_sv_count[i]=0;
2046 
2047  for (int32_t i=0; i<l; i++)
2048  {
2049  if (CMath::abs(alpha[i]) > 0)
2050  class_sv_count[(int32_t) y[i]]++;
2051  }
2052 
2053  model->l=l;
2054  // rho[0]= rho in mcsvm paper, rho[1]...rho[nr_class] is bias in mcsvm paper
2055  model->rho = SG_MALLOC(float64_t, nr_class+1);
2056  model->nr_class = nr_class;
2057  model->label = NULL;
2058  model->SV = SG_MALLOC(svm_node*,nr_class);
2059  model->nSV = SG_MALLOC(int32_t, nr_class);
2060  model->sv_coef = SG_MALLOC(float64_t *,nr_class);
2061  model->normwcw = SG_MALLOC(float64_t,nr_class);
2062 
2063  for (int32_t i=0; i<nr_class+1; i++)
2064  model->rho[i]=0;
2065 
2066  model->objective = si->obj;
2067 
2068  if (param->use_bias)
2069  {
2070  SG_SDEBUG("Computing biases and primal objective\n");
2071  float64_t primal = s.compute_primal(y, alpha, model->rho, model->normwcw);
2072  SG_SINFO("Primal = %10.10f\n", primal);
2073  }
2074 
2075  for (int32_t i=0; i<nr_class; i++)
2076  {
2077  model->nSV[i]=class_sv_count[i];
2078  model->SV[i] = SG_MALLOC(svm_node,class_sv_count[i]);
2079  model->sv_coef[i] = SG_MALLOC(float64_t,class_sv_count[i]);
2080  class_sv_count[i]=0;
2081  }
2082 
2083  for (int32_t i=0;i<prob->l;i++)
2084  {
2085  if(fabs(alpha[i]) > 0)
2086  {
2087  model->SV[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]].index = prob->x[i]->index;
2088  model->sv_coef[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]] = alpha[i];
2089  class_sv_count[(int32_t) y[i]]++;
2090  }
2091  }
2092 
2093  SG_FREE(y);
2094  SG_FREE(zeros);
2095  SG_FREE(alpha);
2096 }
2097 
2098 static void solve_one_class(
2099  const svm_problem *prob, const svm_parameter *param,
2100  float64_t *alpha, Solver::SolutionInfo* si)
2101 {
2102  int32_t l = prob->l;
2103  float64_t *zeros = SG_MALLOC(float64_t, l);
2104  schar *ones = SG_MALLOC(schar, l);
2105  int32_t i;
2106 
2107  int32_t n = (int32_t)(param->nu*prob->l); // # of alpha's at upper bound
2108 
2109  for(i=0;i<n;i++)
2110  alpha[i] = 1;
2111  if(n<prob->l)
2112  alpha[n] = param->nu * prob->l - n;
2113  for(i=n+1;i<l;i++)
2114  alpha[i] = 0;
2115 
2116  for(i=0;i<l;i++)
2117  {
2118  zeros[i] = 0;
2119  ones[i] = 1;
2120  }
2121 
2122  Solver s;
2123  s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
2124  alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias);
2125 
2126  SG_FREE(zeros);
2127  SG_FREE(ones);
2128 }
2129 
2130 static void solve_epsilon_svr(
2131  const svm_problem *prob, const svm_parameter *param,
2132  float64_t *alpha, Solver::SolutionInfo* si)
2133 {
2134  int32_t l = prob->l;
2135  float64_t *alpha2 = SG_MALLOC(float64_t, 2*l);
2136  float64_t *linear_term = SG_MALLOC(float64_t, 2*l);
2137  schar *y = SG_MALLOC(schar, 2*l);
2138  int32_t i;
2139 
2140  for(i=0;i<l;i++)
2141  {
2142  alpha2[i] = 0;
2143  linear_term[i] = param->p - prob->y[i];
2144  y[i] = 1;
2145 
2146  alpha2[i+l] = 0;
2147  linear_term[i+l] = param->p + prob->y[i];
2148  y[i+l] = -1;
2149  }
2150 
2151  Solver s;
2152  s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
2153  alpha2, param->C, param->C, param->eps, si, param->shrinking, param->use_bias);
2154 
2155  float64_t sum_alpha = 0;
2156  for(i=0;i<l;i++)
2157  {
2158  alpha[i] = alpha2[i] - alpha2[i+l];
2159  sum_alpha += fabs(alpha[i]);
2160  }
2161  SG_SINFO("nu = %f\n",sum_alpha/(param->C*l));
2162 
2163  SG_FREE(alpha2);
2164  SG_FREE(linear_term);
2165  SG_FREE(y);
2166 }
2167 
2168 static void solve_nu_svr(
2169  const svm_problem *prob, const svm_parameter *param,
2170  float64_t *alpha, Solver::SolutionInfo* si)
2171 {
2172  int32_t l = prob->l;
2173  float64_t C = param->C;
2174  float64_t *alpha2 = SG_MALLOC(float64_t, 2*l);
2175  float64_t *linear_term = SG_MALLOC(float64_t, 2*l);
2176  schar *y = SG_MALLOC(schar, 2*l);
2177  int32_t i;
2178 
2179  float64_t sum = C * param->nu * l / 2;
2180  for(i=0;i<l;i++)
2181  {
2182  alpha2[i] = alpha2[i+l] = CMath::min(sum,C);
2183  sum -= alpha2[i];
2184 
2185  linear_term[i] = - prob->y[i];
2186  y[i] = 1;
2187 
2188  linear_term[i+l] = prob->y[i];
2189  y[i+l] = -1;
2190  }
2191 
2192  Solver_NU s;
2193  s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
2194  alpha2, C, C, param->eps, si, param->shrinking, param->use_bias);
2195 
2196  SG_SINFO("epsilon = %f\n",-si->r);
2197 
2198  for(i=0;i<l;i++)
2199  alpha[i] = alpha2[i] - alpha2[i+l];
2200 
2201  SG_FREE(alpha2);
2202  SG_FREE(linear_term);
2203  SG_FREE(y);
2204 }
2205 
2206 //
2207 // decision_function
2208 //
2209 struct decision_function
2210 {
2211  float64_t *alpha;
2212  float64_t rho;
2213  float64_t objective;
2214 };
2215 
2216 decision_function svm_train_one(
2217  const svm_problem *prob, const svm_parameter *param,
2218  float64_t Cp, float64_t Cn)
2219 {
2220  float64_t *alpha = SG_MALLOC(float64_t, prob->l);
2221  Solver::SolutionInfo si;
2222  switch(param->svm_type)
2223  {
2224  case C_SVC:
2225  solve_c_svc(prob,param,alpha,&si,Cp,Cn);
2226  break;
2227  case NU_SVC:
2228  solve_nu_svc(prob,param,alpha,&si);
2229  break;
2230  case ONE_CLASS:
2231  solve_one_class(prob,param,alpha,&si);
2232  break;
2233  case EPSILON_SVR:
2234  solve_epsilon_svr(prob,param,alpha,&si);
2235  break;
2236  case NU_SVR:
2237  solve_nu_svr(prob,param,alpha,&si);
2238  break;
2239  }
2240 
2241  SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho);
2242 
2243  // output SVs
2244  if (param->svm_type != ONE_CLASS)
2245  {
2246  int32_t nSV = 0;
2247  int32_t nBSV = 0;
2248  for(int32_t i=0;i<prob->l;i++)
2249  {
2250  if(fabs(alpha[i]) > 0)
2251  {
2252  ++nSV;
2253  if(prob->y[i] > 0)
2254  {
2255  if(fabs(alpha[i]) >= si.upper_bound_p)
2256  ++nBSV;
2257  }
2258  else
2259  {
2260  if(fabs(alpha[i]) >= si.upper_bound_n)
2261  ++nBSV;
2262  }
2263  }
2264  }
2265  SG_SINFO("nSV = %d, nBSV = %d\n",nSV,nBSV);
2266  }
2267 
2268  decision_function f;
2269  f.alpha = alpha;
2270  f.rho = si.rho;
2271  f.objective=si.obj;
2272  return f;
2273 }
2274 
2275 //
2276 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2277 // perm, length l, must be allocated before calling this subroutine
2278 void svm_group_classes(
2279  const svm_problem *prob, int32_t *nr_class_ret, int32_t **label_ret,
2280  int32_t **start_ret, int32_t **count_ret, int32_t *perm)
2281 {
2282  int32_t l = prob->l;
2283  int32_t max_nr_class = 16;
2284  int32_t nr_class = 0;
2285  int32_t *label = SG_MALLOC(int32_t, max_nr_class);
2286  int32_t *count = SG_MALLOC(int32_t, max_nr_class);
2287  int32_t *data_label = SG_MALLOC(int32_t, l);
2288  int32_t i;
2289 
2290  for(i=0;i<l;i++)
2291  {
2292  int32_t this_label=(int32_t) prob->y[i];
2293  int32_t j;
2294  for(j=0;j<nr_class;j++)
2295  {
2296  if(this_label == label[j])
2297  {
2298  ++count[j];
2299  break;
2300  }
2301  }
2302  data_label[i] = j;
2303  if(j == nr_class)
2304  {
2305  if(nr_class == max_nr_class)
2306  {
2307  max_nr_class *= 2;
2308  label=SG_REALLOC(int32_t, label,max_nr_class);
2309  count=SG_REALLOC(int32_t, count,max_nr_class);
2310  }
2311  label[nr_class] = this_label;
2312  count[nr_class] = 1;
2313  ++nr_class;
2314  }
2315  }
2316 
2317  int32_t *start = SG_MALLOC(int32_t, nr_class);
2318  start[0] = 0;
2319  for(i=1;i<nr_class;i++)
2320  start[i] = start[i-1]+count[i-1];
2321  for(i=0;i<l;i++)
2322  {
2323  perm[start[data_label[i]]] = i;
2324  ++start[data_label[i]];
2325  }
2326  start[0] = 0;
2327  for(i=1;i<nr_class;i++)
2328  start[i] = start[i-1]+count[i-1];
2329 
2330  *nr_class_ret = nr_class;
2331  *label_ret = label;
2332  *start_ret = start;
2333  *count_ret = count;
2334  SG_FREE(data_label);
2335 }
2336 
2337 //
2338 // Interface functions
2339 //
2340 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2341 {
2342  svm_model *model = SG_MALLOC(svm_model,1);
2343  model->param = *param;
2344  model->free_sv = 0; // XXX
2345 
2346  if(param->svm_type == ONE_CLASS ||
2347  param->svm_type == EPSILON_SVR ||
2348  param->svm_type == NU_SVR)
2349  {
2350  SG_SINFO("training one class svm or doing epsilon sv regression\n");
2351 
2352  // regression or one-class-svm
2353  model->nr_class = 2;
2354  model->label = NULL;
2355  model->nSV = NULL;
2356  model->sv_coef = SG_MALLOC(float64_t *,1);
2357  decision_function f = svm_train_one(prob,param,0,0);
2358  model->rho = SG_MALLOC(float64_t, 1);
2359  model->rho[0] = f.rho;
2360  model->objective = f.objective;
2361 
2362  int32_t nSV = 0;
2363  int32_t i;
2364  for(i=0;i<prob->l;i++)
2365  if(fabs(f.alpha[i]) > 0) ++nSV;
2366  model->l = nSV;
2367  model->SV = SG_MALLOC(svm_node *,nSV);
2368  model->sv_coef[0] = SG_MALLOC(float64_t, nSV);
2369  int32_t j = 0;
2370  for(i=0;i<prob->l;i++)
2371  if(fabs(f.alpha[i]) > 0)
2372  {
2373  model->SV[j] = prob->x[i];
2374  model->sv_coef[0][j] = f.alpha[i];
2375  ++j;
2376  }
2377 
2378  SG_FREE(f.alpha);
2379  }
2380  else if(param->svm_type == NU_MULTICLASS_SVC)
2381  {
2382  Solver::SolutionInfo si;
2383  solve_nu_multiclass_svc(prob,param,&si,model);
2384  SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho);
2385  }
2386  else
2387  {
2388  // classification
2389  int32_t l = prob->l;
2390  int32_t nr_class;
2391  int32_t *label = NULL;
2392  int32_t *start = NULL;
2393  int32_t *count = NULL;
2394  int32_t *perm = SG_MALLOC(int32_t, l);
2395 
2396  // group training data of the same class
2397  svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2398  svm_node **x = SG_MALLOC(svm_node *,l);
2399  float64_t *C = SG_MALLOC(float64_t,l);
2400  float64_t *pv = SG_MALLOC(float64_t,l);
2401 
2402 
2403  int32_t i;
2404  for(i=0;i<l;i++) {
2405  x[i] = prob->x[perm[i]];
2406  C[i] = prob->C[perm[i]];
2407 
2408  if (prob->pv)
2409  {
2410  pv[i] = prob->pv[perm[i]];
2411  }
2412  else
2413  {
2414  //no custom linear term is set
2415  pv[i] = -1.0;
2416  }
2417 
2418  }
2419 
2420 
2421  // calculate weighted C
2422  float64_t *weighted_C = SG_MALLOC(float64_t, nr_class);
2423  for(i=0;i<nr_class;i++)
2424  weighted_C[i] = param->C;
2425  for(i=0;i<param->nr_weight;i++)
2426  {
2427  int32_t j;
2428  for(j=0;j<nr_class;j++)
2429  if(param->weight_label[i] == label[j])
2430  break;
2431  if(j == nr_class)
2432  SG_SWARNING("warning: class label %d specified in weight is not found\n", param->weight_label[i]);
2433  else
2434  weighted_C[j] *= param->weight[i];
2435  }
2436 
2437  // train k*(k-1)/2 models
2438 
2439  bool *nonzero = SG_MALLOC(bool,l);
2440  for(i=0;i<l;i++)
2441  nonzero[i] = false;
2442  decision_function *f = SG_MALLOC(decision_function,nr_class*(nr_class-1)/2);
2443 
2444  int32_t p = 0;
2445  for(i=0;i<nr_class;i++)
2446  for(int32_t j=i+1;j<nr_class;j++)
2447  {
2448  svm_problem sub_prob;
2449  int32_t si = start[i], sj = start[j];
2450  int32_t ci = count[i], cj = count[j];
2451  sub_prob.l = ci+cj;
2452  sub_prob.x = SG_MALLOC(svm_node *,sub_prob.l);
2453  sub_prob.y = SG_MALLOC(float64_t,sub_prob.l+1); //dirty hack to surpress valgrind err
2454  sub_prob.C = SG_MALLOC(float64_t,sub_prob.l+1);
2455  sub_prob.pv = SG_MALLOC(float64_t,sub_prob.l+1);
2456 
2457  int32_t k;
2458  for(k=0;k<ci;k++)
2459  {
2460  sub_prob.x[k] = x[si+k];
2461  sub_prob.y[k] = +1;
2462  sub_prob.C[k] = C[si+k];
2463  sub_prob.pv[k] = pv[si+k];
2464 
2465  }
2466  for(k=0;k<cj;k++)
2467  {
2468  sub_prob.x[ci+k] = x[sj+k];
2469  sub_prob.y[ci+k] = -1;
2470  sub_prob.C[ci+k] = C[sj+k];
2471  sub_prob.pv[ci+k] = pv[sj+k];
2472  }
2473  sub_prob.y[sub_prob.l]=-1; //dirty hack to surpress valgrind err
2474  sub_prob.C[sub_prob.l]=-1;
2475  sub_prob.pv[sub_prob.l]=-1;
2476 
2477  f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2478  for(k=0;k<ci;k++)
2479  if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2480  nonzero[si+k] = true;
2481  for(k=0;k<cj;k++)
2482  if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2483  nonzero[sj+k] = true;
2484  SG_FREE(sub_prob.x);
2485  SG_FREE(sub_prob.y);
2486  SG_FREE(sub_prob.C);
2487  SG_FREE(sub_prob.pv);
2488  ++p;
2489  }
2490 
2491  // build output
2492 
2493  model->objective = f[0].objective;
2494  model->nr_class = nr_class;
2495 
2496  model->label = SG_MALLOC(int32_t, nr_class);
2497  for(i=0;i<nr_class;i++)
2498  model->label[i] = label[i];
2499 
2500  model->rho = SG_MALLOC(float64_t, nr_class*(nr_class-1)/2);
2501  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2502  model->rho[i] = f[i].rho;
2503 
2504  int32_t total_sv = 0;
2505  int32_t *nz_count = SG_MALLOC(int32_t, nr_class);
2506  model->nSV = SG_MALLOC(int32_t, nr_class);
2507  for(i=0;i<nr_class;i++)
2508  {
2509  int32_t nSV = 0;
2510  for(int32_t j=0;j<count[i];j++)
2511  if(nonzero[start[i]+j])
2512  {
2513  ++nSV;
2514  ++total_sv;
2515  }
2516  model->nSV[i] = nSV;
2517  nz_count[i] = nSV;
2518  }
2519 
2520  SG_SINFO("Total nSV = %d\n",total_sv);
2521 
2522  model->l = total_sv;
2523  model->SV = SG_MALLOC(svm_node *,total_sv);
2524  p = 0;
2525  for(i=0;i<l;i++)
2526  if(nonzero[i]) model->SV[p++] = x[i];
2527 
2528  int32_t *nz_start = SG_MALLOC(int32_t, nr_class);
2529  nz_start[0] = 0;
2530  for(i=1;i<nr_class;i++)
2531  nz_start[i] = nz_start[i-1]+nz_count[i-1];
2532 
2533  model->sv_coef = SG_MALLOC(float64_t *,nr_class-1);
2534  for(i=0;i<nr_class-1;i++)
2535  model->sv_coef[i] = SG_MALLOC(float64_t, total_sv);
2536 
2537  p = 0;
2538  for(i=0;i<nr_class;i++)
2539  for(int32_t j=i+1;j<nr_class;j++)
2540  {
2541  // classifier (i,j): coefficients with
2542  // i are in sv_coef[j-1][nz_start[i]...],
2543  // j are in sv_coef[i][nz_start[j]...]
2544 
2545  int32_t si = start[i];
2546  int32_t sj = start[j];
2547  int32_t ci = count[i];
2548  int32_t cj = count[j];
2549 
2550  int32_t q = nz_start[i];
2551  int32_t k;
2552  for(k=0;k<ci;k++)
2553  if(nonzero[si+k])
2554  model->sv_coef[j-1][q++] = f[p].alpha[k];
2555  q = nz_start[j];
2556  for(k=0;k<cj;k++)
2557  if(nonzero[sj+k])
2558  model->sv_coef[i][q++] = f[p].alpha[ci+k];
2559  ++p;
2560  }
2561 
2562  SG_FREE(label);
2563  SG_FREE(count);
2564  SG_FREE(perm);
2565  SG_FREE(start);
2566  SG_FREE(x);
2567  SG_FREE(C);
2568  SG_FREE(pv);
2569  SG_FREE(weighted_C);
2570  SG_FREE(nonzero);
2571  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2572  SG_FREE(f[i].alpha);
2573  SG_FREE(f);
2574  SG_FREE(nz_count);
2575  SG_FREE(nz_start);
2576  }
2577  return model;
2578 }
2579 
2580 void svm_destroy_model(svm_model* model)
2581 {
2582  if(model->free_sv && model->l > 0)
2583  SG_FREE((void *)(model->SV[0]));
2584  for(int32_t i=0;i<model->nr_class-1;i++)
2585  SG_FREE(model->sv_coef[i]);
2586  SG_FREE(model->SV);
2587  SG_FREE(model->sv_coef);
2588  SG_FREE(model->rho);
2589  SG_FREE(model->label);
2590  SG_FREE(model->nSV);
2591  SG_FREE(model);
2592 }
2593 
2594 void svm_destroy_param(svm_parameter* param)
2595 {
2596  SG_FREE(param->weight_label);
2597  SG_FREE(param->weight);
2598 }
2599 
2600 const char *svm_check_parameter(
2601  const svm_problem *prob, const svm_parameter *param)
2602 {
2603  // svm_type
2604 
2605  int32_t svm_type = param->svm_type;
2606  if(svm_type != C_SVC &&
2607  svm_type != NU_SVC &&
2608  svm_type != ONE_CLASS &&
2609  svm_type != EPSILON_SVR &&
2610  svm_type != NU_SVR &&
2611  svm_type != NU_MULTICLASS_SVC)
2612  return "unknown svm type";
2613 
2614  // kernel_type, degree
2615 
2616  int32_t kernel_type = param->kernel_type;
2617  if(kernel_type != LINEAR &&
2618  kernel_type != POLY &&
2619  kernel_type != RBF &&
2620  kernel_type != SIGMOID &&
2621  kernel_type != PRECOMPUTED)
2622  return "unknown kernel type";
2623 
2624  if(param->degree < 0)
2625  return "degree of polynomial kernel < 0";
2626 
2627  // cache_size,eps,C,nu,p,shrinking
2628 
2629  if(param->cache_size <= 0)
2630  return "cache_size <= 0";
2631 
2632  if(param->eps <= 0)
2633  return "eps <= 0";
2634 
2635  if(svm_type == C_SVC ||
2636  svm_type == EPSILON_SVR ||
2637  svm_type == NU_SVR)
2638  if(param->C <= 0)
2639  return "C <= 0";
2640 
2641  if(svm_type == NU_SVC ||
2642  svm_type == ONE_CLASS ||
2643  svm_type == NU_SVR)
2644  if(param->nu <= 0 || param->nu > 1)
2645  return "nu <= 0 or nu > 1";
2646 
2647  if(svm_type == EPSILON_SVR)
2648  if(param->p < 0)
2649  return "p < 0";
2650 
2651  if(param->shrinking != 0 &&
2652  param->shrinking != 1)
2653  return "shrinking != 0 and shrinking != 1";
2654 
2655 
2656  // check whether nu-svc is feasible
2657 
2658  if(svm_type == NU_SVC)
2659  {
2660  int32_t l = prob->l;
2661  int32_t max_nr_class = 16;
2662  int32_t nr_class = 0;
2663  int32_t *label = SG_MALLOC(int32_t, max_nr_class);
2664  int32_t *count = SG_MALLOC(int32_t, max_nr_class);
2665 
2666  int32_t i;
2667  for(i=0;i<l;i++)
2668  {
2669  int32_t this_label = (int32_t) prob->y[i];
2670  int32_t j;
2671  for(j=0;j<nr_class;j++)
2672  if(this_label == label[j])
2673  {
2674  ++count[j];
2675  break;
2676  }
2677  if(j == nr_class)
2678  {
2679  if(nr_class == max_nr_class)
2680  {
2681  max_nr_class *= 2;
2682  label=SG_REALLOC(int32_t, label, max_nr_class);
2683  count=SG_REALLOC(int32_t, count, max_nr_class);
2684  }
2685  label[nr_class] = this_label;
2686  count[nr_class] = 1;
2687  ++nr_class;
2688  }
2689  }
2690 
2691  for(i=0;i<nr_class;i++)
2692  {
2693  int32_t n1 = count[i];
2694  for(int32_t j=i+1;j<nr_class;j++)
2695  {
2696  int32_t n2 = count[j];
2697  if(param->nu*(n1+n2)/2 > CMath::min(n1,n2))
2698  {
2699  SG_FREE(label);
2700  SG_FREE(count);
2701  return "specified nu is infeasible";
2702  }
2703  }
2704  }
2705  SG_FREE(label);
2706  SG_FREE(count);
2707  }
2708 
2709  return NULL;
2710 }
2711 }
2712 #endif // DOXYGEN_SHOULD_SKIP_THIS

SHOGUN Machine Learning Toolbox - Documentation