SHOGUN v0.9.0
|
00001 /* 00002 * Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * 1. Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * 00012 * 2. Redistributions in binary form must reproduce the above copyright 00013 * notice, this list of conditions and the following disclaimer in the 00014 * documentation and/or other materials provided with the distribution. 00015 * 00016 * 3. Neither name of copyright holders nor the names of its contributors 00017 * may be used to endorse or promote products derived from this software 00018 * without specific prior written permission. 00019 * 00020 * 00021 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00022 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00023 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00024 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 00025 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00026 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00027 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 * 00033 * Shogun specific adjustments (w) 2006-2009 Soeren Sonnenburg 00034 */ 00035 00036 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00037 00038 #include "lib/memory.h" 00039 #include "classifier/svm/SVM_libsvm.h" 00040 #include "kernel/Kernel.h" 00041 #include "lib/io.h" 00042 #include "lib/Time.h" 00043 #include "lib/Signal.h" 00044 #include "lib/common.h" 00045 #include "lib/Mathematics.h" 00046 00047 #include <stdio.h> 00048 #include <stdlib.h> 00049 #include <ctype.h> 00050 #include <float.h> 00051 #include <string.h> 00052 #include <stdarg.h> 00053 00054 namespace shogun 00055 { 00056 00057 typedef KERNELCACHE_ELEM Qfloat; 00058 typedef float64_t schar; 00059 00060 template <class S, class T> inline void clone(T*& dst, S* src, int32_t n) 00061 { 00062 dst = new T[n]; 00063 memcpy((void *)dst,(void *)src,sizeof(T)*n); 00064 } 00065 #define INF HUGE_VAL 00066 #define TAU 1e-12 00067 #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) 00068 00069 class QMatrix; 00070 class SVC_QMC; 00071 00072 // 00073 // Kernel Cache 00074 // 00075 // l is the number of total data items 00076 // size is the cache size limit in bytes 00077 // 00078 class Cache 00079 { 00080 public: 00081 Cache(int32_t l, int64_t size); 00082 ~Cache(); 00083 00084 // request data [0,len) 00085 // return some position p where [p,len) need to be filled 00086 // (p >= len if nothing needs to be filled) 00087 int32_t get_data(const int32_t index, Qfloat **data, int32_t len); 00088 void swap_index(int32_t i, int32_t j); // future_option 00089 00090 private: 00091 int32_t l; 00092 int64_t size; 00093 struct head_t 00094 { 00095 head_t *prev, *next; // a circular list 00096 Qfloat *data; 00097 int32_t len; // data[0,len) is cached in this entry 00098 }; 00099 00100 head_t *head; 00101 head_t lru_head; 00102 void lru_delete(head_t *h); 00103 void lru_insert(head_t *h); 00104 }; 00105 00106 Cache::Cache(int32_t l_, int64_t size_):l(l_),size(size_) 00107 { 00108 head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0 00109 size /= sizeof(Qfloat); 00110 size -= l * sizeof(head_t) / sizeof(Qfloat); 00111 size = CMath::max(size, (int64_t) 2*l); // cache must be large enough for two columns 00112 lru_head.next = lru_head.prev = &lru_head; 00113 } 00114 00115 Cache::~Cache() 00116 { 00117 for(head_t *h = lru_head.next; h != &lru_head; h=h->next) 00118 free(h->data); 00119 free(head); 00120 } 00121 00122 void Cache::lru_delete(head_t *h) 00123 { 00124 // delete from current location 00125 h->prev->next = h->next; 00126 h->next->prev = h->prev; 00127 } 00128 00129 void Cache::lru_insert(head_t *h) 00130 { 00131 // insert to last position 00132 h->next = &lru_head; 00133 h->prev = lru_head.prev; 00134 h->prev->next = h; 00135 h->next->prev = h; 00136 } 00137 00138 int32_t Cache::get_data(const int32_t index, Qfloat **data, int32_t len) 00139 { 00140 head_t *h = &head[index]; 00141 if(h->len) lru_delete(h); 00142 int32_t more = len - h->len; 00143 00144 if(more > 0) 00145 { 00146 // free old space 00147 while(size < more) 00148 { 00149 head_t *old = lru_head.next; 00150 lru_delete(old); 00151 free(old->data); 00152 size += old->len; 00153 old->data = 0; 00154 old->len = 0; 00155 } 00156 00157 // allocate new space 00158 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len); 00159 size -= more; 00160 CMath::swap(h->len,len); 00161 } 00162 00163 lru_insert(h); 00164 *data = h->data; 00165 return len; 00166 } 00167 00168 void Cache::swap_index(int32_t i, int32_t j) 00169 { 00170 if(i==j) return; 00171 00172 if(head[i].len) lru_delete(&head[i]); 00173 if(head[j].len) lru_delete(&head[j]); 00174 CMath::swap(head[i].data,head[j].data); 00175 CMath::swap(head[i].len,head[j].len); 00176 if(head[i].len) lru_insert(&head[i]); 00177 if(head[j].len) lru_insert(&head[j]); 00178 00179 if(i>j) CMath::swap(i,j); 00180 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next) 00181 { 00182 if(h->len > i) 00183 { 00184 if(h->len > j) 00185 CMath::swap(h->data[i],h->data[j]); 00186 else 00187 { 00188 // give up 00189 lru_delete(h); 00190 free(h->data); 00191 size += h->len; 00192 h->data = 0; 00193 h->len = 0; 00194 } 00195 } 00196 } 00197 } 00198 00199 // 00200 // Kernel evaluation 00201 // 00202 // the static method k_function is for doing single kernel evaluation 00203 // the constructor of Kernel prepares to calculate the l*l kernel matrix 00204 // the member function get_Q is for getting one column from the Q Matrix 00205 // 00206 class QMatrix { 00207 public: 00208 virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0; 00209 virtual Qfloat *get_QD() const = 0; 00210 virtual void swap_index(int32_t i, int32_t j) const = 0; 00211 virtual ~QMatrix() {} 00212 00213 float64_t max_train_time; 00214 }; 00215 00216 class LibSVMKernel: public QMatrix { 00217 public: 00218 LibSVMKernel(int32_t l, svm_node * const * x, const svm_parameter& param); 00219 virtual ~LibSVMKernel(); 00220 00221 virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0; 00222 virtual Qfloat *get_QD() const = 0; 00223 virtual void swap_index(int32_t i, int32_t j) const // no so const... 00224 { 00225 CMath::swap(x[i],x[j]); 00226 if(x_square) CMath::swap(x_square[i],x_square[j]); 00227 } 00228 00229 inline float64_t kernel_function(int32_t i, int32_t j) const 00230 { 00231 return kernel->kernel(x[i]->index,x[j]->index); 00232 } 00233 00234 private: 00235 CKernel* kernel; 00236 const svm_node **x; 00237 float64_t *x_square; 00238 00239 // svm_parameter 00240 const int32_t kernel_type; 00241 const int32_t degree; 00242 const float64_t gamma; 00243 const float64_t coef0; 00244 }; 00245 00246 LibSVMKernel::LibSVMKernel(int32_t l, svm_node * const * x_, const svm_parameter& param) 00247 : kernel_type(param.kernel_type), degree(param.degree), 00248 gamma(param.gamma), coef0(param.coef0) 00249 { 00250 clone(x,x_,l); 00251 x_square = 0; 00252 kernel=param.kernel; 00253 max_train_time=param.max_train_time; 00254 } 00255 00256 LibSVMKernel::~LibSVMKernel() 00257 { 00258 delete[] x; 00259 delete[] x_square; 00260 } 00261 00262 // Generalized SMO+SVMlight algorithm 00263 // Solves: 00264 // 00265 // min 0.5(\alpha^T Q \alpha) + b^T \alpha 00266 // 00267 // y^T \alpha = \delta 00268 // y_i = +1 or -1 00269 // 0 <= alpha_i <= Cp for y_i = 1 00270 // 0 <= alpha_i <= Cn for y_i = -1 00271 // 00272 // Given: 00273 // 00274 // Q, p, y, Cp, Cn, and an initial feasible point \alpha 00275 // l is the size of vectors and matrices 00276 // eps is the stopping tolerance 00277 // 00278 // solution will be put in \alpha, objective value will be put in obj 00279 // 00280 class Solver { 00281 public: 00282 Solver() {}; 00283 virtual ~Solver() {}; 00284 00285 struct SolutionInfo { 00286 float64_t obj; 00287 float64_t rho; 00288 float64_t upper_bound_p; 00289 float64_t upper_bound_n; 00290 float64_t r; // for Solver_NU 00291 }; 00292 00293 void Solve( 00294 int32_t l, const QMatrix& Q, const float64_t *p_, const schar *y_, 00295 float64_t *alpha_, float64_t Cp, float64_t Cn, float64_t eps, 00296 SolutionInfo* si, int32_t shrinking, bool use_bias); 00297 00298 protected: 00299 int32_t active_size; 00300 schar *y; 00301 float64_t *G; // gradient of objective function 00302 enum { LOWER_BOUND, UPPER_BOUND, FREE }; 00303 char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE 00304 float64_t *alpha; 00305 const QMatrix *Q; 00306 const Qfloat *QD; 00307 float64_t eps; 00308 float64_t Cp,Cn; 00309 float64_t *p; 00310 int32_t *active_set; 00311 float64_t *G_bar; // gradient, if we treat free variables as 0 00312 int32_t l; 00313 bool unshrink; // XXX 00314 00315 float64_t get_C(int32_t i) 00316 { 00317 return (y[i] > 0)? Cp : Cn; 00318 } 00319 void update_alpha_status(int32_t i) 00320 { 00321 if(alpha[i] >= get_C(i)) 00322 alpha_status[i] = UPPER_BOUND; 00323 else if(alpha[i] <= 0) 00324 alpha_status[i] = LOWER_BOUND; 00325 else alpha_status[i] = FREE; 00326 } 00327 bool is_upper_bound(int32_t i) { return alpha_status[i] == UPPER_BOUND; } 00328 bool is_lower_bound(int32_t i) { return alpha_status[i] == LOWER_BOUND; } 00329 bool is_free(int32_t i) { return alpha_status[i] == FREE; } 00330 void swap_index(int32_t i, int32_t j); 00331 void reconstruct_gradient(); 00332 virtual int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap); 00333 virtual float64_t calculate_rho(); 00334 virtual void do_shrinking(); 00335 private: 00336 bool be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2); 00337 }; 00338 00339 void Solver::swap_index(int32_t i, int32_t j) 00340 { 00341 Q->swap_index(i,j); 00342 CMath::swap(y[i],y[j]); 00343 CMath::swap(G[i],G[j]); 00344 CMath::swap(alpha_status[i],alpha_status[j]); 00345 CMath::swap(alpha[i],alpha[j]); 00346 CMath::swap(p[i],p[j]); 00347 CMath::swap(active_set[i],active_set[j]); 00348 CMath::swap(G_bar[i],G_bar[j]); 00349 } 00350 00351 void Solver::reconstruct_gradient() 00352 { 00353 // reconstruct inactive elements of G from G_bar and free variables 00354 00355 if(active_size == l) return; 00356 00357 int32_t i,j; 00358 int32_t nr_free = 0; 00359 00360 for(j=active_size;j<l;j++) 00361 G[j] = G_bar[j] + p[j]; 00362 00363 for(j=0;j<active_size;j++) 00364 if(is_free(j)) 00365 nr_free++; 00366 00367 if (nr_free*l > 2*active_size*(l-active_size)) 00368 { 00369 for(i=active_size;i<l;i++) 00370 { 00371 const Qfloat *Q_i = Q->get_Q(i,active_size); 00372 for(j=0;j<active_size;j++) 00373 if(is_free(j)) 00374 G[i] += alpha[j] * Q_i[j]; 00375 } 00376 } 00377 else 00378 { 00379 for(i=0;i<active_size;i++) 00380 if(is_free(i)) 00381 { 00382 const Qfloat *Q_i = Q->get_Q(i,l); 00383 float64_t alpha_i = alpha[i]; 00384 for(j=active_size;j<l;j++) 00385 G[j] += alpha_i * Q_i[j]; 00386 } 00387 } 00388 } 00389 00390 void Solver::Solve( 00391 int32_t p_l, const QMatrix& p_Q, const float64_t *p_p, 00392 const schar *p_y, float64_t *p_alpha, float64_t p_Cp, float64_t p_Cn, 00393 float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias) 00394 { 00395 this->l = p_l; 00396 this->Q = &p_Q; 00397 QD=Q->get_QD(); 00398 clone(p, p_p,l); 00399 clone(y, p_y,l); 00400 clone(alpha,p_alpha,l); 00401 this->Cp = p_Cp; 00402 this->Cn = p_Cn; 00403 this->eps = p_eps; 00404 unshrink = false; 00405 00406 // initialize alpha_status 00407 { 00408 alpha_status = new char[l]; 00409 for(int32_t i=0;i<l;i++) 00410 update_alpha_status(i); 00411 } 00412 00413 // initialize active set (for shrinking) 00414 { 00415 active_set = new int32_t[l]; 00416 for(int32_t i=0;i<l;i++) 00417 active_set[i] = i; 00418 active_size = l; 00419 } 00420 00421 // initialize gradient 00422 CSignal::clear_cancel(); 00423 CTime start_time; 00424 { 00425 G = new float64_t[l]; 00426 G_bar = new float64_t[l]; 00427 int32_t i; 00428 for(i=0;i<l;i++) 00429 { 00430 G[i] = p_p[i]; 00431 G_bar[i] = 0; 00432 } 00433 SG_SINFO("Computing gradient for initial set of non-zero alphas\n"); 00434 //CMath::display_vector(alpha, l, "alphas"); 00435 for(i=0;i<l && !CSignal::cancel_computations(); i++) 00436 { 00437 if(!is_lower_bound(i)) 00438 { 00439 const Qfloat *Q_i = Q->get_Q(i,l); 00440 float64_t alpha_i = alpha[i]; 00441 int32_t j; 00442 for(j=0;j<l;j++) 00443 G[j] += alpha_i*Q_i[j]; 00444 if(is_upper_bound(i)) 00445 for(j=0;j<l;j++) 00446 G_bar[j] += get_C(i) * Q_i[j]; 00447 } 00448 SG_SPROGRESS(i, 0, l); 00449 } 00450 SG_SDONE(); 00451 } 00452 00453 // optimization step 00454 00455 int32_t iter = 0; 00456 int32_t counter = CMath::min(l,1000)+1; 00457 00458 while (!CSignal::cancel_computations()) 00459 { 00460 if (Q->max_train_time > 0 && start_time.cur_time_diff() > Q->max_train_time) 00461 break; 00462 00463 // show progress and do shrinking 00464 00465 if(--counter == 0) 00466 { 00467 counter = CMath::min(l,1000); 00468 if(shrinking) do_shrinking(); 00469 //SG_SINFO("."); 00470 } 00471 00472 int32_t i,j; 00473 float64_t gap; 00474 if(select_working_set(i,j, gap)!=0) 00475 { 00476 // reconstruct the whole gradient 00477 reconstruct_gradient(); 00478 // reset active set size and check 00479 active_size = l; 00480 //SG_SINFO("*"); 00481 if(select_working_set(i,j, gap)!=0) 00482 break; 00483 else 00484 counter = 1; // do shrinking next iteration 00485 } 00486 00487 SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6); 00488 00489 ++iter; 00490 00491 // update alpha[i] and alpha[j], handle bounds carefully 00492 00493 const Qfloat *Q_i = Q->get_Q(i,active_size); 00494 const Qfloat *Q_j = Q->get_Q(j,active_size); 00495 00496 float64_t C_i = get_C(i); 00497 float64_t C_j = get_C(j); 00498 00499 float64_t old_alpha_i = alpha[i]; 00500 float64_t old_alpha_j = alpha[j]; 00501 00502 if (!use_bias) 00503 { 00504 double pi=G[i]-Q_i[i]*alpha[i]-Q_i[j]*alpha[j]; 00505 double pj=G[j]-Q_i[j]*alpha[i]-Q_j[j]*alpha[j]; 00506 double det=Q_i[i]*Q_j[j]-Q_i[j]*Q_i[j]; 00507 double alpha_i=-(Q_j[j]*pi-Q_i[j]*pj)/det; 00508 alpha_i=CMath::min(C_i,CMath::max(0.0,alpha_i)); 00509 double alpha_j=-(-Q_i[j]*pi+Q_i[i]*pj)/det; 00510 alpha_j=CMath::min(C_j,CMath::max(0.0,alpha_j)); 00511 00512 if (alpha_i==0 || alpha_i == C_i) 00513 alpha_j=CMath::min(C_j,CMath::max(0.0,-(pj+Q_i[j]*alpha_i)/Q_j[j])); 00514 if (alpha_j==0 || alpha_j == C_j) 00515 alpha_i=CMath::min(C_i,CMath::max(0.0,-(pi+Q_i[j]*alpha_j)/Q_i[i])); 00516 00517 alpha[i]=alpha_i; alpha[j]=alpha_j; 00518 } 00519 else 00520 { 00521 if(y[i]!=y[j]) 00522 { 00523 float64_t quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j]; 00524 if (quad_coef <= 0) 00525 quad_coef = TAU; 00526 float64_t delta = (-G[i]-G[j])/quad_coef; 00527 float64_t diff = alpha[i] - alpha[j]; 00528 alpha[i] += delta; 00529 alpha[j] += delta; 00530 00531 if(diff > 0) 00532 { 00533 if(alpha[j] < 0) 00534 { 00535 alpha[j] = 0; 00536 alpha[i] = diff; 00537 } 00538 } 00539 else 00540 { 00541 if(alpha[i] < 0) 00542 { 00543 alpha[i] = 0; 00544 alpha[j] = -diff; 00545 } 00546 } 00547 if(diff > C_i - C_j) 00548 { 00549 if(alpha[i] > C_i) 00550 { 00551 alpha[i] = C_i; 00552 alpha[j] = C_i - diff; 00553 } 00554 } 00555 else 00556 { 00557 if(alpha[j] > C_j) 00558 { 00559 alpha[j] = C_j; 00560 alpha[i] = C_j + diff; 00561 } 00562 } 00563 } 00564 else 00565 { 00566 float64_t quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j]; 00567 if (quad_coef <= 0) 00568 quad_coef = TAU; 00569 float64_t delta = (G[i]-G[j])/quad_coef; 00570 float64_t sum = alpha[i] + alpha[j]; 00571 alpha[i] -= delta; 00572 alpha[j] += delta; 00573 00574 if(sum > C_i) 00575 { 00576 if(alpha[i] > C_i) 00577 { 00578 alpha[i] = C_i; 00579 alpha[j] = sum - C_i; 00580 } 00581 } 00582 else 00583 { 00584 if(alpha[j] < 0) 00585 { 00586 alpha[j] = 0; 00587 alpha[i] = sum; 00588 } 00589 } 00590 if(sum > C_j) 00591 { 00592 if(alpha[j] > C_j) 00593 { 00594 alpha[j] = C_j; 00595 alpha[i] = sum - C_j; 00596 } 00597 } 00598 else 00599 { 00600 if(alpha[i] < 0) 00601 { 00602 alpha[i] = 0; 00603 alpha[j] = sum; 00604 } 00605 } 00606 } 00607 } 00608 00609 // update G 00610 00611 float64_t delta_alpha_i = alpha[i] - old_alpha_i; 00612 float64_t delta_alpha_j = alpha[j] - old_alpha_j; 00613 00614 for(int32_t k=0;k<active_size;k++) 00615 { 00616 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j; 00617 } 00618 00619 // update alpha_status and G_bar 00620 00621 { 00622 bool ui = is_upper_bound(i); 00623 bool uj = is_upper_bound(j); 00624 update_alpha_status(i); 00625 update_alpha_status(j); 00626 int32_t k; 00627 if(ui != is_upper_bound(i)) 00628 { 00629 Q_i = Q->get_Q(i,l); 00630 if(ui) 00631 for(k=0;k<l;k++) 00632 G_bar[k] -= C_i * Q_i[k]; 00633 else 00634 for(k=0;k<l;k++) 00635 G_bar[k] += C_i * Q_i[k]; 00636 } 00637 00638 if(uj != is_upper_bound(j)) 00639 { 00640 Q_j = Q->get_Q(j,l); 00641 if(uj) 00642 for(k=0;k<l;k++) 00643 G_bar[k] -= C_j * Q_j[k]; 00644 else 00645 for(k=0;k<l;k++) 00646 G_bar[k] += C_j * Q_j[k]; 00647 } 00648 } 00649 00650 #ifdef MCSVM_DEBUG 00651 // calculate objective value 00652 { 00653 float64_t v = 0; 00654 for(i=0;i<l;i++) 00655 v += alpha[i] * (G[i] + p[i]); 00656 00657 p_si->obj = v/2; 00658 00659 float64_t primal=0; 00660 //float64_t gap=100000; 00661 SG_SPRINT("dual obj=%f primal obf=%f gap=%f\n", v/2, primal, gap); 00662 } 00663 #endif 00664 } 00665 00666 // calculate rho 00667 00668 if (!use_bias) 00669 p_si->rho = 0; 00670 else 00671 p_si->rho = calculate_rho(); 00672 00673 // calculate objective value 00674 { 00675 float64_t v = 0; 00676 int32_t i; 00677 for(i=0;i<l;i++) 00678 v += alpha[i] * (G[i] + p[i]); 00679 00680 p_si->obj = v/2; 00681 } 00682 00683 // put back the solution 00684 { 00685 for(int32_t i=0;i<l;i++) 00686 p_alpha[active_set[i]] = alpha[i]; 00687 } 00688 00689 p_si->upper_bound_p = Cp; 00690 p_si->upper_bound_n = Cn; 00691 00692 SG_SINFO("\noptimization finished, #iter = %d\n",iter); 00693 00694 delete[] p; 00695 delete[] y; 00696 delete[] alpha; 00697 delete[] alpha_status; 00698 delete[] active_set; 00699 delete[] G; 00700 delete[] G_bar; 00701 } 00702 00703 // return 1 if already optimal, return 0 otherwise 00704 int32_t Solver::select_working_set( 00705 int32_t &out_i, int32_t &out_j, float64_t &gap) 00706 { 00707 // return i,j such that 00708 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) 00709 // j: minimizes the decrease of obj value 00710 // (if quadratic coefficient <= 0, replace it with tau) 00711 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) 00712 00713 float64_t Gmax = -INF; 00714 float64_t Gmax2 = -INF; 00715 int32_t Gmax_idx = -1; 00716 int32_t Gmin_idx = -1; 00717 float64_t obj_diff_min = INF; 00718 00719 for(int32_t t=0;t<active_size;t++) 00720 if(y[t]==+1) 00721 { 00722 if(!is_upper_bound(t)) 00723 if(-G[t] >= Gmax) 00724 { 00725 Gmax = -G[t]; 00726 Gmax_idx = t; 00727 } 00728 } 00729 else 00730 { 00731 if(!is_lower_bound(t)) 00732 if(G[t] >= Gmax) 00733 { 00734 Gmax = G[t]; 00735 Gmax_idx = t; 00736 } 00737 } 00738 00739 int32_t i = Gmax_idx; 00740 const Qfloat *Q_i = NULL; 00741 if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1 00742 Q_i = Q->get_Q(i,active_size); 00743 00744 for(int32_t j=0;j<active_size;j++) 00745 { 00746 if(y[j]==+1) 00747 { 00748 if (!is_lower_bound(j)) 00749 { 00750 float64_t grad_diff=Gmax+G[j]; 00751 if (G[j] >= Gmax2) 00752 Gmax2 = G[j]; 00753 if (grad_diff > 0) 00754 { 00755 float64_t obj_diff; 00756 float64_t quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j]; 00757 if (quad_coef > 0) 00758 obj_diff = -(grad_diff*grad_diff)/quad_coef; 00759 else 00760 obj_diff = -(grad_diff*grad_diff)/TAU; 00761 00762 if (obj_diff <= obj_diff_min) 00763 { 00764 Gmin_idx=j; 00765 obj_diff_min = obj_diff; 00766 } 00767 } 00768 } 00769 } 00770 else 00771 { 00772 if (!is_upper_bound(j)) 00773 { 00774 float64_t grad_diff= Gmax-G[j]; 00775 if (-G[j] >= Gmax2) 00776 Gmax2 = -G[j]; 00777 if (grad_diff > 0) 00778 { 00779 float64_t obj_diff; 00780 float64_t quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j]; 00781 if (quad_coef > 0) 00782 obj_diff = -(grad_diff*grad_diff)/quad_coef; 00783 else 00784 obj_diff = -(grad_diff*grad_diff)/TAU; 00785 00786 if (obj_diff <= obj_diff_min) 00787 { 00788 Gmin_idx=j; 00789 obj_diff_min = obj_diff; 00790 } 00791 } 00792 } 00793 } 00794 } 00795 00796 gap=Gmax+Gmax2; 00797 if(gap < eps) 00798 return 1; 00799 00800 out_i = Gmax_idx; 00801 out_j = Gmin_idx; 00802 return 0; 00803 } 00804 00805 bool Solver::be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2) 00806 { 00807 if(is_upper_bound(i)) 00808 { 00809 if(y[i]==+1) 00810 return(-G[i] > Gmax1); 00811 else 00812 return(-G[i] > Gmax2); 00813 } 00814 else if(is_lower_bound(i)) 00815 { 00816 if(y[i]==+1) 00817 return(G[i] > Gmax2); 00818 else 00819 return(G[i] > Gmax1); 00820 } 00821 else 00822 return(false); 00823 } 00824 00825 00826 void Solver::do_shrinking() 00827 { 00828 int32_t i; 00829 float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) } 00830 float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) } 00831 00832 // find maximal violating pair first 00833 for(i=0;i<active_size;i++) 00834 { 00835 if(y[i]==+1) 00836 { 00837 if(!is_upper_bound(i)) 00838 { 00839 if(-G[i] >= Gmax1) 00840 Gmax1 = -G[i]; 00841 } 00842 if(!is_lower_bound(i)) 00843 { 00844 if(G[i] >= Gmax2) 00845 Gmax2 = G[i]; 00846 } 00847 } 00848 else 00849 { 00850 if(!is_upper_bound(i)) 00851 { 00852 if(-G[i] >= Gmax2) 00853 Gmax2 = -G[i]; 00854 } 00855 if(!is_lower_bound(i)) 00856 { 00857 if(G[i] >= Gmax1) 00858 Gmax1 = G[i]; 00859 } 00860 } 00861 } 00862 00863 if(unshrink == false && Gmax1 + Gmax2 <= eps*10) 00864 { 00865 unshrink = true; 00866 reconstruct_gradient(); 00867 active_size = l; 00868 } 00869 00870 for(i=0;i<active_size;i++) 00871 if (be_shrunk(i, Gmax1, Gmax2)) 00872 { 00873 active_size--; 00874 while (active_size > i) 00875 { 00876 if (!be_shrunk(active_size, Gmax1, Gmax2)) 00877 { 00878 swap_index(i,active_size); 00879 break; 00880 } 00881 active_size--; 00882 } 00883 } 00884 } 00885 00886 float64_t Solver::calculate_rho() 00887 { 00888 float64_t r; 00889 int32_t nr_free = 0; 00890 float64_t ub = INF, lb = -INF, sum_free = 0; 00891 for(int32_t i=0;i<active_size;i++) 00892 { 00893 float64_t yG = y[i]*G[i]; 00894 00895 if(is_upper_bound(i)) 00896 { 00897 if(y[i]==-1) 00898 ub = CMath::min(ub,yG); 00899 else 00900 lb = CMath::max(lb,yG); 00901 } 00902 else if(is_lower_bound(i)) 00903 { 00904 if(y[i]==+1) 00905 ub = CMath::min(ub,yG); 00906 else 00907 lb = CMath::max(lb,yG); 00908 } 00909 else 00910 { 00911 ++nr_free; 00912 sum_free += yG; 00913 } 00914 } 00915 00916 if(nr_free>0) 00917 r = sum_free/nr_free; 00918 else 00919 r = (ub+lb)/2; 00920 00921 return r; 00922 } 00923 00924 00925 // 00926 //Solve with individually weighted examples 00927 // 00928 class WeightedSolver : public Solver 00929 { 00930 00931 public: 00932 00933 WeightedSolver(float64_t* cost_vec) 00934 { 00935 00936 this->Cs = cost_vec; 00937 00938 } 00939 00940 virtual float64_t get_C(int32_t i) 00941 { 00942 00943 return Cs[i]; 00944 } 00945 00946 protected: 00947 00948 float64_t* Cs; 00949 00950 }; 00951 00952 00953 // 00954 // Solver for nu-svm classification and regression 00955 // 00956 // additional constraint: e^T \alpha = constant 00957 // 00958 class Solver_NU : public Solver 00959 { 00960 public: 00961 Solver_NU() {} 00962 void Solve( 00963 int32_t p_l, const QMatrix& p_Q, const float64_t *p_p, 00964 const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn, 00965 float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias) 00966 { 00967 this->si = p_si; 00968 Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si, 00969 shrinking,use_bias); 00970 } 00971 private: 00972 SolutionInfo *si; 00973 int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap); 00974 float64_t calculate_rho(); 00975 bool be_shrunk( 00976 int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3, 00977 float64_t Gmax4); 00978 void do_shrinking(); 00979 }; 00980 00981 // return 1 if already optimal, return 0 otherwise 00982 int32_t Solver_NU::select_working_set( 00983 int32_t &out_i, int32_t &out_j, float64_t &gap) 00984 { 00985 // return i,j such that y_i = y_j and 00986 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) 00987 // j: minimizes the decrease of obj value 00988 // (if quadratic coefficient <= 0, replace it with tau) 00989 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) 00990 00991 float64_t Gmaxp = -INF; 00992 float64_t Gmaxp2 = -INF; 00993 int32_t Gmaxp_idx = -1; 00994 00995 float64_t Gmaxn = -INF; 00996 float64_t Gmaxn2 = -INF; 00997 int32_t Gmaxn_idx = -1; 00998 00999 int32_t Gmin_idx = -1; 01000 float64_t obj_diff_min = INF; 01001 01002 for(int32_t t=0;t<active_size;t++) 01003 if(y[t]==+1) 01004 { 01005 if(!is_upper_bound(t)) 01006 if(-G[t] >= Gmaxp) 01007 { 01008 Gmaxp = -G[t]; 01009 Gmaxp_idx = t; 01010 } 01011 } 01012 else 01013 { 01014 if(!is_lower_bound(t)) 01015 if(G[t] >= Gmaxn) 01016 { 01017 Gmaxn = G[t]; 01018 Gmaxn_idx = t; 01019 } 01020 } 01021 01022 int32_t ip = Gmaxp_idx; 01023 int32_t in = Gmaxn_idx; 01024 const Qfloat *Q_ip = NULL; 01025 const Qfloat *Q_in = NULL; 01026 if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1 01027 Q_ip = Q->get_Q(ip,active_size); 01028 if(in != -1) 01029 Q_in = Q->get_Q(in,active_size); 01030 01031 for(int32_t j=0;j<active_size;j++) 01032 { 01033 if(y[j]==+1) 01034 { 01035 if (!is_lower_bound(j)) 01036 { 01037 float64_t grad_diff=Gmaxp+G[j]; 01038 if (G[j] >= Gmaxp2) 01039 Gmaxp2 = G[j]; 01040 if (grad_diff > 0) 01041 { 01042 float64_t obj_diff; 01043 float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j]; 01044 if (quad_coef > 0) 01045 obj_diff = -(grad_diff*grad_diff)/quad_coef; 01046 else 01047 obj_diff = -(grad_diff*grad_diff)/TAU; 01048 01049 if (obj_diff <= obj_diff_min) 01050 { 01051 Gmin_idx=j; 01052 obj_diff_min = obj_diff; 01053 } 01054 } 01055 } 01056 } 01057 else 01058 { 01059 if (!is_upper_bound(j)) 01060 { 01061 float64_t grad_diff=Gmaxn-G[j]; 01062 if (-G[j] >= Gmaxn2) 01063 Gmaxn2 = -G[j]; 01064 if (grad_diff > 0) 01065 { 01066 float64_t obj_diff; 01067 float64_t quad_coef = Q_in[in]+QD[j]-2*Q_in[j]; 01068 if (quad_coef > 0) 01069 obj_diff = -(grad_diff*grad_diff)/quad_coef; 01070 else 01071 obj_diff = -(grad_diff*grad_diff)/TAU; 01072 01073 if (obj_diff <= obj_diff_min) 01074 { 01075 Gmin_idx=j; 01076 obj_diff_min = obj_diff; 01077 } 01078 } 01079 } 01080 } 01081 } 01082 01083 gap=CMath::max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2); 01084 if(gap < eps) 01085 return 1; 01086 01087 if (y[Gmin_idx] == +1) 01088 out_i = Gmaxp_idx; 01089 else 01090 out_i = Gmaxn_idx; 01091 out_j = Gmin_idx; 01092 01093 return 0; 01094 } 01095 01096 bool Solver_NU::be_shrunk( 01097 int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3, 01098 float64_t Gmax4) 01099 { 01100 if(is_upper_bound(i)) 01101 { 01102 if(y[i]==+1) 01103 return(-G[i] > Gmax1); 01104 else 01105 return(-G[i] > Gmax4); 01106 } 01107 else if(is_lower_bound(i)) 01108 { 01109 if(y[i]==+1) 01110 return(G[i] > Gmax2); 01111 else 01112 return(G[i] > Gmax3); 01113 } 01114 else 01115 return(false); 01116 } 01117 01118 void Solver_NU::do_shrinking() 01119 { 01120 float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) } 01121 float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) } 01122 float64_t Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) } 01123 float64_t Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) } 01124 01125 // find maximal violating pair first 01126 int32_t i; 01127 for(i=0;i<active_size;i++) 01128 { 01129 if(!is_upper_bound(i)) 01130 { 01131 if(y[i]==+1) 01132 { 01133 if(-G[i] > Gmax1) Gmax1 = -G[i]; 01134 } 01135 else if(-G[i] > Gmax4) Gmax4 = -G[i]; 01136 } 01137 if(!is_lower_bound(i)) 01138 { 01139 if(y[i]==+1) 01140 { 01141 if(G[i] > Gmax2) Gmax2 = G[i]; 01142 } 01143 else if(G[i] > Gmax3) Gmax3 = G[i]; 01144 } 01145 } 01146 01147 if(unshrink == false && CMath::max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) 01148 { 01149 unshrink = true; 01150 reconstruct_gradient(); 01151 active_size = l; 01152 } 01153 01154 for(i=0;i<active_size;i++) 01155 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4)) 01156 { 01157 active_size--; 01158 while (active_size > i) 01159 { 01160 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4)) 01161 { 01162 swap_index(i,active_size); 01163 break; 01164 } 01165 active_size--; 01166 } 01167 } 01168 } 01169 01170 float64_t Solver_NU::calculate_rho() 01171 { 01172 int32_t nr_free1 = 0,nr_free2 = 0; 01173 float64_t ub1 = INF, ub2 = INF; 01174 float64_t lb1 = -INF, lb2 = -INF; 01175 float64_t sum_free1 = 0, sum_free2 = 0; 01176 01177 for(int32_t i=0; i<active_size; i++) 01178 { 01179 if(y[i]==+1) 01180 { 01181 if(is_upper_bound(i)) 01182 lb1 = CMath::max(lb1,G[i]); 01183 else if(is_lower_bound(i)) 01184 ub1 = CMath::min(ub1,G[i]); 01185 else 01186 { 01187 ++nr_free1; 01188 sum_free1 += G[i]; 01189 } 01190 } 01191 else 01192 { 01193 if(is_upper_bound(i)) 01194 lb2 = CMath::max(lb2,G[i]); 01195 else if(is_lower_bound(i)) 01196 ub2 = CMath::min(ub2,G[i]); 01197 else 01198 { 01199 ++nr_free2; 01200 sum_free2 += G[i]; 01201 } 01202 } 01203 } 01204 01205 float64_t r1,r2; 01206 if(nr_free1 > 0) 01207 r1 = sum_free1/nr_free1; 01208 else 01209 r1 = (ub1+lb1)/2; 01210 01211 if(nr_free2 > 0) 01212 r2 = sum_free2/nr_free2; 01213 else 01214 r2 = (ub2+lb2)/2; 01215 01216 si->r = (r1+r2)/2; 01217 return (r1-r2)/2; 01218 } 01219 01220 class SVC_QMC: public LibSVMKernel 01221 { 01222 public: 01223 SVC_QMC(const svm_problem& prob, const svm_parameter& param, const schar *y_, int32_t n_class, float64_t fac) 01224 :LibSVMKernel(prob.l, prob.x, param) 01225 { 01226 nr_class=n_class; 01227 factor=fac; 01228 clone(y,y_,prob.l); 01229 cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20))); 01230 QD = new Qfloat[prob.l]; 01231 for(int32_t i=0;i<prob.l;i++) 01232 { 01233 QD[i]= factor*(nr_class-1)*kernel_function(i,i); 01234 } 01235 } 01236 01237 Qfloat *get_Q(int32_t i, int32_t len) const 01238 { 01239 Qfloat *data; 01240 int32_t start; 01241 if((start = cache->get_data(i,&data,len)) < len) 01242 { 01243 for(int32_t j=start;j<len;j++) 01244 { 01245 if (y[i]==y[j]) 01246 data[j] = factor*(nr_class-1)*kernel_function(i,j); 01247 else 01248 data[j] = -factor*kernel_function(i,j); 01249 } 01250 } 01251 return data; 01252 } 01253 01254 inline Qfloat get_orig_Qij(Qfloat Q, int32_t i, int32_t j) 01255 { 01256 if (y[i]==y[j]) 01257 return Q/(nr_class-1); 01258 else 01259 return -Q; 01260 } 01261 01262 Qfloat *get_QD() const 01263 { 01264 return QD; 01265 } 01266 01267 void swap_index(int32_t i, int32_t j) const 01268 { 01269 cache->swap_index(i,j); 01270 LibSVMKernel::swap_index(i,j); 01271 CMath::swap(y[i],y[j]); 01272 CMath::swap(QD[i],QD[j]); 01273 } 01274 01275 ~SVC_QMC() 01276 { 01277 delete[] y; 01278 delete cache; 01279 delete[] QD; 01280 } 01281 private: 01282 float64_t factor; 01283 float64_t nr_class; 01284 schar *y; 01285 Cache *cache; 01286 Qfloat *QD; 01287 }; 01288 01289 // 01290 // Solver for nu-svm classification and regression 01291 // 01292 // additional constraint: e^T \alpha = constant 01293 // 01294 class Solver_NUMC : public Solver 01295 { 01296 public: 01297 Solver_NUMC(int32_t n_class, float64_t svm_nu) 01298 { 01299 nr_class=n_class; 01300 nu=svm_nu; 01301 } 01302 01303 void Solve( 01304 int32_t p_l, const QMatrix& p_Q, const float64_t *p_p, 01305 const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn, 01306 float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias) 01307 { 01308 this->si = p_si; 01309 Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,shrinking, use_bias); 01310 } 01311 float64_t compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases,float64_t* normwcw); 01312 01313 private: 01314 SolutionInfo *si; 01315 int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap); 01316 float64_t calculate_rho(); 01317 bool be_shrunk( 01318 int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3, 01319 float64_t Gmax4); 01320 void do_shrinking(); 01321 01322 private: 01323 int32_t nr_class; 01324 float64_t nu; 01325 }; 01326 01327 float64_t Solver_NUMC::compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases, float64_t* normwcw) 01328 { 01329 clone(y, p_y,l); 01330 clone(alpha,p_alpha,l); 01331 01332 alpha_status = new char[l]; 01333 for(int32_t i=0;i<l;i++) 01334 update_alpha_status(i); 01335 01336 float64_t* class_count = new float64_t[nr_class]; 01337 float64_t* outputs = new float64_t[l]; 01338 01339 for (int32_t i=0; i<nr_class; i++) 01340 { 01341 class_count[i]=0; 01342 biases[i+1]=0; 01343 } 01344 01345 for (int32_t i=0; i<active_size; i++) 01346 { 01347 update_alpha_status(i); 01348 if(!is_upper_bound(i) && !is_lower_bound(i)) 01349 class_count[(int32_t) y[i]]++; 01350 } 01351 01352 //CMath::display_vector(class_count, nr_class, "class_count"); 01353 01354 float64_t mu=((float64_t) nr_class)/(nu*l); 01355 //SG_SPRINT("nr_class=%d, l=%d, active_size=%d, nu=%f, mu=%f\n", nr_class, l, active_size, nu, mu); 01356 01357 float64_t rho=0; 01358 float64_t quad=0; 01359 float64_t* zero_counts = new float64_t[nr_class]; 01360 float64_t normwc_const = 0; 01361 01362 for (int32_t i=0; i<nr_class; i++) 01363 { 01364 zero_counts[i]=-INF; 01365 normwcw[i]=0; 01366 } 01367 01368 for (int32_t i=0; i<active_size; i++) 01369 { 01370 float64_t sum_free=0; 01371 float64_t sum_atbound=0; 01372 float64_t sum_zero_count=0; 01373 01374 Qfloat* Q_i = Q->get_Q(i,active_size); 01375 outputs[i]=0; 01376 01377 for (int j=0; j<active_size; j++) 01378 { 01379 quad+= alpha[i]*alpha[j]*Q_i[j]; 01380 float64_t tmp= alpha[j]*Q_i[j]/mu; 01381 01382 if(!is_upper_bound(i) && !is_lower_bound(i)) 01383 sum_free+=tmp; 01384 else 01385 sum_atbound+=tmp; 01386 01387 if (class_count[(int32_t) y[i]] == 0 && y[j]==y[i]) 01388 sum_zero_count+= tmp; 01389 01390 SVC_QMC* QMC=(SVC_QMC*) Q; 01391 float64_t norm_tmp=alpha[i]*alpha[j]*QMC->get_orig_Qij(Q_i[j], i, j); 01392 if (y[i]==y[j]) 01393 normwcw[(int32_t) y[i]]+=norm_tmp; 01394 01395 normwcw[(int32_t) y[i]]-=2.0/nr_class*norm_tmp; 01396 normwc_const+=norm_tmp; 01397 } 01398 01399 if (class_count[(int32_t) y[i]] == 0) 01400 { 01401 if (zero_counts[(int32_t) y[i]]<sum_zero_count) 01402 zero_counts[(int32_t) y[i]]=sum_zero_count; 01403 } 01404 01405 biases[(int32_t) y[i]+1]-=sum_free; 01406 if (class_count[(int32_t) y[i]] != 0.0) 01407 rho+=sum_free/class_count[(int32_t) y[i]]; 01408 outputs[i]+=sum_free+sum_atbound; 01409 } 01410 01411 for (int32_t i=0; i<nr_class; i++) 01412 { 01413 if (class_count[i] == 0.0) 01414 rho+=zero_counts[i]; 01415 01416 normwcw[i]+=normwc_const/CMath::sq(nr_class); 01417 normwcw[i]=CMath::sqrt(normwcw[i]); 01418 } 01419 01420 CMath::display_vector(normwcw, nr_class, "normwcw"); 01421 01422 rho/=nr_class; 01423 01424 SG_SPRINT("rho=%f\n", rho); 01425 01426 float64_t sumb=0; 01427 for (int32_t i=0; i<nr_class; i++) 01428 { 01429 if (class_count[i] != 0.0) 01430 biases[i+1]=biases[i+1]/class_count[i]+rho; 01431 else 01432 biases[i+1]+=rho-zero_counts[i]; 01433 01434 SG_SPRINT("biases=%f\n", biases[i+1]); 01435 01436 sumb+=biases[i+1]; 01437 } 01438 SG_SPRINT("sumb=%f\n", sumb); 01439 01440 delete[] zero_counts; 01441 01442 for (int32_t i=0; i<l; i++) 01443 outputs[i]+=biases[(int32_t) y[i]+1]; 01444 01445 biases[0]=rho; 01446 01447 //CMath::display_vector(outputs, l, "outputs"); 01448 01449 01450 float64_t xi=0; 01451 for (int32_t i=0; i<active_size; i++) 01452 { 01453 if (is_lower_bound(i)) 01454 continue; 01455 xi+=rho-outputs[i]; 01456 } 01457 01458 //SG_SPRINT("xi=%f\n", xi); 01459 01460 //SG_SPRINT("quad=%f Cp=%f xi*mu=%f\n", quad, nr_class*rho, xi*mu); 01461 01462 float64_t primal=0.5*quad- nr_class*rho+xi*mu; 01463 01464 //SG_SPRINT("primal=%10.10f\n", primal); 01465 01466 delete[] y; 01467 delete[] alpha; 01468 delete[] alpha_status; 01469 01470 return primal; 01471 } 01472 01473 01474 // return 1 if already optimal, return 0 otherwise 01475 int32_t Solver_NUMC::select_working_set( 01476 int32_t &out_i, int32_t &out_j, float64_t &gap) 01477 { 01478 // return i,j such that y_i = y_j and 01479 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) 01480 // j: minimizes the decrease of obj value 01481 // (if quadratic coefficient <= 0, replace it with tau) 01482 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) 01483 01484 int32_t retval=0; 01485 float64_t best_gap=0; 01486 int32_t best_out_i=-1; 01487 int32_t best_out_j=-1; 01488 01489 float64_t* Gmaxp = new float64_t[nr_class]; 01490 float64_t* Gmaxp2 = new float64_t[nr_class]; 01491 int32_t* Gmaxp_idx = new int32_t[nr_class]; 01492 01493 int32_t* Gmin_idx = new int32_t[nr_class]; 01494 float64_t* obj_diff_min = new float64_t[nr_class]; 01495 01496 for (int32_t i=0; i<nr_class; i++) 01497 { 01498 Gmaxp[i]=-INF; 01499 Gmaxp2[i]=-INF; 01500 Gmaxp_idx[i]=-1; 01501 Gmin_idx[i]=-1; 01502 obj_diff_min[i]=INF; 01503 } 01504 01505 for(int32_t t=0;t<active_size;t++) 01506 { 01507 int32_t cidx=y[t]; 01508 if(!is_upper_bound(t)) 01509 { 01510 if(-G[t] >= Gmaxp[cidx]) 01511 { 01512 Gmaxp[cidx] = -G[t]; 01513 Gmaxp_idx[cidx] = t; 01514 } 01515 } 01516 } 01517 01518 for(int32_t j=0;j<active_size;j++) 01519 { 01520 int32_t cidx=y[j]; 01521 int32_t ip = Gmaxp_idx[cidx]; 01522 const Qfloat *Q_ip = NULL; 01523 if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1 01524 Q_ip = Q->get_Q(ip,active_size); 01525 01526 if (!is_lower_bound(j)) 01527 { 01528 float64_t grad_diff=Gmaxp[cidx]+G[j]; 01529 if (G[j] >= Gmaxp2[cidx]) 01530 Gmaxp2[cidx] = G[j]; 01531 if (grad_diff > 0) 01532 { 01533 float64_t obj_diff; 01534 float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j]; 01535 if (quad_coef > 0) 01536 obj_diff = -(grad_diff*grad_diff)/quad_coef; 01537 else 01538 obj_diff = -(grad_diff*grad_diff)/TAU; 01539 01540 if (obj_diff <= obj_diff_min[cidx]) 01541 { 01542 Gmin_idx[cidx]=j; 01543 obj_diff_min[cidx] = obj_diff; 01544 } 01545 } 01546 } 01547 01548 gap=Gmaxp[cidx]+Gmaxp2[cidx]; 01549 if (gap>=best_gap && Gmin_idx[cidx]>=0 && 01550 Gmaxp_idx[cidx]>=0 && Gmin_idx[cidx]<active_size) 01551 { 01552 out_i = Gmaxp_idx[cidx]; 01553 out_j = Gmin_idx[cidx]; 01554 01555 best_gap=gap; 01556 best_out_i=out_i; 01557 best_out_j=out_j; 01558 } 01559 } 01560 01561 gap=best_gap; 01562 out_i=best_out_i; 01563 out_j=best_out_j; 01564 01565 SG_SDEBUG("i=%d j=%d best_gap=%f y_i=%f y_j=%f\n", out_i, out_j, gap, y[out_i], y[out_j]); 01566 01567 01568 if(gap < eps) 01569 retval=1; 01570 01571 delete[] Gmaxp; 01572 delete[] Gmaxp2; 01573 delete[] Gmaxp_idx; 01574 delete[] Gmin_idx; 01575 delete[] obj_diff_min; 01576 01577 return retval; 01578 } 01579 01580 bool Solver_NUMC::be_shrunk( 01581 int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3, 01582 float64_t Gmax4) 01583 { 01584 return false; 01585 } 01586 01587 void Solver_NUMC::do_shrinking() 01588 { 01589 } 01590 01591 float64_t Solver_NUMC::calculate_rho() 01592 { 01593 return 0; 01594 } 01595 01596 01597 // 01598 // Q matrices for various formulations 01599 // 01600 class SVC_Q: public LibSVMKernel 01601 { 01602 public: 01603 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_) 01604 :LibSVMKernel(prob.l, prob.x, param) 01605 { 01606 clone(y,y_,prob.l); 01607 cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20))); 01608 QD = new Qfloat[prob.l]; 01609 for(int32_t i=0;i<prob.l;i++) 01610 QD[i]= (Qfloat)kernel_function(i,i); 01611 } 01612 01613 Qfloat *get_Q(int32_t i, int32_t len) const 01614 { 01615 Qfloat *data; 01616 int32_t start; 01617 if((start = cache->get_data(i,&data,len)) < len) 01618 { 01619 for(int32_t j=start;j<len;j++) 01620 data[j] = (Qfloat) y[i]*y[j]*kernel_function(i,j); 01621 } 01622 return data; 01623 } 01624 01625 Qfloat *get_QD() const 01626 { 01627 return QD; 01628 } 01629 01630 void swap_index(int32_t i, int32_t j) const 01631 { 01632 cache->swap_index(i,j); 01633 LibSVMKernel::swap_index(i,j); 01634 CMath::swap(y[i],y[j]); 01635 CMath::swap(QD[i],QD[j]); 01636 } 01637 01638 ~SVC_Q() 01639 { 01640 delete[] y; 01641 delete cache; 01642 delete[] QD; 01643 } 01644 private: 01645 schar *y; 01646 Cache *cache; 01647 Qfloat *QD; 01648 }; 01649 01650 01651 class ONE_CLASS_Q: public LibSVMKernel 01652 { 01653 public: 01654 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param) 01655 :LibSVMKernel(prob.l, prob.x, param) 01656 { 01657 cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20))); 01658 QD = new Qfloat[prob.l]; 01659 for(int32_t i=0;i<prob.l;i++) 01660 QD[i]= (Qfloat)kernel_function(i,i); 01661 } 01662 01663 Qfloat *get_Q(int32_t i, int32_t len) const 01664 { 01665 Qfloat *data; 01666 int32_t start; 01667 if((start = cache->get_data(i,&data,len)) < len) 01668 { 01669 for(int32_t j=start;j<len;j++) 01670 data[j] = (Qfloat) kernel_function(i,j); 01671 } 01672 return data; 01673 } 01674 01675 Qfloat *get_QD() const 01676 { 01677 return QD; 01678 } 01679 01680 void swap_index(int32_t i, int32_t j) const 01681 { 01682 cache->swap_index(i,j); 01683 LibSVMKernel::swap_index(i,j); 01684 CMath::swap(QD[i],QD[j]); 01685 } 01686 01687 ~ONE_CLASS_Q() 01688 { 01689 delete cache; 01690 delete[] QD; 01691 } 01692 private: 01693 Cache *cache; 01694 Qfloat *QD; 01695 }; 01696 01697 class SVR_Q: public LibSVMKernel 01698 { 01699 public: 01700 SVR_Q(const svm_problem& prob, const svm_parameter& param) 01701 :LibSVMKernel(prob.l, prob.x, param) 01702 { 01703 l = prob.l; 01704 cache = new Cache(l,(int64_t)(param.cache_size*(1l<<20))); 01705 QD = new Qfloat[2*l]; 01706 sign = new schar[2*l]; 01707 index = new int32_t[2*l]; 01708 for(int32_t k=0;k<l;k++) 01709 { 01710 sign[k] = 1; 01711 sign[k+l] = -1; 01712 index[k] = k; 01713 index[k+l] = k; 01714 QD[k]= (Qfloat)kernel_function(k,k); 01715 QD[k+l]=QD[k]; 01716 } 01717 buffer[0] = new Qfloat[2*l]; 01718 buffer[1] = new Qfloat[2*l]; 01719 next_buffer = 0; 01720 } 01721 01722 void swap_index(int32_t i, int32_t j) const 01723 { 01724 CMath::swap(sign[i],sign[j]); 01725 CMath::swap(index[i],index[j]); 01726 CMath::swap(QD[i],QD[j]); 01727 } 01728 01729 Qfloat *get_Q(int32_t i, int32_t len) const 01730 { 01731 Qfloat *data; 01732 int32_t real_i = index[i]; 01733 if(cache->get_data(real_i,&data,l) < l) 01734 { 01735 for(int32_t j=0;j<l;j++) 01736 data[j] = (Qfloat)kernel_function(real_i,j); 01737 } 01738 01739 // reorder and copy 01740 Qfloat *buf = buffer[next_buffer]; 01741 next_buffer = 1 - next_buffer; 01742 schar si = sign[i]; 01743 for(int32_t j=0;j<len;j++) 01744 buf[j] = si * sign[j] * data[index[j]]; 01745 return buf; 01746 } 01747 01748 Qfloat *get_QD() const 01749 { 01750 return QD; 01751 } 01752 01753 ~SVR_Q() 01754 { 01755 delete cache; 01756 delete[] sign; 01757 delete[] index; 01758 delete[] buffer[0]; 01759 delete[] buffer[1]; 01760 delete[] QD; 01761 } 01762 01763 private: 01764 int32_t l; 01765 Cache *cache; 01766 schar *sign; 01767 int32_t *index; 01768 mutable int32_t next_buffer; 01769 Qfloat *buffer[2]; 01770 Qfloat *QD; 01771 }; 01772 01773 // 01774 // construct and solve various formulations 01775 // 01776 static void solve_c_svc( 01777 const svm_problem *prob, const svm_parameter* param, 01778 float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn) 01779 { 01780 int32_t l = prob->l; 01781 schar *y = new schar[l]; 01782 01783 int32_t i; 01784 01785 for(i=0;i<l;i++) 01786 { 01787 alpha[i] = 0; 01788 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1; 01789 } 01790 01791 Solver s; 01792 s.Solve(l, SVC_Q(*prob,*param,y), prob->pv, y, 01793 alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias); 01794 01795 float64_t sum_alpha=0; 01796 for(i=0;i<l;i++) 01797 sum_alpha += alpha[i]; 01798 01799 if (Cp==Cn) 01800 SG_SINFO("nu = %f\n", sum_alpha/(param->C*prob->l)); 01801 01802 for(i=0;i<l;i++) 01803 alpha[i] *= y[i]; 01804 01805 delete[] y; 01806 } 01807 01808 01809 //two weighted datasets 01810 void solve_c_svc_weighted( 01811 const svm_problem *prob, const svm_parameter* param, 01812 float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn) 01813 { 01814 int l = prob->l; 01815 float64_t *minus_ones = new float64_t[l]; 01816 schar *y = new schar[l]; 01817 01818 int i; 01819 01820 for(i=0;i<l;i++) 01821 { 01822 alpha[i] = 0; 01823 minus_ones[i] = -1; 01824 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1; 01825 } 01826 01827 WeightedSolver s = WeightedSolver(prob->C); 01828 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y, 01829 alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias); 01830 01831 float64_t sum_alpha=0; 01832 for(i=0;i<l;i++) 01833 sum_alpha += alpha[i]; 01834 01835 //if (Cp==Cn) 01836 // SG_SINFO("nu = %f\n", sum_alpha/(prob->C*prob->l)); 01837 01838 for(i=0;i<l;i++) 01839 alpha[i] *= y[i]; 01840 01841 delete[] minus_ones; 01842 delete[] y; 01843 } 01844 01845 static void solve_nu_svc( 01846 const svm_problem *prob, const svm_parameter *param, 01847 float64_t *alpha, Solver::SolutionInfo* si) 01848 { 01849 int32_t i; 01850 int32_t l = prob->l; 01851 float64_t nu = param->nu; 01852 01853 schar *y = new schar[l]; 01854 01855 for(i=0;i<l;i++) 01856 if(prob->y[i]>0) 01857 y[i] = +1; 01858 else 01859 y[i] = -1; 01860 01861 float64_t sum_pos = nu*l/2; 01862 float64_t sum_neg = nu*l/2; 01863 01864 for(i=0;i<l;i++) 01865 if(y[i] == +1) 01866 { 01867 alpha[i] = CMath::min(1.0,sum_pos); 01868 sum_pos -= alpha[i]; 01869 } 01870 else 01871 { 01872 alpha[i] = CMath::min(1.0,sum_neg); 01873 sum_neg -= alpha[i]; 01874 } 01875 01876 float64_t *zeros = new float64_t[l]; 01877 01878 for(i=0;i<l;i++) 01879 zeros[i] = 0; 01880 01881 Solver_NU s; 01882 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y, 01883 alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias); 01884 float64_t r = si->r; 01885 01886 SG_SINFO("C = %f\n",1/r); 01887 01888 for(i=0;i<l;i++) 01889 alpha[i] *= y[i]/r; 01890 01891 si->rho /= r; 01892 si->obj /= (r*r); 01893 si->upper_bound_p = 1/r; 01894 si->upper_bound_n = 1/r; 01895 01896 delete[] y; 01897 delete[] zeros; 01898 } 01899 01900 static void solve_nu_multiclass_svc(const svm_problem *prob, 01901 const svm_parameter *param, Solver::SolutionInfo* si, svm_model* model) 01902 { 01903 int32_t l = prob->l; 01904 float64_t nu = param->nu; 01905 01906 float64_t *alpha = Malloc(float64_t, prob->l); 01907 schar *y = new schar[l]; 01908 01909 for(int32_t i=0;i<l;i++) 01910 { 01911 alpha[i] = 0; 01912 y[i]=prob->y[i]; 01913 } 01914 01915 int32_t nr_class=param->nr_class; 01916 float64_t* sum_class = new float64_t[nr_class]; 01917 01918 for (int32_t j=0; j<nr_class; j++) 01919 sum_class[j] = nu*l/nr_class; 01920 01921 for(int32_t i=0;i<l;i++) 01922 { 01923 alpha[i] = CMath::min(1.0,sum_class[int32_t(y[i])]); 01924 sum_class[int32_t(y[i])] -= alpha[i]; 01925 } 01926 delete[] sum_class; 01927 01928 01929 float64_t *zeros = new float64_t[l]; 01930 01931 for (int32_t i=0;i<l;i++) 01932 zeros[i] = 0; 01933 01934 Solver_NUMC s(nr_class, nu); 01935 SVC_QMC Q(*prob,*param,y, nr_class, ((float64_t) nr_class)/CMath::sq(nu*l)); 01936 01937 s.Solve(l, Q, zeros, y, 01938 alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias); 01939 01940 01941 int32_t* class_sv_count=new int32_t[nr_class]; 01942 01943 for (int32_t i=0; i<nr_class; i++) 01944 class_sv_count[i]=0; 01945 01946 for (int32_t i=0; i<l; i++) 01947 { 01948 if (CMath::abs(alpha[i]) > 0) 01949 class_sv_count[(int32_t) y[i]]++; 01950 } 01951 01952 model->l=l; 01953 // rho[0]= rho in mcsvm paper, rho[1]...rho[nr_class] is bias in mcsvm paper 01954 model->rho = Malloc(float64_t, nr_class+1); 01955 model->nr_class = nr_class; 01956 model->label = NULL; 01957 model->SV = Malloc(svm_node*,nr_class); 01958 model->nSV = Malloc(int32_t, nr_class); 01959 model->sv_coef = Malloc(float64_t *,nr_class); 01960 model->normwcw = Malloc(float64_t,nr_class); 01961 01962 for (int32_t i=0; i<nr_class+1; i++) 01963 model->rho[i]=0; 01964 01965 model->objective = si->obj; 01966 01967 if (param->use_bias) 01968 { 01969 SG_SDEBUG("Computing biases and primal objective\n"); 01970 float64_t primal = s.compute_primal(y, alpha, model->rho, model->normwcw); 01971 SG_SINFO("Primal = %10.10f\n", primal); 01972 } 01973 01974 for (int32_t i=0; i<nr_class; i++) 01975 { 01976 model->nSV[i]=class_sv_count[i]; 01977 model->SV[i] = Malloc(svm_node,class_sv_count[i]); 01978 model->sv_coef[i] = Malloc(float64_t,class_sv_count[i]); 01979 class_sv_count[i]=0; 01980 } 01981 01982 for (int32_t i=0;i<prob->l;i++) 01983 { 01984 if(fabs(alpha[i]) > 0) 01985 { 01986 model->SV[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]].index = prob->x[i]->index; 01987 model->sv_coef[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]] = alpha[i]; 01988 class_sv_count[(int32_t) y[i]]++; 01989 } 01990 } 01991 01992 delete[] y; 01993 delete[] zeros; 01994 free(alpha); 01995 } 01996 01997 static void solve_one_class( 01998 const svm_problem *prob, const svm_parameter *param, 01999 float64_t *alpha, Solver::SolutionInfo* si) 02000 { 02001 int32_t l = prob->l; 02002 float64_t *zeros = new float64_t[l]; 02003 schar *ones = new schar[l]; 02004 int32_t i; 02005 02006 int32_t n = (int32_t)(param->nu*prob->l); // # of alpha's at upper bound 02007 02008 for(i=0;i<n;i++) 02009 alpha[i] = 1; 02010 if(n<prob->l) 02011 alpha[n] = param->nu * prob->l - n; 02012 for(i=n+1;i<l;i++) 02013 alpha[i] = 0; 02014 02015 for(i=0;i<l;i++) 02016 { 02017 zeros[i] = 0; 02018 ones[i] = 1; 02019 } 02020 02021 Solver s; 02022 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones, 02023 alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias); 02024 02025 delete[] zeros; 02026 delete[] ones; 02027 } 02028 02029 static void solve_epsilon_svr( 02030 const svm_problem *prob, const svm_parameter *param, 02031 float64_t *alpha, Solver::SolutionInfo* si) 02032 { 02033 int32_t l = prob->l; 02034 float64_t *alpha2 = new float64_t[2*l]; 02035 float64_t *linear_term = new float64_t[2*l]; 02036 schar *y = new schar[2*l]; 02037 int32_t i; 02038 02039 for(i=0;i<l;i++) 02040 { 02041 alpha2[i] = 0; 02042 linear_term[i] = param->p - prob->y[i]; 02043 y[i] = 1; 02044 02045 alpha2[i+l] = 0; 02046 linear_term[i+l] = param->p + prob->y[i]; 02047 y[i+l] = -1; 02048 } 02049 02050 Solver s; 02051 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, 02052 alpha2, param->C, param->C, param->eps, si, param->shrinking, param->use_bias); 02053 02054 float64_t sum_alpha = 0; 02055 for(i=0;i<l;i++) 02056 { 02057 alpha[i] = alpha2[i] - alpha2[i+l]; 02058 sum_alpha += fabs(alpha[i]); 02059 } 02060 SG_SINFO("nu = %f\n",sum_alpha/(param->C*l)); 02061 02062 delete[] alpha2; 02063 delete[] linear_term; 02064 delete[] y; 02065 } 02066 02067 static void solve_nu_svr( 02068 const svm_problem *prob, const svm_parameter *param, 02069 float64_t *alpha, Solver::SolutionInfo* si) 02070 { 02071 int32_t l = prob->l; 02072 float64_t C = param->C; 02073 float64_t *alpha2 = new float64_t[2*l]; 02074 float64_t *linear_term = new float64_t[2*l]; 02075 schar *y = new schar[2*l]; 02076 int32_t i; 02077 02078 float64_t sum = C * param->nu * l / 2; 02079 for(i=0;i<l;i++) 02080 { 02081 alpha2[i] = alpha2[i+l] = CMath::min(sum,C); 02082 sum -= alpha2[i]; 02083 02084 linear_term[i] = - prob->y[i]; 02085 y[i] = 1; 02086 02087 linear_term[i+l] = prob->y[i]; 02088 y[i+l] = -1; 02089 } 02090 02091 Solver_NU s; 02092 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, 02093 alpha2, C, C, param->eps, si, param->shrinking, param->use_bias); 02094 02095 SG_SINFO("epsilon = %f\n",-si->r); 02096 02097 for(i=0;i<l;i++) 02098 alpha[i] = alpha2[i] - alpha2[i+l]; 02099 02100 delete[] alpha2; 02101 delete[] linear_term; 02102 delete[] y; 02103 } 02104 02105 // 02106 // decision_function 02107 // 02108 struct decision_function 02109 { 02110 float64_t *alpha; 02111 float64_t rho; 02112 float64_t objective; 02113 }; 02114 02115 decision_function svm_train_one( 02116 const svm_problem *prob, const svm_parameter *param, 02117 float64_t Cp, float64_t Cn) 02118 { 02119 float64_t *alpha = Malloc(float64_t, prob->l); 02120 Solver::SolutionInfo si; 02121 switch(param->svm_type) 02122 { 02123 case C_SVC: 02124 solve_c_svc(prob,param,alpha,&si,Cp,Cn); 02125 break; 02126 case NU_SVC: 02127 solve_nu_svc(prob,param,alpha,&si); 02128 break; 02129 case ONE_CLASS: 02130 solve_one_class(prob,param,alpha,&si); 02131 break; 02132 case EPSILON_SVR: 02133 solve_epsilon_svr(prob,param,alpha,&si); 02134 break; 02135 case NU_SVR: 02136 solve_nu_svr(prob,param,alpha,&si); 02137 break; 02138 } 02139 02140 SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho); 02141 02142 // output SVs 02143 if (param->svm_type != ONE_CLASS) 02144 { 02145 int32_t nSV = 0; 02146 int32_t nBSV = 0; 02147 for(int32_t i=0;i<prob->l;i++) 02148 { 02149 if(fabs(alpha[i]) > 0) 02150 { 02151 ++nSV; 02152 if(prob->y[i] > 0) 02153 { 02154 if(fabs(alpha[i]) >= si.upper_bound_p) 02155 ++nBSV; 02156 } 02157 else 02158 { 02159 if(fabs(alpha[i]) >= si.upper_bound_n) 02160 ++nBSV; 02161 } 02162 } 02163 } 02164 SG_SINFO("nSV = %d, nBSV = %d\n",nSV,nBSV); 02165 } 02166 02167 decision_function f; 02168 f.alpha = alpha; 02169 f.rho = si.rho; 02170 f.objective=si.obj; 02171 return f; 02172 } 02173 02174 // 02175 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data 02176 // perm, length l, must be allocated before calling this subroutine 02177 void svm_group_classes( 02178 const svm_problem *prob, int32_t *nr_class_ret, int32_t **label_ret, 02179 int32_t **start_ret, int32_t **count_ret, int32_t *perm) 02180 { 02181 int32_t l = prob->l; 02182 int32_t max_nr_class = 16; 02183 int32_t nr_class = 0; 02184 int32_t *label = Malloc(int32_t, max_nr_class); 02185 int32_t *count = Malloc(int32_t, max_nr_class); 02186 int32_t *data_label = Malloc(int32_t, l); 02187 int32_t i; 02188 02189 for(i=0;i<l;i++) 02190 { 02191 int32_t this_label=(int32_t) prob->y[i]; 02192 int32_t j; 02193 for(j=0;j<nr_class;j++) 02194 { 02195 if(this_label == label[j]) 02196 { 02197 ++count[j]; 02198 break; 02199 } 02200 } 02201 data_label[i] = j; 02202 if(j == nr_class) 02203 { 02204 if(nr_class == max_nr_class) 02205 { 02206 max_nr_class *= 2; 02207 label=(int32_t *) realloc(label,max_nr_class*sizeof(int32_t)); 02208 count=(int32_t *) realloc(count,max_nr_class*sizeof(int32_t)); 02209 } 02210 label[nr_class] = this_label; 02211 count[nr_class] = 1; 02212 ++nr_class; 02213 } 02214 } 02215 02216 int32_t *start = Malloc(int32_t, nr_class); 02217 start[0] = 0; 02218 for(i=1;i<nr_class;i++) 02219 start[i] = start[i-1]+count[i-1]; 02220 for(i=0;i<l;i++) 02221 { 02222 perm[start[data_label[i]]] = i; 02223 ++start[data_label[i]]; 02224 } 02225 start[0] = 0; 02226 for(i=1;i<nr_class;i++) 02227 start[i] = start[i-1]+count[i-1]; 02228 02229 *nr_class_ret = nr_class; 02230 *label_ret = label; 02231 *start_ret = start; 02232 *count_ret = count; 02233 free(data_label); 02234 } 02235 02236 // 02237 // Interface functions 02238 // 02239 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param) 02240 { 02241 svm_model *model = Malloc(svm_model,1); 02242 model->param = *param; 02243 model->free_sv = 0; // XXX 02244 02245 if(param->svm_type == ONE_CLASS || 02246 param->svm_type == EPSILON_SVR || 02247 param->svm_type == NU_SVR) 02248 { 02249 SG_SINFO("training one class svm or doing epsilon sv regression\n"); 02250 02251 // regression or one-class-svm 02252 model->nr_class = 2; 02253 model->label = NULL; 02254 model->nSV = NULL; 02255 model->sv_coef = Malloc(float64_t *,1); 02256 decision_function f = svm_train_one(prob,param,0,0); 02257 model->rho = Malloc(float64_t, 1); 02258 model->rho[0] = f.rho; 02259 model->objective = f.objective; 02260 02261 int32_t nSV = 0; 02262 int32_t i; 02263 for(i=0;i<prob->l;i++) 02264 if(fabs(f.alpha[i]) > 0) ++nSV; 02265 model->l = nSV; 02266 model->SV = Malloc(svm_node *,nSV); 02267 model->sv_coef[0] = Malloc(float64_t, nSV); 02268 int32_t j = 0; 02269 for(i=0;i<prob->l;i++) 02270 if(fabs(f.alpha[i]) > 0) 02271 { 02272 model->SV[j] = prob->x[i]; 02273 model->sv_coef[0][j] = f.alpha[i]; 02274 ++j; 02275 } 02276 02277 free(f.alpha); 02278 } 02279 else if(param->svm_type == NU_MULTICLASS_SVC) 02280 { 02281 Solver::SolutionInfo si; 02282 solve_nu_multiclass_svc(prob,param,&si,model); 02283 SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho); 02284 } 02285 else 02286 { 02287 // classification 02288 int32_t l = prob->l; 02289 int32_t nr_class; 02290 int32_t *label = NULL; 02291 int32_t *start = NULL; 02292 int32_t *count = NULL; 02293 int32_t *perm = Malloc(int32_t, l); 02294 02295 // group training data of the same class 02296 svm_group_classes(prob,&nr_class,&label,&start,&count,perm); 02297 svm_node **x = Malloc(svm_node *,l); 02298 float64_t *C = Malloc(float64_t,l); 02299 float64_t *pv = Malloc(float64_t,l); 02300 02301 02302 int32_t i; 02303 for(i=0;i<l;i++) { 02304 x[i] = prob->x[perm[i]]; 02305 C[i] = prob->C[perm[i]]; 02306 02307 if (prob->pv) 02308 { 02309 pv[i] = prob->pv[perm[i]]; 02310 } 02311 else 02312 { 02313 //no custom linear term is set 02314 pv[i] = -1.0; 02315 } 02316 02317 } 02318 02319 02320 // calculate weighted C 02321 float64_t *weighted_C = Malloc(float64_t, nr_class); 02322 for(i=0;i<nr_class;i++) 02323 weighted_C[i] = param->C; 02324 for(i=0;i<param->nr_weight;i++) 02325 { 02326 int32_t j; 02327 for(j=0;j<nr_class;j++) 02328 if(param->weight_label[i] == label[j]) 02329 break; 02330 if(j == nr_class) 02331 SG_SWARNING("warning: class label %d specified in weight is not found\n", param->weight_label[i]); 02332 else 02333 weighted_C[j] *= param->weight[i]; 02334 } 02335 02336 // train k*(k-1)/2 models 02337 02338 bool *nonzero = Malloc(bool,l); 02339 for(i=0;i<l;i++) 02340 nonzero[i] = false; 02341 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2); 02342 02343 int32_t p = 0; 02344 for(i=0;i<nr_class;i++) 02345 for(int32_t j=i+1;j<nr_class;j++) 02346 { 02347 svm_problem sub_prob; 02348 int32_t si = start[i], sj = start[j]; 02349 int32_t ci = count[i], cj = count[j]; 02350 sub_prob.l = ci+cj; 02351 sub_prob.x = Malloc(svm_node *,sub_prob.l); 02352 sub_prob.y = Malloc(float64_t,sub_prob.l+1); //dirty hack to surpress valgrind err 02353 sub_prob.C = Malloc(float64_t,sub_prob.l+1); 02354 sub_prob.pv = Malloc(float64_t,sub_prob.l+1); 02355 02356 int32_t k; 02357 for(k=0;k<ci;k++) 02358 { 02359 sub_prob.x[k] = x[si+k]; 02360 sub_prob.y[k] = +1; 02361 sub_prob.C[k] = C[si+k]; 02362 sub_prob.pv[k] = pv[si+k]; 02363 02364 } 02365 for(k=0;k<cj;k++) 02366 { 02367 sub_prob.x[ci+k] = x[sj+k]; 02368 sub_prob.y[ci+k] = -1; 02369 sub_prob.C[ci+k] = C[sj+k]; 02370 sub_prob.pv[ci+k] = pv[sj+k]; 02371 } 02372 sub_prob.y[sub_prob.l]=-1; //dirty hack to surpress valgrind err 02373 sub_prob.C[sub_prob.l]=-1; 02374 sub_prob.pv[sub_prob.l]=-1; 02375 02376 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]); 02377 for(k=0;k<ci;k++) 02378 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0) 02379 nonzero[si+k] = true; 02380 for(k=0;k<cj;k++) 02381 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0) 02382 nonzero[sj+k] = true; 02383 free(sub_prob.x); 02384 free(sub_prob.y); 02385 free(sub_prob.C); 02386 free(sub_prob.pv); 02387 ++p; 02388 } 02389 02390 // build output 02391 02392 model->objective = f[0].objective; 02393 model->nr_class = nr_class; 02394 02395 model->label = Malloc(int32_t, nr_class); 02396 for(i=0;i<nr_class;i++) 02397 model->label[i] = label[i]; 02398 02399 model->rho = Malloc(float64_t, nr_class*(nr_class-1)/2); 02400 for(i=0;i<nr_class*(nr_class-1)/2;i++) 02401 model->rho[i] = f[i].rho; 02402 02403 int32_t total_sv = 0; 02404 int32_t *nz_count = Malloc(int32_t, nr_class); 02405 model->nSV = Malloc(int32_t, nr_class); 02406 for(i=0;i<nr_class;i++) 02407 { 02408 int32_t nSV = 0; 02409 for(int32_t j=0;j<count[i];j++) 02410 if(nonzero[start[i]+j]) 02411 { 02412 ++nSV; 02413 ++total_sv; 02414 } 02415 model->nSV[i] = nSV; 02416 nz_count[i] = nSV; 02417 } 02418 02419 SG_SINFO("Total nSV = %d\n",total_sv); 02420 02421 model->l = total_sv; 02422 model->SV = Malloc(svm_node *,total_sv); 02423 p = 0; 02424 for(i=0;i<l;i++) 02425 if(nonzero[i]) model->SV[p++] = x[i]; 02426 02427 int32_t *nz_start = Malloc(int32_t, nr_class); 02428 nz_start[0] = 0; 02429 for(i=1;i<nr_class;i++) 02430 nz_start[i] = nz_start[i-1]+nz_count[i-1]; 02431 02432 model->sv_coef = Malloc(float64_t *,nr_class-1); 02433 for(i=0;i<nr_class-1;i++) 02434 model->sv_coef[i] = Malloc(float64_t, total_sv); 02435 02436 p = 0; 02437 for(i=0;i<nr_class;i++) 02438 for(int32_t j=i+1;j<nr_class;j++) 02439 { 02440 // classifier (i,j): coefficients with 02441 // i are in sv_coef[j-1][nz_start[i]...], 02442 // j are in sv_coef[i][nz_start[j]...] 02443 02444 int32_t si = start[i]; 02445 int32_t sj = start[j]; 02446 int32_t ci = count[i]; 02447 int32_t cj = count[j]; 02448 02449 int32_t q = nz_start[i]; 02450 int32_t k; 02451 for(k=0;k<ci;k++) 02452 if(nonzero[si+k]) 02453 model->sv_coef[j-1][q++] = f[p].alpha[k]; 02454 q = nz_start[j]; 02455 for(k=0;k<cj;k++) 02456 if(nonzero[sj+k]) 02457 model->sv_coef[i][q++] = f[p].alpha[ci+k]; 02458 ++p; 02459 } 02460 02461 free(label); 02462 free(count); 02463 free(perm); 02464 free(start); 02465 free(x); 02466 free(C); 02467 free(pv); 02468 free(weighted_C); 02469 free(nonzero); 02470 for(i=0;i<nr_class*(nr_class-1)/2;i++) 02471 free(f[i].alpha); 02472 free(f); 02473 free(nz_count); 02474 free(nz_start); 02475 } 02476 return model; 02477 } 02478 02479 void svm_destroy_model(svm_model* model) 02480 { 02481 if(model->free_sv && model->l > 0) 02482 free((void *)(model->SV[0])); 02483 for(int32_t i=0;i<model->nr_class-1;i++) 02484 free(model->sv_coef[i]); 02485 free(model->SV); 02486 free(model->sv_coef); 02487 free(model->rho); 02488 free(model->label); 02489 free(model->nSV); 02490 free(model); 02491 } 02492 02493 void svm_destroy_param(svm_parameter* param) 02494 { 02495 free(param->weight_label); 02496 free(param->weight); 02497 } 02498 02499 const char *svm_check_parameter( 02500 const svm_problem *prob, const svm_parameter *param) 02501 { 02502 // svm_type 02503 02504 int32_t svm_type = param->svm_type; 02505 if(svm_type != C_SVC && 02506 svm_type != NU_SVC && 02507 svm_type != ONE_CLASS && 02508 svm_type != EPSILON_SVR && 02509 svm_type != NU_SVR && 02510 svm_type != NU_MULTICLASS_SVC) 02511 return "unknown svm type"; 02512 02513 // kernel_type, degree 02514 02515 int32_t kernel_type = param->kernel_type; 02516 if(kernel_type != LINEAR && 02517 kernel_type != POLY && 02518 kernel_type != RBF && 02519 kernel_type != SIGMOID && 02520 kernel_type != PRECOMPUTED) 02521 return "unknown kernel type"; 02522 02523 if(param->degree < 0) 02524 return "degree of polynomial kernel < 0"; 02525 02526 // cache_size,eps,C,nu,p,shrinking 02527 02528 if(param->cache_size <= 0) 02529 return "cache_size <= 0"; 02530 02531 if(param->eps <= 0) 02532 return "eps <= 0"; 02533 02534 if(svm_type == C_SVC || 02535 svm_type == EPSILON_SVR || 02536 svm_type == NU_SVR) 02537 if(param->C <= 0) 02538 return "C <= 0"; 02539 02540 if(svm_type == NU_SVC || 02541 svm_type == ONE_CLASS || 02542 svm_type == NU_SVR) 02543 if(param->nu <= 0 || param->nu > 1) 02544 return "nu <= 0 or nu > 1"; 02545 02546 if(svm_type == EPSILON_SVR) 02547 if(param->p < 0) 02548 return "p < 0"; 02549 02550 if(param->shrinking != 0 && 02551 param->shrinking != 1) 02552 return "shrinking != 0 and shrinking != 1"; 02553 02554 02555 // check whether nu-svc is feasible 02556 02557 if(svm_type == NU_SVC) 02558 { 02559 int32_t l = prob->l; 02560 int32_t max_nr_class = 16; 02561 int32_t nr_class = 0; 02562 int32_t *label = Malloc(int32_t, max_nr_class); 02563 int32_t *count = Malloc(int32_t, max_nr_class); 02564 02565 int32_t i; 02566 for(i=0;i<l;i++) 02567 { 02568 int32_t this_label = (int32_t) prob->y[i]; 02569 int32_t j; 02570 for(j=0;j<nr_class;j++) 02571 if(this_label == label[j]) 02572 { 02573 ++count[j]; 02574 break; 02575 } 02576 if(j == nr_class) 02577 { 02578 if(nr_class == max_nr_class) 02579 { 02580 max_nr_class *= 2; 02581 label=(int32_t *) realloc(label, 02582 max_nr_class*sizeof(int32_t)); 02583 count=(int32_t *) realloc(count, 02584 max_nr_class*sizeof(int32_t)); 02585 } 02586 label[nr_class] = this_label; 02587 count[nr_class] = 1; 02588 ++nr_class; 02589 } 02590 } 02591 02592 for(i=0;i<nr_class;i++) 02593 { 02594 int32_t n1 = count[i]; 02595 for(int32_t j=i+1;j<nr_class;j++) 02596 { 02597 int32_t n2 = count[j]; 02598 if(param->nu*(n1+n2)/2 > CMath::min(n1,n2)) 02599 { 02600 free(label); 02601 free(count); 02602 return "specified nu is infeasible"; 02603 } 02604 } 02605 } 02606 free(label); 02607 free(count); 02608 } 02609 02610 return NULL; 02611 } 02612 } 02613 #endif // DOXYGEN_SHOULD_SKIP_THIS