SHOGUN v0.9.0
|
00001 /* 00002 * Copyright (c) 2007-2009 The LIBLINEAR Project. 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * 1. Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * 00012 * 2. Redistributions in binary form must reproduce the above copyright 00013 * notice, this list of conditions and the following disclaimer in the 00014 * documentation and/or other materials provided with the distribution. 00015 * 00016 * 3. Neither name of copyright holders nor the names of its contributors 00017 * may be used to endorse or promote products derived from this software 00018 * without specific prior written permission. 00019 * 00020 * 00021 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00022 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00023 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00024 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 00025 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00026 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00027 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 */ 00033 #include "lib/config.h" 00034 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00035 #ifdef HAVE_LAPACK 00036 #include <math.h> 00037 #include <stdio.h> 00038 #include <stdlib.h> 00039 #include <string.h> 00040 #include <stdarg.h> 00041 00042 #include "lib/Mathematics.h" 00043 #include "classifier/svm/SVM_linear.h" 00044 #include "classifier/svm/Tron.h" 00045 00046 using namespace shogun; 00047 00048 l2r_lr_fun::l2r_lr_fun(const problem *p, float64_t Cp, float64_t Cn) 00049 { 00050 int i; 00051 int l=p->l; 00052 int *y=p->y; 00053 00054 this->prob = p; 00055 00056 z = new double[l]; 00057 D = new double[l]; 00058 C = new double[l]; 00059 00060 for (i=0; i<l; i++) 00061 { 00062 if (y[i] == 1) 00063 C[i] = Cp; 00064 else 00065 C[i] = Cn; 00066 } 00067 } 00068 00069 l2r_lr_fun::~l2r_lr_fun() 00070 { 00071 delete[] z; 00072 delete[] D; 00073 delete[] C; 00074 } 00075 00076 00077 double l2r_lr_fun::fun(double *w) 00078 { 00079 int i; 00080 double f=0; 00081 int *y=prob->y; 00082 int l=prob->l; 00083 int32_t n=prob->n; 00084 00085 Xv(w, z); 00086 for(i=0;i<l;i++) 00087 { 00088 double yz = y[i]*z[i]; 00089 if (yz >= 0) 00090 f += C[i]*log(1 + exp(-yz)); 00091 else 00092 f += C[i]*(-yz+log(1 + exp(yz))); 00093 } 00094 f += 0.5 *CMath::dot(w,w,n); 00095 00096 return(f); 00097 } 00098 00099 void l2r_lr_fun::grad(double *w, double *g) 00100 { 00101 int i; 00102 int *y=prob->y; 00103 int l=prob->l; 00104 int w_size=get_nr_variable(); 00105 00106 for(i=0;i<l;i++) 00107 { 00108 z[i] = 1/(1 + exp(-y[i]*z[i])); 00109 D[i] = z[i]*(1-z[i]); 00110 z[i] = C[i]*(z[i]-1)*y[i]; 00111 } 00112 XTv(z, g); 00113 00114 for(i=0;i<w_size;i++) 00115 g[i] = w[i] + g[i]; 00116 } 00117 00118 int l2r_lr_fun::get_nr_variable(void) 00119 { 00120 return prob->n; 00121 } 00122 00123 void l2r_lr_fun::Hv(double *s, double *Hs) 00124 { 00125 int i; 00126 int l=prob->l; 00127 int w_size=get_nr_variable(); 00128 double *wa = new double[l]; 00129 00130 Xv(s, wa); 00131 for(i=0;i<l;i++) 00132 wa[i] = C[i]*D[i]*wa[i]; 00133 00134 XTv(wa, Hs); 00135 for(i=0;i<w_size;i++) 00136 Hs[i] = s[i] + Hs[i]; 00137 delete[] wa; 00138 } 00139 00140 void l2r_lr_fun::Xv(double *v, double *res_Xv) 00141 { 00142 int32_t l=prob->l; 00143 int32_t n=prob->n; 00144 float64_t bias=0; 00145 00146 if (prob->use_bias) 00147 { 00148 n--; 00149 bias=v[n]; 00150 } 00151 00152 prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias); 00153 } 00154 00155 void l2r_lr_fun::XTv(double *v, double *res_XTv) 00156 { 00157 int l=prob->l; 00158 int32_t n=prob->n; 00159 00160 memset(res_XTv, 0, sizeof(double)*prob->n); 00161 00162 if (prob->use_bias) 00163 n--; 00164 00165 for (int32_t i=0;i<l;i++) 00166 { 00167 prob->x->add_to_dense_vec(v[i], i, res_XTv, n); 00168 00169 if (prob->use_bias) 00170 res_XTv[n]+=v[i]; 00171 } 00172 } 00173 00174 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *p, double Cp, double Cn) 00175 { 00176 int i; 00177 int l=p->l; 00178 int *y=p->y; 00179 00180 this->prob = p; 00181 00182 z = new double[l]; 00183 D = new double[l]; 00184 C = new double[l]; 00185 I = new int[l]; 00186 00187 for (i=0; i<l; i++) 00188 { 00189 if (y[i] == 1) 00190 C[i] = Cp; 00191 else 00192 C[i] = Cn; 00193 } 00194 } 00195 00196 l2r_l2_svc_fun::~l2r_l2_svc_fun() 00197 { 00198 delete[] z; 00199 delete[] D; 00200 delete[] C; 00201 delete[] I; 00202 } 00203 00204 double l2r_l2_svc_fun::fun(double *w) 00205 { 00206 int i; 00207 double f=0; 00208 int *y=prob->y; 00209 int l=prob->l; 00210 int w_size=get_nr_variable(); 00211 00212 Xv(w, z); 00213 for(i=0;i<l;i++) 00214 { 00215 z[i] = y[i]*z[i]; 00216 double d = 1-z[i]; 00217 if (d > 0) 00218 f += C[i]*d*d; 00219 } 00220 f += 0.5*CMath::dot(w, w, w_size); 00221 00222 return(f); 00223 } 00224 00225 void l2r_l2_svc_fun::grad(double *w, double *g) 00226 { 00227 int i; 00228 int *y=prob->y; 00229 int l=prob->l; 00230 int w_size=get_nr_variable(); 00231 00232 sizeI = 0; 00233 for (i=0;i<l;i++) 00234 if (z[i] < 1) 00235 { 00236 z[sizeI] = C[i]*y[i]*(z[i]-1); 00237 I[sizeI] = i; 00238 sizeI++; 00239 } 00240 subXTv(z, g); 00241 00242 for(i=0;i<w_size;i++) 00243 g[i] = w[i] + 2*g[i]; 00244 } 00245 00246 int l2r_l2_svc_fun::get_nr_variable(void) 00247 { 00248 return prob->n; 00249 } 00250 00251 void l2r_l2_svc_fun::Hv(double *s, double *Hs) 00252 { 00253 int i; 00254 int l=prob->l; 00255 int w_size=get_nr_variable(); 00256 double *wa = new double[l]; 00257 00258 subXv(s, wa); 00259 for(i=0;i<sizeI;i++) 00260 wa[i] = C[I[i]]*wa[i]; 00261 00262 subXTv(wa, Hs); 00263 for(i=0;i<w_size;i++) 00264 Hs[i] = s[i] + 2*Hs[i]; 00265 delete[] wa; 00266 } 00267 00268 void l2r_l2_svc_fun::Xv(double *v, double *res_Xv) 00269 { 00270 int32_t l=prob->l; 00271 int32_t n=prob->n; 00272 float64_t bias=0; 00273 00274 if (prob->use_bias) 00275 { 00276 n--; 00277 bias=v[n]; 00278 } 00279 00280 prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias); 00281 } 00282 00283 void l2r_l2_svc_fun::subXv(double *v, double *res_Xv) 00284 { 00285 int32_t n=prob->n; 00286 float64_t bias=0; 00287 00288 if (prob->use_bias) 00289 { 00290 n--; 00291 bias=v[n]; 00292 } 00293 00294 prob->x->dense_dot_range_subset(I, sizeI, res_Xv, NULL, v, n, bias); 00295 00296 /*for (int32_t i=0;i<sizeI;i++) 00297 { 00298 res_Xv[i]=prob->x->dense_dot(I[i], v, n); 00299 00300 if (prob->use_bias) 00301 res_Xv[i]+=bias; 00302 }*/ 00303 } 00304 00305 void l2r_l2_svc_fun::subXTv(double *v, double *XTv) 00306 { 00307 int32_t n=prob->n; 00308 00309 if (prob->use_bias) 00310 n--; 00311 00312 memset(XTv, 0, sizeof(float64_t)*prob->n); 00313 for (int32_t i=0;i<sizeI;i++) 00314 { 00315 prob->x->add_to_dense_vec(v[i], I[i], XTv, n); 00316 00317 if (prob->use_bias) 00318 XTv[n]+=v[i]; 00319 } 00320 } 00321 00322 // A coordinate descent algorithm for 00323 // multi-class support vector machines by Crammer and Singer 00324 // 00325 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i 00326 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i 00327 // 00328 // where e^m_i = 0 if y_i = m, 00329 // e^m_i = 1 if y_i != m, 00330 // C^m_i = C if m = y_i, 00331 // C^m_i = 0 if m != y_i, 00332 // and w_m(\alpha) = \sum_i \alpha^m_i x_i 00333 // 00334 // Given: 00335 // x, y, C 00336 // eps is the stopping tolerance 00337 // 00338 // solution will be put in w 00339 00340 #define GETI(i) (prob->y[i]) 00341 // To support weights for instances, use GETI(i) (i) 00342 00343 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class, double *weighted_C, double epsilon, int max_it) 00344 { 00345 this->w_size = prob->n; 00346 this->l = prob->l; 00347 this->nr_class = n_class; 00348 this->eps = epsilon; 00349 this->max_iter = max_it; 00350 this->prob = p; 00351 this->C = weighted_C; 00352 this->B = new double[nr_class]; 00353 this->G = new double[nr_class]; 00354 } 00355 00356 Solver_MCSVM_CS::~Solver_MCSVM_CS() 00357 { 00358 delete[] B; 00359 delete[] G; 00360 } 00361 00362 int compare_double(const void *a, const void *b) 00363 { 00364 if(*(double *)a > *(double *)b) 00365 return -1; 00366 if(*(double *)a < *(double *)b) 00367 return 1; 00368 return 0; 00369 } 00370 00371 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new) 00372 { 00373 int r; 00374 double *D=CMath::clone_vector(B, active_i); 00375 00376 if(yi < active_i) 00377 D[yi] += A_i*C_yi; 00378 qsort(D, active_i, sizeof(double), compare_double); 00379 00380 double beta = D[0] - A_i*C_yi; 00381 for(r=1;r<active_i && beta<r*D[r];r++) 00382 beta += D[r]; 00383 00384 beta /= r; 00385 for(r=0;r<active_i;r++) 00386 { 00387 if(r == yi) 00388 alpha_new[r] = CMath::min(C_yi, (beta-B[r])/A_i); 00389 else 00390 alpha_new[r] = CMath::min((double)0, (beta - B[r])/A_i); 00391 } 00392 delete[] D; 00393 } 00394 00395 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG) 00396 { 00397 double bound = 0; 00398 if(m == yi) 00399 bound = C[GETI(i)]; 00400 if(alpha_i == bound && G[m] < minG) 00401 return true; 00402 return false; 00403 } 00404 00405 void Solver_MCSVM_CS::Solve(double *w) 00406 { 00407 int i, m, s; 00408 int iter = 0; 00409 double *alpha = new double[l*nr_class]; 00410 double *alpha_new = new double[nr_class]; 00411 int *index = new int[l]; 00412 double *QD = new double[l]; 00413 int *d_ind = new int[nr_class]; 00414 double *d_val = new double[nr_class]; 00415 int *alpha_index = new int[nr_class*l]; 00416 int *y_index = new int[l]; 00417 int active_size = l; 00418 int *active_size_i = new int[l]; 00419 double eps_shrink = CMath::max(10.0*eps, 1.0); // stopping tolerance for shrinking 00420 bool start_from_all = true; 00421 // initial 00422 for(i=0;i<l*nr_class;i++) 00423 alpha[i] = 0; 00424 for(i=0;i<w_size*nr_class;i++) 00425 w[i] = 0; 00426 for(i=0;i<l;i++) 00427 { 00428 for(m=0;m<nr_class;m++) 00429 alpha_index[i*nr_class+m] = m; 00430 00431 QD[i] = prob->x->dot(i, prob->x,i); 00432 00433 active_size_i[i] = nr_class; 00434 y_index[i] = prob->y[i]; 00435 index[i] = i; 00436 } 00437 00438 while(iter < max_iter) 00439 { 00440 double stopping = -CMath::INFTY; 00441 for(i=0;i<active_size;i++) 00442 { 00443 int j = i+rand()%(active_size-i); 00444 CMath::swap(index[i], index[j]); 00445 } 00446 for(s=0;s<active_size;s++) 00447 { 00448 i = index[s]; 00449 double Ai = QD[i]; 00450 double *alpha_i = &alpha[i*nr_class]; 00451 int *alpha_index_i = &alpha_index[i*nr_class]; 00452 00453 if(Ai > 0) 00454 { 00455 for(m=0;m<active_size_i[i];m++) 00456 G[m] = 1; 00457 if(y_index[i] < active_size_i[i]) 00458 G[y_index[i]] = 0; 00459 00460 SG_SNOTIMPLEMENTED; 00461 /* FIXME 00462 feature_node *xi = prob->x[i]; 00463 while(xi->index!= -1) 00464 { 00465 double *w_i = &w[(xi->index-1)*nr_class]; 00466 for(m=0;m<active_size_i[i];m++) 00467 G[m] += w_i[alpha_index_i[m]]*(xi->value); 00468 xi++; 00469 } 00470 */ 00471 00472 double minG = CMath::INFTY; 00473 double maxG = -CMath::INFTY; 00474 for(m=0;m<active_size_i[i];m++) 00475 { 00476 if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG) 00477 minG = G[m]; 00478 if(G[m] > maxG) 00479 maxG = G[m]; 00480 } 00481 if(y_index[i] < active_size_i[i]) 00482 if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG) 00483 minG = G[y_index[i]]; 00484 00485 for(m=0;m<active_size_i[i];m++) 00486 { 00487 if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG)) 00488 { 00489 active_size_i[i]--; 00490 while(active_size_i[i]>m) 00491 { 00492 if(!be_shrunk(i, active_size_i[i], y_index[i], 00493 alpha_i[alpha_index_i[active_size_i[i]]], minG)) 00494 { 00495 CMath::swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]); 00496 CMath::swap(G[m], G[active_size_i[i]]); 00497 if(y_index[i] == active_size_i[i]) 00498 y_index[i] = m; 00499 else if(y_index[i] == m) 00500 y_index[i] = active_size_i[i]; 00501 break; 00502 } 00503 active_size_i[i]--; 00504 } 00505 } 00506 } 00507 00508 if(active_size_i[i] <= 1) 00509 { 00510 active_size--; 00511 CMath::swap(index[s], index[active_size]); 00512 s--; 00513 continue; 00514 } 00515 00516 if(maxG-minG <= 1e-12) 00517 continue; 00518 else 00519 stopping = CMath::CMath::max(maxG - minG, stopping); 00520 00521 for(m=0;m<active_size_i[i];m++) 00522 B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ; 00523 00524 solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new); 00525 int nz_d = 0; 00526 for(m=0;m<active_size_i[i];m++) 00527 { 00528 double d = alpha_new[m] - alpha_i[alpha_index_i[m]]; 00529 alpha_i[alpha_index_i[m]] = alpha_new[m]; 00530 if(fabs(d) >= 1e-12) 00531 { 00532 d_ind[nz_d] = alpha_index_i[m]; 00533 d_val[nz_d] = d; 00534 nz_d++; 00535 } 00536 } 00537 00538 /* FIXME 00539 xi = prob->x[i]; 00540 while(xi->index != -1) 00541 { 00542 double *w_i = &w[(xi->index-1)*nr_class]; 00543 for(m=0;m<nz_d;m++) 00544 w_i[d_ind[m]] += d_val[m]*xi->value; 00545 xi++; 00546 }*/ 00547 } 00548 } 00549 00550 iter++; 00551 if(iter % 10 == 0) 00552 { 00553 SG_SINFO("."); 00554 } 00555 00556 if(stopping < eps_shrink) 00557 { 00558 if(stopping < eps && start_from_all == true) 00559 break; 00560 else 00561 { 00562 active_size = l; 00563 for(i=0;i<l;i++) 00564 active_size_i[i] = nr_class; 00565 SG_SINFO("*"); 00566 eps_shrink = CMath::max(eps_shrink/2, eps); 00567 start_from_all = true; 00568 } 00569 } 00570 else 00571 start_from_all = false; 00572 } 00573 00574 SG_SINFO("\noptimization finished, #iter = %d\n",iter); 00575 if (iter >= max_iter) 00576 SG_SINFO("Warning: reaching max number of iterations\n"); 00577 00578 // calculate objective value 00579 double v = 0; 00580 int nSV = 0; 00581 for(i=0;i<w_size*nr_class;i++) 00582 v += w[i]*w[i]; 00583 v = 0.5*v; 00584 for(i=0;i<l*nr_class;i++) 00585 { 00586 v += alpha[i]; 00587 if(fabs(alpha[i]) > 0) 00588 nSV++; 00589 } 00590 for(i=0;i<l;i++) 00591 v -= alpha[i*nr_class+prob->y[i]]; 00592 SG_SINFO("Objective value = %lf\n",v); 00593 SG_SINFO("nSV = %d\n",nSV); 00594 00595 delete [] alpha; 00596 delete [] alpha_new; 00597 delete [] index; 00598 delete [] QD; 00599 delete [] d_ind; 00600 delete [] d_val; 00601 delete [] alpha_index; 00602 delete [] y_index; 00603 delete [] active_size_i; 00604 } 00605 00606 // 00607 // Interface functions 00608 // 00609 00610 void destroy_model(struct model *model_) 00611 { 00612 if(model_->w != NULL) 00613 free(model_->w); 00614 if(model_->label != NULL) 00615 free(model_->label); 00616 free(model_); 00617 } 00618 00619 void destroy_param(parameter* param) 00620 { 00621 if(param->weight_label != NULL) 00622 free(param->weight_label); 00623 if(param->weight != NULL) 00624 free(param->weight); 00625 } 00626 #endif //HAVE_LAPACK 00627 #endif // DOXYGEN_SHOULD_SKIP_THIS