SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2007-2010 Soeren Sonnenburg 00008 * Copyright (c) 2007-2009 The LIBLINEAR Project. 00009 * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 #include "lib/config.h" 00012 00013 #ifdef HAVE_LAPACK 00014 #include "lib/io.h" 00015 #include "lib/Signal.h" 00016 #include "lib/Time.h" 00017 #include "base/Parameter.h" 00018 #include "classifier/svm/LibLinear.h" 00019 #include "classifier/svm/SVM_linear.h" 00020 #include "classifier/svm/Tron.h" 00021 #include "features/DotFeatures.h" 00022 00023 using namespace shogun; 00024 00025 CLibLinear::CLibLinear(void) 00026 : CLinearClassifier() 00027 { 00028 init(); 00029 } 00030 00031 CLibLinear::CLibLinear(LIBLINEAR_SOLVER_TYPE l) 00032 : CLinearClassifier() 00033 { 00034 init(); 00035 } 00036 00037 CLibLinear::CLibLinear( 00038 float64_t C, CDotFeatures* traindat, CLabels* trainlab) 00039 : CLinearClassifier() 00040 { 00041 init(); 00042 C1=C; 00043 C2=C; 00044 use_bias=true; 00045 epsilon=1e-5; 00046 00047 set_features(traindat); 00048 set_labels(trainlab); 00049 init_linear_term(); 00050 } 00051 00052 void CLibLinear::init() 00053 { 00054 liblinear_solver_type=L2R_L1LOSS_SVC_DUAL; 00055 use_bias=false; 00056 C1=1; 00057 C2=1; 00058 set_max_iterations(); 00059 m_linear_term=NULL; 00060 m_linear_term_len=0; 00061 00062 m_parameters->add(&C1, "C1", "C Cost constant 1."); 00063 m_parameters->add(&C2, "C2", "C Cost constant 2."); 00064 m_parameters->add(&use_bias, "use_bias", "Indicates if bias is used."); 00065 m_parameters->add(&epsilon, "epsilon", "Convergence precision."); 00066 m_parameters->add(&max_iterations, "max_iterations", "Max number of iterations."); 00067 m_parameters->add_vector(&m_linear_term, &m_linear_term_len, "linear_term", "Linear Term"); 00068 m_parameters->add((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type", "Type of LibLinear solver."); 00069 } 00070 00071 CLibLinear::~CLibLinear() 00072 { 00073 delete[] m_linear_term; 00074 } 00075 00076 bool CLibLinear::train(CFeatures* data) 00077 { 00078 CSignal::clear_cancel(); 00079 ASSERT(labels); 00080 00081 if (data) 00082 { 00083 if (!data->has_property(FP_DOT)) 00084 SG_ERROR("Specified features are not of type CDotFeatures\n"); 00085 00086 set_features((CDotFeatures*) data); 00087 } 00088 ASSERT(features); 00089 ASSERT(labels->is_two_class_labeling()); 00090 00091 00092 int32_t num_train_labels=labels->get_num_labels(); 00093 int32_t num_feat=features->get_dim_feature_space(); 00094 int32_t num_vec=features->get_num_vectors(); 00095 00096 if (liblinear_solver_type == L1R_L2LOSS_SVC || 00097 (liblinear_solver_type == L1R_LR) ) 00098 { 00099 if (num_feat!=num_train_labels) 00100 { 00101 SG_ERROR("L1 methods require the data to be transposed: " 00102 "number of features %d does not match number of " 00103 "training labels %d\n", 00104 num_feat, num_train_labels); 00105 } 00106 CMath::swap(num_feat, num_vec); 00107 00108 } 00109 else 00110 { 00111 if (num_vec!=num_train_labels) 00112 { 00113 SG_ERROR("number of vectors %d does not match " 00114 "number of training labels %d\n", 00115 num_vec, num_train_labels); 00116 } 00117 } 00118 delete[] w; 00119 if (use_bias) 00120 w=new float64_t[num_feat+1]; 00121 else 00122 w=new float64_t[num_feat+0]; 00123 w_dim=num_feat; 00124 00125 problem prob; 00126 if (use_bias) 00127 { 00128 prob.n=w_dim+1; 00129 memset(w, 0, sizeof(float64_t)*(w_dim+1)); 00130 } 00131 else 00132 { 00133 prob.n=w_dim; 00134 memset(w, 0, sizeof(float64_t)*(w_dim+0)); 00135 } 00136 prob.l=num_vec; 00137 prob.x=features; 00138 prob.y=new int[prob.l]; 00139 prob.use_bias=use_bias; 00140 00141 for (int32_t i=0; i<prob.l; i++) 00142 prob.y[i]=labels->get_int_label(i); 00143 00144 int pos = 0; 00145 int neg = 0; 00146 for(int i=0;i<prob.l;i++) 00147 { 00148 if(prob.y[i]==+1) 00149 pos++; 00150 } 00151 neg = prob.l - pos; 00152 00153 SG_INFO("%d training points %d dims\n", prob.l, prob.n); 00154 00155 function *fun_obj=NULL; 00156 double Cp=C1; 00157 double Cn=C2; 00158 switch (liblinear_solver_type) 00159 { 00160 case L2R_LR: 00161 { 00162 fun_obj=new l2r_lr_fun(&prob, Cp, Cn); 00163 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations); 00164 SG_DEBUG("starting L2R_LR training via tron\n"); 00165 tron_obj.tron(w, max_train_time); 00166 SG_DEBUG("done with tron\n"); 00167 delete fun_obj; 00168 break; 00169 } 00170 case L2R_L2LOSS_SVC: 00171 { 00172 fun_obj=new l2r_l2_svc_fun(&prob, Cp, Cn); 00173 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations); 00174 tron_obj.tron(w, max_train_time); 00175 delete fun_obj; 00176 break; 00177 } 00178 case L2R_L2LOSS_SVC_DUAL: 00179 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL); 00180 break; 00181 case L2R_L1LOSS_SVC_DUAL: 00182 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL); 00183 break; 00184 case L1R_L2LOSS_SVC: 00185 { 00186 //ASSUME FEATURES ARE TRANSPOSED ALREADY 00187 solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn); 00188 break; 00189 } 00190 case L1R_LR: 00191 { 00192 //ASSUME FEATURES ARE TRANSPOSED ALREADY 00193 solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn); 00194 break; 00195 } 00196 case MCSVM_CS: 00197 { 00198 SG_NOTIMPLEMENTED; 00199 /* TODO... 00200 model_->w=Malloc(double, n*nr_class); 00201 for(i=0;i<nr_class;i++) 00202 for(j=start[i];j<start[i]+count[i];j++) 00203 sub_prob.y[j] = i; 00204 Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps); 00205 Solver.Solve(model_->w); 00206 */ 00207 } 00208 default: 00209 SG_ERROR("Error: unknown solver_type\n"); 00210 break; 00211 } 00212 00213 if (use_bias) 00214 set_bias(w[w_dim]); 00215 else 00216 set_bias(0); 00217 00218 delete[] prob.y; 00219 00220 return true; 00221 } 00222 00223 // A coordinate descent algorithm for 00224 // L1-loss and L2-loss SVM dual problems 00225 // 00226 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, 00227 // s.t. 0 <= alpha_i <= upper_bound_i, 00228 // 00229 // where Qij = yi yj xi^T xj and 00230 // D is a diagonal matrix 00231 // 00232 // In L1-SVM case: 00233 // upper_bound_i = Cp if y_i = 1 00234 // upper_bound_i = Cn if y_i = -1 00235 // D_ii = 0 00236 // In L2-SVM case: 00237 // upper_bound_i = INF 00238 // D_ii = 1/(2*Cp) if y_i = 1 00239 // D_ii = 1/(2*Cn) if y_i = -1 00240 // 00241 // Given: 00242 // x, y, Cp, Cn 00243 // eps is the stopping tolerance 00244 // 00245 // solution will be put in w 00246 00247 #undef GETI 00248 #define GETI(i) (y[i]+1) 00249 // To support weights for instances, use GETI(i) (i) 00250 00251 void CLibLinear::solve_l2r_l1l2_svc( 00252 const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st) 00253 { 00254 int l = prob->l; 00255 int w_size = prob->n; 00256 int i, s, iter = 0; 00257 double C, d, G; 00258 double *QD = new double[l]; 00259 int *index = new int[l]; 00260 double *alpha = new double[l]; 00261 int32_t *y = new int32_t[l]; 00262 int active_size = l; 00263 00264 // PG: projected gradient, for shrinking and stopping 00265 double PG; 00266 double PGmax_old = CMath::INFTY; 00267 double PGmin_old = -CMath::INFTY; 00268 double PGmax_new, PGmin_new; 00269 00270 // default solver_type: L2R_L2LOSS_SVC_DUAL 00271 double diag[3] = {0.5/Cn, 0, 0.5/Cp}; 00272 double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY}; 00273 if(st == L2R_L1LOSS_SVC_DUAL) 00274 { 00275 diag[0] = 0; 00276 diag[2] = 0; 00277 upper_bound[0] = Cn; 00278 upper_bound[2] = Cp; 00279 } 00280 00281 int n = prob->n; 00282 00283 if (prob->use_bias) 00284 n--; 00285 00286 for(i=0; i<w_size; i++) 00287 w[i] = 0; 00288 00289 for(i=0; i<l; i++) 00290 { 00291 alpha[i] = 0; 00292 if(prob->y[i] > 0) 00293 { 00294 y[i] = +1; 00295 } 00296 else 00297 { 00298 y[i] = -1; 00299 } 00300 QD[i] = diag[GETI(i)]; 00301 00302 QD[i] += prob->x->dot(i, prob->x,i); 00303 index[i] = i; 00304 } 00305 00306 00307 CTime start_time; 00308 while (iter < max_iterations && !CSignal::cancel_computations()) 00309 { 00310 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) 00311 break; 00312 00313 PGmax_new = -CMath::INFTY; 00314 PGmin_new = CMath::INFTY; 00315 00316 for (i=0; i<active_size; i++) 00317 { 00318 int j = i+rand()%(active_size-i); 00319 CMath::swap(index[i], index[j]); 00320 } 00321 00322 for (s=0;s<active_size;s++) 00323 { 00324 i = index[s]; 00325 int32_t yi = y[i]; 00326 00327 G = prob->x->dense_dot(i, w, n); 00328 if (prob->use_bias) 00329 G+=w[n]; 00330 00331 if (m_linear_term) 00332 G = G*yi + m_linear_term[i]; 00333 else 00334 G = G*yi-1; 00335 00336 C = upper_bound[GETI(i)]; 00337 G += alpha[i]*diag[GETI(i)]; 00338 00339 PG = 0; 00340 if (alpha[i] == 0) 00341 { 00342 if (G > PGmax_old) 00343 { 00344 active_size--; 00345 CMath::swap(index[s], index[active_size]); 00346 s--; 00347 continue; 00348 } 00349 else if (G < 0) 00350 PG = G; 00351 } 00352 else if (alpha[i] == C) 00353 { 00354 if (G < PGmin_old) 00355 { 00356 active_size--; 00357 CMath::swap(index[s], index[active_size]); 00358 s--; 00359 continue; 00360 } 00361 else if (G > 0) 00362 PG = G; 00363 } 00364 else 00365 PG = G; 00366 00367 PGmax_new = CMath::max(PGmax_new, PG); 00368 PGmin_new = CMath::min(PGmin_new, PG); 00369 00370 if(fabs(PG) > 1.0e-12) 00371 { 00372 double alpha_old = alpha[i]; 00373 alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C); 00374 d = (alpha[i] - alpha_old)*yi; 00375 00376 prob->x->add_to_dense_vec(d, i, w, n); 00377 00378 if (prob->use_bias) 00379 w[n]+=d; 00380 } 00381 } 00382 00383 iter++; 00384 float64_t gap=PGmax_new - PGmin_new; 00385 SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6); 00386 00387 if(gap <= eps) 00388 { 00389 if(active_size == l) 00390 break; 00391 else 00392 { 00393 active_size = l; 00394 PGmax_old = CMath::INFTY; 00395 PGmin_old = -CMath::INFTY; 00396 continue; 00397 } 00398 } 00399 PGmax_old = PGmax_new; 00400 PGmin_old = PGmin_new; 00401 if (PGmax_old <= 0) 00402 PGmax_old = CMath::INFTY; 00403 if (PGmin_old >= 0) 00404 PGmin_old = -CMath::INFTY; 00405 } 00406 00407 SG_DONE(); 00408 SG_INFO("optimization finished, #iter = %d\n",iter); 00409 if (iter >= max_iterations) 00410 { 00411 SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster" 00412 "(also see liblinear FAQ)\n\n"); 00413 } 00414 00415 // calculate objective value 00416 00417 double v = 0; 00418 int nSV = 0; 00419 for(i=0; i<w_size; i++) 00420 v += w[i]*w[i]; 00421 for(i=0; i<l; i++) 00422 { 00423 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2); 00424 if(alpha[i] > 0) 00425 ++nSV; 00426 } 00427 SG_INFO("Objective value = %lf\n",v/2); 00428 SG_INFO("nSV = %d\n",nSV); 00429 00430 delete [] QD; 00431 delete [] alpha; 00432 delete [] y; 00433 delete [] index; 00434 } 00435 00436 // A coordinate descent algorithm for 00437 // L1-regularized L2-loss support vector classification 00438 // 00439 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2, 00440 // 00441 // Given: 00442 // x, y, Cp, Cn 00443 // eps is the stopping tolerance 00444 // 00445 // solution will be put in w 00446 00447 #undef GETI 00448 #define GETI(i) (y[i]+1) 00449 // To support weights for instances, use GETI(i) (i) 00450 00451 void CLibLinear::solve_l1r_l2_svc( 00452 problem *prob_col, double eps, double Cp, double Cn) 00453 { 00454 int l = prob_col->l; 00455 int w_size = prob_col->n; 00456 int j, s, iter = 0; 00457 int active_size = w_size; 00458 int max_num_linesearch = 20; 00459 00460 double sigma = 0.01; 00461 double d, G_loss, G, H; 00462 double Gmax_old = CMath::INFTY; 00463 double Gmax_new; 00464 double Gmax_init=0; 00465 double d_old, d_diff; 00466 double loss_old=0, loss_new; 00467 double appxcond, cond; 00468 00469 int *index = new int[w_size]; 00470 int32_t *y = new int32_t[l]; 00471 double *b = new double[l]; // b = 1-ywTx 00472 double *xj_sq = new double[w_size]; 00473 00474 CDotFeatures* x = (CDotFeatures*) prob_col->x; 00475 void* iterator; 00476 int32_t ind; 00477 float64_t val; 00478 00479 double C[3] = {Cn,0,Cp}; 00480 00481 int n = prob_col->n; 00482 if (prob_col->use_bias) 00483 n--; 00484 00485 for(j=0; j<l; j++) 00486 { 00487 b[j] = 1; 00488 if(prob_col->y[j] > 0) 00489 y[j] = 1; 00490 else 00491 y[j] = -1; 00492 } 00493 00494 for(j=0; j<w_size; j++) 00495 { 00496 w[j] = 0; 00497 index[j] = j; 00498 xj_sq[j] = 0; 00499 00500 if (use_bias && j==n) 00501 { 00502 for (ind=0; ind<l; ind++) 00503 xj_sq[n] += C[GETI(ind)]; 00504 } 00505 else 00506 { 00507 iterator=x->get_feature_iterator(j); 00508 while (x->get_next_feature(ind, val, iterator)) 00509 xj_sq[j] += C[GETI(ind)]*val*val; 00510 x->free_feature_iterator(iterator); 00511 } 00512 } 00513 00514 00515 CTime start_time; 00516 while (iter < max_iterations && !CSignal::cancel_computations()) 00517 { 00518 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) 00519 break; 00520 00521 Gmax_new = 0; 00522 00523 for(j=0; j<active_size; j++) 00524 { 00525 int i = j+rand()%(active_size-j); 00526 CMath::swap(index[i], index[j]); 00527 } 00528 00529 for(s=0; s<active_size; s++) 00530 { 00531 j = index[s]; 00532 G_loss = 0; 00533 H = 0; 00534 00535 if (use_bias && j==n) 00536 { 00537 for (ind=0; ind<l; ind++) 00538 { 00539 if(b[ind] > 0) 00540 { 00541 double tmp = C[GETI(ind)]*y[ind]; 00542 G_loss -= tmp*b[ind]; 00543 H += tmp*y[ind]; 00544 } 00545 } 00546 } 00547 else 00548 { 00549 iterator=x->get_feature_iterator(j); 00550 00551 while (x->get_next_feature(ind, val, iterator)) 00552 { 00553 if(b[ind] > 0) 00554 { 00555 double tmp = C[GETI(ind)]*val*y[ind]; 00556 G_loss -= tmp*b[ind]; 00557 H += tmp*val*y[ind]; 00558 } 00559 } 00560 x->free_feature_iterator(iterator); 00561 } 00562 00563 G_loss *= 2; 00564 00565 G = G_loss; 00566 H *= 2; 00567 H = CMath::max(H, 1e-12); 00568 00569 double Gp = G+1; 00570 double Gn = G-1; 00571 double violation = 0; 00572 if(w[j] == 0) 00573 { 00574 if(Gp < 0) 00575 violation = -Gp; 00576 else if(Gn > 0) 00577 violation = Gn; 00578 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) 00579 { 00580 active_size--; 00581 CMath::swap(index[s], index[active_size]); 00582 s--; 00583 continue; 00584 } 00585 } 00586 else if(w[j] > 0) 00587 violation = fabs(Gp); 00588 else 00589 violation = fabs(Gn); 00590 00591 Gmax_new = CMath::max(Gmax_new, violation); 00592 00593 // obtain Newton direction d 00594 if(Gp <= H*w[j]) 00595 d = -Gp/H; 00596 else if(Gn >= H*w[j]) 00597 d = -Gn/H; 00598 else 00599 d = -w[j]; 00600 00601 if(fabs(d) < 1.0e-12) 00602 continue; 00603 00604 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; 00605 d_old = 0; 00606 int num_linesearch; 00607 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) 00608 { 00609 d_diff = d_old - d; 00610 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; 00611 00612 appxcond = xj_sq[j]*d*d + G_loss*d + cond; 00613 if(appxcond <= 0) 00614 { 00615 if (use_bias && j==n) 00616 { 00617 for (ind=0; ind<l; ind++) 00618 b[ind] += d_diff*y[ind]; 00619 break; 00620 } 00621 else 00622 { 00623 iterator=x->get_feature_iterator(j); 00624 while (x->get_next_feature(ind, val, iterator)) 00625 b[ind] += d_diff*val*y[ind]; 00626 00627 x->free_feature_iterator(iterator); 00628 break; 00629 } 00630 } 00631 00632 if(num_linesearch == 0) 00633 { 00634 loss_old = 0; 00635 loss_new = 0; 00636 00637 if (use_bias && j==n) 00638 { 00639 for (ind=0; ind<l; ind++) 00640 { 00641 if(b[ind] > 0) 00642 loss_old += C[GETI(ind)]*b[ind]*b[ind]; 00643 double b_new = b[ind] + d_diff*y[ind]; 00644 b[ind] = b_new; 00645 if(b_new > 0) 00646 loss_new += C[GETI(ind)]*b_new*b_new; 00647 } 00648 } 00649 else 00650 { 00651 iterator=x->get_feature_iterator(j); 00652 while (x->get_next_feature(ind, val, iterator)) 00653 { 00654 if(b[ind] > 0) 00655 loss_old += C[GETI(ind)]*b[ind]*b[ind]; 00656 double b_new = b[ind] + d_diff*val*y[ind]; 00657 b[ind] = b_new; 00658 if(b_new > 0) 00659 loss_new += C[GETI(ind)]*b_new*b_new; 00660 } 00661 x->free_feature_iterator(iterator); 00662 } 00663 } 00664 else 00665 { 00666 loss_new = 0; 00667 if (use_bias && j==n) 00668 { 00669 for (ind=0; ind<l; ind++) 00670 { 00671 double b_new = b[ind] + d_diff*y[ind]; 00672 b[ind] = b_new; 00673 if(b_new > 0) 00674 loss_new += C[GETI(ind)]*b_new*b_new; 00675 } 00676 } 00677 else 00678 { 00679 iterator=x->get_feature_iterator(j); 00680 while (x->get_next_feature(ind, val, iterator)) 00681 { 00682 double b_new = b[ind] + d_diff*val*y[ind]; 00683 b[ind] = b_new; 00684 if(b_new > 0) 00685 loss_new += C[GETI(ind)]*b_new*b_new; 00686 } 00687 x->free_feature_iterator(iterator); 00688 } 00689 } 00690 00691 cond = cond + loss_new - loss_old; 00692 if(cond <= 0) 00693 break; 00694 else 00695 { 00696 d_old = d; 00697 d *= 0.5; 00698 delta *= 0.5; 00699 } 00700 } 00701 00702 w[j] += d; 00703 00704 // recompute b[] if line search takes too many steps 00705 if(num_linesearch >= max_num_linesearch) 00706 { 00707 SG_INFO("#"); 00708 for(int i=0; i<l; i++) 00709 b[i] = 1; 00710 00711 for(int i=0; i<n; i++) 00712 { 00713 if(w[i]==0) 00714 continue; 00715 00716 iterator=x->get_feature_iterator(i); 00717 while (x->get_next_feature(ind, val, iterator)) 00718 b[ind] -= w[i]*val*y[ind]; 00719 x->free_feature_iterator(iterator); 00720 } 00721 00722 if (use_bias && w[n]) 00723 { 00724 for (ind=0; ind<l; ind++) 00725 b[ind] -= w[n]*y[ind]; 00726 } 00727 } 00728 } 00729 00730 if(iter == 0) 00731 Gmax_init = Gmax_new; 00732 iter++; 00733 00734 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6); 00735 00736 if(Gmax_new <= eps*Gmax_init) 00737 { 00738 if(active_size == w_size) 00739 break; 00740 else 00741 { 00742 active_size = w_size; 00743 Gmax_old = CMath::INFTY; 00744 continue; 00745 } 00746 } 00747 00748 Gmax_old = Gmax_new; 00749 } 00750 00751 SG_DONE(); 00752 SG_INFO("optimization finished, #iter = %d\n", iter); 00753 if(iter >= max_iterations) 00754 SG_WARNING("\nWARNING: reaching max number of iterations\n"); 00755 00756 // calculate objective value 00757 00758 double v = 0; 00759 int nnz = 0; 00760 for(j=0; j<w_size; j++) 00761 { 00762 if(w[j] != 0) 00763 { 00764 v += fabs(w[j]); 00765 nnz++; 00766 } 00767 } 00768 for(j=0; j<l; j++) 00769 if(b[j] > 0) 00770 v += C[GETI(j)]*b[j]*b[j]; 00771 00772 SG_INFO("Objective value = %lf\n", v); 00773 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size); 00774 00775 delete [] index; 00776 delete [] y; 00777 delete [] b; 00778 delete [] xj_sq; 00779 } 00780 00781 // A coordinate descent algorithm for 00782 // L1-regularized logistic regression problems 00783 // 00784 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)), 00785 // 00786 // Given: 00787 // x, y, Cp, Cn 00788 // eps is the stopping tolerance 00789 // 00790 // solution will be put in w 00791 00792 #undef GETI 00793 #define GETI(i) (y[i]+1) 00794 // To support weights for instances, use GETI(i) (i) 00795 00796 void CLibLinear::solve_l1r_lr( 00797 const problem *prob_col, double eps, 00798 double Cp, double Cn) 00799 { 00800 int l = prob_col->l; 00801 int w_size = prob_col->n; 00802 int j, s, iter = 0; 00803 int active_size = w_size; 00804 int max_num_linesearch = 20; 00805 00806 double x_min = 0; 00807 double sigma = 0.01; 00808 double d, G, H; 00809 double Gmax_old = CMath::INFTY; 00810 double Gmax_new; 00811 double Gmax_init=0; 00812 double sum1, appxcond1; 00813 double sum2, appxcond2; 00814 double cond; 00815 00816 int *index = new int[w_size]; 00817 int32_t *y = new int32_t[l]; 00818 double *exp_wTx = new double[l]; 00819 double *exp_wTx_new = new double[l]; 00820 double *xj_max = new double[w_size]; 00821 double *C_sum = new double[w_size]; 00822 double *xjneg_sum = new double[w_size]; 00823 double *xjpos_sum = new double[w_size]; 00824 00825 CDotFeatures* x = prob_col->x; 00826 void* iterator; 00827 int ind; 00828 double val; 00829 00830 double C[3] = {Cn,0,Cp}; 00831 00832 int n = prob_col->n; 00833 if (prob_col->use_bias) 00834 n--; 00835 00836 for(j=0; j<l; j++) 00837 { 00838 exp_wTx[j] = 1; 00839 if(prob_col->y[j] > 0) 00840 y[j] = 1; 00841 else 00842 y[j] = -1; 00843 } 00844 for(j=0; j<w_size; j++) 00845 { 00846 w[j] = 0; 00847 index[j] = j; 00848 xj_max[j] = 0; 00849 C_sum[j] = 0; 00850 xjneg_sum[j] = 0; 00851 xjpos_sum[j] = 0; 00852 00853 if (use_bias && j==n) 00854 { 00855 for (ind=0; ind<l; ind++) 00856 { 00857 x_min = CMath::min(x_min, 1.0); 00858 xj_max[j] = CMath::max(xj_max[j], 1.0); 00859 C_sum[j] += C[GETI(ind)]; 00860 if(y[ind] == -1) 00861 xjneg_sum[j] += C[GETI(ind)]; 00862 else 00863 xjpos_sum[j] += C[GETI(ind)]; 00864 } 00865 } 00866 else 00867 { 00868 iterator=x->get_feature_iterator(j); 00869 while (x->get_next_feature(ind, val, iterator)) 00870 { 00871 x_min = CMath::min(x_min, val); 00872 xj_max[j] = CMath::max(xj_max[j], val); 00873 C_sum[j] += C[GETI(ind)]; 00874 if(y[ind] == -1) 00875 xjneg_sum[j] += C[GETI(ind)]*val; 00876 else 00877 xjpos_sum[j] += C[GETI(ind)]*val; 00878 } 00879 x->free_feature_iterator(iterator); 00880 } 00881 } 00882 00883 CTime start_time; 00884 while (iter < max_iterations && !CSignal::cancel_computations()) 00885 { 00886 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) 00887 break; 00888 00889 Gmax_new = 0; 00890 00891 for(j=0; j<active_size; j++) 00892 { 00893 int i = j+rand()%(active_size-j); 00894 CMath::swap(index[i], index[j]); 00895 } 00896 00897 for(s=0; s<active_size; s++) 00898 { 00899 j = index[s]; 00900 sum1 = 0; 00901 sum2 = 0; 00902 H = 0; 00903 00904 if (use_bias && j==n) 00905 { 00906 for (ind=0; ind<l; ind++) 00907 { 00908 double exp_wTxind = exp_wTx[ind]; 00909 double tmp1 = 1.0/(1+exp_wTxind); 00910 double tmp2 = C[GETI(ind)]*tmp1; 00911 double tmp3 = tmp2*exp_wTxind; 00912 sum2 += tmp2; 00913 sum1 += tmp3; 00914 H += tmp1*tmp3; 00915 } 00916 } 00917 else 00918 { 00919 iterator=x->get_feature_iterator(j); 00920 while (x->get_next_feature(ind, val, iterator)) 00921 { 00922 double exp_wTxind = exp_wTx[ind]; 00923 double tmp1 = val/(1+exp_wTxind); 00924 double tmp2 = C[GETI(ind)]*tmp1; 00925 double tmp3 = tmp2*exp_wTxind; 00926 sum2 += tmp2; 00927 sum1 += tmp3; 00928 H += tmp1*tmp3; 00929 } 00930 x->free_feature_iterator(iterator); 00931 } 00932 00933 G = -sum2 + xjneg_sum[j]; 00934 00935 double Gp = G+1; 00936 double Gn = G-1; 00937 double violation = 0; 00938 if(w[j] == 0) 00939 { 00940 if(Gp < 0) 00941 violation = -Gp; 00942 else if(Gn > 0) 00943 violation = Gn; 00944 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) 00945 { 00946 active_size--; 00947 CMath::swap(index[s], index[active_size]); 00948 s--; 00949 continue; 00950 } 00951 } 00952 else if(w[j] > 0) 00953 violation = fabs(Gp); 00954 else 00955 violation = fabs(Gn); 00956 00957 Gmax_new = CMath::max(Gmax_new, violation); 00958 00959 // obtain Newton direction d 00960 if(Gp <= H*w[j]) 00961 d = -Gp/H; 00962 else if(Gn >= H*w[j]) 00963 d = -Gn/H; 00964 else 00965 d = -w[j]; 00966 00967 if(fabs(d) < 1.0e-12) 00968 continue; 00969 00970 d = CMath::min(CMath::max(d,-10.0),10.0); 00971 00972 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; 00973 int num_linesearch; 00974 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) 00975 { 00976 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; 00977 00978 if(x_min >= 0) 00979 { 00980 double tmp = exp(d*xj_max[j]); 00981 appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j]; 00982 appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j]; 00983 if(CMath::min(appxcond1,appxcond2) <= 0) 00984 { 00985 if (use_bias && j==n) 00986 { 00987 for (ind=0; ind<l; ind++) 00988 exp_wTx[ind] *= exp(d); 00989 } 00990 00991 else 00992 { 00993 iterator=x->get_feature_iterator(j); 00994 while (x->get_next_feature(ind, val, iterator)) 00995 exp_wTx[ind] *= exp(d*val); 00996 x->free_feature_iterator(iterator); 00997 } 00998 break; 00999 } 01000 } 01001 01002 cond += d*xjneg_sum[j]; 01003 01004 int i = 0; 01005 01006 if (use_bias && j==n) 01007 { 01008 for (ind=0; ind<l; ind++) 01009 { 01010 double exp_dx = exp(d); 01011 exp_wTx_new[i] = exp_wTx[ind]*exp_dx; 01012 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); 01013 i++; 01014 } 01015 } 01016 else 01017 { 01018 01019 iterator=x->get_feature_iterator(j); 01020 while (x->get_next_feature(ind, val, iterator)) 01021 { 01022 double exp_dx = exp(d*val); 01023 exp_wTx_new[i] = exp_wTx[ind]*exp_dx; 01024 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); 01025 i++; 01026 } 01027 x->free_feature_iterator(iterator); 01028 } 01029 01030 if(cond <= 0) 01031 { 01032 i = 0; 01033 if (use_bias && j==n) 01034 { 01035 for (ind=0; ind<l; ind++) 01036 { 01037 exp_wTx[ind] = exp_wTx_new[i]; 01038 i++; 01039 } 01040 } 01041 else 01042 { 01043 iterator=x->get_feature_iterator(j); 01044 while (x->get_next_feature(ind, val, iterator)) 01045 { 01046 exp_wTx[ind] = exp_wTx_new[i]; 01047 i++; 01048 } 01049 x->free_feature_iterator(iterator); 01050 } 01051 break; 01052 } 01053 else 01054 { 01055 d *= 0.5; 01056 delta *= 0.5; 01057 } 01058 } 01059 01060 w[j] += d; 01061 01062 // recompute exp_wTx[] if line search takes too many steps 01063 if(num_linesearch >= max_num_linesearch) 01064 { 01065 SG_INFO("#"); 01066 for(int i=0; i<l; i++) 01067 exp_wTx[i] = 0; 01068 01069 for(int i=0; i<w_size; i++) 01070 { 01071 if(w[i]==0) continue; 01072 01073 if (use_bias && i==n) 01074 { 01075 for (ind=0; ind<l; ind++) 01076 exp_wTx[ind] += w[i]; 01077 } 01078 else 01079 { 01080 iterator=x->get_feature_iterator(i); 01081 while (x->get_next_feature(ind, val, iterator)) 01082 exp_wTx[ind] += w[i]*val; 01083 x->free_feature_iterator(iterator); 01084 } 01085 } 01086 01087 for(int i=0; i<l; i++) 01088 exp_wTx[i] = exp(exp_wTx[i]); 01089 } 01090 } 01091 01092 if(iter == 0) 01093 Gmax_init = Gmax_new; 01094 iter++; 01095 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6); 01096 01097 if(Gmax_new <= eps*Gmax_init) 01098 { 01099 if(active_size == w_size) 01100 break; 01101 else 01102 { 01103 active_size = w_size; 01104 Gmax_old = CMath::INFTY; 01105 continue; 01106 } 01107 } 01108 01109 Gmax_old = Gmax_new; 01110 } 01111 01112 SG_DONE(); 01113 SG_INFO("optimization finished, #iter = %d\n", iter); 01114 if(iter >= max_iterations) 01115 SG_WARNING("\nWARNING: reaching max number of iterations\n"); 01116 01117 // calculate objective value 01118 01119 double v = 0; 01120 int nnz = 0; 01121 for(j=0; j<w_size; j++) 01122 if(w[j] != 0) 01123 { 01124 v += fabs(w[j]); 01125 nnz++; 01126 } 01127 for(j=0; j<l; j++) 01128 if(y[j] == 1) 01129 v += C[GETI(j)]*log(1+1/exp_wTx[j]); 01130 else 01131 v += C[GETI(j)]*log(1+exp_wTx[j]); 01132 01133 SG_INFO("Objective value = %lf\n", v); 01134 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size); 01135 01136 delete [] index; 01137 delete [] y; 01138 delete [] exp_wTx; 01139 delete [] exp_wTx_new; 01140 delete [] xj_max; 01141 delete [] C_sum; 01142 delete [] xjneg_sum; 01143 delete [] xjpos_sum; 01144 } 01145 01146 void CLibLinear::get_linear_term(float64_t** linear_term, int32_t* len) 01147 { 01148 if (!m_linear_term_len || !m_linear_term) 01149 SG_ERROR("Please assign linear term first!\n"); 01150 01151 *linear_term=(float64_t*) malloc(sizeof(float64_t)*m_linear_term_len); 01152 01153 for (int32_t i=0; i<m_linear_term_len; i++) 01154 (*linear_term)[i]=m_linear_term[i]; 01155 } 01156 01157 void CLibLinear::init_linear_term() 01158 { 01159 if (!labels) 01160 SG_ERROR("Please assign labels first!\n"); 01161 01162 delete[] m_linear_term; 01163 01164 m_linear_term_len=labels->get_num_labels(); 01165 m_linear_term = new float64_t[m_linear_term_len]; 01166 CMath::fill_vector(m_linear_term, m_linear_term_len, -1.0); 01167 } 01168 01169 #endif //HAVE_LAPACK