00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013 #include "lib/Mathematics.h"
00014 #include "lib/Signal.h"
00015 #include "lib/Time.h"
00016 #include "classifier/LinearClassifier.h"
00017 #include "classifier/svm/SubGradientSVM.h"
00018 #include "classifier/svm/qpbsvmlib.h"
00019 #include "features/DotFeatures.h"
00020 #include "features/Labels.h"
00021
00022 #undef DEBUG_SUBGRADIENTSVM
00023
00024 using namespace shogun;
00025
00026 CSubGradientSVM::CSubGradientSVM()
00027 : CLinearClassifier(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
00028 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00029 {
00030 }
00031
00032 CSubGradientSVM::CSubGradientSVM(
00033 float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00034 : CLinearClassifier(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
00035 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00036 {
00037 set_features(traindat);
00038 set_labels(trainlab);
00039 }
00040
00041
00042 CSubGradientSVM::~CSubGradientSVM()
00043 {
00044 }
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079 int32_t CSubGradientSVM::find_active(
00080 int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
00081 {
00082 delta_bound=0;
00083 delta_active=0;
00084 num_active=0;
00085 num_bound=0;
00086
00087 for (int32_t i=0; i<num_vec; i++)
00088 {
00089 active[i]=0;
00090
00091
00092 if (proj[i] < 1-autoselected_epsilon)
00093 {
00094 idx_active[num_active++]=i;
00095 active[i]=1;
00096 }
00097
00098
00099 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00100 {
00101 idx_bound[num_bound++]=i;
00102 active[i]=2;
00103 }
00104
00105 if (active[i]!=old_active[i])
00106 delta_active++;
00107
00108 if (active[i]==2 && old_active[i]==2)
00109 delta_bound++;
00110 }
00111
00112
00113 if (delta_active==0 && work_epsilon<=epsilon)
00114 return 0;
00115 else if (delta_active==0)
00116 {
00117 work_epsilon=CMath::min(work_epsilon/2, autoselected_epsilon);
00118 work_epsilon=CMath::max(work_epsilon, epsilon);
00119 num_bound=qpsize;
00120 }
00121
00122 delta_bound=0;
00123 delta_active=0;
00124 num_active=0;
00125 num_bound=0;
00126
00127 for (int32_t i=0; i<num_vec; i++)
00128 {
00129 tmp_proj[i]=CMath::abs(proj[i]-1);
00130 tmp_proj_idx[i]=i;
00131 }
00132
00133 CMath::qsort_index(tmp_proj, tmp_proj_idx, num_vec);
00134
00135 autoselected_epsilon=tmp_proj[CMath::min(qpsize,num_vec-1)];
00136
00137 #ifdef DEBUG_SUBGRADIENTSVM
00138
00139 #endif
00140
00141 if (autoselected_epsilon>work_epsilon)
00142 autoselected_epsilon=work_epsilon;
00143
00144 if (autoselected_epsilon<epsilon)
00145 {
00146 autoselected_epsilon=epsilon;
00147
00148 int32_t i=0;
00149 while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
00150 i++;
00151
00152
00153
00154 if (i>=qpsize_max && autoselected_epsilon>epsilon)
00155 {
00156 SG_INFO("qpsize limit (%d) reached\n", qpsize_max);
00157 int32_t num_in_qp=i;
00158 while (--i>=0 && num_in_qp>=qpsize_max)
00159 {
00160 if (tmp_proj[i] < autoselected_epsilon)
00161 {
00162 autoselected_epsilon=tmp_proj[i];
00163 num_in_qp--;
00164 }
00165 }
00166
00167
00168 }
00169 }
00170
00171 for (int32_t i=0; i<num_vec; i++)
00172 {
00173 active[i]=0;
00174
00175
00176 if (proj[i] < 1-autoselected_epsilon)
00177 {
00178 idx_active[num_active++]=i;
00179 active[i]=1;
00180 }
00181
00182
00183 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00184 {
00185 idx_bound[num_bound++]=i;
00186 active[i]=2;
00187 }
00188
00189 if (active[i]!=old_active[i])
00190 delta_active++;
00191
00192 if (active[i]==2 && old_active[i]==2)
00193 delta_bound++;
00194 }
00195
00196
00197 return delta_active;
00198 }
00199
00200
00201 void CSubGradientSVM::update_active(int32_t num_feat, int32_t num_vec)
00202 {
00203 for (int32_t i=0; i<num_vec; i++)
00204 {
00205 if (active[i]==1 && old_active[i]!=1)
00206 {
00207 features->add_to_dense_vec(C1*get_label(i), i, sum_CXy_active, num_feat);
00208 if (use_bias)
00209 sum_Cy_active+=C1*get_label(i);
00210 }
00211 else if (old_active[i]==1 && active[i]!=1)
00212 {
00213 features->add_to_dense_vec(-C1*get_label(i), i, sum_CXy_active, num_feat);
00214 if (use_bias)
00215 sum_Cy_active-=C1*get_label(i);
00216 }
00217 }
00218
00219 CMath::swap(active,old_active);
00220 }
00221
00222 float64_t CSubGradientSVM::line_search(int32_t num_feat, int32_t num_vec)
00223 {
00224 float64_t sum_B = 0;
00225 float64_t A_zero = 0.5*CMath::dot(grad_w, grad_w, num_feat);
00226 float64_t B_zero = -CMath::dot(w, grad_w, num_feat);
00227
00228 int32_t num_hinge=0;
00229
00230 for (int32_t i=0; i<num_vec; i++)
00231 {
00232 float64_t p=get_label(i)*(features->dense_dot(i, grad_w, num_feat)+grad_b);
00233 grad_proj[i]=p;
00234 if (p!=0)
00235 {
00236 hinge_point[num_hinge]=(proj[i]-1)/p;
00237 hinge_idx[num_hinge]=i;
00238 num_hinge++;
00239
00240 if (p<0)
00241 sum_B+=p;
00242 }
00243 }
00244 sum_B*=C1;
00245
00246 CMath::qsort_index(hinge_point, hinge_idx, num_hinge);
00247
00248
00249 float64_t alpha = hinge_point[0];
00250 float64_t grad_val = 2*A_zero*alpha + B_zero + sum_B;
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261 float64_t old_grad_val = grad_val;
00262 float64_t old_alpha = alpha;
00263
00264 for (int32_t i=1; i < num_hinge && grad_val < 0; i++)
00265 {
00266 alpha = hinge_point[i];
00267 grad_val = 2*A_zero*alpha + B_zero + sum_B;
00268
00269 if (grad_val > 0)
00270 {
00271 ASSERT(old_grad_val-grad_val != 0);
00272 float64_t gamma = -grad_val/(old_grad_val-grad_val);
00273 alpha = old_alpha*gamma + (1-gamma)*alpha;
00274 }
00275 else
00276 {
00277 old_grad_val = grad_val;
00278 old_alpha = alpha;
00279
00280 sum_B = sum_B + CMath::abs(C1*grad_proj[hinge_idx[i]]);
00281 grad_val = 2*A_zero*alpha + B_zero + sum_B;
00282 }
00283 }
00284
00285 return alpha;
00286 }
00287
00288 float64_t CSubGradientSVM::compute_min_subgradient(
00289 int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound)
00290 {
00291 float64_t dir_deriv=0;
00292
00293 if (num_bound > 0)
00294 {
00295
00296 CTime t2;
00297 CMath::add(v, 1.0, w, -1.0, sum_CXy_active, num_feat);
00298
00299 if (num_bound>=qpsize_max && num_it_noimprovement!=10)
00300 {
00301
00302 for (int32_t i=0; i<num_bound; i++)
00303 beta[i]=CMath::random(0.0,1.0);
00304 }
00305 else
00306 {
00307 memset(beta, 0, sizeof(float64_t)*num_bound);
00308
00309 float64_t bias_const=0;
00310
00311 if (use_bias)
00312 bias_const=1;
00313
00314 for (int32_t i=0; i<num_bound; i++)
00315 {
00316 for (int32_t j=i; j<num_bound; j++)
00317 {
00318 Z[i*num_bound+j]= 2.0*C1*C1*get_label(idx_bound[i])*get_label(idx_bound[j])*
00319 (features->dot(idx_bound[i], idx_bound[j]) + bias_const);
00320
00321 Z[j*num_bound+i]=Z[i*num_bound+j];
00322
00323 }
00324
00325 Zv[i]=-2.0*C1*get_label(idx_bound[i])*
00326 (features->dense_dot(idx_bound[i], v, num_feat)-sum_Cy_active);
00327 }
00328
00329
00330
00331 t2.stop();
00332 t2.time_diff_sec(true);
00333
00334 CTime t;
00335 CQPBSVMLib solver(Z,num_bound, Zv,num_bound, 1.0);
00336
00337
00338 #ifdef USE_CPLEX
00339 solver.set_solver(QPB_SOLVER_CPLEX);
00340 #else
00341 solver.set_solver(QPB_SOLVER_SCAS);
00342 #endif
00343
00344 solver.solve_qp(beta, num_bound);
00345
00346 t.stop();
00347 tim+=t.time_diff_sec(true);
00348
00349
00350
00351
00352
00353
00354
00355
00356 }
00357
00358 CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
00359 grad_b = -sum_Cy_active;
00360
00361 for (int32_t i=0; i<num_bound; i++)
00362 {
00363 features->add_to_dense_vec(-C1*beta[i]*get_label(idx_bound[i]), idx_bound[i], grad_w, num_feat);
00364 if (use_bias)
00365 grad_b -= C1 * get_label(idx_bound[i])*beta[i];
00366 }
00367
00368 dir_deriv = CMath::dot(grad_w, v, num_feat) - grad_b*sum_Cy_active;
00369 for (int32_t i=0; i<num_bound; i++)
00370 {
00371 float64_t val= C1*get_label(idx_bound[i])*(features->dense_dot(idx_bound[i], grad_w, num_feat)+grad_b);
00372 dir_deriv += CMath::max(0.0, val);
00373 }
00374 }
00375 else
00376 {
00377 CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
00378 grad_b = -sum_Cy_active;
00379
00380 dir_deriv = CMath::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
00381 }
00382
00383 return dir_deriv;
00384 }
00385
00386 float64_t CSubGradientSVM::compute_objective(int32_t num_feat, int32_t num_vec)
00387 {
00388 float64_t result= 0.5 * CMath::dot(w,w, num_feat);
00389
00390 for (int32_t i=0; i<num_vec; i++)
00391 {
00392 if (proj[i]<1.0)
00393 result += C1 * (1.0-proj[i]);
00394 }
00395
00396 return result;
00397 }
00398
00399 void CSubGradientSVM::compute_projection(int32_t num_feat, int32_t num_vec)
00400 {
00401 for (int32_t i=0; i<num_vec; i++)
00402 proj[i]=get_label(i)*(features->dense_dot(i, w, num_feat)+bias);
00403 }
00404
00405 void CSubGradientSVM::update_projection(float64_t alpha, int32_t num_vec)
00406 {
00407 CMath::vec1_plus_scalar_times_vec2(proj,-alpha, grad_proj, num_vec);
00408 }
00409
00410 void CSubGradientSVM::init(int32_t num_vec, int32_t num_feat)
00411 {
00412
00413 delete[] w;
00414 w=new float64_t[num_feat];
00415 w_dim=num_feat;
00416 memset(w,0,sizeof(float64_t)*num_feat);
00417
00418 bias=0;
00419 num_it_noimprovement=0;
00420 grad_b=0;
00421 qpsize_limit=5000;
00422
00423 grad_w=new float64_t[num_feat];
00424 memset(grad_w,0,sizeof(float64_t)*num_feat);
00425
00426 sum_CXy_active=new float64_t[num_feat];
00427 memset(sum_CXy_active,0,sizeof(float64_t)*num_feat);
00428
00429 v=new float64_t[num_feat];
00430 memset(v,0,sizeof(float64_t)*num_feat);
00431
00432 old_v=new float64_t[num_feat];
00433 memset(old_v,0,sizeof(float64_t)*num_feat);
00434
00435 sum_Cy_active=0;
00436
00437 proj= new float64_t[num_vec];
00438 memset(proj,0,sizeof(float64_t)*num_vec);
00439
00440 tmp_proj=new float64_t[num_vec];
00441 memset(proj,0,sizeof(float64_t)*num_vec);
00442
00443 tmp_proj_idx= new int32_t[num_vec];
00444 memset(tmp_proj_idx,0,sizeof(int32_t)*num_vec);
00445
00446 grad_proj= new float64_t[num_vec];
00447 memset(grad_proj,0,sizeof(float64_t)*num_vec);
00448
00449 hinge_point= new float64_t[num_vec];
00450 memset(hinge_point,0,sizeof(float64_t)*num_vec);
00451
00452 hinge_idx= new int32_t[num_vec];
00453 memset(hinge_idx,0,sizeof(int32_t)*num_vec);
00454
00455 active=new uint8_t[num_vec];
00456 memset(active,0,sizeof(uint8_t)*num_vec);
00457
00458 old_active=new uint8_t[num_vec];
00459 memset(old_active,0,sizeof(uint8_t)*num_vec);
00460
00461 idx_bound=new int32_t[num_vec];
00462 memset(idx_bound,0,sizeof(int32_t)*num_vec);
00463
00464 idx_active=new int32_t[num_vec];
00465 memset(idx_active,0,sizeof(int32_t)*num_vec);
00466
00467 Z=new float64_t[qpsize_limit*qpsize_limit];
00468 memset(Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
00469
00470 Zv=new float64_t[qpsize_limit];
00471 memset(Zv,0,sizeof(float64_t)*qpsize_limit);
00472
00473 beta=new float64_t[qpsize_limit];
00474 memset(beta,0,sizeof(float64_t)*qpsize_limit);
00475
00476 old_Z=new float64_t[qpsize_limit*qpsize_limit];
00477 memset(old_Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
00478
00479 old_Zv=new float64_t[qpsize_limit];
00480 memset(old_Zv,0,sizeof(float64_t)*qpsize_limit);
00481
00482 old_beta=new float64_t[qpsize_limit];
00483 memset(old_beta,0,sizeof(float64_t)*qpsize_limit);
00484
00485 }
00486
00487 void CSubGradientSVM::cleanup()
00488 {
00489 delete[] hinge_idx;
00490 delete[] hinge_point;
00491 delete[] grad_proj;
00492 delete[] proj;
00493 delete[] tmp_proj;
00494 delete[] tmp_proj_idx;
00495 delete[] active;
00496 delete[] old_active;
00497 delete[] idx_bound;
00498 delete[] idx_active;
00499 delete[] sum_CXy_active;
00500 delete[] grad_w;
00501 delete[] v;
00502 delete[] Z;
00503 delete[] Zv;
00504 delete[] beta;
00505 delete[] old_v;
00506 delete[] old_Z;
00507 delete[] old_Zv;
00508 delete[] old_beta;
00509
00510 hinge_idx=NULL;
00511 proj=NULL;
00512 active=NULL;
00513 old_active=NULL;
00514 idx_bound=NULL;
00515 idx_active=NULL;
00516 sum_CXy_active=NULL;
00517 grad_w=NULL;
00518 v=NULL;
00519 Z=NULL;
00520 Zv=NULL;
00521 beta=NULL;
00522 }
00523
00524 bool CSubGradientSVM::train(CFeatures* data)
00525 {
00526 tim=0;
00527 SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
00528 ASSERT(labels);
00529
00530 if (data)
00531 {
00532 if (!data->has_property(FP_DOT))
00533 SG_ERROR("Specified features are not of type CDotFeatures\n");
00534 set_features((CDotFeatures*) data);
00535 }
00536 ASSERT(get_features());
00537
00538 int32_t num_iterations=0;
00539 int32_t num_train_labels=labels->get_num_labels();
00540 int32_t num_feat=features->get_dim_feature_space();
00541 int32_t num_vec=features->get_num_vectors();
00542
00543 ASSERT(num_vec==num_train_labels);
00544
00545 init(num_vec, num_feat);
00546
00547 int32_t num_active=0;
00548 int32_t num_bound=0;
00549 float64_t alpha=0;
00550 float64_t dir_deriv=0;
00551 float64_t obj=0;
00552 delta_active=num_vec;
00553 last_it_noimprovement=-1;
00554
00555 work_epsilon=0.99;
00556 autoselected_epsilon=work_epsilon;
00557
00558 compute_projection(num_feat, num_vec);
00559
00560 CTime time;
00561 float64_t loop_time=0;
00562 while (!(CSignal::cancel_computations()))
00563 {
00564 CTime t;
00565 delta_active=find_active(num_feat, num_vec, num_active, num_bound);
00566
00567 update_active(num_feat, num_vec);
00568
00569 #ifdef DEBUG_SUBGRADIENTSVM
00570 SG_PRINT("==================================================\niteration: %d ", num_iterations);
00571 obj=compute_objective(num_feat, num_vec);
00572 SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
00573 obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
00574 #else
00575 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00576 #endif
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588
00589 dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
00590
00591 alpha=line_search(num_feat, num_vec);
00592
00593 if (num_it_noimprovement==10 || num_bound<qpsize_max)
00594 {
00595 float64_t norm_grad=CMath::dot(grad_w, grad_w, num_feat) +
00596 grad_b*grad_b;
00597
00598 #ifdef DEBUG_SUBGRADIENTSVM
00599 SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
00600 "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
00601 work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
00602 #else
00603 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00604 #endif
00605
00606 if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
00607 break;
00608 else
00609 num_it_noimprovement=0;
00610 }
00611
00612 if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
00613 {
00614 if (last_it_noimprovement==num_iterations-1)
00615 {
00616 SG_PRINT("no improvement...\n");
00617 num_it_noimprovement++;
00618 }
00619 else
00620 num_it_noimprovement=0;
00621
00622 last_it_noimprovement=num_iterations;
00623 }
00624
00625 CMath::vec1_plus_scalar_times_vec2(w, -alpha, grad_w, num_feat);
00626 bias-=alpha*grad_b;
00627
00628 update_projection(alpha, num_vec);
00629
00630
00631
00632
00633
00634 t.stop();
00635 loop_time=t.time_diff_sec();
00636 num_iterations++;
00637
00638 if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
00639 break;
00640 }
00641
00642 SG_INFO("converged after %d iterations\n", num_iterations);
00643
00644 obj=compute_objective(num_feat, num_vec);
00645 SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d\n",
00646 obj, alpha, dir_deriv, num_bound, num_active);
00647
00648 #ifdef DEBUG_SUBGRADIENTSVM
00649 CMath::display_vector(w, w_dim, "w");
00650 SG_PRINT("bias: %f\n", bias);
00651 #endif
00652 SG_INFO("solver time:%f s\n", tim);
00653
00654 cleanup();
00655
00656 return true;
00657 }