SHOGUN v0.9.0
|
00001 /*----------------------------------------------------------------------- 00002 * 00003 * This program is free software; you can redistribute it and/or modify 00004 * it under the terms of the GNU General Public License as published by 00005 * the Free Software Foundation; either version 3 of the License, or 00006 * (at your option) any later version. 00007 * 00008 * Library of solvers for Generalized Nearest Point Problem (GNPP). 00009 * 00010 * Written (W) 1999-2008 Vojtech Franc, xfrancv@cmp.felk.cvut.cz 00011 * Copyright (C) 1999-2008 Center for Machine Perception, CTU FEL Prague 00012 * 00013 * 00014 gmnplib.c: Library of solvers for Generalized Minimal Norm Problem (GMNP). 00015 00016 Generalized Minimal Norm Problem to solve is 00017 00018 min 0.5*alpha'*H*alpha + c'*alpha 00019 00020 subject to sum(alpha) = 1, alpha(i) >= 0 00021 00022 H [dim x dim] is symmetric positive definite matrix. 00023 c [dim x 1] is an arbitrary vector. 00024 00025 The precision of the found solution is given by 00026 the parameters tmax, tolabs and tolrel which 00027 define the stopping conditions: 00028 00029 UB-LB <= tolabs -> exit_flag = 1 Abs. tolerance. 00030 UB-LB <= UB*tolrel -> exit_flag = 2 Relative tolerance. 00031 LB > th -> exit_flag = 3 Threshold on lower bound. 00032 t >= tmax -> exit_flag = 0 Number of iterations. 00033 00034 UB ... Upper bound on the optimal solution. 00035 LB ... Lower bound on the optimal solution. 00036 t ... Number of iterations. 00037 History ... Value of LB and UB wrt. number of iterations. 00038 00039 00040 The following algorithms are implemented: 00041 .............................................. 00042 00043 - GMNP solver based on improved MDM algorithm 1 (u fixed v optimized) 00044 exitflag = gmnp_imdm( &get_col, diag_H, vector_c, dim, 00045 tmax, tolabs, tolrel, th, &alpha, &t, &History, verb ); 00046 00047 For more info refer to V.Franc: Optimization Algorithms for Kernel 00048 Methods. Research report. CTU-CMP-2005-22. CTU FEL Prague. 2005. 00049 ftp://cmp.felk.cvut.cz/pub/cmp/articles/franc/Franc-PhD.pdf . 00050 00051 Modifications: 00052 09-sep-2005, VF 00053 24-jan-2005, VF 00054 26-nov-2004, VF 00055 25-nov-2004, VF 00056 21-nov-2004, VF 00057 20-nov-2004, VF 00058 31-may-2004, VF 00059 23-Jan-2004, VF 00060 00061 -------------------------------------------------------------------- */ 00062 00063 #include "classifier/svm/GMNPLib.h" 00064 #include "lib/Mathematics.h" 00065 00066 #include <string.h> 00067 #include <limits.h> 00068 00069 using namespace shogun; 00070 00071 #define HISTORY_BUF 1000000 00072 00073 #define MINUS_INF INT_MIN 00074 #define PLUS_INF INT_MAX 00075 00076 #define INDEX(ROW,COL,DIM) ((COL*DIM)+ROW) 00077 #define KDELTA(A,B) (A==B) 00078 #define KDELTA4(A1,A2,A3,A4) ((A1==A2)||(A1==A3)||(A1==A4)||(A2==A3)||(A2==A4)||(A3==A4)) 00079 00080 CGMNPLib::CGMNPLib(void) 00081 { 00082 SG_UNSTABLE("CGMNPLib::CGMNPLib(void)", "\n"); 00083 00084 diag_H = NULL; 00085 kernel_columns = NULL; 00086 cache_index = NULL; 00087 first_kernel_inx = 0; 00088 Cache_Size = 0; 00089 m_num_data = 0; 00090 m_reg_const = 0; 00091 m_vector_y = 0; 00092 m_kernel = NULL; 00093 00094 first_virt_inx = 0; 00095 memset(&virt_columns, 0, sizeof (virt_columns)); 00096 m_num_virt_data = 0; 00097 m_num_classes = 0; 00098 } 00099 00100 CGMNPLib::CGMNPLib( 00101 float64_t* vector_y, CKernel* kernel, int32_t num_data, 00102 int32_t num_virt_data, int32_t num_classes, float64_t reg_const) 00103 : CSGObject() 00104 { 00105 m_num_classes=num_classes; 00106 m_num_virt_data=num_virt_data; 00107 m_reg_const = reg_const; 00108 m_num_data = num_data; 00109 m_vector_y = vector_y; 00110 m_kernel = kernel; 00111 00112 Cache_Size = ((int64_t) kernel->get_cache_size())*1024*1024/(sizeof(float64_t)*num_data); 00113 Cache_Size = CMath::min(Cache_Size, (int64_t) num_data); 00114 00115 SG_INFO("using %d kernel cache lines\n", Cache_Size); 00116 ASSERT(Cache_Size>=2); 00117 00118 /* allocates memory for kernel cache */ 00119 kernel_columns = new float64_t*[Cache_Size]; 00120 cache_index = new float64_t[Cache_Size]; 00121 00122 for(int32_t i = 0; i < Cache_Size; i++ ) 00123 { 00124 kernel_columns[i] = new float64_t[num_data]; 00125 cache_index[i] = -2; 00126 } 00127 first_kernel_inx = 0; 00128 00129 00130 00131 for(int32_t i = 0; i < 3; i++ ) 00132 { 00133 virt_columns[i] = new float64_t[num_virt_data]; 00134 } 00135 first_virt_inx = 0; 00136 00137 diag_H = new float64_t[num_virt_data]; 00138 00139 for(int32_t i = 0; i < num_virt_data; i++ ) 00140 diag_H[i] = kernel_fce(i,i); 00141 } 00142 00143 CGMNPLib::~CGMNPLib() 00144 { 00145 for(int32_t i = 0; i < Cache_Size; i++ ) 00146 delete[] kernel_columns[i]; 00147 00148 for(int32_t i = 0; i < 3; i++ ) 00149 delete[] virt_columns[i]; 00150 00151 delete[] cache_index; 00152 delete[] kernel_columns; 00153 00154 delete[] diag_H; 00155 } 00156 00157 /* ------------------------------------------------------------ 00158 Returns pointer at a-th column of the kernel matrix. 00159 This function maintains FIFO cache of kernel columns. 00160 ------------------------------------------------------------ */ 00161 float64_t* CGMNPLib::get_kernel_col( int32_t a ) 00162 { 00163 float64_t *col_ptr; 00164 int32_t i; 00165 int32_t inx; 00166 00167 inx = -1; 00168 for( i=0; i < Cache_Size; i++ ) { 00169 if( cache_index[i] == a ) { inx = i; break; } 00170 } 00171 00172 if( inx != -1 ) { 00173 col_ptr = kernel_columns[inx]; 00174 return( col_ptr ); 00175 } 00176 00177 col_ptr = kernel_columns[first_kernel_inx]; 00178 cache_index[first_kernel_inx] = a; 00179 00180 first_kernel_inx++; 00181 if( first_kernel_inx >= Cache_Size ) first_kernel_inx = 0; 00182 00183 for( i=0; i < m_num_data; i++ ) { 00184 col_ptr[i] = m_kernel->kernel(i,a); 00185 } 00186 00187 return( col_ptr ); 00188 } 00189 00190 /* ------------------------------------------------------------ 00191 Computes index of input example and its class label from 00192 index of virtual "single-class" example. 00193 ------------------------------------------------------------ */ 00194 void CGMNPLib::get_indices2( int32_t *index, int32_t *c, int32_t i ) 00195 { 00196 *index = i / (m_num_classes-1); 00197 00198 *c= (i % (m_num_classes-1))+1; 00199 if( *c>= m_vector_y[ *index ]) (*c)++; 00200 00201 return; 00202 } 00203 00204 00205 /* ------------------------------------------------------------ 00206 Returns pointer at the a-th column of the virtual K matrix. 00207 00208 (note: the b-th column must be preserved in the cache during 00209 updating but b is from (a(t-2), a(t-1)) where a=a(t) and 00210 thus FIFO with three columns does not have to take care od b.) 00211 ------------------------------------------------------------ */ 00212 float64_t* CGMNPLib::get_col( int32_t a, int32_t b ) 00213 { 00214 int32_t i; 00215 float64_t *col_ptr; 00216 float64_t *ker_ptr; 00217 float64_t value; 00218 int32_t i1,c1,i2,c2; 00219 00220 col_ptr = virt_columns[first_virt_inx++]; 00221 if( first_virt_inx >= 3 ) first_virt_inx = 0; 00222 00223 get_indices2( &i1, &c1, a ); 00224 ker_ptr = (float64_t*) get_kernel_col( i1 ); 00225 00226 for( i=0; i < m_num_virt_data; i++ ) { 00227 get_indices2( &i2, &c2, i ); 00228 00229 if( KDELTA4(m_vector_y[i1],m_vector_y[i2],c1,c2) ) { 00230 value = (+KDELTA(m_vector_y[i1],m_vector_y[i2]) 00231 -KDELTA(m_vector_y[i1],c2) 00232 -KDELTA(m_vector_y[i2],c1) 00233 +KDELTA(c1,c2) 00234 )*(ker_ptr[i2]+1); 00235 } 00236 else 00237 { 00238 value = 0; 00239 } 00240 00241 if(a==i) value += m_reg_const; 00242 00243 col_ptr[i] = value; 00244 } 00245 00246 return( col_ptr ); 00247 } 00248 00249 00250 /* -------------------------------------------------------------- 00251 GMNP solver based on improved MDM algorithm 1. 00252 00253 Search strategy: u determined by common rule and v is 00254 optimized. 00255 00256 Usage: exitflag = gmnp_imdm( &get_col, diag_H, vector_c, dim, 00257 tmax, tolabs, tolrel, th, &alpha, &t, &History ); 00258 -------------------------------------------------------------- */ 00259 00260 int8_t CGMNPLib::gmnp_imdm(float64_t *vector_c, 00261 int32_t dim, 00262 int32_t tmax, 00263 float64_t tolabs, 00264 float64_t tolrel, 00265 float64_t th, 00266 float64_t *alpha, 00267 int32_t *ptr_t, 00268 float64_t **ptr_History, 00269 int32_t verb) 00270 { 00271 float64_t LB; 00272 float64_t UB; 00273 float64_t aHa, ac; 00274 float64_t tmp, tmp1; 00275 float64_t Huu, Huv, Hvv; 00276 float64_t min_beta, beta; 00277 float64_t max_improv, improv; 00278 float64_t lambda; 00279 float64_t *History; 00280 float64_t *Ha; 00281 float64_t *tmp_ptr; 00282 float64_t *col_u, *col_v; 00283 int32_t u=0; 00284 int32_t v=0; 00285 int32_t new_u=0; 00286 int32_t i; 00287 int32_t t; 00288 int32_t History_size; 00289 int8_t exitflag; 00290 00291 /* ------------------------------------------------------------ */ 00292 /* Initialization */ 00293 /* ------------------------------------------------------------ */ 00294 00295 Ha = new float64_t[dim]; 00296 if( Ha == NULL ) SG_ERROR("Not enough memory."); 00297 00298 History_size = (tmax < HISTORY_BUF ) ? tmax+1 : HISTORY_BUF; 00299 History = new float64_t[History_size*2]; 00300 if( History == NULL ) SG_ERROR("Not enough memory."); 00301 00302 /* inx = argmin(0.5*diag_H + vector_c ); */ 00303 for( tmp1 = PLUS_INF, i = 0; i < dim; i++ ) { 00304 tmp = 0.5*diag_H[i] + vector_c[i]; 00305 if( tmp1 > tmp) { 00306 tmp1 = tmp; 00307 v = i; 00308 } 00309 } 00310 00311 col_v = (float64_t*)get_col(v,-1); 00312 00313 for( min_beta = PLUS_INF, i = 0; i < dim; i++ ) 00314 { 00315 alpha[i] = 0; 00316 Ha[i] = col_v[i]; 00317 00318 beta = Ha[i] + vector_c[i]; 00319 if( beta < min_beta ) { 00320 min_beta = beta; 00321 u = i; 00322 } 00323 } 00324 00325 alpha[v] = 1; 00326 aHa = diag_H[v]; 00327 ac = vector_c[v]; 00328 00329 UB = 0.5*aHa + ac; 00330 LB = min_beta - 0.5*aHa; 00331 00332 t = 0; 00333 History[INDEX(0,0,2)] = LB; 00334 History[INDEX(1,0,2)] = UB; 00335 00336 if( verb ) { 00337 SG_PRINT("Init: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n", 00338 UB, LB, UB-LB,(UB-LB)/UB); 00339 } 00340 00341 /* Stopping conditions */ 00342 if( UB-LB <= tolabs ) exitflag = 1; 00343 else if(UB-LB <= CMath::abs(UB)*tolrel ) exitflag = 2; 00344 else if(LB > th ) exitflag = 3; 00345 else exitflag = -1; 00346 00347 /* ------------------------------------------------------------ */ 00348 /* Main optimization loop */ 00349 /* ------------------------------------------------------------ */ 00350 00351 col_u = (float64_t*)get_col(u,-1); 00352 while( exitflag == -1 ) 00353 { 00354 t++; 00355 00356 col_v = (float64_t*)get_col(v,u); 00357 00358 /* Adaptation rule and update */ 00359 Huu = diag_H[u]; 00360 Hvv = diag_H[v]; 00361 Huv = col_u[v]; 00362 00363 lambda = (Ha[v]-Ha[u]+vector_c[v]-vector_c[u])/(alpha[v]*(Huu-2*Huv+Hvv)); 00364 if( lambda < 0 ) lambda = 0; else if (lambda > 1) lambda = 1; 00365 00366 aHa = aHa + 2*alpha[v]*lambda*(Ha[u]-Ha[v])+ 00367 lambda*lambda*alpha[v]*alpha[v]*(Huu-2*Huv+Hvv); 00368 00369 ac = ac + lambda*alpha[v]*(vector_c[u]-vector_c[v]); 00370 00371 tmp = alpha[v]; 00372 alpha[u]=alpha[u]+lambda*alpha[v]; 00373 alpha[v]=alpha[v]-lambda*alpha[v]; 00374 00375 UB = 0.5*aHa + ac; 00376 00377 /* max_beta = MINUS_INF;*/ 00378 for( min_beta = PLUS_INF, i = 0; i < dim; i++ ) 00379 { 00380 Ha[i] = Ha[i] + lambda*tmp*(col_u[i] - col_v[i]); 00381 00382 beta = Ha[i]+ vector_c[i]; 00383 00384 if( beta < min_beta ) 00385 { 00386 new_u = i; 00387 min_beta = beta; 00388 } 00389 } 00390 00391 LB = min_beta - 0.5*aHa; 00392 u = new_u; 00393 col_u = (float64_t*)get_col(u,-1); 00394 00395 /* search for optimal v while u is fixed */ 00396 for( max_improv = MINUS_INF, i = 0; i < dim; i++ ) { 00397 00398 if( alpha[i] != 0 ) { 00399 beta = Ha[i] + vector_c[i]; 00400 00401 if( beta >= min_beta ) { 00402 00403 tmp = diag_H[u] - 2*col_u[i] + diag_H[i]; 00404 if( tmp != 0 ) { 00405 improv = (0.5*(beta-min_beta)*(beta-min_beta))/tmp; 00406 00407 if( improv > max_improv ) { 00408 max_improv = improv; 00409 v = i; 00410 } 00411 } 00412 } 00413 } 00414 } 00415 00416 /* Stopping conditions */ 00417 if( UB-LB <= tolabs ) exitflag = 1; 00418 else if( UB-LB <= CMath::abs(UB)*tolrel) exitflag = 2; 00419 else if(LB > th ) exitflag = 3; 00420 else if(t >= tmax) exitflag = 0; 00421 00422 /* print info */ 00423 SG_ABS_PROGRESS(CMath::abs((UB-LB)/UB), 00424 -CMath::log10(CMath::abs(UB-LB)), 00425 -CMath::log10(1.0), 00426 -CMath::log10(tolrel), 6); 00427 if(verb && (t % verb) == 0 ) { 00428 SG_PRINT("%d: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n", 00429 t, UB, LB, UB-LB,(UB-LB)/UB); 00430 } 00431 00432 /* Store selected values */ 00433 if( t < History_size ) { 00434 History[INDEX(0,t,2)] = LB; 00435 History[INDEX(1,t,2)] = UB; 00436 } 00437 else { 00438 tmp_ptr = new float64_t[(History_size+HISTORY_BUF)*2]; 00439 if( tmp_ptr == NULL ) SG_ERROR("Not enough memory."); 00440 for( i = 0; i < History_size; i++ ) { 00441 tmp_ptr[INDEX(0,i,2)] = History[INDEX(0,i,2)]; 00442 tmp_ptr[INDEX(1,i,2)] = History[INDEX(1,i,2)]; 00443 } 00444 tmp_ptr[INDEX(0,t,2)] = LB; 00445 tmp_ptr[INDEX(1,t,2)] = UB; 00446 00447 History_size += HISTORY_BUF; 00448 delete[] History; 00449 History = tmp_ptr; 00450 } 00451 } 00452 00453 /* print info about last iteration*/ 00454 SG_DONE(); 00455 if(verb && (t % verb) ) { 00456 SG_PRINT("exit: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n", 00457 UB, LB, UB-LB,(UB-LB)/UB); 00458 } 00459 00460 00461 /*------------------------------------------------------- */ 00462 /* Set outputs */ 00463 /*------------------------------------------------------- */ 00464 (*ptr_t) = t; 00465 (*ptr_History) = History; 00466 00467 /* Free memory */ 00468 delete[] Ha; 00469 00470 return( exitflag ); 00471 } 00472 00473 /* ------------------------------------------------------------ 00474 Retures (a,b)-th element of the virtual kernel matrix 00475 of size [num_virt_data x num_virt_data]. 00476 ------------------------------------------------------------ */ 00477 float64_t CGMNPLib::kernel_fce( int32_t a, int32_t b ) 00478 { 00479 float64_t value; 00480 int32_t i1,c1,i2,c2; 00481 00482 get_indices2( &i1, &c1, a ); 00483 get_indices2( &i2, &c2, b ); 00484 00485 if( KDELTA4(m_vector_y[i1],m_vector_y[i2],c1,c2) ) { 00486 value = (+KDELTA(m_vector_y[i1],m_vector_y[i2]) 00487 -KDELTA(m_vector_y[i1],c2) 00488 -KDELTA(m_vector_y[i2],c1) 00489 +KDELTA(c1,c2) 00490 )*(m_kernel->kernel( i1, i2 )+1); 00491 } 00492 else 00493 { 00494 value = 0; 00495 } 00496 00497 if(a==b) value += m_reg_const; 00498 00499 return( value ); 00500 }