gmnplib.cpp

Go to the documentation of this file.
00001 /*-----------------------------------------------------------------------
00002  *
00003  * This program is free software; you can redistribute it and/or modify
00004  * it under the terms of the GNU General Public License as published by
00005  * the Free Software Foundation; either version 3 of the License, or
00006  * (at your option) any later version.
00007  *
00008  * Library of solvers for Generalized Nearest Point Problem (GNPP).
00009  *
00010  * Written (W) 1999-2008 Vojtech Franc, xfrancv@cmp.felk.cvut.cz
00011  * Copyright (C) 1999-2008 Center for Machine Perception, CTU FEL Prague 
00012  *
00013  *
00014 gmnplib.c: Library of solvers for Generalized Minimal Norm Problem (GMNP).
00015  
00016  Generalized Minimal Norm Problem to solve is
00017   
00018   min 0.5*alpha'*H*alpha + c'*alpha
00019 
00020   subject to  sum(alpha) = 1,  alpha(i) >= 0
00021   
00022  H [dim x dim] is symmetric positive definite matrix.
00023  c [dim x 1] is an arbitrary vector.
00024 
00025  The precision of the found solution is given by
00026  the parameters tmax, tolabs and tolrel which
00027  define the stopping conditions:
00028  
00029  UB-LB <= tolabs      ->  exit_flag = 1   Abs. tolerance.
00030  UB-LB <= UB*tolrel   ->  exit_flag = 2   Relative tolerance.
00031  LB > th              ->  exit_flag = 3   Threshold on lower bound.
00032  t >= tmax            ->  exit_flag = 0   Number of iterations.
00033 
00034  UB ... Upper bound on the optimal solution.
00035  LB ... Lower bound on the optimal solution.
00036  t  ... Number of iterations.
00037  History ... Value of LB and UB wrt. number of iterations.
00038 
00039 
00040  The following algorithms are implemented:
00041  ..............................................
00042 
00043  - GMNP solver based on improved MDM algorithm 1 (u fixed v optimized)
00044     exitflag = gmnp_imdm( &get_col, diag_H, vector_c, dim,  
00045                  tmax, tolabs, tolrel, th, &alpha, &t, &History, verb  );
00046 
00047   For more info refer to V.Franc: Optimization Algorithms for Kernel 
00048   Methods. Research report. CTU-CMP-2005-22. CTU FEL Prague. 2005.
00049   ftp://cmp.felk.cvut.cz/pub/cmp/articles/franc/Franc-PhD.pdf .
00050 
00051  Modifications:
00052  09-sep-2005, VF
00053  24-jan-2005, VF
00054  26-nov-2004, VF
00055  25-nov-2004, VF
00056  21-nov-2004, VF
00057  20-nov-2004, VF
00058  31-may-2004, VF
00059  23-Jan-2004, VF
00060 
00061 -------------------------------------------------------------------- */
00062 
00063 #include "classifier/svm/gmnplib.h"
00064 #include "lib/Mathematics.h"
00065 
00066 #include <string.h>
00067 #include <limits.h>
00068 
00069 using namespace shogun;
00070 
00071 #define HISTORY_BUF 1000000
00072 
00073 #define MINUS_INF INT_MIN
00074 #define PLUS_INF  INT_MAX
00075 
00076 #define INDEX(ROW,COL,DIM) ((COL*DIM)+ROW)
00077 #define KDELTA(A,B) (A==B)
00078 #define KDELTA4(A1,A2,A3,A4) ((A1==A2)||(A1==A3)||(A1==A4)||(A2==A3)||(A2==A4)||(A3==A4))
00079 
00080 CGMNPLib::CGMNPLib(
00081     float64_t* vector_y, CKernel* kernel, int32_t num_data,
00082     int32_t num_virt_data, int32_t num_classes, float64_t reg_const)
00083 : CSGObject()
00084 {
00085   m_num_classes=num_classes;
00086   m_num_virt_data=num_virt_data;
00087   m_reg_const = reg_const;
00088   m_num_data = num_data;
00089   m_vector_y = vector_y;
00090   m_kernel = kernel;
00091 
00092   Cache_Size = ((int64_t) kernel->get_cache_size())*1024*1024/(sizeof(float64_t)*num_data);
00093   Cache_Size = CMath::min(Cache_Size, (int64_t) num_data);
00094 
00095   SG_INFO("using %d kernel cache lines\n", Cache_Size);
00096   ASSERT(Cache_Size>=2);
00097 
00098   /* allocates memory for kernel cache */
00099   kernel_columns = new float64_t*[Cache_Size];
00100   cache_index = new float64_t[Cache_Size];
00101 
00102   for(int32_t i = 0; i < Cache_Size; i++ )
00103   {
00104     kernel_columns[i] = new float64_t[num_data];
00105     cache_index[i] = -2;
00106   }
00107   first_kernel_inx = 0;
00108 
00109 
00110 
00111   for(int32_t i = 0; i < 3; i++ )
00112   {
00113     virt_columns[i] = new float64_t[num_virt_data];
00114   }
00115   first_virt_inx = 0;
00116 
00117   diag_H = new float64_t[num_virt_data];
00118 
00119   for(int32_t i = 0; i < num_virt_data; i++ )
00120       diag_H[i] = kernel_fce(i,i);
00121 }
00122 
00123 CGMNPLib::~CGMNPLib()
00124 {
00125     for(int32_t i = 0; i < Cache_Size; i++ ) 
00126         delete[] kernel_columns[i];
00127 
00128     for(int32_t i = 0; i < 3; i++ ) 
00129         delete[] virt_columns[i];
00130 
00131     delete[] cache_index;
00132     delete[] kernel_columns;
00133 
00134     delete[] diag_H;
00135 }
00136 
00137 /* ------------------------------------------------------------
00138   Returns pointer at a-th column of the kernel matrix.
00139   This function maintains FIFO cache of kernel columns.
00140 ------------------------------------------------------------ */
00141 float64_t* CGMNPLib::get_kernel_col( int32_t a )
00142 {
00143   float64_t *col_ptr;
00144   int32_t i;
00145   int32_t inx;
00146 
00147   inx = -1;
00148   for( i=0; i < Cache_Size; i++ ) {
00149     if( cache_index[i] == a ) { inx = i; break; }
00150   }
00151     
00152   if( inx != -1 ) {
00153     col_ptr = kernel_columns[inx];
00154     return( col_ptr );
00155   }
00156    
00157   col_ptr = kernel_columns[first_kernel_inx];
00158   cache_index[first_kernel_inx] = a;
00159 
00160   first_kernel_inx++;
00161   if( first_kernel_inx >= Cache_Size ) first_kernel_inx = 0;
00162 
00163   for( i=0; i < m_num_data; i++ ) {
00164     col_ptr[i] = m_kernel->kernel(i,a);
00165   }
00166 
00167   return( col_ptr );
00168 }
00169 
00170 /* ------------------------------------------------------------
00171   Computes index of input example and its class label from 
00172   index of virtual "single-class" example.
00173 ------------------------------------------------------------ */
00174 void CGMNPLib::get_indices2( int32_t *index, int32_t *c, int32_t i )
00175 {
00176    *index = i / (m_num_classes-1);
00177  
00178    *c= (i % (m_num_classes-1))+1;
00179    if( *c>= m_vector_y[ *index ]) (*c)++;
00180 
00181    return;
00182 }
00183 
00184 
00185 /* ------------------------------------------------------------
00186   Returns pointer at the a-th column of the virtual K matrix.
00187 
00188   (note: the b-th column must be preserved in the cache during 
00189    updating but b is from (a(t-2), a(t-1)) where a=a(t) and
00190    thus FIFO with three columns does not have to take care od b.)
00191 ------------------------------------------------------------ */
00192 float64_t* CGMNPLib::get_col( int32_t a, int32_t b )
00193 {
00194   int32_t i;
00195   float64_t *col_ptr;
00196   float64_t *ker_ptr;
00197   float64_t value;
00198   int32_t i1,c1,i2,c2;
00199 
00200   col_ptr = virt_columns[first_virt_inx++];
00201   if( first_virt_inx >= 3 ) first_virt_inx = 0;
00202 
00203   get_indices2( &i1, &c1, a );
00204   ker_ptr = (float64_t*) get_kernel_col( i1 );
00205 
00206   for( i=0; i < m_num_virt_data; i++ ) {
00207     get_indices2( &i2, &c2, i );
00208 
00209     if( KDELTA4(m_vector_y[i1],m_vector_y[i2],c1,c2) ) {
00210       value = (+KDELTA(m_vector_y[i1],m_vector_y[i2]) 
00211                -KDELTA(m_vector_y[i1],c2)
00212                -KDELTA(m_vector_y[i2],c1)
00213                +KDELTA(c1,c2)
00214               )*(ker_ptr[i2]+1);
00215     }
00216     else
00217     {
00218       value = 0;
00219     }
00220 
00221     if(a==i) value += m_reg_const; 
00222 
00223     col_ptr[i] = value;
00224   }
00225   
00226   return( col_ptr );
00227 }
00228 
00229 
00230 /* --------------------------------------------------------------
00231  GMNP solver based on improved MDM algorithm 1.
00232 
00233  Search strategy: u determined by common rule and v is 
00234  optimized.
00235 
00236  Usage: exitflag = gmnp_imdm( &get_col, diag_H, vector_c, dim,  
00237                   tmax, tolabs, tolrel, th, &alpha, &t, &History );
00238 -------------------------------------------------------------- */
00239 
00240 int8_t CGMNPLib::gmnp_imdm(float64_t *vector_c,
00241             int32_t dim, 
00242             int32_t tmax,
00243             float64_t tolabs,
00244             float64_t tolrel,
00245             float64_t th,
00246             float64_t *alpha,
00247             int32_t  *ptr_t,
00248             float64_t **ptr_History,
00249             int32_t verb)
00250 {
00251   float64_t LB;
00252   float64_t UB;
00253   float64_t aHa, ac;
00254   float64_t tmp, tmp1;
00255   float64_t Huu, Huv, Hvv;
00256   float64_t min_beta, beta;
00257   float64_t max_improv, improv;
00258   float64_t lambda;
00259   float64_t *History;
00260   float64_t *Ha;
00261   float64_t *tmp_ptr;
00262   float64_t *col_u, *col_v;
00263   int32_t u=0;
00264   int32_t v=0;
00265   int32_t new_u=0;
00266   int32_t i;
00267   int32_t t;
00268   int32_t History_size;
00269   int8_t exitflag;
00270 
00271   /* ------------------------------------------------------------ */
00272   /* Initialization                                               */
00273   /* ------------------------------------------------------------ */
00274 
00275   Ha = new float64_t[dim];
00276   if( Ha == NULL ) SG_ERROR("Not enough memory.");
00277 
00278   History_size = (tmax < HISTORY_BUF ) ? tmax+1 : HISTORY_BUF;
00279   History = new float64_t[History_size*2];
00280   if( History == NULL ) SG_ERROR("Not enough memory.");
00281 
00282   /* inx = argmin(0.5*diag_H + vector_c ); */
00283   for( tmp1 =  PLUS_INF, i = 0; i < dim; i++ ) {
00284     tmp = 0.5*diag_H[i] + vector_c[i];
00285     if( tmp1 > tmp) {
00286       tmp1 = tmp;
00287       v = i;
00288     }
00289   }
00290 
00291   col_v = (float64_t*)get_col(v,-1);
00292 
00293   for( min_beta = PLUS_INF, i = 0; i < dim; i++ ) 
00294   {
00295     alpha[i] = 0;
00296     Ha[i] = col_v[i];
00297 
00298     beta = Ha[i] + vector_c[i];
00299     if( beta < min_beta ) {
00300       min_beta = beta;
00301       u = i;
00302     }
00303   }
00304 
00305   alpha[v] = 1;
00306   aHa = diag_H[v];
00307   ac = vector_c[v];
00308 
00309   UB = 0.5*aHa + ac;
00310   LB = min_beta - 0.5*aHa;
00311 
00312   t = 0;
00313   History[INDEX(0,0,2)] = LB;
00314   History[INDEX(1,0,2)] = UB;
00315 
00316   if( verb ) {
00317     SG_PRINT("Init: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n",
00318       UB, LB, UB-LB,(UB-LB)/UB);
00319   }  
00320 
00321   /* Stopping conditions */
00322   if( UB-LB <= tolabs ) exitflag = 1;
00323   else if(UB-LB <= CMath::abs(UB)*tolrel ) exitflag = 2;
00324   else if(LB > th ) exitflag = 3;
00325   else exitflag = -1;
00326 
00327   /* ------------------------------------------------------------ */
00328   /* Main optimization loop                                       */
00329   /* ------------------------------------------------------------ */
00330 
00331   col_u = (float64_t*)get_col(u,-1);
00332   while( exitflag == -1 ) 
00333   {
00334     t++;     
00335 
00336     col_v = (float64_t*)get_col(v,u);
00337 
00338     /* Adaptation rule and update */
00339     Huu = diag_H[u];
00340     Hvv = diag_H[v];
00341     Huv = col_u[v];
00342 
00343     lambda = (Ha[v]-Ha[u]+vector_c[v]-vector_c[u])/(alpha[v]*(Huu-2*Huv+Hvv));
00344     if( lambda < 0 ) lambda = 0; else if (lambda > 1) lambda = 1;
00345 
00346     aHa = aHa + 2*alpha[v]*lambda*(Ha[u]-Ha[v])+
00347                 lambda*lambda*alpha[v]*alpha[v]*(Huu-2*Huv+Hvv);
00348 
00349     ac = ac + lambda*alpha[v]*(vector_c[u]-vector_c[v]);
00350 
00351     tmp = alpha[v];
00352     alpha[u]=alpha[u]+lambda*alpha[v];
00353     alpha[v]=alpha[v]-lambda*alpha[v];
00354 
00355     UB = 0.5*aHa + ac;
00356     
00357 /*    max_beta = MINUS_INF;*/
00358     for( min_beta = PLUS_INF, i = 0; i < dim; i++ ) 
00359     {
00360        Ha[i] = Ha[i] + lambda*tmp*(col_u[i] - col_v[i]);
00361 
00362        beta = Ha[i]+ vector_c[i];
00363 
00364        if( beta < min_beta )
00365        { 
00366          new_u = i;
00367          min_beta = beta;
00368        }
00369     }    
00370 
00371     LB = min_beta - 0.5*aHa; 
00372     u = new_u;    
00373     col_u = (float64_t*)get_col(u,-1);
00374 
00375     /* search for optimal v while u is fixed */
00376     for( max_improv =  MINUS_INF, i = 0; i < dim; i++ ) {
00377 
00378       if( alpha[i] != 0 ) {
00379         beta = Ha[i] + vector_c[i];
00380 
00381         if( beta >= min_beta ) {
00382 
00383           tmp = diag_H[u] - 2*col_u[i] + diag_H[i];
00384           if( tmp != 0 ) {
00385             improv = (0.5*(beta-min_beta)*(beta-min_beta))/tmp;
00386 
00387             if( improv > max_improv ) {
00388               max_improv = improv;
00389               v = i;
00390             }
00391           }
00392         }
00393       }
00394     }
00395 
00396     /* Stopping conditions */
00397     if( UB-LB <= tolabs ) exitflag = 1; 
00398     else if( UB-LB <= CMath::abs(UB)*tolrel ) exitflag = 2;
00399     else if(LB > th ) exitflag = 3;
00400     else if(t >= tmax) exitflag = 0; 
00401 
00402     /* print info */
00403     if(verb && (t % verb) == 0 ) {
00404       SG_PRINT("%d: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n",
00405         t, UB, LB, UB-LB,(UB-LB)/UB);
00406     }  
00407 
00408     /* Store selected values */
00409     if( t < History_size ) {
00410       History[INDEX(0,t,2)] = LB;
00411       History[INDEX(1,t,2)] = UB;
00412     }
00413     else {
00414       tmp_ptr = new float64_t[(History_size+HISTORY_BUF)*2];
00415       if( tmp_ptr == NULL ) SG_ERROR("Not enough memory.");
00416       for( i = 0; i < History_size; i++ ) {
00417         tmp_ptr[INDEX(0,i,2)] = History[INDEX(0,i,2)];
00418         tmp_ptr[INDEX(1,i,2)] = History[INDEX(1,i,2)];
00419       }
00420       tmp_ptr[INDEX(0,t,2)] = LB;
00421       tmp_ptr[INDEX(1,t,2)] = UB;
00422       
00423       History_size += HISTORY_BUF;
00424       delete[] History;
00425       History = tmp_ptr;
00426     }
00427   }
00428 
00429   /* print info about last iteration*/
00430   if(verb && (t % verb) ) {
00431     SG_PRINT("exit: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n",
00432       UB, LB, UB-LB,(UB-LB)/UB);
00433   }  
00434 
00435 
00436   /*------------------------------------------------------- */
00437   /* Set outputs                                            */
00438   /*------------------------------------------------------- */
00439   (*ptr_t) = t;
00440   (*ptr_History) = History;
00441 
00442   /* Free memory */
00443   delete[] Ha;
00444   
00445   return( exitflag ); 
00446 }
00447 
00448 /* ------------------------------------------------------------
00449   Retures (a,b)-th element of the virtual kernel matrix 
00450   of size [num_virt_data x num_virt_data]. 
00451 ------------------------------------------------------------ */
00452 float64_t CGMNPLib::kernel_fce( int32_t a, int32_t b )
00453 {
00454   float64_t value;
00455   int32_t i1,c1,i2,c2;
00456 
00457   get_indices2( &i1, &c1, a );
00458   get_indices2( &i2, &c2, b );
00459 
00460   if( KDELTA4(m_vector_y[i1],m_vector_y[i2],c1,c2) ) {
00461     value = (+KDELTA(m_vector_y[i1],m_vector_y[i2]) 
00462              -KDELTA(m_vector_y[i1],c2)
00463              -KDELTA(m_vector_y[i2],c1)
00464              +KDELTA(c1,c2)
00465             )*(m_kernel->kernel( i1, i2 )+1);
00466   }
00467   else
00468   {
00469     value = 0;
00470   }
00471 
00472   if(a==b) value += m_reg_const; 
00473 
00474   return( value );
00475 }

SHOGUN Machine Learning Toolbox - Documentation