SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009 Alexander Binder 00008 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include "MKLMultiClassGradient.h" 00012 00013 using namespace shogun; 00014 00015 MKLMultiClassGradient::MKLMultiClassGradient() 00016 { 00017 numkernels = 0; 00018 pnorm=2; 00019 00020 } 00021 MKLMultiClassGradient::~MKLMultiClassGradient() 00022 { 00023 00024 } 00025 00026 MKLMultiClassGradient MKLMultiClassGradient::operator=(MKLMultiClassGradient & gl) 00027 { 00028 numkernels=gl.numkernels; 00029 pnorm=gl.pnorm; 00030 return (*this); 00031 00032 } 00033 MKLMultiClassGradient::MKLMultiClassGradient(MKLMultiClassGradient & gl) 00034 { 00035 numkernels=gl.numkernels; 00036 pnorm=gl.pnorm; 00037 00038 } 00039 00040 void MKLMultiClassGradient::setup(const int32_t numkernels2) 00041 { 00042 numkernels=numkernels2; 00043 if (numkernels<=1) 00044 { 00045 SG_ERROR("void glpkwrapper::setup(const int32_tnumkernels): input " 00046 "numkernels out of bounds: %d\n",numkernels); 00047 } 00048 00049 00050 } 00051 00052 void MKLMultiClassGradient::set_mkl_norm(float64_t norm) 00053 { 00054 pnorm=norm; 00055 if(pnorm<1 ) 00056 SG_ERROR("MKLMultiClassGradient::set_mkl_norm(float64_t norm) : parameter pnorm<1"); 00057 } 00058 00059 00060 void MKLMultiClassGradient::addconstraint(const ::std::vector<float64_t> & normw2, 00061 const float64_t sumofpositivealphas) 00062 { 00063 normsofsubkernels.push_back(normw2); 00064 sumsofalphas.push_back(sumofpositivealphas); 00065 } 00066 00067 void MKLMultiClassGradient::genbetas( ::std::vector<float64_t> & weights ,const ::std::vector<float64_t> & gammas) 00068 { 00069 00070 assert((int32_t)gammas.size()+1==numkernels); 00071 00072 double pi4=3.151265358979238/4; 00073 00074 weights.resize(numkernels); 00075 00076 00077 // numkernels-dimensional polar transform 00078 weights[0]=1; 00079 00080 for(int32_t i=0; i< numkernels-1 ;++i) 00081 { 00082 for(int32_t k=0; k< i+1 ;++k) 00083 { 00084 weights[k]*=cos( std::min(std::max(0.0,gammas[i]),pi4) ); 00085 } 00086 weights[i+1]=sin( std::min(std::max(0.0,gammas[i]),pi4) ); 00087 } 00088 00089 // pnorm- manifold adjustment 00090 if(pnorm!=2.0) 00091 { 00092 for(int32_t i=0; i< numkernels ;++i) 00093 weights[i]=pow(weights[i],2.0/pnorm); 00094 } 00095 00096 } 00097 00098 void MKLMultiClassGradient::gengammagradient( ::std::vector<float64_t> & gammagradient ,const ::std::vector<float64_t> & gammas,const int32_t dim) 00099 { 00100 00101 assert((int32_t)gammas.size()+1==numkernels); 00102 00103 double pi4=3.151265358979238/2; 00104 00105 gammagradient.resize(numkernels); 00106 std::fill(gammagradient.begin(),gammagradient.end(),0.0); 00107 00108 // numkernels-dimensional polar transform 00109 gammagradient[0]=1; 00110 00111 for(int32_t i=0; i< numkernels-1 ;++i) 00112 { 00113 if(i!=dim) 00114 { 00115 for(int32_t k=0; k< std::min(i+1,dim+2) ;++k) 00116 { 00117 gammagradient[k]*=pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm) ; 00118 } 00119 if(i<dim) 00120 gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm); 00121 } 00122 else if(i==dim) 00123 { 00124 // i==dim, higher dims are 0 00125 for(int32_t k=0; k< i+1 ;++k) 00126 { 00127 gammagradient[k]*= pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm-1.0)*(-1)*sin( std::min(std::max(0.0,gammas[i]),pi4) ); 00128 } 00129 gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm-1)*cos( std::min(std::max(0.0,gammas[i]),pi4) ); 00130 } 00131 } 00132 00133 00134 } 00135 00136 00137 00138 float64_t MKLMultiClassGradient::objectives(const ::std::vector<float64_t> & weights, const int32_t index) 00139 { 00140 assert(index>=0); 00141 assert(index < (int32_t) sumsofalphas.size()); 00142 assert(index < (int32_t) normsofsubkernels.size()); 00143 00144 00145 float64_t obj= -sumsofalphas[index]; 00146 for(int32_t i=0; i< numkernels ;++i) 00147 { 00148 obj+=0.5*normsofsubkernels[index][i]*weights[i]; 00149 } 00150 return(obj); 00151 } 00152 00153 00154 void MKLMultiClassGradient::linesearch(std::vector<float64_t> & finalbeta,const std::vector<float64_t> & oldweights) 00155 { 00156 00157 float64_t pi4=3.151265358979238/2; 00158 00159 float64_t fingrad=1e-7; 00160 int32_t maxhalfiter=20; 00161 int32_t totaliters=6; 00162 float64_t maxrelobjdiff=1e-6; 00163 00164 00165 00166 std::vector<float64_t> finalgamma,curgamma; 00167 00168 curgamma.resize(numkernels-1); 00169 if(oldweights.empty()) 00170 { 00171 std::fill(curgamma.begin(),curgamma.end(),pi4/2); 00172 } 00173 else 00174 { 00175 // curgamma from init: arcsin on highest dim ^p/2 !!! and divided everthing by its cos 00176 std::vector<float64_t> tmpbeta(numkernels); 00177 for(int32_t i=numkernels-1; i>= 0 ;--i) 00178 { 00179 tmpbeta[i]=pow(oldweights[i],pnorm/2); 00180 } 00181 00182 for(int32_t i=numkernels-1; i>= 1 ;--i) 00183 { 00184 curgamma[i-1]=asin(tmpbeta[i]); 00185 for(int32_t k=numkernels-2; k>= 1 ;--k) // k==0 not necessary 00186 { 00187 if(cos(curgamma[i-1])>0) 00188 tmpbeta[k]/=cos(curgamma[i-1]); 00189 } 00190 } 00191 00192 } 00193 bool finished=false; 00194 int32_t longiters=0; 00195 while(!finished) 00196 { 00197 ++longiters; 00198 std::vector<float64_t> curbeta; 00199 genbetas( curbeta ,curgamma); 00200 //find smallest objective 00201 int32_t minind=0; 00202 float64_t minval=objectives(curbeta, minind); 00203 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i) 00204 { 00205 float64_t tmpval=objectives(curbeta, i); 00206 if(tmpval<minval) 00207 { 00208 minval=tmpval; 00209 minind=i; 00210 } 00211 } 00212 float64_t lobj=minval; 00213 //compute gradient for smallest objective 00214 std::vector<float64_t> curgrad; 00215 for(int32_t i=0; i< numkernels-1 ;++i) 00216 { 00217 ::std::vector<float64_t> gammagradient; 00218 gengammagradient( gammagradient ,curgamma,i); 00219 00220 curgrad.push_back(objectives(gammagradient, minind)); 00221 } 00222 //find boundary hit point (check for each dim) to [0, pi/4] 00223 std::vector<float64_t> maxalphas(numkernels-1,0); 00224 float64_t maxgrad=0; 00225 for(int32_t i=0; i< numkernels-1 ;++i) 00226 { 00227 maxgrad=std::max(maxgrad,fabs(curgrad[i]) ); 00228 if(curgrad[i]<0) 00229 { 00230 maxalphas[i]=(0-curgamma[i])/curgrad[i]; 00231 } 00232 else if(curgrad[i]>0) 00233 { 00234 maxalphas[i]=(pi4-curgamma[i])/curgrad[i]; 00235 } 00236 else 00237 { 00238 maxalphas[i]=1024*1024; 00239 } 00240 } 00241 00242 float64_t maxalpha=maxalphas[0]; 00243 for(int32_t i=1; i< numkernels-1 ;++i) 00244 { 00245 maxalpha=std::min(maxalpha,maxalphas[i]); 00246 } 00247 00248 if((maxalpha>1024*1023)|| (maxgrad<fingrad)) 00249 { 00250 //curgrad[i] approx 0 for all i terminate 00251 finished=true; 00252 finalgamma=curgamma; 00253 } 00254 else // of if(maxalpha>1024*1023) 00255 { 00256 //linesearch: shrink until min of all objective increases compared to starting point, then check left half and right halve until finish 00257 // curgamma + al*curgrad ,aximizes al in [0, maxal] 00258 float64_t leftalpha=0, rightalpha=maxalpha, midalpha=(leftalpha+rightalpha)/2; 00259 00260 std::vector<float64_t> tmpgamma=curgamma, tmpbeta; 00261 for(int32_t i=1; i< numkernels-1 ;++i) 00262 { 00263 tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i]; 00264 } 00265 genbetas( tmpbeta ,tmpgamma); 00266 float64_t curobj=objectives(tmpbeta, 0); 00267 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i) 00268 { 00269 curobj=std::min(curobj,objectives(tmpbeta, i)); 00270 } 00271 00272 int curhalfiter=0; 00273 while((curobj < minval)&&(curhalfiter<maxhalfiter)&&(fabs(curobj/minval-1 ) > maxrelobjdiff )) 00274 { 00275 rightalpha=midalpha; 00276 midalpha=(leftalpha+rightalpha)/2; 00277 ++curhalfiter; 00278 00279 tmpgamma=curgamma; 00280 for(int32_t i=1; i< numkernels-1 ;++i) 00281 { 00282 tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i]; 00283 } 00284 genbetas( tmpbeta ,tmpgamma); 00285 curobj=objectives(tmpbeta, 0); 00286 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i) 00287 { 00288 curobj=std::min(curobj,objectives(tmpbeta, i)); 00289 } 00290 00291 } 00292 00293 float64_t robj=curobj; 00294 float64_t tmpobj=std::max(lobj,robj); 00295 do 00296 { 00297 00298 tmpobj=std::max(lobj,robj); 00299 00300 00301 00302 tmpgamma=curgamma; 00303 for(int32_t i=1; i< numkernels-1 ;++i) 00304 { 00305 tmpgamma[i]=tmpgamma[i]+midalpha*curgrad[i]; 00306 } 00307 genbetas( tmpbeta ,tmpgamma); 00308 curobj=objectives(tmpbeta, 0); 00309 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i) 00310 { 00311 curobj=std::min(curobj,objectives(tmpbeta, i)); 00312 } 00313 00314 if(lobj>robj) 00315 { 00316 rightalpha=midalpha; 00317 robj=curobj; 00318 } 00319 else 00320 { 00321 leftalpha=midalpha; 00322 lobj=curobj; 00323 } 00324 midalpha=(leftalpha+rightalpha)/2; 00325 00326 00327 } 00328 while( fabs(curobj/tmpobj-1 ) > maxrelobjdiff ); 00329 finalgamma=tmpgamma; 00330 curgamma=tmpgamma; 00331 } // else // of if(maxalpha>1024*1023) 00332 00333 if(longiters>= totaliters) 00334 { 00335 finished=true; 00336 } 00337 } 00338 genbetas(finalbeta,finalgamma); 00339 float64_t nor=0; 00340 for(int32_t i=0; i< numkernels ;++i) 00341 { 00342 nor+=pow(finalbeta[i],pnorm); 00343 } 00344 if(nor>0) 00345 { 00346 nor=pow(nor,1.0/pnorm); 00347 for(int32_t i=0; i< numkernels ;++i) 00348 { 00349 finalbeta[i]/=nor; 00350 } 00351 } 00352 } 00353 00354 00355 void MKLMultiClassGradient::computeweights(std::vector<float64_t> & weights2) 00356 { 00357 if(pnorm<1 ) 00358 SG_ERROR("MKLMultiClassGradient::computeweights(std::vector<float64_t> & weights2) : parameter pnorm<1"); 00359 00360 SG_SDEBUG("MKLMultiClassGradient::computeweights(...): pnorm %f\n",pnorm); 00361 00362 int maxnumlinesrch=15; 00363 float64_t maxdiff=1e-6; 00364 00365 bool finished =false; 00366 int numiter=0; 00367 do 00368 { 00369 ++numiter; 00370 std::vector<float64_t> initw(weights2); 00371 linesearch(weights2,initw); 00372 float64_t norm=0; 00373 if(!initw.empty()) 00374 { 00375 for(size_t i=0;i<weights2.size();++i) 00376 { 00377 norm+=(weights2[i]-initw[i])*(weights2[i]-initw[i]); 00378 } 00379 norm=sqrt(norm); 00380 } 00381 else 00382 { 00383 norm=maxdiff+1; 00384 } 00385 00386 if((norm < maxdiff) ||(numiter>=maxnumlinesrch )) 00387 { 00388 finished=true; 00389 } 00390 } 00391 while(false==finished); 00392 00393 00394 00395 00396 }