SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/config.h" 00013 #include "lib/common.h" 00014 #include "lib/io.h" 00015 #include "lib/File.h" 00016 #include "lib/Time.h" 00017 #include "lib/Signal.h" 00018 00019 #include "base/Parallel.h" 00020 00021 #include "kernel/Kernel.h" 00022 #include "kernel/IdentityKernelNormalizer.h" 00023 #include "features/Features.h" 00024 #include "base/Parameter.h" 00025 00026 #include "classifier/svm/SVM.h" 00027 00028 #include <string.h> 00029 #include <unistd.h> 00030 #include <math.h> 00031 00032 #ifndef WIN32 00033 #include <pthread.h> 00034 #endif 00035 00036 using namespace shogun; 00037 00038 CKernel::CKernel() : CSGObject() 00039 { 00040 init(); 00041 } 00042 00043 CKernel::CKernel(int32_t size) : CSGObject() 00044 { 00045 init(); 00046 00047 if (size<10) 00048 size=10; 00049 00050 cache_size=size; 00051 } 00052 00053 00054 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject() 00055 { 00056 init(); 00057 00058 if (size<10) 00059 size=10; 00060 00061 cache_size=size; 00062 00063 set_normalizer(new CIdentityKernelNormalizer()); 00064 init(p_lhs, p_rhs); 00065 } 00066 00067 CKernel::~CKernel() 00068 { 00069 if (get_is_initialized()) 00070 SG_ERROR("Kernel still initialized on destruction.\n"); 00071 00072 remove_lhs_and_rhs(); 00073 SG_UNREF(normalizer); 00074 00075 SG_INFO("Kernel deleted (%p).\n", this); 00076 } 00077 00078 void CKernel::get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n) 00079 { 00080 ASSERT(dst && m && n); 00081 00082 float64_t* result = NULL; 00083 00084 if (has_features()) 00085 { 00086 int32_t num_vec1=get_num_vec_lhs(); 00087 int32_t num_vec2=get_num_vec_rhs(); 00088 *m=num_vec1; 00089 *n=num_vec2; 00090 00091 int64_t total_num = ((int64_t) num_vec1) * num_vec2; 00092 SG_DEBUG( "allocating memory for a kernel matrix" 00093 " of size %dx%d\n", num_vec1, num_vec2); 00094 00095 result=(float64_t*) malloc(sizeof(float64_t)*total_num); 00096 ASSERT(result); 00097 get_kernel_matrix<float64_t>(num_vec1,num_vec2, result); 00098 } 00099 else 00100 SG_ERROR( "no features assigned to kernel\n"); 00101 00102 *dst=result; 00103 } 00104 00105 00106 00107 bool CKernel::init(CFeatures* l, CFeatures* r) 00108 { 00109 //make sure features were indeed supplied 00110 ASSERT(l); 00111 ASSERT(r); 00112 00113 //make sure features are compatible 00114 ASSERT(l->get_feature_class()==r->get_feature_class()); 00115 ASSERT(l->get_feature_type()==r->get_feature_type()); 00116 00117 //remove references to previous features 00118 remove_lhs_and_rhs(); 00119 00120 //increase reference counts 00121 SG_REF(l); 00122 if (l==r) 00123 lhs_equals_rhs=true; 00124 else // l!=r 00125 SG_REF(r); 00126 00127 lhs=l; 00128 rhs=r; 00129 00130 ASSERT(!num_lhs || num_lhs==l->get_num_vectors()); 00131 ASSERT(!num_rhs || num_rhs==l->get_num_vectors()); 00132 00133 num_lhs=l->get_num_vectors(); 00134 num_rhs=r->get_num_vectors(); 00135 00136 return true; 00137 } 00138 00139 bool CKernel::set_normalizer(CKernelNormalizer* n) 00140 { 00141 SG_REF(n); 00142 if (lhs && rhs) 00143 n->init(this); 00144 00145 SG_UNREF(normalizer); 00146 normalizer=n; 00147 00148 return (normalizer!=NULL); 00149 } 00150 00151 CKernelNormalizer* CKernel::get_normalizer() 00152 { 00153 SG_REF(normalizer) 00154 return normalizer; 00155 } 00156 00157 bool CKernel::init_normalizer() 00158 { 00159 return normalizer->init(this); 00160 } 00161 00162 void CKernel::cleanup() 00163 { 00164 remove_lhs_and_rhs(); 00165 } 00166 00167 00168 00169 void CKernel::load(CFile* loader) 00170 { 00171 SG_SET_LOCALE_C; 00172 SG_RESET_LOCALE; 00173 } 00174 00175 void CKernel::save(CFile* writer) 00176 { 00177 int32_t m,n; 00178 float64_t* km=get_kernel_matrix<float64_t>(m,n, NULL); 00179 SG_SET_LOCALE_C; 00180 writer->set_real_matrix(km, m,n); 00181 delete[] km; 00182 SG_RESET_LOCALE; 00183 } 00184 00185 void CKernel::remove_lhs_and_rhs() 00186 { 00187 if (rhs!=lhs) 00188 SG_UNREF(rhs); 00189 rhs = NULL; 00190 num_rhs=0; 00191 00192 SG_UNREF(lhs); 00193 lhs = NULL; 00194 num_lhs=0; 00195 lhs_equals_rhs=false; 00196 00197 00198 } 00199 00200 void CKernel::remove_lhs() 00201 { 00202 if (rhs==lhs) 00203 rhs=NULL; 00204 SG_UNREF(lhs); 00205 lhs = NULL; 00206 num_lhs=NULL; 00207 lhs_equals_rhs=false; 00208 00209 } 00210 00212 void CKernel::remove_rhs() 00213 { 00214 if (rhs!=lhs) 00215 SG_UNREF(rhs); 00216 rhs = NULL; 00217 num_rhs=NULL; 00218 lhs_equals_rhs=false; 00219 00220 00221 } 00222 00223 #define ENUM_CASE(n) case n: SG_INFO(#n " "); break; 00224 00225 void CKernel::list_kernel() 00226 { 00227 SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(), 00228 get_combined_kernel_weight(), 00229 get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : 00230 "SLOWBUTMEMEFFICIENT"); 00231 00232 switch (get_kernel_type()) 00233 { 00234 ENUM_CASE(K_UNKNOWN) 00235 ENUM_CASE(K_LINEAR) 00236 ENUM_CASE(K_POLY) 00237 ENUM_CASE(K_GAUSSIAN) 00238 ENUM_CASE(K_GAUSSIANSHIFT) 00239 ENUM_CASE(K_GAUSSIANMATCH) 00240 ENUM_CASE(K_HISTOGRAM) 00241 ENUM_CASE(K_SALZBERG) 00242 ENUM_CASE(K_LOCALITYIMPROVED) 00243 ENUM_CASE(K_SIMPLELOCALITYIMPROVED) 00244 ENUM_CASE(K_FIXEDDEGREE) 00245 ENUM_CASE(K_WEIGHTEDDEGREE) 00246 ENUM_CASE(K_WEIGHTEDDEGREEPOS) 00247 ENUM_CASE(K_WEIGHTEDDEGREERBF) 00248 ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING) 00249 ENUM_CASE(K_POLYMATCH) 00250 ENUM_CASE(K_ALIGNMENT) 00251 ENUM_CASE(K_COMMWORDSTRING) 00252 ENUM_CASE(K_COMMULONGSTRING) 00253 ENUM_CASE(K_SPECTRUMMISMATCHRBF) 00254 ENUM_CASE(K_COMBINED) 00255 ENUM_CASE(K_AUC) 00256 ENUM_CASE(K_CUSTOM) 00257 ENUM_CASE(K_SIGMOID) 00258 ENUM_CASE(K_CHI2) 00259 ENUM_CASE(K_DIAG) 00260 ENUM_CASE(K_CONST) 00261 ENUM_CASE(K_DISTANCE) 00262 ENUM_CASE(K_LOCALALIGNMENT) 00263 ENUM_CASE(K_PYRAMIDCHI2) 00264 ENUM_CASE(K_OLIGO) 00265 ENUM_CASE(K_MATCHWORD) 00266 ENUM_CASE(K_TPPK) 00267 ENUM_CASE(K_REGULATORYMODULES) 00268 ENUM_CASE(K_SPARSESPATIALSAMPLE) 00269 ENUM_CASE(K_HISTOGRAMINTERSECTION) 00270 } 00271 00272 switch (get_feature_class()) 00273 { 00274 ENUM_CASE(C_UNKNOWN) 00275 ENUM_CASE(C_SIMPLE) 00276 ENUM_CASE(C_SPARSE) 00277 ENUM_CASE(C_STRING) 00278 ENUM_CASE(C_COMBINED) 00279 ENUM_CASE(C_COMBINED_DOT) 00280 ENUM_CASE(C_WD) 00281 ENUM_CASE(C_SPEC) 00282 ENUM_CASE(C_WEIGHTEDSPEC) 00283 ENUM_CASE(C_POLY) 00284 ENUM_CASE(C_ANY) 00285 } 00286 00287 switch (get_feature_type()) 00288 { 00289 ENUM_CASE(F_UNKNOWN) 00290 ENUM_CASE(F_BOOL) 00291 ENUM_CASE(F_CHAR) 00292 ENUM_CASE(F_BYTE) 00293 ENUM_CASE(F_SHORT) 00294 ENUM_CASE(F_WORD) 00295 ENUM_CASE(F_INT) 00296 ENUM_CASE(F_UINT) 00297 ENUM_CASE(F_LONG) 00298 ENUM_CASE(F_ULONG) 00299 ENUM_CASE(F_SHORTREAL) 00300 ENUM_CASE(F_DREAL) 00301 ENUM_CASE(F_LONGREAL) 00302 ENUM_CASE(F_ANY) 00303 } 00304 SG_INFO( "\n"); 00305 } 00306 #undef ENUM_CASE 00307 00308 bool CKernel::init_optimization( 00309 int32_t count, int32_t *IDX, float64_t * weights) 00310 { 00311 SG_ERROR( "kernel does not support linadd optimization\n"); 00312 return false ; 00313 } 00314 00315 bool CKernel::delete_optimization() 00316 { 00317 SG_ERROR( "kernel does not support linadd optimization\n"); 00318 return false; 00319 } 00320 00321 float64_t CKernel::compute_optimized(int32_t vector_idx) 00322 { 00323 SG_ERROR( "kernel does not support linadd optimization\n"); 00324 return 0; 00325 } 00326 00327 void CKernel::compute_batch( 00328 int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec, 00329 int32_t* IDX, float64_t* weights, float64_t factor) 00330 { 00331 SG_ERROR( "kernel does not support batch computation\n"); 00332 } 00333 00334 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight) 00335 { 00336 SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n"); 00337 } 00338 00339 void CKernel::clear_normal() 00340 { 00341 SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n"); 00342 } 00343 00344 int32_t CKernel::get_num_subkernels() 00345 { 00346 return 1; 00347 } 00348 00349 void CKernel::compute_by_subkernel( 00350 int32_t vector_idx, float64_t * subkernel_contrib) 00351 { 00352 SG_ERROR( "kernel compute_by_subkernel not implemented\n"); 00353 } 00354 00355 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights) 00356 { 00357 num_weights=1 ; 00358 return &combined_kernel_weight ; 00359 } 00360 00361 void CKernel::set_subkernel_weights(float64_t* weights, int32_t num_weights) 00362 { 00363 combined_kernel_weight = weights[0] ; 00364 if (num_weights!=1) 00365 SG_ERROR( "number of subkernel weights should be one ...\n"); 00366 } 00367 00368 bool CKernel::init_optimization_svm(CSVM * svm) 00369 { 00370 int32_t num_suppvec=svm->get_num_support_vectors(); 00371 int32_t* sv_idx=new int32_t[num_suppvec]; 00372 float64_t* sv_weight=new float64_t[num_suppvec]; 00373 00374 for (int32_t i=0; i<num_suppvec; i++) 00375 { 00376 sv_idx[i] = svm->get_support_vector(i); 00377 sv_weight[i] = svm->get_alpha(i); 00378 } 00379 bool ret = init_optimization(num_suppvec, sv_idx, sv_weight); 00380 00381 delete[] sv_idx; 00382 delete[] sv_weight; 00383 return ret; 00384 } 00385 00386 void CKernel::load_serializable_post() throw (ShogunException) 00387 { 00388 CSGObject::load_serializable_post(); 00389 if (lhs_equals_rhs) 00390 rhs=lhs; 00391 } 00392 00393 void CKernel::save_serializable_pre() throw (ShogunException) 00394 { 00395 CSGObject::save_serializable_pre(); 00396 00397 if (lhs_equals_rhs) 00398 rhs=NULL; 00399 } 00400 00401 void CKernel::save_serializable_post() throw (ShogunException) 00402 { 00403 CSGObject::save_serializable_post(); 00404 00405 if (lhs_equals_rhs) 00406 rhs=lhs; 00407 } 00408 00409 void CKernel::init() 00410 { 00411 cache_size=10; 00412 kernel_matrix=NULL; 00413 lhs=NULL; 00414 rhs=NULL; 00415 num_lhs=0; 00416 num_rhs=0; 00417 combined_kernel_weight=1; 00418 optimization_initialized=false; 00419 opt_type=FASTBUTMEMHUNGRY; 00420 properties=KP_NONE; 00421 normalizer=NULL; 00422 00423 00424 00425 set_normalizer(new CIdentityKernelNormalizer()); 00426 00427 m_parameters->add(&cache_size, "cache_size", 00428 "Cache size in MB."); 00429 m_parameters->add((CSGObject**) &lhs, "lhs", 00430 "Feature vectors to occur on left hand side."); 00431 m_parameters->add((CSGObject**) &rhs, "rhs", 00432 "Feature vectors to occur on right hand side."); 00433 m_parameters->add(&lhs_equals_rhs, "lhs_equals_rhs", 00434 "If features on lhs are the same as on rhs."); 00435 m_parameters->add(&num_lhs, "num_lhs", 00436 "Number of feature vectors on left hand side."); 00437 m_parameters->add(&num_rhs, "num_rhs", 00438 "Number of feature vectors on right hand side."); 00439 m_parameters->add(&combined_kernel_weight, "combined_kernel_weight", 00440 "Combined kernel weight."); 00441 m_parameters->add(&optimization_initialized, 00442 "optimization_initialized", 00443 "Optimization is initialized."); 00444 m_parameters->add((machine_int_t*) &opt_type, "opt_type", 00445 "Optimization type."); 00446 m_parameters->add(&properties, "properties", 00447 "Kernel properties."); 00448 m_parameters->add((CSGObject**) &normalizer, "normalizer", 00449 "Normalize the kernel."); 00450 }