Kernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/config.h"
00013 #include "lib/common.h"
00014 #include "lib/io.h"
00015 #include "lib/File.h"
00016 #include "lib/Time.h"
00017 #include "lib/Signal.h"
00018 
00019 #include "base/Parallel.h"
00020 
00021 #include "kernel/Kernel.h"
00022 #include "kernel/IdentityKernelNormalizer.h"
00023 #include "features/Features.h"
00024 
00025 #include "classifier/svm/SVM.h"
00026 
00027 #include <string.h>
00028 #include <unistd.h>
00029 #include <math.h>
00030 
00031 #ifndef WIN32
00032 #include <pthread.h>
00033 #endif
00034 
00035 using namespace shogun;
00036 
00037 CKernel::CKernel()
00038 : CSGObject(), cache_size(10), kernel_matrix(NULL), lhs(NULL),
00039     rhs(NULL), num_lhs(0), num_rhs(0), combined_kernel_weight(1),
00040     optimization_initialized(false), opt_type(FASTBUTMEMHUNGRY),
00041     properties(KP_NONE), normalizer(NULL)
00042 {
00043 
00044 
00045 
00046     set_normalizer(new CIdentityKernelNormalizer());
00047 }
00048 
00049 CKernel::CKernel(int32_t size)
00050 : CSGObject(), kernel_matrix(NULL), lhs(NULL), rhs(NULL), num_lhs(0),
00051     num_rhs(0), combined_kernel_weight(1), optimization_initialized(false),
00052     opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE), normalizer(NULL)
00053 {
00054     if (size<10)
00055         size=10;
00056 
00057     cache_size=size;
00058 
00059 
00060     if (get_is_initialized())
00061         SG_ERROR( "COptimizableKernel still initialized on destruction");
00062 
00063     set_normalizer(new CIdentityKernelNormalizer());
00064 }
00065 
00066 
00067 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject(),
00068     kernel_matrix(NULL), lhs(NULL), rhs(NULL), num_lhs(0), num_rhs(0),
00069     combined_kernel_weight(1), optimization_initialized(false),
00070     opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE), normalizer(NULL)
00071 {
00072     if (size<10)
00073         size=10;
00074 
00075     cache_size=size;
00076 
00077     if (get_is_initialized())
00078         SG_ERROR("Kernel initialized on construction.\n");
00079 
00080     set_normalizer(new CIdentityKernelNormalizer());
00081     init(p_lhs, p_rhs);
00082 }
00083 
00084 CKernel::~CKernel()
00085 {
00086     if (get_is_initialized())
00087         SG_ERROR("Kernel still initialized on destruction.\n");
00088 
00089     remove_lhs_and_rhs();
00090     SG_UNREF(normalizer);
00091 
00092     SG_INFO("Kernel deleted (%p).\n", this);
00093 }
00094 
00095 void CKernel::get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n)
00096 {
00097     ASSERT(dst && m && n);
00098 
00099     float64_t* result = NULL;
00100 
00101     if (has_features())
00102     {
00103         int32_t num_vec1=get_num_vec_lhs();
00104         int32_t num_vec2=get_num_vec_rhs();
00105         *m=num_vec1;
00106         *n=num_vec2;
00107 
00108         int64_t total_num = ((int64_t) num_vec1) * num_vec2;
00109         SG_DEBUG( "allocating memory for a kernel matrix"
00110                 " of size %dx%d\n", num_vec1, num_vec2);
00111 
00112         result=(float64_t*) malloc(sizeof(float64_t)*total_num);
00113         ASSERT(result);
00114         get_kernel_matrix<float64_t>(num_vec1,num_vec2, result);
00115     }
00116     else
00117         SG_ERROR( "no features assigned to kernel\n");
00118 
00119     *dst=result;
00120 }
00121 
00122 
00123 
00124 bool CKernel::init(CFeatures* l, CFeatures* r)
00125 {
00126     //make sure features were indeed supplied
00127     ASSERT(l);
00128     ASSERT(r);
00129 
00130     //make sure features are compatible
00131     ASSERT(l->get_feature_class()==r->get_feature_class());
00132     ASSERT(l->get_feature_type()==r->get_feature_type());
00133 
00134     //remove references to previous features
00135     remove_lhs_and_rhs();
00136 
00137     //increase reference counts
00138     SG_REF(l);
00139     if (l!=r)
00140         SG_REF(r);
00141 
00142     lhs=l;
00143     rhs=r;
00144 
00145     ASSERT(!num_lhs || num_lhs==l->get_num_vectors());
00146     ASSERT(!num_rhs || num_rhs==l->get_num_vectors());
00147 
00148     num_lhs=l->get_num_vectors();
00149     num_rhs=r->get_num_vectors();
00150 
00151     return true;
00152 }
00153 
00154 bool CKernel::set_normalizer(CKernelNormalizer* n)
00155 {
00156     SG_REF(n);
00157     SG_UNREF(normalizer);
00158     normalizer=n;
00159 
00160     return (normalizer!=NULL);
00161 }
00162 
00163 CKernelNormalizer* CKernel::get_normalizer()
00164 {
00165     SG_REF(normalizer)
00166     return normalizer;
00167 }
00168 
00169 bool CKernel::init_normalizer()
00170 {
00171     return normalizer->init(this);
00172 }
00173 
00174 void CKernel::cleanup()
00175 {
00176     remove_lhs_and_rhs();
00177 }
00178 
00179 
00180 
00181 bool CKernel::load(char* fname)
00182 {
00183     return false;
00184 }
00185 
00186 bool CKernel::save(char* fname)
00187 {
00188     int32_t i=0;
00189     int32_t num_left=get_num_vec_lhs();
00190     int32_t num_right=rhs->get_num_vectors();
00191     KERNELCACHE_IDX num_total=num_left*num_right;
00192 
00193     CFile f(fname, 'w', F_DREAL);
00194 
00195     for (int32_t l=0; l< (int32_t) num_left && f.is_ok(); l++)
00196     {
00197         for (int32_t r=0; r< (int32_t) num_right && f.is_ok(); r++)
00198         {
00199              if (!(i % (num_total/200+1)))
00200                 SG_PROGRESS(i, 0, num_total-1);
00201 
00202             float64_t k=kernel(l,r);
00203             f.save_real_data(&k, 1);
00204 
00205             i++;
00206         }
00207     }
00208     SG_DONE();
00209 
00210     if (f.is_ok())
00211         SG_INFO( "kernel matrix of size %ld x %ld written (filesize: %ld)\n", num_left, num_right, num_total*sizeof(KERNELCACHE_ELEM));
00212 
00213     return (f.is_ok());
00214 }
00215 
00216 void CKernel::remove_lhs_and_rhs()
00217 {
00218     if (rhs!=lhs)
00219         SG_UNREF(rhs);
00220     rhs = NULL;
00221     num_rhs=0;
00222 
00223     SG_UNREF(lhs);
00224     lhs = NULL;
00225     num_lhs=0;
00226 
00227 
00228 }
00229 
00230 void CKernel::remove_lhs()
00231 {
00232     if (rhs==lhs)
00233         rhs=NULL;
00234     SG_UNREF(lhs);
00235     lhs = NULL;
00236     num_lhs=NULL;
00237 
00238 
00239 }
00240 
00242 void CKernel::remove_rhs()
00243 {
00244     if (rhs!=lhs)
00245         SG_UNREF(rhs);
00246     rhs = NULL;
00247     num_rhs=NULL;
00248 
00249 
00250 }
00251 
00252 
00253 void CKernel::list_kernel()
00254 {
00255     SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00256             get_combined_kernel_weight(),
00257             get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00258             "SLOWBUTMEMEFFICIENT");
00259 
00260     switch (get_kernel_type())
00261     {
00262         case K_UNKNOWN:
00263             SG_INFO( "K_UNKNOWN ");
00264             break;
00265         case K_LINEAR:
00266             SG_INFO( "K_LINEAR ");
00267             break;
00268         case K_SPARSELINEAR:
00269             SG_INFO( "K_SPARSELINEAR ");
00270             break;
00271         case K_POLY:
00272             SG_INFO( "K_POLY ");
00273             break;
00274         case K_GAUSSIAN:
00275             SG_INFO( "K_GAUSSIAN ");
00276             break;
00277         case K_SPARSEGAUSSIAN:
00278             SG_INFO( "K_SPARSEGAUSSIAN ");
00279             break;
00280         case K_GAUSSIANSHIFT:
00281             SG_INFO( "K_GAUSSIANSHIFT ");
00282             break;
00283         case K_HISTOGRAM:
00284             SG_INFO( "K_HISTOGRAM ");
00285             break;
00286         case K_SALZBERG:
00287             SG_INFO( "K_SALZBERG ");
00288             break;
00289         case K_LOCALITYIMPROVED:
00290             SG_INFO( "K_LOCALITYIMPROVED ");
00291             break;
00292         case K_SIMPLELOCALITYIMPROVED:
00293             SG_INFO( "K_SIMPLELOCALITYIMPROVED ");
00294             break;
00295         case K_FIXEDDEGREE:
00296             SG_INFO( "K_FIXEDDEGREE ");
00297             break;
00298         case K_WEIGHTEDDEGREE:
00299             SG_INFO( "K_WEIGHTEDDEGREE ");
00300             break;
00301         case K_WEIGHTEDDEGREEPOS:
00302             SG_INFO( "K_WEIGHTEDDEGREEPOS ");
00303             break;
00304         case K_WEIGHTEDCOMMWORDSTRING:
00305             SG_INFO( "K_WEIGHTEDCOMMWORDSTRING ");
00306             break;
00307         case K_POLYMATCH:
00308             SG_INFO( "K_POLYMATCH ");
00309             break;
00310         case K_ALIGNMENT:
00311             SG_INFO( "K_ALIGNMENT ");
00312             break;
00313         case K_COMMWORDSTRING:
00314             SG_INFO( "K_COMMWORDSTRING ");
00315             break;
00316         case K_COMMULONGSTRING:
00317             SG_INFO( "K_COMMULONGSTRING ");
00318             break;
00319         case K_COMBINED:
00320             SG_INFO( "K_COMBINED ");
00321             break;
00322         case K_AUC:
00323             SG_INFO( "K_AUC ");
00324             break;
00325         case K_CUSTOM:
00326             SG_INFO( "K_CUSTOM ");
00327             break;
00328         case K_SIGMOID:
00329             SG_INFO( "K_SIGMOID ");
00330             break;
00331         case K_CHI2:
00332             SG_INFO( "K_CHI2 ");
00333             break;
00334         case K_DIAG:
00335             SG_INFO( "K_DIAG ");
00336             break;
00337         case K_CONST:
00338             SG_INFO( "K_CONST ");
00339             break;
00340         case K_DISTANCE:
00341             SG_INFO( "K_DISTANCE ");
00342             break;
00343         case K_LOCALALIGNMENT:
00344             SG_INFO( "K_LOCALALIGNMENT ");
00345             break;
00346         case K_TPPK:
00347             SG_INFO( "K_TPPK ");
00348             break;
00349         default:
00350          SG_ERROR( "ERROR UNKNOWN KERNEL TYPE");
00351             break;
00352     }
00353 
00354     switch (get_feature_class())
00355     {
00356         case C_UNKNOWN:
00357             SG_INFO( "C_UNKNOWN ");
00358             break;
00359         case C_SIMPLE:
00360             SG_INFO( "C_SIMPLE ");
00361             break;
00362         case C_SPARSE:
00363             SG_INFO( "C_SPARSE ");
00364             break;
00365         case C_STRING:
00366             SG_INFO( "C_STRING ");
00367             break;
00368         case C_COMBINED:
00369             SG_INFO( "C_COMBINED ");
00370             break;
00371         case C_ANY:
00372             SG_INFO( "C_ANY ");
00373             break;
00374         default:
00375          SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
00376     }
00377 
00378     switch (get_feature_type())
00379     {
00380         case F_UNKNOWN:
00381             SG_INFO( "F_UNKNOWN ");
00382             break;
00383         case F_DREAL:
00384             SG_INFO( "F_REAL ");
00385             break;
00386         case F_SHORT:
00387             SG_INFO( "F_SHORT ");
00388             break;
00389         case F_CHAR:
00390             SG_INFO( "F_CHAR ");
00391             break;
00392         case F_INT:
00393             SG_INFO( "F_INT ");
00394             break;
00395         case F_BYTE:
00396             SG_INFO( "F_BYTE ");
00397             break;
00398         case F_WORD:
00399             SG_INFO( "F_WORD ");
00400             break;
00401         case F_ULONG:
00402             SG_INFO( "F_ULONG ");
00403             break;
00404         case F_ANY:
00405             SG_INFO( "F_ANY ");
00406             break;
00407         default:
00408          SG_ERROR( "ERROR UNKNOWN FEATURE TYPE");
00409             break;
00410     }
00411     SG_INFO( "\n");
00412 }
00413 
00414 bool CKernel::init_optimization(
00415     int32_t count, int32_t *IDX, float64_t * weights)
00416 {
00417    SG_ERROR( "kernel does not support linadd optimization\n");
00418     return false ;
00419 }
00420 
00421 bool CKernel::delete_optimization()
00422 {
00423    SG_ERROR( "kernel does not support linadd optimization\n");
00424     return false;
00425 }
00426 
00427 float64_t CKernel::compute_optimized(int32_t vector_idx)
00428 {
00429    SG_ERROR( "kernel does not support linadd optimization\n");
00430     return 0;
00431 }
00432 
00433 void CKernel::compute_batch(
00434     int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00435     int32_t* IDX, float64_t* weights, float64_t factor)
00436 {
00437    SG_ERROR( "kernel does not support batch computation\n");
00438 }
00439 
00440 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00441 {
00442    SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00443 }
00444 
00445 void CKernel::clear_normal()
00446 {
00447    SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00448 }
00449 
00450 int32_t CKernel::get_num_subkernels()
00451 {
00452     return 1;
00453 }
00454 
00455 void CKernel::compute_by_subkernel(
00456     int32_t vector_idx, float64_t * subkernel_contrib)
00457 {
00458    SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00459 }
00460 
00461 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00462 {
00463     num_weights=1 ;
00464     return &combined_kernel_weight ;
00465 }
00466 
00467 void CKernel::set_subkernel_weights(float64_t* weights, int32_t num_weights)
00468 {
00469     combined_kernel_weight = weights[0] ;
00470     if (num_weights!=1)
00471       SG_ERROR( "number of subkernel weights should be one ...\n");
00472 }
00473 
00474 bool CKernel::init_optimization_svm(CSVM * svm)
00475 {
00476     int32_t num_suppvec=svm->get_num_support_vectors();
00477     int32_t* sv_idx=new int32_t[num_suppvec];
00478     float64_t* sv_weight=new float64_t[num_suppvec];
00479 
00480     for (int32_t i=0; i<num_suppvec; i++)
00481     {
00482         sv_idx[i]    = svm->get_support_vector(i);
00483         sv_weight[i] = svm->get_alpha(i);
00484     }
00485     bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00486 
00487     delete[] sv_idx;
00488     delete[] sv_weight;
00489     return ret;
00490 }
00491 

SHOGUN Machine Learning Toolbox - Documentation