SHOGUN v0.9.0
Kernel.cpp
浏览该文件的文档。
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/config.h"
00013 #include "lib/common.h"
00014 #include "lib/io.h"
00015 #include "lib/File.h"
00016 #include "lib/Time.h"
00017 #include "lib/Signal.h"
00018 
00019 #include "base/Parallel.h"
00020 
00021 #include "kernel/Kernel.h"
00022 #include "kernel/IdentityKernelNormalizer.h"
00023 #include "features/Features.h"
00024 #include "base/Parameter.h"
00025 
00026 #include "classifier/svm/SVM.h"
00027 
00028 #include <string.h>
00029 #include <unistd.h>
00030 #include <math.h>
00031 
00032 #ifndef WIN32
00033 #include <pthread.h>
00034 #endif
00035 
00036 using namespace shogun;
00037 
00038 CKernel::CKernel() : CSGObject()
00039 {
00040     init();
00041 }
00042 
00043 CKernel::CKernel(int32_t size) : CSGObject()
00044 {
00045     init();
00046 
00047     if (size<10)
00048         size=10;
00049 
00050     cache_size=size;
00051 }
00052 
00053 
00054 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject()
00055 {
00056     init();
00057 
00058     if (size<10)
00059         size=10;
00060 
00061     cache_size=size;
00062 
00063     set_normalizer(new CIdentityKernelNormalizer());
00064     init(p_lhs, p_rhs);
00065 }
00066 
00067 CKernel::~CKernel()
00068 {
00069     if (get_is_initialized())
00070         SG_ERROR("Kernel still initialized on destruction.\n");
00071 
00072     remove_lhs_and_rhs();
00073     SG_UNREF(normalizer);
00074 
00075     SG_INFO("Kernel deleted (%p).\n", this);
00076 }
00077 
00078 void CKernel::get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n)
00079 {
00080     ASSERT(dst && m && n);
00081 
00082     float64_t* result = NULL;
00083 
00084     if (has_features())
00085     {
00086         int32_t num_vec1=get_num_vec_lhs();
00087         int32_t num_vec2=get_num_vec_rhs();
00088         *m=num_vec1;
00089         *n=num_vec2;
00090 
00091         int64_t total_num = ((int64_t) num_vec1) * num_vec2;
00092         SG_DEBUG( "allocating memory for a kernel matrix"
00093                 " of size %dx%d\n", num_vec1, num_vec2);
00094 
00095         result=(float64_t*) malloc(sizeof(float64_t)*total_num);
00096         ASSERT(result);
00097         get_kernel_matrix<float64_t>(num_vec1,num_vec2, result);
00098     }
00099     else
00100         SG_ERROR( "no features assigned to kernel\n");
00101 
00102     *dst=result;
00103 }
00104 
00105 
00106 
00107 bool CKernel::init(CFeatures* l, CFeatures* r)
00108 {
00109     //make sure features were indeed supplied
00110     ASSERT(l);
00111     ASSERT(r);
00112 
00113     //make sure features are compatible
00114     ASSERT(l->get_feature_class()==r->get_feature_class());
00115     ASSERT(l->get_feature_type()==r->get_feature_type());
00116 
00117     //remove references to previous features
00118     remove_lhs_and_rhs();
00119 
00120     //increase reference counts
00121     SG_REF(l);
00122     if (l==r)
00123         lhs_equals_rhs=true;
00124     else // l!=r
00125         SG_REF(r);
00126 
00127     lhs=l;
00128     rhs=r;
00129 
00130     ASSERT(!num_lhs || num_lhs==l->get_num_vectors());
00131     ASSERT(!num_rhs || num_rhs==l->get_num_vectors());
00132 
00133     num_lhs=l->get_num_vectors();
00134     num_rhs=r->get_num_vectors();
00135 
00136     return true;
00137 }
00138 
00139 bool CKernel::set_normalizer(CKernelNormalizer* n)
00140 {
00141     SG_REF(n);
00142     if (lhs && rhs)
00143         n->init(this);
00144 
00145     SG_UNREF(normalizer);
00146     normalizer=n;
00147 
00148     return (normalizer!=NULL);
00149 }
00150 
00151 CKernelNormalizer* CKernel::get_normalizer()
00152 {
00153     SG_REF(normalizer)
00154     return normalizer;
00155 }
00156 
00157 bool CKernel::init_normalizer()
00158 {
00159     return normalizer->init(this);
00160 }
00161 
00162 void CKernel::cleanup()
00163 {
00164     remove_lhs_and_rhs();
00165 }
00166 
00167 
00168 
00169 void CKernel::load(CFile* loader)
00170 {
00171     SG_SET_LOCALE_C;
00172     SG_RESET_LOCALE;
00173 }
00174 
00175 void CKernel::save(CFile* writer)
00176 {
00177     int32_t m,n;
00178     float64_t* km=get_kernel_matrix<float64_t>(m,n, NULL);
00179     SG_SET_LOCALE_C;
00180     writer->set_real_matrix(km, m,n);
00181     delete[] km;
00182     SG_RESET_LOCALE;
00183 }
00184 
00185 void CKernel::remove_lhs_and_rhs()
00186 {
00187     if (rhs!=lhs)
00188         SG_UNREF(rhs);
00189     rhs = NULL;
00190     num_rhs=0;
00191 
00192     SG_UNREF(lhs);
00193     lhs = NULL;
00194     num_lhs=0;
00195     lhs_equals_rhs=false;
00196 
00197 
00198 }
00199 
00200 void CKernel::remove_lhs()
00201 {
00202     if (rhs==lhs)
00203         rhs=NULL;
00204     SG_UNREF(lhs);
00205     lhs = NULL;
00206     num_lhs=NULL;
00207     lhs_equals_rhs=false;
00208 
00209 }
00210 
00212 void CKernel::remove_rhs()
00213 {
00214     if (rhs!=lhs)
00215         SG_UNREF(rhs);
00216     rhs = NULL;
00217     num_rhs=NULL;
00218     lhs_equals_rhs=false;
00219 
00220 
00221 }
00222 
00223 #define ENUM_CASE(n) case n: SG_INFO(#n " "); break;
00224 
00225 void CKernel::list_kernel()
00226 {
00227     SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00228             get_combined_kernel_weight(),
00229             get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00230             "SLOWBUTMEMEFFICIENT");
00231 
00232     switch (get_kernel_type())
00233     {
00234         ENUM_CASE(K_UNKNOWN)
00235         ENUM_CASE(K_LINEAR)
00236         ENUM_CASE(K_POLY)
00237         ENUM_CASE(K_GAUSSIAN)
00238         ENUM_CASE(K_GAUSSIANSHIFT)
00239         ENUM_CASE(K_GAUSSIANMATCH)
00240         ENUM_CASE(K_HISTOGRAM)
00241         ENUM_CASE(K_SALZBERG)
00242         ENUM_CASE(K_LOCALITYIMPROVED)
00243         ENUM_CASE(K_SIMPLELOCALITYIMPROVED)
00244         ENUM_CASE(K_FIXEDDEGREE)
00245         ENUM_CASE(K_WEIGHTEDDEGREE)
00246         ENUM_CASE(K_WEIGHTEDDEGREEPOS)
00247         ENUM_CASE(K_WEIGHTEDDEGREERBF)
00248         ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING)
00249         ENUM_CASE(K_POLYMATCH)
00250         ENUM_CASE(K_ALIGNMENT)
00251         ENUM_CASE(K_COMMWORDSTRING)
00252         ENUM_CASE(K_COMMULONGSTRING)
00253         ENUM_CASE(K_SPECTRUMMISMATCHRBF)
00254         ENUM_CASE(K_COMBINED)
00255         ENUM_CASE(K_AUC)
00256         ENUM_CASE(K_CUSTOM)
00257         ENUM_CASE(K_SIGMOID)
00258         ENUM_CASE(K_CHI2)
00259         ENUM_CASE(K_DIAG)
00260         ENUM_CASE(K_CONST)
00261         ENUM_CASE(K_DISTANCE)
00262         ENUM_CASE(K_LOCALALIGNMENT)
00263         ENUM_CASE(K_PYRAMIDCHI2)
00264         ENUM_CASE(K_OLIGO)
00265         ENUM_CASE(K_MATCHWORD)
00266         ENUM_CASE(K_TPPK)
00267         ENUM_CASE(K_REGULATORYMODULES)
00268         ENUM_CASE(K_SPARSESPATIALSAMPLE)
00269         ENUM_CASE(K_HISTOGRAMINTERSECTION)
00270     }
00271 
00272     switch (get_feature_class())
00273     {
00274         ENUM_CASE(C_UNKNOWN)
00275         ENUM_CASE(C_SIMPLE)
00276         ENUM_CASE(C_SPARSE)
00277         ENUM_CASE(C_STRING)
00278         ENUM_CASE(C_COMBINED)
00279         ENUM_CASE(C_COMBINED_DOT)
00280         ENUM_CASE(C_WD)
00281         ENUM_CASE(C_SPEC)
00282         ENUM_CASE(C_WEIGHTEDSPEC)
00283         ENUM_CASE(C_POLY)
00284         ENUM_CASE(C_ANY)
00285     }
00286 
00287     switch (get_feature_type())
00288     {
00289         ENUM_CASE(F_UNKNOWN)
00290         ENUM_CASE(F_BOOL)
00291         ENUM_CASE(F_CHAR)
00292         ENUM_CASE(F_BYTE)
00293         ENUM_CASE(F_SHORT)
00294         ENUM_CASE(F_WORD)
00295         ENUM_CASE(F_INT)
00296         ENUM_CASE(F_UINT)
00297         ENUM_CASE(F_LONG)
00298         ENUM_CASE(F_ULONG)
00299         ENUM_CASE(F_SHORTREAL)
00300         ENUM_CASE(F_DREAL)
00301         ENUM_CASE(F_LONGREAL)
00302         ENUM_CASE(F_ANY)
00303     }
00304     SG_INFO( "\n");
00305 }
00306 #undef ENUM_CASE
00307 
00308 bool CKernel::init_optimization(
00309     int32_t count, int32_t *IDX, float64_t * weights)
00310 {
00311    SG_ERROR( "kernel does not support linadd optimization\n");
00312     return false ;
00313 }
00314 
00315 bool CKernel::delete_optimization()
00316 {
00317    SG_ERROR( "kernel does not support linadd optimization\n");
00318     return false;
00319 }
00320 
00321 float64_t CKernel::compute_optimized(int32_t vector_idx)
00322 {
00323    SG_ERROR( "kernel does not support linadd optimization\n");
00324     return 0;
00325 }
00326 
00327 void CKernel::compute_batch(
00328     int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00329     int32_t* IDX, float64_t* weights, float64_t factor)
00330 {
00331    SG_ERROR( "kernel does not support batch computation\n");
00332 }
00333 
00334 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00335 {
00336    SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00337 }
00338 
00339 void CKernel::clear_normal()
00340 {
00341    SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00342 }
00343 
00344 int32_t CKernel::get_num_subkernels()
00345 {
00346     return 1;
00347 }
00348 
00349 void CKernel::compute_by_subkernel(
00350     int32_t vector_idx, float64_t * subkernel_contrib)
00351 {
00352    SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00353 }
00354 
00355 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00356 {
00357     num_weights=1 ;
00358     return &combined_kernel_weight ;
00359 }
00360 
00361 void CKernel::set_subkernel_weights(float64_t* weights, int32_t num_weights)
00362 {
00363     combined_kernel_weight = weights[0] ;
00364     if (num_weights!=1)
00365       SG_ERROR( "number of subkernel weights should be one ...\n");
00366 }
00367 
00368 bool CKernel::init_optimization_svm(CSVM * svm)
00369 {
00370     int32_t num_suppvec=svm->get_num_support_vectors();
00371     int32_t* sv_idx=new int32_t[num_suppvec];
00372     float64_t* sv_weight=new float64_t[num_suppvec];
00373 
00374     for (int32_t i=0; i<num_suppvec; i++)
00375     {
00376         sv_idx[i]    = svm->get_support_vector(i);
00377         sv_weight[i] = svm->get_alpha(i);
00378     }
00379     bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00380 
00381     delete[] sv_idx;
00382     delete[] sv_weight;
00383     return ret;
00384 }
00385 
00386 void CKernel::load_serializable_post() throw (ShogunException)
00387 {
00388     CSGObject::load_serializable_post();
00389     if (lhs_equals_rhs)
00390         rhs=lhs;
00391 }
00392 
00393 void CKernel::save_serializable_pre() throw (ShogunException)
00394 {
00395     CSGObject::save_serializable_pre();
00396 
00397     if (lhs_equals_rhs)
00398         rhs=NULL;
00399 }
00400 
00401 void CKernel::save_serializable_post() throw (ShogunException)
00402 {
00403     CSGObject::save_serializable_post();
00404 
00405     if (lhs_equals_rhs)
00406         rhs=lhs;
00407 }
00408 
00409 void CKernel::init()
00410 {
00411     cache_size=10;
00412     kernel_matrix=NULL;
00413     lhs=NULL;
00414     rhs=NULL;
00415     num_lhs=0;
00416     num_rhs=0;
00417     combined_kernel_weight=1;
00418     optimization_initialized=false;
00419     opt_type=FASTBUTMEMHUNGRY;
00420     properties=KP_NONE;
00421     normalizer=NULL;
00422 
00423 
00424 
00425     set_normalizer(new CIdentityKernelNormalizer());
00426 
00427     m_parameters->add(&cache_size, "cache_size",
00428                       "Cache size in MB.");
00429     m_parameters->add((CSGObject**) &lhs, "lhs",
00430                       "Feature vectors to occur on left hand side.");
00431     m_parameters->add((CSGObject**) &rhs, "rhs",
00432                       "Feature vectors to occur on right hand side.");
00433     m_parameters->add(&lhs_equals_rhs, "lhs_equals_rhs",
00434                       "If features on lhs are the same as on rhs.");
00435     m_parameters->add(&num_lhs, "num_lhs",
00436                       "Number of feature vectors on left hand side.");
00437     m_parameters->add(&num_rhs, "num_rhs",
00438                       "Number of feature vectors on right hand side.");
00439     m_parameters->add(&combined_kernel_weight, "combined_kernel_weight",
00440                       "Combined kernel weight.");
00441     m_parameters->add(&optimization_initialized,
00442                       "optimization_initialized",
00443                       "Optimization is initialized.");
00444     m_parameters->add((machine_int_t*) &opt_type, "opt_type",
00445                       "Optimization type.");
00446     m_parameters->add(&properties, "properties",
00447                       "Kernel properties.");
00448     m_parameters->add((CSGObject**) &normalizer, "normalizer",
00449                       "Normalize the kernel.");
00450 }

SHOGUN Machine Learning Toolbox - Documentation