00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _KERNEL_H___
00013 #define _KERNEL_H___
00014
00015 #include "lib/common.h"
00016 #include "lib/Signal.h"
00017 #include "lib/Mathematics.h"
00018 #include "base/SGObject.h"
00019 #include "features/Features.h"
00020 #include "kernel/KernelNormalizer.h"
00021
00022 namespace shogun
00023 {
00024 class CFeatures;
00025 class CKernelNormalizer;
00026 enum EFeatureType;
00027 enum EFeatureClass;
00028
00029 #ifdef USE_SHORTREAL_KERNELCACHE
00030 typedef float32_t KERNELCACHE_ELEM;
00031 #else
00032 typedef float64_t KERNELCACHE_ELEM;
00033 #endif
00034
00035 typedef int64_t KERNELCACHE_IDX;
00036
00037
00038 enum EOptimizationType
00039 {
00040 FASTBUTMEMHUNGRY,
00041 SLOWBUTMEMEFFICIENT
00042 };
00043
00044 enum EKernelType
00045 {
00046 K_UNKNOWN = 0,
00047 K_LINEAR = 10,
00048 K_SPARSELINEAR = 11,
00049 K_POLY = 20,
00050 K_GAUSSIAN = 30,
00051 K_SPARSEGAUSSIAN = 31,
00052 K_GAUSSIANSHIFT = 32,
00053 K_HISTOGRAM = 40,
00054 K_SALZBERG = 41,
00055 K_LOCALITYIMPROVED = 50,
00056 K_SIMPLELOCALITYIMPROVED = 60,
00057 K_FIXEDDEGREE = 70,
00058 K_WEIGHTEDDEGREE = 80,
00059 K_WEIGHTEDDEGREEPOS = 81,
00060 K_WEIGHTEDCOMMWORDSTRING = 90,
00061 K_POLYMATCH = 100,
00062 K_ALIGNMENT = 110,
00063 K_COMMWORDSTRING = 120,
00064 K_COMMULONGSTRING = 121,
00065 K_COMBINED = 140,
00066 K_AUC = 150,
00067 K_CUSTOM = 160,
00068 K_SIGMOID = 170,
00069 K_CHI2 = 180,
00070 K_DIAG = 190,
00071 K_CONST = 200,
00072 K_DISTANCE = 220,
00073 K_LOCALALIGNMENT = 230,
00074 K_PYRAMIDCHI2 = 240,
00075 K_OLIGO = 250,
00076 K_MATCHWORD = 260,
00077 K_TPPK = 270,
00078 K_REGULATORYMODULES = 280
00079 };
00080
00081 enum EKernelProperty
00082 {
00083 KP_NONE = 0,
00084 KP_LINADD = 1,
00085 KP_KERNCOMBINATION = 2,
00086 KP_BATCHEVALUATION = 4
00087 };
00088
00090 template <class T> struct K_THREAD_PARAM
00091 {
00093 CKernel* kernel;
00095 int32_t start;
00097 int32_t end;
00099 int32_t total_start;
00101 int32_t total_end;
00103 int32_t m;
00105 int32_t n;
00107 T* result;
00109 bool symmetric;
00111 bool verbose;
00112 };
00113
00114 class CSVM;
00115
00141 class CKernel : public CSGObject
00142 {
00143 friend class CVarianceKernelNormalizer;
00144 friend class CSqrtDiagKernelNormalizer;
00145 friend class CAvgDiagKernelNormalizer;
00146 friend class CRidgeKernelNormalizer;
00147 friend class CFirstElementKernelNormalizer;
00148 friend class CTanimotoKernelNormalizer;
00149 friend class CDiceKernelNormalizer;
00150
00151 public:
00152
00156 CKernel();
00157
00158
00163 CKernel(int32_t size);
00164
00171 CKernel(CFeatures* l, CFeatures* r, int32_t size);
00172
00173 virtual ~CKernel();
00174
00182 inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00183 {
00184 if (idx_a<0 || idx_b<0 || idx_a>=num_lhs || idx_b>=num_rhs)
00185 {
00186 SG_ERROR("Index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
00187 idx_a,num_lhs, idx_b,num_rhs);
00188 }
00189
00190 return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00191 }
00192
00199 void get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n);
00200
00208 template <class T>
00209 T* get_kernel_matrix(int32_t &m, int32_t &n, T* target)
00210 {
00211 T* result = NULL;
00212
00213 if (!has_features())
00214 SG_ERROR( "no features assigned to kernel\n");
00215
00216 if (target && (m!=get_num_vec_lhs() ||
00217 n!=get_num_vec_rhs()) )
00218 {
00219 SG_ERROR( "kernel matrix size mismatch\n");
00220 }
00221
00222 m=get_num_vec_lhs();
00223 n=get_num_vec_rhs();
00224
00225 int64_t total_num = int64_t(m)*n;
00226
00227
00228 bool symmetric= (lhs && lhs==rhs && m==n);
00229
00230 SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n);
00231
00232 if (target)
00233 result=target;
00234 else
00235 result=new T[total_num];
00236
00237 int32_t num_threads=parallel->get_num_threads();
00238 if (num_threads < 2)
00239 {
00240 K_THREAD_PARAM<T> params;
00241 params.kernel=this;
00242 params.result=result;
00243 params.start=0;
00244 params.end=m;
00245 params.total_start=0;
00246 params.total_end=total_num;
00247 params.n=n;
00248 params.m=m;
00249 params.symmetric=symmetric;
00250 params.verbose=true;
00251 get_kernel_matrix_helper<T>((void*) ¶ms);
00252 }
00253 else
00254 {
00255 pthread_t* threads = new pthread_t[num_threads-1];
00256 K_THREAD_PARAM<T>* params = new K_THREAD_PARAM<T>[num_threads];
00257 int64_t step= total_num/num_threads;
00258
00259 int32_t t;
00260
00261 for (t=0; t<num_threads-1; t++)
00262 {
00263 params[t].kernel = this;
00264 params[t].result = result;
00265 params[t].start = compute_row_start(t*step, n, symmetric);
00266 params[t].end = compute_row_start((t+1)*step, n, symmetric);
00267 params[t].total_start=t*step;
00268 params[t].total_end=(t+1)*step;
00269 params[t].n=n;
00270 params[t].m=m;
00271 params[t].symmetric=symmetric;
00272 params[t].verbose=false;
00273 pthread_create(&threads[t], NULL,
00274 CKernel::get_kernel_matrix_helper<T>, (void*)¶ms[t]);
00275 }
00276
00277 params[t].kernel = this;
00278 params[t].result = result;
00279 params[t].start = compute_row_start(t*step, n, symmetric);
00280 params[t].end = m;
00281 params[t].total_start=t*step;
00282 params[t].total_end=total_num;
00283 params[t].n=n;
00284 params[t].m=m;
00285 params[t].symmetric=symmetric;
00286 params[t].verbose=true;
00287 get_kernel_matrix_helper<T>(¶ms[t]);
00288
00289 for (t=0; t<num_threads-1; t++)
00290 pthread_join(threads[t], NULL);
00291
00292 delete[] params;
00293 delete[] threads;
00294 }
00295
00296 SG_DONE();
00297
00298 return result;
00299 }
00300
00301
00312 virtual bool init(CFeatures* lhs, CFeatures* rhs);
00313
00318 virtual bool set_normalizer(CKernelNormalizer* normalizer);
00319
00324 virtual CKernelNormalizer* get_normalizer();
00325
00329 virtual bool init_normalizer();
00330
00337 virtual void cleanup();
00338
00344 bool load(char* fname);
00345
00351 bool save(char* fname);
00352
00357 inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00358
00363 inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00364
00369 virtual inline int32_t get_num_vec_lhs()
00370 {
00371 return num_lhs;
00372 }
00373
00378 virtual inline int32_t get_num_vec_rhs()
00379 {
00380 return num_rhs;
00381 }
00382
00387 virtual inline bool has_features()
00388 {
00389 return lhs && rhs;
00390 }
00391
00396 inline bool lhs_equals_rhs()
00397 {
00398 return lhs==rhs;
00399 }
00400
00402 virtual void remove_lhs_and_rhs();
00403
00405 virtual void remove_lhs();
00406
00408 virtual void remove_rhs();
00409
00417 virtual EKernelType get_kernel_type()=0 ;
00418
00425 virtual EFeatureType get_feature_type()=0;
00426
00433 virtual EFeatureClass get_feature_class()=0;
00434
00439 inline void set_cache_size(int32_t size)
00440 {
00441 cache_size = size;
00442
00443 }
00444
00449 inline int32_t get_cache_size() { return cache_size; }
00450
00451
00452
00454 void list_kernel();
00455
00461 inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00462
00466 virtual void clear_normal();
00467
00473 virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00474
00479 inline EOptimizationType get_optimization_type() { return opt_type; }
00480
00485 virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
00486
00491 inline bool get_is_initialized() { return optimization_initialized; }
00492
00500 virtual bool init_optimization(
00501 int32_t count, int32_t *IDX, float64_t *weights);
00502
00507 virtual bool delete_optimization();
00508
00514 bool init_optimization_svm(CSVM * svm) ;
00515
00521 virtual float64_t compute_optimized(int32_t vector_idx);
00522
00531 virtual void compute_batch(
00532 int32_t num_vec, int32_t* vec_idx, float64_t* target,
00533 int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00534 float64_t factor=1.0);
00535
00540 inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00541
00546 inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00547
00552 virtual int32_t get_num_subkernels();
00553
00559 virtual void compute_by_subkernel(
00560 int32_t vector_idx, float64_t * subkernel_contrib);
00561
00567 virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00568
00574 virtual void set_subkernel_weights(
00575 float64_t* weights, int32_t num_weights);
00576
00577 protected:
00582 inline void set_property(EKernelProperty p)
00583 {
00584 properties |= p;
00585 }
00586
00591 inline void unset_property(EKernelProperty p)
00592 {
00593 properties &= (properties | p) ^ p;
00594 }
00595
00600 inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00601
00612 virtual float64_t compute(int32_t x, int32_t y)=0;
00613
00614
00621 int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
00622 {
00623 int32_t i_start;
00624
00625 if (symmetric)
00626 i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
00627 else
00628 i_start=(int32_t) (offs/int64_t(n));
00629
00630 return i_start;
00631 }
00632
00633
00638 template <class T>
00639 static void* get_kernel_matrix_helper(void* p)
00640 {
00641 K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
00642 int32_t i_start=params->start;
00643 int32_t i_end=params->end;
00644 CKernel* k=params->kernel;
00645 T* result=params->result;
00646 bool symmetric=params->symmetric;
00647 int32_t n=params->n;
00648 int32_t m=params->m;
00649 bool verbose=params->verbose;
00650 int64_t total_start=params->total_start;
00651 int64_t total_end=params->total_end;
00652 int64_t total=total_start;
00653
00654 for (int32_t i=i_start; i<i_end; i++)
00655 {
00656 int32_t j_start=0;
00657
00658 if (symmetric)
00659 j_start=i;
00660
00661 for (int32_t j=j_start; j<n; j++)
00662 {
00663 float64_t v=k->kernel(i,j);
00664 result[i+j*m]=v;
00665
00666 if (symmetric && i!=j)
00667 result[j+i*m]=v;
00668
00669 if (verbose)
00670 {
00671 total++;
00672
00673 if (symmetric && i!=j)
00674 total++;
00675
00676 if (total%100 == 0)
00677 k->SG_PROGRESS(total, total_start, total_end);
00678
00679 if (CSignal::cancel_computations())
00680 break;
00681 }
00682 }
00683
00684 }
00685
00686 return NULL;
00687 }
00688
00689
00691
00692
00693 #ifdef HAVE_BOOST_SERIALIZATION
00694 private:
00695
00696 friend class ::boost::serialization::access;
00697 template<class Archive>
00698 void serialize(Archive & ar, const unsigned int archive_version)
00699 {
00700
00701 SG_DEBUG("archiving CKernel\n");
00702
00703 ar & ::boost::serialization::base_object<CSGObject>(*this);
00704
00705 ar & cache_size;
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717 ar & rhs;
00718 ar & lhs;
00719
00720 ar & combined_kernel_weight;
00721
00722 ar & optimization_initialized;
00723
00724 ar & opt_type;
00725
00726 ar & properties;
00727
00728 SG_DEBUG("done with CKernel\n");
00729
00730 }
00731
00732 #endif //HAVE_BOOST_SERIALIZATION
00733
00734
00735
00736 protected:
00738 int32_t cache_size;
00739
00740
00741
00744 KERNELCACHE_ELEM* kernel_matrix;
00745
00747 CFeatures* lhs;
00749 CFeatures* rhs;
00750
00752 int32_t num_lhs;
00754 int32_t num_rhs;
00755
00757 float64_t combined_kernel_weight;
00758
00760 bool optimization_initialized;
00764 EOptimizationType opt_type;
00765
00767 uint64_t properties;
00768
00771 CKernelNormalizer* normalizer;
00772 };
00773
00774 }
00775 #endif