SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___ 00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___ 00014 00015 #include "lib/common.h" 00016 #include "lib/Trie.h" 00017 #include "kernel/StringKernel.h" 00018 #include "kernel/MultitaskKernelMklNormalizer.h" 00019 #include "features/StringFeatures.h" 00020 00021 namespace shogun 00022 { 00023 00024 enum EWDKernType 00025 { 00026 E_WD=0, 00027 E_EXTERNAL=1, 00028 00029 E_BLOCK_CONST=2, 00030 E_BLOCK_LINEAR=3, 00031 E_BLOCK_SQPOLY=4, 00032 E_BLOCK_CUBICPOLY=5, 00033 E_BLOCK_EXP=6, 00034 E_BLOCK_LOG=7, 00035 }; 00036 00037 00052 class CWeightedDegreeStringKernel: public CStringKernel<char> 00053 { 00054 public: 00055 00059 CWeightedDegreeStringKernel(); 00060 00061 00067 CWeightedDegreeStringKernel(int32_t degree, EWDKernType type=E_WD); 00068 00074 CWeightedDegreeStringKernel(float64_t* weights, int32_t degree); 00075 00082 CWeightedDegreeStringKernel( 00083 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree); 00084 00085 virtual ~CWeightedDegreeStringKernel(); 00086 00093 virtual bool init(CFeatures* l, CFeatures* r); 00094 00096 virtual void cleanup(); 00097 00105 EWDKernType get_type() const 00106 { 00107 return type; 00108 } 00109 00114 int32_t get_degree() const 00115 { 00116 return degree; 00117 } 00118 00124 int32_t get_max_mismatch() const 00125 { 00126 return max_mismatch; 00127 } 00128 00133 virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; } 00134 00139 virtual const char* get_name() const { 00140 return "WeightedDegreeStringKernel"; 00141 } 00142 00150 inline virtual bool init_optimization( 00151 int32_t count, int32_t *IDX, float64_t* alphas) 00152 { 00153 return init_optimization(count, IDX, alphas, -1); 00154 } 00155 00166 virtual bool init_optimization( 00167 int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num); 00168 00173 virtual bool delete_optimization(); 00174 00180 virtual float64_t compute_optimized(int32_t idx) 00181 { 00182 if (get_is_initialized()) 00183 return compute_by_tree(idx); 00184 00185 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n"); 00186 return 0; 00187 } 00188 00193 static void* compute_batch_helper(void* p); 00194 00205 virtual void compute_batch( 00206 int32_t num_vec, int32_t* vec_idx, float64_t* target, 00207 int32_t num_suppvec, int32_t* IDX, float64_t* alphas, 00208 float64_t factor=1.0); 00209 00213 inline virtual void clear_normal() 00214 { 00215 if (get_is_initialized()) 00216 { 00217 00218 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00219 SG_ERROR("not implemented"); 00220 00221 tries->delete_trees(max_mismatch==0); 00222 set_is_initialized(false); 00223 } 00224 } 00225 00231 inline virtual void add_to_normal(int32_t idx, float64_t weight) 00232 { 00233 00234 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00235 SG_ERROR("not implemented"); 00236 00237 if (max_mismatch==0) 00238 add_example_to_tree(idx, weight); 00239 else 00240 add_example_to_tree_mismatch(idx, weight); 00241 00242 set_is_initialized(true); 00243 } 00244 00249 inline virtual int32_t get_num_subkernels() 00250 { 00251 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00252 return ((CMultitaskKernelMklNormalizer*)normalizer)->get_num_betas(); 00253 if (position_weights!=NULL) 00254 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ; 00255 if (length==0) 00256 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize); 00257 return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ; 00258 } 00259 00265 inline void compute_by_subkernel( 00266 int32_t idx, float64_t * subkernel_contrib) 00267 { 00268 00269 if (get_is_initialized()) 00270 { 00271 00272 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00273 SG_ERROR("not implemented"); 00274 00275 compute_by_tree(idx, subkernel_contrib); 00276 return ; 00277 } 00278 00279 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n"); 00280 } 00281 00287 inline const float64_t* get_subkernel_weights(int32_t& num_weights) 00288 { 00289 00290 num_weights = get_num_subkernels(); 00291 00292 delete[] weights_buffer ; 00293 weights_buffer = new float64_t[num_weights]; 00294 00295 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00296 for (int32_t i=0; i<num_weights; i++) 00297 weights_buffer[i] = ((CMultitaskKernelMklNormalizer*)normalizer)->get_beta(i); 00298 else if (position_weights!=NULL) 00299 for (int32_t i=0; i<num_weights; i++) 00300 weights_buffer[i] = position_weights[i*mkl_stepsize]; 00301 else 00302 for (int32_t i=0; i<num_weights; i++) 00303 weights_buffer[i] = weights[i*mkl_stepsize]; 00304 00305 return weights_buffer; 00306 } 00307 00313 inline void set_subkernel_weights( 00314 float64_t* weights2, int32_t num_weights2) 00315 { 00316 int32_t num_weights = get_num_subkernels(); 00317 if (num_weights!=num_weights2) 00318 SG_ERROR( "number of weights do not match\n"); 00319 00320 00321 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00322 for (int32_t i=0; i<num_weights; i++) 00323 ((CMultitaskKernelMklNormalizer*)normalizer)->set_beta(i, weights2[i]); 00324 else if (position_weights!=NULL) 00325 { 00326 for (int32_t i=0; i<num_weights; i++) 00327 { 00328 for (int32_t j=0; j<mkl_stepsize; j++) 00329 { 00330 if (i*mkl_stepsize+j<seq_length) 00331 position_weights[i*mkl_stepsize+j] = weights2[i]; 00332 } 00333 } 00334 } 00335 else if (length==0) 00336 { 00337 for (int32_t i=0; i<num_weights; i++) 00338 { 00339 for (int32_t j=0; j<mkl_stepsize; j++) 00340 { 00341 if (i*mkl_stepsize+j<get_degree()) 00342 weights[i*mkl_stepsize+j] = weights2[i]; 00343 } 00344 } 00345 } 00346 else 00347 { 00348 for (int32_t i=0; i<num_weights; i++) 00349 { 00350 for (int32_t j=0; j<mkl_stepsize; j++) 00351 { 00352 if (i*mkl_stepsize+j<get_degree()*length) 00353 weights[i*mkl_stepsize+j] = weights2[i]; 00354 } 00355 } 00356 } 00357 } 00358 00363 virtual bool set_normalizer(CKernelNormalizer* normalizer_) { 00364 00365 if (normalizer_ && strcmp(normalizer_->get_name(),"MultitaskKernelTreeNormalizer")==0) { 00366 unset_property(KP_LINADD); 00367 unset_property(KP_BATCHEVALUATION); 00368 } 00369 else 00370 { 00371 set_property(KP_LINADD); 00372 set_property(KP_BATCHEVALUATION); 00373 } 00374 00375 00376 return CStringKernel<char>::set_normalizer(normalizer_); 00377 00378 } 00379 00380 // other kernel tree operations 00386 float64_t *compute_abs_weights(int32_t & len); 00387 00394 void compute_by_tree(int32_t idx, float64_t *LevelContrib); 00395 00400 bool is_tree_initialized() { return tree_initialized; } 00401 00407 inline float64_t *get_degree_weights(int32_t& d, int32_t& len) 00408 { 00409 d=degree; 00410 len=length; 00411 return weights; 00412 } 00413 00419 inline float64_t *get_weights(int32_t& num_weights) 00420 { 00421 00422 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00423 SG_ERROR("not implemented"); 00424 00425 if (position_weights!=NULL) 00426 { 00427 num_weights = seq_length ; 00428 return position_weights ; 00429 } 00430 if (length==0) 00431 num_weights = degree ; 00432 else 00433 num_weights = degree*length ; 00434 return weights; 00435 } 00436 00442 inline float64_t *get_position_weights(int32_t& len) 00443 { 00444 len=seq_length; 00445 return position_weights; 00446 } 00447 00453 bool set_wd_weights_by_type(EWDKernType type); 00454 00461 void set_wd_weights(float64_t* p_weights, int32_t d) 00462 { 00463 set_weights(p_weights,d,0); 00464 } 00465 00472 bool set_weights(float64_t* weights, int32_t d, int32_t len); 00473 00480 bool set_position_weights(float64_t* pws, int32_t len=0); 00481 00486 bool init_block_weights(); 00487 00492 bool init_block_weights_from_wd(); 00493 00498 bool init_block_weights_from_wd_external(); 00499 00504 bool init_block_weights_const(); 00505 00510 bool init_block_weights_linear(); 00511 00516 bool init_block_weights_sqpoly(); 00517 00522 bool init_block_weights_cubicpoly(); 00523 00528 bool init_block_weights_exp(); 00529 00534 bool init_block_weights_log(); 00535 00540 bool delete_position_weights() 00541 { 00542 delete[] position_weights; 00543 position_weights=NULL; 00544 return true; 00545 } 00546 00552 bool set_max_mismatch(int32_t max); 00553 00558 inline int32_t get_max_mismatch() { return max_mismatch; } 00559 00565 inline bool set_degree(int32_t deg) { degree=deg; return true; } 00566 00571 inline int32_t get_degree() { return degree; } 00572 00578 inline bool set_use_block_computation(bool block) 00579 { 00580 block_computation=block; 00581 return true; 00582 } 00583 00588 inline bool get_use_block_computation() { return block_computation; } 00589 00595 inline bool set_mkl_stepsize(int32_t step) 00596 { 00597 if (step<1) 00598 SG_ERROR("Stepsize must be a positive integer\n"); 00599 mkl_stepsize=step; 00600 return true; 00601 } 00602 00607 inline int32_t get_mkl_stepsize() { return mkl_stepsize; } 00608 00614 inline bool set_which_degree(int32_t which) 00615 { 00616 which_degree=which; 00617 return true; 00618 } 00619 00624 inline int32_t get_which_degree() { return which_degree; } 00625 00626 protected: 00628 void create_empty_tries(); 00629 00635 void add_example_to_tree(int32_t idx, float64_t weight); 00636 00643 void add_example_to_single_tree( 00644 int32_t idx, float64_t weight, int32_t tree_num); 00645 00651 void add_example_to_tree_mismatch(int32_t idx, float64_t weight); 00652 00659 void add_example_to_single_tree_mismatch( 00660 int32_t idx, float64_t weight, int32_t tree_num); 00661 00667 float64_t compute_by_tree(int32_t idx); 00668 00677 float64_t compute(int32_t idx_a, int32_t idx_b); 00678 00687 float64_t compute_with_mismatch( 00688 char* avec, int32_t alen, char* bvec, int32_t blen); 00689 00698 float64_t compute_without_mismatch( 00699 char* avec, int32_t alen, char* bvec, int32_t blen); 00700 00709 float64_t compute_without_mismatch_matrix( 00710 char* avec, int32_t alen, char* bvec, int32_t blen); 00711 00720 float64_t compute_using_block(char* avec, int32_t alen, 00721 char* bvec, int32_t blen); 00722 00724 virtual void remove_lhs(); 00725 00726 private: 00729 void init(); 00730 00731 protected: 00735 float64_t* weights; 00737 int32_t weights_degree; 00739 int32_t weights_length; 00740 00741 00743 float64_t* position_weights; 00745 int32_t position_weights_len; 00747 float64_t* weights_buffer; 00749 int32_t mkl_stepsize; 00751 int32_t degree; 00753 int32_t length; 00754 00756 int32_t max_mismatch; 00758 int32_t seq_length; 00759 00761 bool initialized; 00762 00764 bool block_computation; 00765 00767 float64_t* block_weights; 00769 EWDKernType type; 00771 int32_t which_degree; 00772 00774 CTrie<DNATrie>* tries; 00775 00777 bool tree_initialized; 00778 00780 CAlphabet* alphabet; 00781 }; 00782 00783 } 00784 00785 #endif /* _WEIGHTEDDEGREESTRINGKERNEL_H__ */