The CommWordString kernel may be used to compute the spectrum kernel from strings that have been mapped into unsigned 16bit integers.
These 16bit integers correspond to k-mers. To applicable in this kernel they need to be sorted (e.g. via the SortWordString pre-processor).
It basically uses the algorithm in the unix "comm" command (hence the name) to compute:
where maps a sequence
that consists of letters in
to a feature vector of size
. In this feature vector each entry denotes how often the k-mer appears in that
.
Note that this representation is especially tuned to small alphabets (like the 2-bit alphabet DNA), for which it enables spectrum kernels of order up to 8.
For this kernel the linadd speedups are quite efficiently implemented using direct maps.
在文件CommWordStringKernel.h第46行定义。
公有成员 | |
CCommWordStringKernel (int32_t size, bool use_sign) | |
CCommWordStringKernel (CStringFeatures< uint16_t > *l, CStringFeatures< uint16_t > *r, bool use_sign=false, int32_t size=10) | |
virtual | ~CCommWordStringKernel () |
virtual bool | init (CFeatures *l, CFeatures *r) |
virtual void | cleanup () |
virtual EKernelType | get_kernel_type () |
virtual const char * | get_name () const |
virtual bool | init_dictionary (int32_t size) |
virtual bool | init_optimization (int32_t count, int32_t *IDX, float64_t *weights) |
virtual bool | delete_optimization () |
virtual float64_t | compute_optimized (int32_t idx) |
virtual void | add_to_normal (int32_t idx, float64_t weight) |
virtual void | clear_normal () |
virtual EFeatureType | get_feature_type () |
void | get_dictionary (int32_t &dsize, float64_t *&dweights) |
virtual float64_t * | compute_scoring (int32_t max_degree, int32_t &num_feat, int32_t &num_sym, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, bool do_init=true) |
char * | compute_consensus (int32_t &num_feat, int32_t num_suppvec, int32_t *IDX, float64_t *alphas) |
void | set_use_dict_diagonal_optimization (bool flag) |
bool | get_use_dict_diagonal_optimization () |
保护成员 | |
virtual float64_t | compute (int32_t idx_a, int32_t idx_b) |
virtual float64_t | compute_helper (int32_t idx_a, int32_t idx_b, bool do_sort) |
virtual float64_t | compute_diag (int32_t idx_a) |
保护属性 | |
int32_t | dictionary_size |
float64_t * | dictionary_weights |
bool | use_sign |
bool | use_dict_diagonal_optimization |
int32_t * | dict_diagonal_optimization |
友元 | |
class | CVarianceKernelNormalizer |
class | CSqrtDiagKernelNormalizer |
class | CAvgDiagKernelNormalizer |
class | CRidgeKernelNormalizer |
class | CFirstElementKernelNormalizer |
class | CTanimotoKernelNormalizer |
class | CDiceKernelNormalizer |
CCommWordStringKernel | ( | int32_t | size, | |
bool | use_sign | |||
) |
CCommWordStringKernel | ( | CStringFeatures< uint16_t > * | l, | |
CStringFeatures< uint16_t > * | r, | |||
bool | use_sign = false , |
|||
int32_t | size = 10 | |||
) |
constructor
l | features of left-hand side | |
r | features of right-hand side | |
use_sign | if sign shall be used | |
size | cache size |
在文件CommWordStringKernel.cpp第28行定义。
~CCommWordStringKernel | ( | ) | [virtual] |
在文件CommWordStringKernel.cpp第53行定义。
void add_to_normal | ( | int32_t | idx, | |
float64_t | weight | |||
) | [virtual] |
add to normal
idx | where to add | |
weight | what to add |
重载CKernel。
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.cpp第237行定义。
void cleanup | ( | ) | [virtual] |
void clear_normal | ( | ) | [virtual] |
virtual float64_t compute | ( | int32_t | idx_a, | |
int32_t | idx_b | |||
) | [protected, virtual] |
compute kernel function for features a and b idx_{a,b} denote the index of the feature vectors in the corresponding feature object
idx_a | index a | |
idx_b | index b |
实现了CKernel。
在文件CommWordStringKernel.h第212行定义。
char * compute_consensus | ( | int32_t & | num_feat, | |
int32_t | num_suppvec, | |||
int32_t * | IDX, | |||
float64_t * | alphas | |||
) |
compute consensus
num_feat | number of features | |
num_suppvec | number of support vectors | |
IDX | IDX | |
alphas | alphas |
在文件CommWordStringKernel.cpp第494行定义。
float64_t compute_diag | ( | int32_t | idx_a | ) | [protected, virtual] |
helper to compute only diagonal normalization for training
idx_a | index a |
在文件CommWordStringKernel.cpp第81行定义。
float64_t compute_helper | ( | int32_t | idx_a, | |
int32_t | idx_b, | |||
bool | do_sort | |||
) | [protected, virtual] |
helper for compute
idx_a | index a | |
idx_b | index b | |
do_sort | if sorting shall be performed |
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.cpp第125行定义。
float64_t compute_optimized | ( | int32_t | idx | ) | [virtual] |
compute optimized
idx | index to compute |
重载CKernel。
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.cpp第322行定义。
float64_t * compute_scoring | ( | int32_t | max_degree, | |
int32_t & | num_feat, | |||
int32_t & | num_sym, | |||
float64_t * | target, | |||
int32_t | num_suppvec, | |||
int32_t * | IDX, | |||
float64_t * | alphas, | |||
bool | do_init = true | |||
) | [virtual] |
compute scoring
max_degree | maximum degree | |
num_feat | number of features | |
num_sym | number of symbols | |
target | target | |
num_suppvec | number of support vectors | |
IDX | IDX | |
alphas | alphas | |
do_init | if initialization shall be performed |
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.cpp第371行定义。
bool delete_optimization | ( | ) | [virtual] |
void get_dictionary | ( | int32_t & | dsize, | |
float64_t *& | dweights | |||
) |
get dictionary
dsize | dictionary size will be stored in here | |
dweights | dictionary weights will be stored in here |
在文件CommWordStringKernel.h第150行定义。
virtual EFeatureType get_feature_type | ( | ) | [virtual] |
return feature type the kernel can deal with
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.h第143行定义。
virtual EKernelType get_kernel_type | ( | ) | [virtual] |
return what type of kernel we are
实现了CKernel。
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.h第92行定义。
virtual const char* get_name | ( | ) | const [virtual] |
return the kernel's name
实现了CSGObject。
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.h第98行定义。
bool get_use_dict_diagonal_optimization | ( | ) |
get.use.dict.diagonal.optimization
在文件CommWordStringKernel.h第198行定义。
initialize kernel
l | features of left-hand side | |
r | features of right-hand side |
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.cpp第61行定义。
bool init_dictionary | ( | int32_t | size | ) | [virtual] |
bool init_optimization | ( | int32_t | count, | |
int32_t * | IDX, | |||
float64_t * | weights | |||
) | [virtual] |
initialize optimization
count | count | |
IDX | index | |
weights | weights |
重载CKernel。
在文件CommWordStringKernel.cpp第288行定义。
void set_use_dict_diagonal_optimization | ( | bool | flag | ) |
set_use_dict_diagonal_optimization
flag | enable diagonal optimization |
在文件CommWordStringKernel.h第189行定义。
friend class CAvgDiagKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第50行定义。
friend class CDiceKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第54行定义。
friend class CFirstElementKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第52行定义。
friend class CRidgeKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第51行定义。
friend class CSqrtDiagKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第49行定义。
friend class CTanimotoKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第53行定义。
friend class CVarianceKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第48行定义。
int32_t* dict_diagonal_optimization [protected] |
array to hold counters for all strings
在文件CommWordStringKernel.h第247行定义。
int32_t dictionary_size [protected] |
size of dictionary (number of possible strings)
在文件CommWordStringKernel.h第236行定义。
float64_t* dictionary_weights [protected] |
dictionary weights - array to hold counters for all possible strings
在文件CommWordStringKernel.h第239行定义。
bool use_dict_diagonal_optimization [protected] |
whether diagonal optimization shall be used
在文件CommWordStringKernel.h第245行定义。
bool use_sign [protected] |
if sign shall be used
在文件CommWordStringKernel.h第242行定义。