SHOGUN v0.9.0
|
The CommWordString kernel may be used to compute the spectrum kernel from strings that have been mapped into unsigned 16bit integers.
These 16bit integers correspond to k-mers. To applicable in this kernel they need to be sorted (e.g. via the SortWordString pre-processor).
It basically uses the algorithm in the unix "comm" command (hence the name) to compute:
where maps a sequence
that consists of letters in
to a feature vector of size
. In this feature vector each entry denotes how often the k-mer appears in that
.
Note that this representation is especially tuned to small alphabets (like the 2-bit alphabet DNA), for which it enables spectrum kernels of order up to 8.
For this kernel the linadd speedups are quite efficiently implemented using direct maps.
在文件CommWordStringKernel.h第46行定义。
公有成员 | |
CCommWordStringKernel () | |
CCommWordStringKernel (int32_t size, bool use_sign) | |
CCommWordStringKernel (CStringFeatures< uint16_t > *l, CStringFeatures< uint16_t > *r, bool use_sign=false, int32_t size=10) | |
virtual | ~CCommWordStringKernel () |
virtual bool | init (CFeatures *l, CFeatures *r) |
virtual void | cleanup () |
virtual EKernelType | get_kernel_type () |
virtual const char * | get_name () const |
virtual bool | init_dictionary (int32_t size) |
virtual bool | init_optimization (int32_t count, int32_t *IDX, float64_t *weights) |
virtual bool | delete_optimization () |
virtual float64_t | compute_optimized (int32_t idx) |
virtual void | add_to_normal (int32_t idx, float64_t weight) |
virtual void | clear_normal () |
virtual EFeatureType | get_feature_type () |
void | get_dictionary (int32_t &dsize, float64_t *&dweights) |
virtual float64_t * | compute_scoring (int32_t max_degree, int32_t &num_feat, int32_t &num_sym, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, bool do_init=true) |
char * | compute_consensus (int32_t &num_feat, int32_t num_suppvec, int32_t *IDX, float64_t *alphas) |
void | set_use_dict_diagonal_optimization (bool flag) |
bool | get_use_dict_diagonal_optimization () |
保护成员 | |
virtual float64_t | compute (int32_t idx_a, int32_t idx_b) |
virtual float64_t | compute_helper (int32_t idx_a, int32_t idx_b, bool do_sort) |
virtual float64_t | compute_diag (int32_t idx_a) |
保护属性 | |
int32_t | dictionary_size |
float64_t * | dictionary_weights |
bool | use_sign |
bool | use_dict_diagonal_optimization |
int32_t * | dict_diagonal_optimization |
友元 | |
class | CVarianceKernelNormalizer |
class | CSqrtDiagKernelNormalizer |
class | CAvgDiagKernelNormalizer |
class | CRidgeKernelNormalizer |
class | CFirstElementKernelNormalizer |
class | CTanimotoKernelNormalizer |
class | CDiceKernelNormalizer |
default constructor
在文件CommWordStringKernel.cpp第22行定义。
CCommWordStringKernel | ( | int32_t | size, |
bool | use_sign | ||
) |
CCommWordStringKernel | ( | CStringFeatures< uint16_t > * | l, |
CStringFeatures< uint16_t > * | r, | ||
bool | use_sign = false , |
||
int32_t | size = 10 |
||
) |
constructor
l | features of left-hand side |
r | features of right-hand side |
use_sign | if sign shall be used |
size | cache size |
在文件CommWordStringKernel.cpp第35行定义。
~CCommWordStringKernel | ( | ) | [virtual] |
在文件CommWordStringKernel.cpp第57行定义。
void add_to_normal | ( | int32_t | idx, |
float64_t | weight | ||
) | [virtual] |
add to normal
idx | where to add |
weight | what to add |
重载CKernel。
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.cpp第241行定义。
void cleanup | ( | ) | [virtual] |
void clear_normal | ( | ) | [virtual] |
virtual float64_t compute | ( | int32_t | idx_a, |
int32_t | idx_b | ||
) | [protected, virtual] |
compute kernel function for features a and b idx_{a,b} denote the index of the feature vectors in the corresponding feature object
idx_a | index a |
idx_b | index b |
实现了CKernel。
在文件CommWordStringKernel.h第215行定义。
char * compute_consensus | ( | int32_t & | num_feat, |
int32_t | num_suppvec, | ||
int32_t * | IDX, | ||
float64_t * | alphas | ||
) |
compute consensus
num_feat | number of features |
num_suppvec | number of support vectors |
IDX | IDX |
alphas | alphas |
在文件CommWordStringKernel.cpp第498行定义。
float64_t compute_diag | ( | int32_t | idx_a | ) | [protected, virtual] |
helper to compute only diagonal normalization for training
idx_a | index a |
在文件CommWordStringKernel.cpp第85行定义。
float64_t compute_helper | ( | int32_t | idx_a, |
int32_t | idx_b, | ||
bool | do_sort | ||
) | [protected, virtual] |
helper for compute
idx_a | index a |
idx_b | index b |
do_sort | if sorting shall be performed |
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.cpp第129行定义。
float64_t compute_optimized | ( | int32_t | idx | ) | [virtual] |
compute optimized
idx | index to compute |
重载CKernel。
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.cpp第326行定义。
float64_t * compute_scoring | ( | int32_t | max_degree, |
int32_t & | num_feat, | ||
int32_t & | num_sym, | ||
float64_t * | target, | ||
int32_t | num_suppvec, | ||
int32_t * | IDX, | ||
float64_t * | alphas, | ||
bool | do_init = true |
||
) | [virtual] |
compute scoring
max_degree | maximum degree |
num_feat | number of features |
num_sym | number of symbols |
target | target |
num_suppvec | number of support vectors |
IDX | IDX |
alphas | alphas |
do_init | if initialization shall be performed |
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.cpp第375行定义。
bool delete_optimization | ( | ) | [virtual] |
void get_dictionary | ( | int32_t & | dsize, |
float64_t *& | dweights | ||
) |
get dictionary
dsize | dictionary size will be stored in here |
dweights | dictionary weights will be stored in here |
在文件CommWordStringKernel.h第153行定义。
virtual EFeatureType get_feature_type | ( | ) | [virtual] |
return feature type the kernel can deal with
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.h第146行定义。
virtual EKernelType get_kernel_type | ( | ) | [virtual] |
return what type of kernel we are
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.h第95行定义。
virtual const char* get_name | ( | void | ) | const [virtual] |
return the kernel's name
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.h第101行定义。
bool get_use_dict_diagonal_optimization | ( | ) |
get.use.dict.diagonal.optimization
在文件CommWordStringKernel.h第201行定义。
initialize kernel
l | features of left-hand side |
r | features of right-hand side |
被CWeightedCommWordStringKernel重载。
在文件CommWordStringKernel.cpp第65行定义。
bool init_dictionary | ( | int32_t | size | ) | [virtual] |
bool init_optimization | ( | int32_t | count, |
int32_t * | IDX, | ||
float64_t * | weights | ||
) | [virtual] |
initialize optimization
count | count |
IDX | index |
weights | weights |
重载CKernel。
在文件CommWordStringKernel.cpp第292行定义。
void set_use_dict_diagonal_optimization | ( | bool | flag | ) |
set_use_dict_diagonal_optimization
flag | enable diagonal optimization |
在文件CommWordStringKernel.h第192行定义。
friend class CAvgDiagKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第50行定义。
friend class CDiceKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第54行定义。
friend class CFirstElementKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第52行定义。
friend class CRidgeKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第51行定义。
friend class CSqrtDiagKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第49行定义。
friend class CTanimotoKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第53行定义。
friend class CVarianceKernelNormalizer [friend] |
重载CKernel。
在文件CommWordStringKernel.h第48行定义。
int32_t* dict_diagonal_optimization [protected] |
array to hold counters for all strings
在文件CommWordStringKernel.h第253行定义。
int32_t dictionary_size [protected] |
size of dictionary (number of possible strings)
在文件CommWordStringKernel.h第242行定义。
float64_t* dictionary_weights [protected] |
dictionary weights - array to hold counters for all possible strings
在文件CommWordStringKernel.h第245行定义。
bool use_dict_diagonal_optimization [protected] |
whether diagonal optimization shall be used
在文件CommWordStringKernel.h第251行定义。
bool use_sign [protected] |
if sign shall be used
在文件CommWordStringKernel.h第248行定义。