SHOGUN v0.9.0
|
The WeightedCommWordString kernel may be used to compute the weighted spectrum kernel (i.e. a spectrum kernel for 1 to K-mers, where each k-mer length is weighted by some coefficient ) from strings that have been mapped into unsigned 16bit integers.
These 16bit integers correspond to k-mers. To applicable in this kernel they need to be sorted (e.g. via the SortWordString pre-processor).
It basically uses the algorithm in the unix "comm" command (hence the name) to compute:
where maps a sequence
that consists of letters in
to a feature vector of size
. In this feature vector each entry denotes how often the k-mer appears in that
.
Note that this representation is especially tuned to small alphabets (like the 2-bit alphabet DNA), for which it enables spectrum kernels of order 8.
For this kernel the linadd speedups are quite efficiently implemented using direct maps.
在文件WeightedCommWordStringKernel.h第50行定义。
公有成员 | |
CWeightedCommWordStringKernel () | |
CWeightedCommWordStringKernel (int32_t size, bool use_sign) | |
CWeightedCommWordStringKernel (CStringFeatures< uint16_t > *l, CStringFeatures< uint16_t > *r, bool use_sign=false, int32_t size=10) | |
virtual | ~CWeightedCommWordStringKernel () |
virtual bool | init (CFeatures *l, CFeatures *r) |
virtual void | cleanup () |
virtual float64_t | compute_optimized (int32_t idx) |
virtual void | add_to_normal (int32_t idx, float64_t weight) |
void | merge_normal () |
bool | set_wd_weights () |
bool | set_weights (float64_t *w, int32_t d) |
virtual EKernelType | get_kernel_type () |
virtual const char * | get_name () const |
virtual EFeatureType | get_feature_type () |
virtual float64_t * | compute_scoring (int32_t max_degree, int32_t &num_feat, int32_t &num_sym, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, bool do_init=true) |
保护成员 | |
virtual float64_t | compute_helper (int32_t idx_a, int32_t idx_b, bool do_sort) |
保护属性 | |
int32_t | degree |
float64_t * | weights |
default constructor
CWeightedCommWordStringKernel | ( | int32_t | size, |
bool | use_sign | ||
) |
constructor
size | cache size |
use_sign | if sign shall be used |
CWeightedCommWordStringKernel | ( | CStringFeatures< uint16_t > * | l, |
CStringFeatures< uint16_t > * | r, | ||
bool | use_sign = false , |
||
int32_t | size = 10 |
||
) |
constructor
l | features of left-hand side |
r | features of right-hand side |
use_sign | if sign shall be used |
size | cache size |
~CWeightedCommWordStringKernel | ( | ) | [virtual] |
void add_to_normal | ( | int32_t | idx, |
float64_t | weight | ||
) | [virtual] |
add to normal
idx | where to add |
weight | what to add |
void cleanup | ( | ) | [virtual] |
float64_t compute_helper | ( | int32_t | idx_a, |
int32_t | idx_b, | ||
bool | do_sort | ||
) | [protected, virtual] |
helper for compute
idx_a | index a |
idx_b | index b |
do_sort | if sorting shall be performed |
float64_t compute_optimized | ( | int32_t | idx | ) | [virtual] |
compute optimized
idx | index to compute |
float64_t * compute_scoring | ( | int32_t | max_degree, |
int32_t & | num_feat, | ||
int32_t & | num_sym, | ||
float64_t * | target, | ||
int32_t | num_suppvec, | ||
int32_t * | IDX, | ||
float64_t * | alphas, | ||
bool | do_init = true |
||
) | [virtual] |
compute scoring
max_degree | maximum degree |
num_feat | number of features |
num_sym | number of symbols |
target | target |
num_suppvec | number of support vectors |
IDX | IDX |
alphas | alphas |
do_init | if initialization shall be performed |
virtual EFeatureType get_feature_type | ( | ) | [virtual] |
return feature type the kernel can deal with
virtual EKernelType get_kernel_type | ( | ) | [virtual] |
return what type of kernel we are
virtual const char* get_name | ( | void | ) | const [virtual] |
return the kernel's name
initialize kernel
l | features of left-hand side |
r | features of right-hand side |
void merge_normal | ( | ) |
merge normal
bool set_wd_weights | ( | ) |
bool set_weights | ( | float64_t * | w, |
int32_t | d | ||
) |
set custom weights (swig compatible)
w | weights |
d | degree (must match number of weights) |
int32_t degree [protected] |
degree
weights for each of the subkernels of degree 1...d