SHOGUN v0.9.0
|
The CommUlongString kernel may be used to compute the spectrum kernel from strings that have been mapped into unsigned 64bit integers.
These 64bit integers correspond to k-mers. To be applicable in this kernel they need to be sorted (e.g. via the SortUlongString pre-processor).
It basically uses the algorithm in the unix "comm" command (hence the name) to compute:
where maps a sequence
that consists of letters in
to a feature vector of size
. In this feature vector each entry denotes how often the k-mer appears in that
.
Note that this representation enables spectrum kernels of order 8 for 8bit alphabets (like binaries) and order 32 for 2-bit alphabets like DNA.
For this kernel the linadd speedups are implemented (though there is room for improvement here when a whole set of sequences is ADDed) using sorted lists.
在文件CommUlongStringKernel.h第48行定义。
公有成员 | |
CCommUlongStringKernel (int32_t size=10, bool use_sign=false) | |
CCommUlongStringKernel (CStringFeatures< uint64_t > *l, CStringFeatures< uint64_t > *r, bool use_sign=false, int32_t size=10) | |
virtual | ~CCommUlongStringKernel () |
virtual bool | init (CFeatures *l, CFeatures *r) |
virtual void | cleanup () |
virtual EKernelType | get_kernel_type () |
virtual const char * | get_name () const |
virtual bool | init_optimization (int32_t count, int32_t *IDX, float64_t *weights) |
virtual bool | delete_optimization () |
virtual float64_t | compute_optimized (int32_t idx) |
void | merge_dictionaries (int32_t &t, int32_t j, int32_t &k, uint64_t *vec, uint64_t *dic, float64_t *dic_weights, float64_t weight, int32_t vec_idx) |
virtual void | add_to_normal (int32_t idx, float64_t weight) |
virtual void | clear_normal () |
virtual void | remove_lhs () |
virtual void | remove_rhs () |
virtual EFeatureType | get_feature_type () |
void | get_dictionary (int32_t &dsize, uint64_t *&dict, float64_t *&dweights) |
保护成员 | |
float64_t | compute (int32_t idx_a, int32_t idx_b) |
保护属性 | |
CDynamicArray< uint64_t > | dictionary |
CDynamicArray< float64_t > | dictionary_weights |
bool | use_sign |
CCommUlongStringKernel | ( | int32_t | size = 10 , |
bool | use_sign = false |
||
) |
CCommUlongStringKernel | ( | CStringFeatures< uint64_t > * | l, |
CStringFeatures< uint64_t > * | r, | ||
bool | use_sign = false , |
||
int32_t | size = 10 |
||
) |
constructor
l | features of left-hand side |
r | features of right-hand side |
use_sign | if sign shall be used |
size | cache size |
在文件CommUlongStringKernel.cpp第28行定义。
~CCommUlongStringKernel | ( | ) | [virtual] |
在文件CommUlongStringKernel.cpp第39行定义。
void add_to_normal | ( | int32_t | idx, |
float64_t | weight | ||
) | [virtual] |
void cleanup | ( | ) | [virtual] |
void clear_normal | ( | ) | [virtual] |
float64_t compute | ( | int32_t | idx_a, |
int32_t | idx_b | ||
) | [protected, virtual] |
compute kernel function for features a and b idx_{a,b} denote the index of the feature vectors in the corresponding feature object
idx_a | index a |
idx_b | index b |
实现了CKernel。
在文件CommUlongStringKernel.cpp第80行定义。
float64_t compute_optimized | ( | int32_t | idx | ) | [virtual] |
compute optimized
idx | index to compute |
重载CKernel。
在文件CommUlongStringKernel.cpp第257行定义。
bool delete_optimization | ( | ) | [virtual] |
void get_dictionary | ( | int32_t & | dsize, |
uint64_t *& | dict, | ||
float64_t *& | dweights | ||
) |
get dictionary
dsize | dictionary size will be stored in here |
dict | dictionary will be stored in here |
dweights | dictionary weights will be stored in here |
在文件CommUlongStringKernel.h第183行定义。
virtual EFeatureType get_feature_type | ( | ) | [virtual] |
return feature type the kernel can deal with
在文件CommUlongStringKernel.h第175行定义。
virtual EKernelType get_kernel_type | ( | ) | [virtual] |
virtual const char* get_name | ( | void | ) | const [virtual] |
initialize kernel
l | features of left-hand side |
r | features of right-hand side |
在文件CommUlongStringKernel.cpp第67行定义。
bool init_optimization | ( | int32_t | count, |
int32_t * | IDX, | ||
float64_t * | weights | ||
) | [virtual] |
initialize optimization
count | count |
IDX | index |
weights | weights |
重载CKernel。
在文件CommUlongStringKernel.cpp第220行定义。
void merge_dictionaries | ( | int32_t & | t, |
int32_t | j, | ||
int32_t & | k, | ||
uint64_t * | vec, | ||
uint64_t * | dic, | ||
float64_t * | dic_weights, | ||
float64_t | weight, | ||
int32_t | vec_idx | ||
) |
merge dictionaries
t | t |
j | j |
k | k |
vec | vector |
dic | dictionary |
dic_weights | dictionary weights |
weight | weight |
vec_idx | vector index |
在文件CommUlongStringKernel.h第129行定义。
void remove_lhs | ( | ) | [virtual] |
void remove_rhs | ( | ) | [virtual] |
CDynamicArray<uint64_t> dictionary [protected] |
dictionary
在文件CommUlongStringKernel.h第204行定义。
CDynamicArray<float64_t> dictionary_weights [protected] |
dictionary weights
在文件CommUlongStringKernel.h第206行定义。
bool use_sign [protected] |
if sign shall be used
在文件CommUlongStringKernel.h第209行定义。