SHOGUN
v1.1.0
|
The class SimpleFeatures implements dense feature matrices.
The feature matrices are stored en-block in memory in fortran order, i.e. column-by-column, where a column denotes a feature vector.
There are get_num_vectors() many feature vectors, of dimension get_num_features(). To access a feature vector call get_feature_vector() and when you are done treating it call free_feature_vector(). While free_feature_vector() is a NOP in most cases feature vectors might have been generated on the fly (due to a number preprocessors being attached to them).
From this template class a number the following dense feature matrix types are used and supported:
Definition at line 58 of file SimpleFeatures.h.
Public Member Functions | |
CSimpleFeatures (int32_t size=0) | |
CSimpleFeatures (const CSimpleFeatures &orig) | |
CSimpleFeatures (SGMatrix< ST > matrix) | |
CSimpleFeatures (ST *src, int32_t num_feat, int32_t num_vec) | |
CSimpleFeatures (CFile *loader) | |
virtual CFeatures * | duplicate () const |
virtual | ~CSimpleFeatures () |
void | free_feature_matrix () |
void | free_features () |
ST * | get_feature_vector (int32_t num, int32_t &len, bool &dofree) |
void | set_feature_vector (SGVector< ST > vector, int32_t num) |
SGVector< ST > | get_feature_vector (int32_t num) |
void | free_feature_vector (ST *feat_vec, int32_t num, bool dofree) |
void | free_feature_vector (SGVector< ST > vec, int32_t num) |
void | vector_subset (int32_t *idx, int32_t idx_len) |
void | feature_subset (int32_t *idx, int32_t idx_len) |
void | get_feature_matrix (ST **dst, int32_t *num_feat, int32_t *num_vec) |
SGMatrix< ST > | get_feature_matrix () |
SGMatrix< ST > | steal_feature_matrix () |
void | set_feature_matrix (SGMatrix< ST > matrix) |
ST * | get_feature_matrix (int32_t &num_feat, int32_t &num_vec) |
CSimpleFeatures< ST > * | get_transposed () |
ST * | get_transposed (int32_t &num_feat, int32_t &num_vec) |
virtual void | set_feature_matrix (ST *fm, int32_t num_feat, int32_t num_vec) |
virtual void | copy_feature_matrix (SGMatrix< ST > src) |
void | obtain_from_dot (CDotFeatures *df) |
virtual bool | apply_preprocessor (bool force_preprocessing=false) |
virtual int32_t | get_size () |
virtual int32_t | get_num_vectors () const |
int32_t | get_num_features () |
void | set_num_features (int32_t num) |
void | set_num_vectors (int32_t num) |
void | initialize_cache () |
virtual EFeatureClass | get_feature_class () |
virtual EFeatureType | get_feature_type () |
virtual bool | reshape (int32_t p_num_features, int32_t p_num_vectors) |
virtual int32_t | get_dim_feature_space () const |
virtual float64_t | dot (int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2) |
virtual float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
virtual void | add_to_dense_vec (float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false) |
virtual int32_t | get_nnz_features_for_vector (int32_t num) |
virtual bool | Align_char_features (CStringFeatures< char > *cf, CStringFeatures< char > *Ref, float64_t gapCost) |
virtual void | load (CFile *loader) |
virtual void | save (CFile *saver) |
virtual void * | get_feature_iterator (int32_t vector_index) |
virtual bool | get_next_feature (int32_t &index, float64_t &value, void *iterator) |
virtual void | free_feature_iterator (void *iterator) |
virtual CFeatures * | copy_subset (SGVector< index_t > indices) |
virtual const char * | get_name () const |
template<> | |
bool | Align_char_features (CStringFeatures< char > *cf, CStringFeatures< char > *Ref, float64_t gapCost) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
template<> | |
float64_t | dense_dot (int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len) |
![]() | |
CDotFeatures (int32_t size=0) | |
CDotFeatures (const CDotFeatures &orig) | |
CDotFeatures (CFile *loader) | |
virtual | ~CDotFeatures () |
virtual void | dense_dot_range (float64_t *output, int32_t start, int32_t stop, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b) |
virtual void | dense_dot_range_subset (int32_t *sub_index, int32_t num, float64_t *output, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b) |
float64_t | get_combined_feature_weight () |
void | set_combined_feature_weight (float64_t nw) |
SGMatrix< float64_t > | get_computed_dot_feature_matrix () |
SGVector< float64_t > | get_computed_dot_feature_vector (int32_t num) |
void | benchmark_add_to_dense_vector (int32_t repeats=5) |
void | benchmark_dense_dot_range (int32_t repeats=5) |
virtual SGVector< float64_t > | get_mean () |
virtual SGMatrix< float64_t > | get_cov () |
![]() | |
CFeatures (int32_t size=0) | |
CFeatures (const CFeatures &orig) | |
CFeatures (CFile *loader) | |
virtual | ~CFeatures () |
virtual int32_t | add_preprocessor (CPreprocessor *p) |
set preprocessor | |
virtual CPreprocessor * | del_preprocessor (int32_t num) |
del current preprocessor | |
CPreprocessor * | get_preprocessor (int32_t num) |
get current preprocessor | |
void | set_preprocessed (int32_t num) |
bool | is_preprocessed (int32_t num) |
int32_t | get_num_preprocessed () |
get whether specified preprocessor (or all if num=1) was/were already applied | |
int32_t | get_num_preprocessors () const |
void | clean_preprocessors () |
int32_t | get_cache_size () |
void | list_feature_obj () |
bool | check_feature_compatibility (CFeatures *f) |
bool | has_property (EFeatureProperty p) |
void | set_property (EFeatureProperty p) |
void | unset_property (EFeatureProperty p) |
virtual void | set_subset (CSubset *subset) |
virtual void | remove_subset () |
virtual void | subset_changed_post () |
index_t | subset_idx_conversion (index_t idx) const |
bool | has_subset () const |
![]() | |
CSGObject () | |
CSGObject (const CSGObject &orig) | |
virtual | ~CSGObject () |
virtual bool | is_generic (EPrimitiveType *generic) const |
template<class T > | |
void | set_generic () |
void | unset_generic () |
virtual void | print_serializable (const char *prefix="") |
virtual bool | save_serializable (CSerializableFile *file, const char *prefix="") |
virtual bool | load_serializable (CSerializableFile *file, const char *prefix="") |
void | set_global_io (SGIO *io) |
SGIO * | get_global_io () |
void | set_global_parallel (Parallel *parallel) |
Parallel * | get_global_parallel () |
void | set_global_version (Version *version) |
Version * | get_global_version () |
SGVector< char * > | get_modelsel_names () |
char * | get_modsel_param_descr (const char *param_name) |
index_t | get_modsel_param_index (const char *param_name) |
Protected Member Functions | |
virtual ST * | compute_feature_vector (int32_t num, int32_t &len, ST *target=NULL) |
![]() | |
void | display_progress (int32_t start, int32_t stop, int32_t v) |
Protected Attributes | |
int32_t | num_vectors |
number of vectors in cache | |
int32_t | num_features |
number of features in cache | |
ST * | feature_matrix |
int32_t | feature_matrix_num_vectors |
int32_t | feature_matrix_num_features |
CCache< ST > * | feature_cache |
![]() | |
float64_t | combined_weight |
feature weighting in combined dot features | |
![]() | |
CSubset * | m_subset |
Additional Inherited Members | |
![]() | |
static void * | dense_dot_range_helper (void *p) |
![]() | |
SGIO * | io |
Parallel * | parallel |
Version * | version |
Parameter * | m_parameters |
Parameter * | m_model_selection_parameters |
CSimpleFeatures | ( | int32_t | size = 0 | ) |
CSimpleFeatures | ( | const CSimpleFeatures< ST > & | orig | ) |
copy constructor
Definition at line 16 of file SimpleFeatures.cpp.
CSimpleFeatures | ( | SGMatrix< ST > | matrix | ) |
CSimpleFeatures | ( | ST * | src, |
int32_t | num_feat, | ||
int32_t | num_vec | ||
) |
constructor
src | feature matrix |
num_feat | number of features in matrix |
num_vec | number of vectors in matrix |
Definition at line 32 of file SimpleFeatures.cpp.
CSimpleFeatures | ( | CFile * | loader | ) |
constructor loading features from file
loader | File object via which to load data |
Definition at line 38 of file SimpleFeatures.cpp.
|
virtual |
Definition at line 49 of file SimpleFeatures.cpp.
|
virtual |
add vector 1 multiplied with alpha to dense vector2
possible with subset
alpha | scalar alpha |
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
abs_val | if true add the absolute value |
Implements CDotFeatures.
Definition at line 520 of file SimpleFeatures.cpp.
|
virtual |
align char features
cf | char features |
Ref | other char features |
gapCost | gap cost |
Definition at line 551 of file SimpleFeatures.cpp.
bool Align_char_features | ( | CStringFeatures< char > * | cf, |
CStringFeatures< char > * | Ref, | ||
float64_t | gapCost | ||
) |
align strings and compute emperical kernel map based on alignment scores
non functional code - needs updating
cf | strings to be aligned to reference |
Ref | reference strings to be aligned to |
gapCost | costs for a gap |
Definition at line 669 of file SimpleFeatures.cpp.
|
virtual |
apply preprocessor
applies preprocessors to ALL features (subset removed before and restored afterwards)
not possible with subset
force_preprocessing | if preprocssing shall be forced |
Definition at line 400 of file SimpleFeatures.cpp.
|
protectedvirtual |
compute feature vector for sample num if target is set the vector is written to target len is returned by reference
NOT IMPLEMENTED!
num | num |
len | len |
target |
Reimplemented in CTOPFeatures, CFKFeatures, and CRealFileFeatures.
Definition at line 611 of file SimpleFeatures.cpp.
|
virtual |
copy feature matrix store copy of feature_matrix, where num_features is the column offset, and columns are linear in memory see below for definition of feature_matrix
not possible with subset
src | feature matrix to copy |
Definition at line 352 of file SimpleFeatures.cpp.
Creates a new CFeatures instance containing copies of the elements which are specified by the provided indices.
possible with subset
indices | indices of feature elements to copy |
Reimplemented from CFeatures.
Definition at line 596 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
possible with subset TODO: where?
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 702 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 722 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 742 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 762 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 782 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 802 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 822 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 842 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 862 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 882 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 902 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 922 of file SimpleFeatures.cpp.
compute dot product between vector1 and a dense vector
vec_idx1 | index of first vector |
vec2 | pointer to real valued vector |
vec2_len | length of real valued vector |
Implements CDotFeatures.
Definition at line 939 of file SimpleFeatures.cpp.
|
virtual |
compute dot product between vector1 and vector2, appointed by their indices
possible with subset
vec_idx1 | index of first vector |
df | DotFeatures (of same kind) to compute dot product with |
vec_idx2 | index of second vector |
Implements CDotFeatures.
Definition at line 498 of file SimpleFeatures.cpp.
|
virtual |
duplicate feature object
Implements CFeatures.
Definition at line 44 of file SimpleFeatures.cpp.
void feature_subset | ( | int32_t * | idx, |
int32_t | idx_len | ||
) |
Extracts the features mentioned in idx and replaces them in feature matrix in place.
It does not resize the allocated memory block.
Not possible with subset.
idx | index with features that shall remain in the feature matrix |
idx_len | length of the index |
Note: assumes idx is sorted
Definition at line 211 of file SimpleFeatures.cpp.
|
virtual |
clean up iterator call this function with the iterator returned by get_first_feature
iterator | as returned by get_first_feature |
Implements CDotFeatures.
Definition at line 586 of file SimpleFeatures.cpp.
void free_feature_matrix | ( | ) |
void free_feature_vector | ( | ST * | feat_vec, |
int32_t | num, | ||
bool | dofree | ||
) |
free feature vector
possible with subset
feat_vec | feature vector to free |
num | index in feature cache |
dofree | if vector should be really deleted |
Definition at line 166 of file SimpleFeatures.cpp.
void free_feature_vector | ( | SGVector< ST > | vec, |
int32_t | num | ||
) |
free feature vector
possible with subset
vec | feature vector to free |
num | index in feature cache |
Definition at line 175 of file SimpleFeatures.cpp.
void free_features | ( | ) |
free feature matrix and cache
Any subset is removed
Definition at line 51 of file SimpleFeatures.cpp.
|
virtual |
obtain the dimensionality of the feature space
(not mix this up with the dimensionality of the input space, usually obtained via get_num_features())
Implements CDotFeatures.
Definition at line 496 of file SimpleFeatures.cpp.
|
virtual |
get feature class
Implements CFeatures.
Definition at line 479 of file SimpleFeatures.cpp.
|
virtual |
iterate over the non-zero features
call get_feature_iterator first, followed by get_next_feature and free_feature_iterator to cleanup
possible with subset
vector_index | the index of the vector over whose components to iterate over |
Implements CDotFeatures.
Definition at line 557 of file SimpleFeatures.cpp.
void get_feature_matrix | ( | ST ** | dst, |
int32_t * | num_feat, | ||
int32_t * | num_vec | ||
) |
get a copy of the feature matrix num_feat,num_vectors are returned by reference
possible with subset
dst | destination to store matrix in |
num_feat | number of features (rows of matrix) |
num_vec | number of vectors (columns of matrix) |
Definition at line 241 of file SimpleFeatures.cpp.
SGMatrix< ST > get_feature_matrix | ( | ) |
Getter for feature matrix
subset is ignored
Definition at line 268 of file SimpleFeatures.cpp.
ST * get_feature_matrix | ( | int32_t & | num_feat, |
int32_t & | num_vec | ||
) |
get the pointer to the feature matrix num_feat,num_vectors are returned by reference
subset is ignored
num_feat | number of features in matrix |
num_vec | number of vectors in matrix |
Definition at line 299 of file SimpleFeatures.cpp.
|
virtual |
ST * get_feature_vector | ( | int32_t | num, |
int32_t & | len, | ||
bool & | dofree | ||
) |
get feature vector for sample num from the matrix as it is if matrix is initialized, else return preprocessed compute_feature_vector (not implemented)
num | index of feature vector |
len | length is returned by reference |
dofree | whether returned vector must be freed by caller via free_feature_vector |
Definition at line 69 of file SimpleFeatures.cpp.
SGVector< ST > get_feature_vector | ( | int32_t | num | ) |
get feature vector num
possible with subset
num | index of vector |
Definition at line 150 of file SimpleFeatures.cpp.
|
virtual |
Implements CSGObject.
Reimplemented in CTOPFeatures, CFKFeatures, and CRealFileFeatures.
Definition at line 500 of file SimpleFeatures.h.
|
virtual |
iterate over the non-zero features
call this function with the iterator returned by get_first_feature and call free_feature_iterator to cleanup
possible with subset
index | is returned by reference (-1 when not available) |
value | is returned by reference |
iterator | as returned by get_first_feature |
Implements CDotFeatures.
Definition at line 573 of file SimpleFeatures.cpp.
|
virtual |
get number of non-zero features in vector
num | which vector |
Implements CDotFeatures.
Definition at line 545 of file SimpleFeatures.cpp.
int32_t get_num_features | ( | ) |
get number of features (of possible subset)
Definition at line 448 of file SimpleFeatures.cpp.
|
virtual |
get number of feature vectors
Implements CFeatures.
Definition at line 443 of file SimpleFeatures.cpp.
|
virtual |
get memory footprint of one feature
Implements CFeatures.
Definition at line 441 of file SimpleFeatures.cpp.
CSimpleFeatures< ST > * get_transposed | ( | ) |
get a transposed copy of the features
possible with subset
Definition at line 306 of file SimpleFeatures.cpp.
ST * get_transposed | ( | int32_t & | num_feat, |
int32_t & | num_vec | ||
) |
compute and return the transpose of the feature matrix which will be prepocessed. num_feat, num_vectors are returned by reference caller has to clean up
possible with subset
num_feat | number of features in matrix |
num_vec | number of vectors in matrix |
Definition at line 315 of file SimpleFeatures.cpp.
void initialize_cache | ( | ) |
|
virtual |
load features from file
loader | File object via which to load data |
Reimplemented from CFeatures.
void obtain_from_dot | ( | CDotFeatures * | df | ) |
obtain simple features from other dotfeatures
removes any subset before
df | dotfeatures to obtain features from |
Definition at line 372 of file SimpleFeatures.cpp.
|
virtual |
reshape
not possible with subset
p_num_features | new number of features |
p_num_vectors | new number of vectors |
Reimplemented from CFeatures.
Definition at line 481 of file SimpleFeatures.cpp.
|
virtual |
save features to file
saver | File object via which to save data |
Reimplemented from CFeatures.
void set_feature_matrix | ( | SGMatrix< ST > | matrix | ) |
Setter for feature matrix
any subset is removed
matrix | feature matrix to set |
Definition at line 288 of file SimpleFeatures.cpp.
|
virtual |
set feature matrix necessary to set feature_matrix, num_features, num_vectors, where num_features is the column offset, and columns are linear in memory see below for definition of feature_matrix
not possible with subset
fm | feature matrix to se |
num_feat | number of features in matrix |
num_vec | number of vectors in matrix |
Definition at line 337 of file SimpleFeatures.cpp.
void set_feature_vector | ( | SGVector< ST > | vector, |
int32_t | num | ||
) |
set feature vector num
possible with subset
vector | vector |
num | index if vector to set |
Definition at line 128 of file SimpleFeatures.cpp.
void set_num_features | ( | int32_t | num | ) |
set number of features
num | number to set |
Definition at line 450 of file SimpleFeatures.cpp.
void set_num_vectors | ( | int32_t | num | ) |
set number of vectors
not possible with subset
num | number to set |
Definition at line 456 of file SimpleFeatures.cpp.
SGMatrix< ST > steal_feature_matrix | ( | ) |
steals feature matrix, i.e. returns matrix and forget about it subset is ignored
Definition at line 273 of file SimpleFeatures.cpp.
void vector_subset | ( | int32_t * | idx, |
int32_t | idx_len | ||
) |
Extracts the feature vectors mentioned in idx and replaces them in feature matrix in place.
It does not resize the allocated memory block.
not possible with subset
idx | index with examples that shall remain in the feature matrix |
idx_len | length of the index |
Note: assumes idx is sorted
Definition at line 180 of file SimpleFeatures.cpp.
|
protected |
feature cache
Definition at line 540 of file SimpleFeatures.h.
|
protected |
Feature matrix and its associated number of vectors and features. Note that num_vectors / num_features above have the same sizes if feature_matrix != NULL
Definition at line 531 of file SimpleFeatures.h.
|
protected |
number of features in feature matrix
Definition at line 537 of file SimpleFeatures.h.
|
protected |
number of vectors in feature matrix
Definition at line 534 of file SimpleFeatures.h.
|
protected |
number of features in cache
Definition at line 525 of file SimpleFeatures.h.
|
protected |
number of vectors in cache
Definition at line 522 of file SimpleFeatures.h.