SHOGUN v0.9.0
|
Dynamic Programming Class.
Structure and Function collection. This Class implements a Dynamic Programming functions.
公有成员 | |
CDynProg (int32_t p_num_svms=8) | |
virtual | ~CDynProg () |
void | set_num_states (int32_t N) |
int32_t | get_num_states () |
int32_t | get_num_svms () |
void | init_content_svm_value_array (const int32_t p_num_svms) |
void | init_tiling_data (int32_t *probe_pos, float64_t *intensities, const int32_t num_probes) |
void | precompute_tiling_plifs (CPlif **PEN, const int32_t *tiling_plif_ids, const int32_t num_tiling_plifs) |
void | resize_lin_feat (int32_t num_new_feat) |
void | set_p_vector (float64_t *p, int32_t N) |
void | set_q_vector (float64_t *q, int32_t N) |
void | set_a (float64_t *a, int32_t M, int32_t N) |
void | set_a_id (int32_t *a, int32_t M, int32_t N) |
void | set_a_trans_matrix (float64_t *a_trans, int32_t num_trans, int32_t N) |
void | init_mod_words_array (int32_t *p_mod_words_array, int32_t num_elem, int32_t num_columns) |
bool | check_svm_arrays () |
void | set_observation_matrix (float64_t *seq, int32_t *dims, int32_t ndims) |
int32_t | get_num_positions () |
void | set_content_type_array (float64_t *seg_path, int32_t rows, int32_t cols) |
void | set_pos (int32_t *pos, int32_t seq_len) |
void | set_orf_info (int32_t *orf_info, int32_t m, int32_t n) |
void | set_gene_string (char *genestr, int32_t genestr_len) |
void | set_dict_weights (float64_t *dictionary_weights, int32_t dict_len, int32_t n) |
void | best_path_set_segment_loss (float64_t *segment_loss, int32_t num_segment_id1, int32_t num_segment_id2) |
void | best_path_set_segment_ids_mask (int32_t *segment_ids, float64_t *segment_mask, int32_t m) |
void | set_sparse_features (CSparseFeatures< float64_t > *seq_sparse1, CSparseFeatures< float64_t > *seq_sparse2) |
void | set_plif_matrices (CPlifMatrix *pm) |
void | get_scores (float64_t **scores, int32_t *n) |
void | get_states (int32_t **states, int32_t *m, int32_t *n) |
void | get_positions (int32_t **positions, int32_t *m, int32_t *n) |
void | compute_nbest_paths (int32_t max_num_signals, bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences) |
void | best_path_trans_deriv (int32_t *my_state_seq, int32_t *my_pos_seq, int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals) |
void | set_my_state_seq (int32_t *my_state_seq) |
void | set_my_pos_seq (int32_t *my_pos_seq) |
void | get_path_scores (float64_t **my_scores, int32_t *seq_len) |
void | get_path_losses (float64_t **my_losses, int32_t *seq_len) |
T_STATES | get_N () const |
access function for number of states N | |
void | set_q (T_STATES offset, float64_t value) |
void | set_p (T_STATES offset, float64_t value) |
void | set_a (T_STATES line_, T_STATES column, float64_t value) |
float64_t | get_q (T_STATES offset) const |
float64_t | get_q_deriv (T_STATES offset) const |
float64_t | get_p (T_STATES offset) const |
float64_t | get_p_deriv (T_STATES offset) const |
void | precompute_content_values () |
float64_t * | get_lin_feat (int32_t &dim1, int32_t &dim2) |
void | set_lin_feat (float64_t *p_lin_feat, int32_t p_num_svms, int32_t p_seq_len) |
void | create_word_string () |
void | precompute_stop_codons () |
float64_t | get_a (T_STATES line_, T_STATES column) const |
float64_t | get_a_deriv (T_STATES line_, T_STATES column) const |
void | set_intron_list (CIntronList *intron_list, int32_t num_plifs) |
CSegmentLoss * | get_segment_loss_object () |
void | long_transition_settings (bool use_long_transitions, int32_t threshold, int32_t max_len) |
保护成员 | |
void | lookup_content_svm_values (const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t *svm_values, int32_t frame) |
void | lookup_tiling_plif_values (const int32_t from_state, const int32_t to_state, const int32_t len, float64_t *svm_values) |
int32_t | find_frame (const int32_t from_state) |
int32_t | raw_intensities_interval_query (const int32_t from_pos, const int32_t to_pos, float64_t *intensities, int32_t type) |
bool | extend_orf (int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to) |
virtual const char * | get_name () const |
保护属性 | |
int32_t | m_num_degrees |
int32_t | m_num_svms |
CArray< int32_t > | m_word_degree |
CArray< int32_t > | m_cum_num_words |
int32_t * | m_cum_num_words_array |
CArray< int32_t > | m_num_words |
int32_t * | m_num_words_array |
CArray2< int32_t > | m_mod_words |
int32_t * | m_mod_words_array |
CArray< bool > | m_sign_words |
bool * | m_sign_words_array |
CArray< int32_t > | m_string_words |
int32_t * | m_string_words_array |
CArray< int32_t > | m_num_unique_words |
bool | m_svm_arrays_clean |
int32_t | m_max_a_id |
CArray3< float64_t > | m_observation_matrix |
CArray< int32_t > | m_pos |
int32_t | m_seq_len |
CArray2< int32_t > | m_orf_info |
CArray2< float64_t > | m_segment_sum_weights |
CArray< CPlifBase * > | m_plif_list |
CArray2< CPlifBase * > | m_PEN |
CArray2< CPlifBase * > | m_PEN_state_signals |
CArray< char > | m_genestr |
uint16_t *** | m_wordstr |
CArray2< float64_t > | m_dict_weights |
CArray3< float64_t > | m_segment_loss |
CArray< int32_t > | m_segment_ids |
CArray< float64_t > | m_segment_mask |
CArray< int32_t > | m_my_state_seq |
CArray< int32_t > | m_my_pos_seq |
CArray< float64_t > | m_my_scores |
CArray< float64_t > | m_my_losses |
CSegmentLoss * | m_seg_loss_obj |
CArray< float64_t > | m_scores |
CArray2< int32_t > | m_states |
CArray2< int32_t > | m_positions |
CSparseFeatures< float64_t > * | m_seq_sparse1 |
CSparseFeatures< float64_t > * | m_seq_sparse2 |
CPlifMatrix * | m_plif_matrices |
CArray< bool > | m_genestr_stop |
CIntronList * | m_intron_list |
int32_t | m_num_intron_plifs |
CArray2< float64_t > | m_lin_feat |
float64_t * | m_raw_intensities |
int32_t * | m_probe_pos |
int32_t * | m_num_probes_cum |
int32_t * | m_num_lin_feat_plifs_cum |
int32_t | m_num_raw_data |
bool | m_long_transitions |
int32_t | m_long_transition_threshold |
model specific variables. | |
these are p,q,a,b,N,M etc | |
int32_t | m_N |
number of states | |
CArray2< int32_t > | m_transition_matrix_a_id |
transition matrix | |
CArray2< float64_t > | m_transition_matrix_a |
CArray2< float64_t > | m_transition_matrix_a_deriv |
CArray< float64_t > | m_initial_state_distribution_p |
initial distribution of states | |
CArray< float64_t > | m_initial_state_distribution_p_deriv |
CArray< float64_t > | m_end_state_distribution_q |
distribution of end-states | |
CArray< float64_t > | m_end_state_distribution_q_deriv |
静态保护属性 | |
static int32_t | word_degree_default [4] = {3,4,5,6} |
static int32_t | cum_num_words_default [5] = {0,64,320,1344,5440} |
static int32_t | frame_plifs [3] = {4,5,6} |
static int32_t | num_words_default [4] = {64,256,1024,4096} |
static int32_t | mod_words_default [32] |
static bool | sign_words_default [16] |
static int32_t | string_words_default [16] |
CDynProg | ( | int32_t | p_num_svms = 8 | ) |
~CDynProg | ( | ) | [virtual] |
在文件DynProg.cpp第147行定义。
void best_path_set_segment_ids_mask | ( | int32_t * | segment_ids, |
float64_t * | segment_mask, | ||
int32_t | m | ||
) |
set best path segmend ids mask
segment_ids | segment ids |
segment_mask | segment mask |
m | dimension m |
在文件DynProg.cpp第820行定义。
void best_path_set_segment_loss | ( | float64_t * | segment_loss, |
int32_t | num_segment_id1, | ||
int32_t | num_segment_id2 | ||
) |
set best path segment loss
segment_loss | segment loss |
num_segment_id1 | number of segment id1 |
num_segment_id2 | number of segment id2 |
在文件DynProg.cpp第804行定义。
void best_path_trans_deriv | ( | int32_t * | my_state_seq, |
int32_t * | my_pos_seq, | ||
int32_t | my_seq_len, | ||
const float64_t * | seq_array, | ||
int32_t | max_num_signals | ||
) |
given a path though the state model and the corresponding positions compute the features. This can be seen as the derivative of the score (output of dynamic program) with respect to the parameters
my_state_seq | state sequence of the path |
my_pos_seq | sequence of positions |
my_seq_len | length of state and position sequences |
seq_array | array of features |
max_num_signals | maximal number of signals |
在文件DynProg.cpp第2080行定义。
bool check_svm_arrays | ( | ) |
check SVM arrays call this function to check consistency
在文件DynProg.cpp第606行定义。
void compute_nbest_paths | ( | int32_t | max_num_signals, |
bool | use_orf, | ||
int16_t | nbest, | ||
bool | with_loss, | ||
bool | with_multiple_sequences | ||
) |
run the viterbi algorithm to compute the n best viterbi paths
max_num_signals | maximal number of signals for a single state |
use_orf | whether orf shall be used |
nbest | number of best paths (n) |
with_loss | use loss |
with_multiple_sequences | !!!not functional set to false!!! |
在文件DynProg.cpp第966行定义。
void create_word_string | ( | ) |
create word string from char* Jonas
在文件DynProg.cpp第366行定义。
bool extend_orf | ( | int32_t | orf_from, |
int32_t | orf_to, | ||
int32_t | start, | ||
int32_t & | last_pos, | ||
int32_t | to | ||
) | [protected] |
extend orf
orf_from | orf from |
orf_to | orf to |
start | start |
last_pos | last position |
to | to |
在文件DynProg.cpp第915行定义。
int32_t find_frame | ( | const int32_t | from_state | ) | [protected] |
find frame
from_state | from state |
float64_t* get_lin_feat | ( | int32_t & | dim1, |
int32_t & | dim2 | ||
) |
virtual const char* get_name | ( | void | ) | const [protected, virtual] |
int32_t get_num_positions | ( | ) |
get number of positions; the dynamic program is sparse encoded and this function gives the number of positions that can actually be part of a predicted path
在文件DynProg.cpp第684行定义。
int32_t get_num_states | ( | ) |
get num states
在文件DynProg.cpp第239行定义。
int32_t get_num_svms | ( | ) |
get num svms
在文件DynProg.cpp第195行定义。
void get_path_losses | ( | float64_t ** | my_losses, |
int32_t * | seq_len | ||
) |
get path losses
best_path_trans_deriv result retrieval functions
my_losses | my losses |
seq_len | length of sequence |
在文件DynProg.cpp第899行定义。
void get_path_scores | ( | float64_t ** | my_scores, |
int32_t * | seq_len | ||
) |
get path scores
best_path_trans_deriv result retrieval functions
my_scores | scores |
seq_len | length of sequence |
在文件DynProg.cpp第885行定义。
void get_positions | ( | int32_t ** | positions, |
int32_t * | m, | ||
int32_t * | n | ||
) |
void get_scores | ( | float64_t ** | scores, |
int32_t * | n | ||
) |
CSegmentLoss* get_segment_loss_object | ( | ) |
void get_states | ( | int32_t ** | states, |
int32_t * | m, | ||
int32_t * | n | ||
) |
void init_content_svm_value_array | ( | const int32_t | p_num_svms | ) |
init CArray for precomputed content svm values with size seq_len x num_svms
p_num_svms,: | number of svm weight vectors for content prediction |
在文件DynProg.cpp第274行定义。
void init_mod_words_array | ( | int32_t * | p_mod_words_array, |
int32_t | num_elem, | ||
int32_t | num_columns | ||
) |
init mod words array
p_mod_words_array | new mod words array |
num_elem | number of array elements |
num_columns | number of columns |
在文件DynProg.cpp第583行定义。
void init_tiling_data | ( | int32_t * | probe_pos, |
float64_t * | intensities, | ||
const int32_t | num_probes | ||
) |
init CArray for precomputed tiling intensitie-plif-values with size seq_len x num_svms
probe_pos | local positions of probes |
intensities | intensities of probes |
num_probes | number of probes |
在文件DynProg.cpp第244行定义。
void long_transition_settings | ( | bool | use_long_transitions, |
int32_t | threshold, | ||
int32_t | max_len | ||
) |
void lookup_content_svm_values | ( | const int32_t | from_state, |
const int32_t | to_state, | ||
const int32_t | from_pos, | ||
const int32_t | to_pos, | ||
float64_t * | svm_values, | ||
int32_t | frame | ||
) | [protected] |
lookup content SVM values
from_state | from state |
to_state | to state |
from_pos | from position |
to_pos | to position |
svm_values | SVM values |
frame | frame |
在文件DynProg.cpp第2508行定义。
void lookup_tiling_plif_values | ( | const int32_t | from_state, |
const int32_t | to_state, | ||
const int32_t | len, | ||
float64_t * | svm_values | ||
) | [protected] |
lookup tiling Plif values
from_state | from state |
to_state | to state |
len | length |
svm_values | SVM values |
void precompute_content_values | ( | ) |
create array of precomputed content svm values
在文件DynProg.cpp第397行定义。
void precompute_stop_codons | ( | ) |
precompute stop codons
在文件DynProg.cpp第200行定义。
void precompute_tiling_plifs | ( | CPlif ** | PEN, |
const int32_t * | tiling_plif_ids, | ||
const int32_t | num_tiling_plifs | ||
) |
precompute tiling Plifs
PEN | Plif PEN |
tiling_plif_ids | tiling plif id's |
num_tiling_plifs | number of tiling plifs |
在文件DynProg.cpp第317行定义。
int32_t raw_intensities_interval_query | ( | const int32_t | from_pos, |
const int32_t | to_pos, | ||
float64_t * | intensities, | ||
int32_t | type | ||
) | [protected] |
raw intensities interval query
from_pos | from position |
to_pos | to position |
intensities | intensities |
type | type |
在文件DynProg.cpp第2482行定义。
void resize_lin_feat | ( | int32_t | num_new_feat | ) |
append rows to linear features array
num_new_feat | number of new rows to add |
在文件DynProg.cpp第284行定义。
void set_a | ( | float64_t * | a, |
int32_t | M, | ||
int32_t | N | ||
) |
set matrix a
a | new matrix a |
M | dimension M of matrix a |
N | dimension N of matrix a |
在文件DynProg.cpp第462行定义。
void set_a_id | ( | int32_t * | a, |
int32_t | M, | ||
int32_t | N | ||
) |
set a id
a | new a id (identity?) |
M | dimension M of matrix a |
N | dimension N of matrix a |
在文件DynProg.cpp第470行定义。
void set_a_trans_matrix | ( | float64_t * | a_trans, |
int32_t | num_trans, | ||
int32_t | N | ||
) |
set a transition matrix
a_trans | transition matrix a |
num_trans | number of transitions |
N | dimension N of matrix a |
在文件DynProg.cpp第483行定义。
void set_content_type_array | ( | float64_t * | seg_path, |
int32_t | rows, | ||
int32_t | cols | ||
) |
set an array of length #(candidate positions) which specifies the content type of each pos and a mask that determines to which extend the loss should be applied to this position; this is a way to encode label confidence via weights between zero and one
seg_path | seg path |
rows | rows |
cols | cols |
在文件DynProg.cpp第689行定义。
void set_dict_weights | ( | float64_t * | dictionary_weights, |
int32_t | dict_len, | ||
int32_t | n | ||
) |
set best path dict weights
dictionary_weights | dictionary weights |
dict_len | length of dictionary weights |
n | dimension n |
在文件DynProg.cpp第787行定义。
void set_gene_string | ( | char * | genestr, |
int32_t | genestr_len | ||
) |
set best path genesstr
genestr | gene string |
genestr_len | length of gene string |
在文件DynProg.cpp第763行定义。
void set_intron_list | ( | CIntronList * | intron_list, |
int32_t | num_plifs | ||
) |
void set_lin_feat | ( | float64_t * | p_lin_feat, |
int32_t | p_num_svms, | ||
int32_t | p_seq_len | ||
) |
void set_my_pos_seq | ( | int32_t * | my_pos_seq | ) |
void set_my_state_seq | ( | int32_t * | my_state_seq | ) |
void set_num_states | ( | int32_t | N | ) |
void set_observation_matrix | ( | float64_t * | seq, |
int32_t * | dims, | ||
int32_t | ndims | ||
) |
set best path seq
seq | signal features |
dims | dimensions |
ndims | number of dimensions |
在文件DynProg.cpp第662行定义。
void set_orf_info | ( | int32_t * | orf_info, |
int32_t | m, | ||
int32_t | n | ||
) |
set best path orf info only for compute_nbest_paths
orf_info | the orf info |
m | dimension m |
n | dimension n |
在文件DynProg.cpp第731行定义。
void set_p_vector | ( | float64_t * | p, |
int32_t | N | ||
) |
void set_plif_matrices | ( | CPlifMatrix * | pm | ) |
void set_pos | ( | int32_t * | pos, |
int32_t | seq_len | ||
) |
void set_q_vector | ( | float64_t * | q, |
int32_t | N | ||
) |
void set_sparse_features | ( | CSparseFeatures< float64_t > * | seq_sparse1, |
CSparseFeatures< float64_t > * | seq_sparse2 | ||
) |
set sparse feature matrices
在文件DynProg.cpp第740行定义。
int32_t cum_num_words_default = {0,64,320,1344,5440} [static, protected] |
int32_t frame_plifs = {4,5,6} [static, protected] |
CArray<int32_t> m_cum_num_words [protected] |
int32_t* m_cum_num_words_array [protected] |
CArray2<float64_t> m_dict_weights [protected] |
CArray<float64_t> m_end_state_distribution_q [protected] |
CArray<float64_t> m_end_state_distribution_q_deriv [protected] |
CArray<bool> m_genestr_stop [protected] |
CArray<float64_t> m_initial_state_distribution_p [protected] |
CArray<float64_t> m_initial_state_distribution_p_deriv [protected] |
CIntronList* m_intron_list [protected] |
CArray2<float64_t> m_lin_feat [protected] |
int32_t m_long_transition_threshold [protected] |
bool m_long_transitions [protected] |
int32_t m_max_a_id [protected] |
CArray2<int32_t> m_mod_words [protected] |
int32_t* m_mod_words_array [protected] |
CArray<float64_t> m_my_losses [protected] |
CArray<int32_t> m_my_pos_seq [protected] |
CArray<float64_t> m_my_scores [protected] |
CArray<int32_t> m_my_state_seq [protected] |
int32_t m_num_degrees [protected] |
int32_t m_num_intron_plifs [protected] |
int32_t* m_num_lin_feat_plifs_cum [protected] |
int32_t* m_num_probes_cum [protected] |
int32_t m_num_raw_data [protected] |
int32_t m_num_svms [protected] |
CArray<int32_t> m_num_unique_words [protected] |
CArray<int32_t> m_num_words [protected] |
int32_t* m_num_words_array [protected] |
CArray3<float64_t> m_observation_matrix [protected] |
CArray2<int32_t> m_orf_info [protected] |
CArray2<CPlifBase*> m_PEN_state_signals [protected] |
CArray<CPlifBase*> m_plif_list [protected] |
CPlifMatrix* m_plif_matrices [protected] |
CArray2<int32_t> m_positions [protected] |
int32_t* m_probe_pos [protected] |
float64_t* m_raw_intensities [protected] |
CSegmentLoss* m_seg_loss_obj [protected] |
CArray<int32_t> m_segment_ids [protected] |
CArray3<float64_t> m_segment_loss [protected] |
CArray<float64_t> m_segment_mask [protected] |
CArray2<float64_t> m_segment_sum_weights [protected] |
CSparseFeatures<float64_t>* m_seq_sparse1 [protected] |
CSparseFeatures<float64_t>* m_seq_sparse2 [protected] |
CArray<bool> m_sign_words [protected] |
bool* m_sign_words_array [protected] |
CArray<int32_t> m_string_words [protected] |
int32_t* m_string_words_array [protected] |
bool m_svm_arrays_clean [protected] |
CArray2<float64_t> m_transition_matrix_a [protected] |
CArray2<float64_t> m_transition_matrix_a_deriv [protected] |
CArray2<int32_t> m_transition_matrix_a_id [protected] |
CArray<int32_t> m_word_degree [protected] |
uint16_t*** m_wordstr [protected] |
wordstr is a vector of L n-gram indices, with wordstr(i) representing a number betweeen 0 and 4095 corresponding to the 6-mer in genestr(i-5:i) pos is a vector of candidate transition positions (it is input to compute_nbest_paths) t_end is some index in pos
svs has been initialized by init_svm_values
At the end of this procedure, svs.svm_values[i+s*svs.seqlen] has the value of the s-th SVM on genestr(pos(t_end-i):pos(t_end)) for every i satisfying pos(t_end)-pos(t_end-i) <= svs.maxlookback
The SVM weights are precomputed in m_dict_weights
int32_t mod_words_default [static, protected] |
int32_t num_words_default = {64,256,1024,4096} [static, protected] |
bool sign_words_default [static, protected] |
int32_t string_words_default [static, protected] |
int32_t word_degree_default = {3,4,5,6} [static, protected] |