00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "structure/DynProg.h"
00014 #include "lib/Mathematics.h"
00015 #include "lib/io.h"
00016 #include "lib/config.h"
00017 #include "features/StringFeatures.h"
00018 #include "features/Alphabet.h"
00019 #include "structure/Plif.h"
00020 #include "structure/IntronList.h"
00021 #include "lib/Array.h"
00022 #include "lib/Array2.h"
00023 #include "lib/Array3.h"
00024
00025 #include <stdlib.h>
00026 #include <stdio.h>
00027 #include <time.h>
00028 #include <ctype.h>
00029
00030 using namespace shogun;
00031
00032
00033
00034
00035 int32_t CDynProg::word_degree_default[4]={3,4,5,6} ;
00036 int32_t CDynProg::cum_num_words_default[5]={0,64,320,1344,5440} ;
00037 int32_t CDynProg::frame_plifs[3]={4,5,6};
00038 int32_t CDynProg::num_words_default[4]= {64,256,1024,4096} ;
00039 int32_t CDynProg::mod_words_default[32] = {1,1,1,1,1,1,1,1,
00040 1,1,1,1,1,1,1,1,
00041 0,0,0,0,0,0,0,0,
00042 0,0,0,0,0,0,0,0} ;
00043 bool CDynProg::sign_words_default[16] = {true,true,true,true,true,true,true,true,
00044 false,false,false,false,false,false,false,false} ;
00045 int32_t CDynProg::string_words_default[16] = {0,0,0,0,0,0,0,0,
00046 1,1,1,1,1,1,1,1} ;
00047
00048 CDynProg::CDynProg(int32_t num_svms )
00049 : CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1),
00050 m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1),
00051 m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1),
00052 m_end_state_distribution_q_deriv(1),
00053
00054
00055 m_num_degrees(4),
00056 m_num_svms(num_svms),
00057 m_word_degree(word_degree_default, m_num_degrees, true, true),
00058 m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true),
00059 m_cum_num_words_array(m_cum_num_words.get_array()),
00060 m_num_words(num_words_default, m_num_degrees, true, true),
00061 m_num_words_array(m_num_words.get_array()),
00062 m_mod_words(mod_words_default, m_num_svms, 2, true, true),
00063 m_mod_words_array(m_mod_words.get_array()),
00064 m_sign_words(sign_words_default, m_num_svms, true, true),
00065 m_sign_words_array(m_sign_words.get_array()),
00066 m_string_words(string_words_default, m_num_svms, true, true),
00067 m_string_words_array(m_string_words.get_array()),
00068
00069 m_num_unique_words(m_num_degrees),
00070 m_svm_arrays_clean(true),
00071
00072 m_max_a_id(0), m_observation_matrix(1,1,1),
00073 m_pos(1),
00074 m_seq_len(0),
00075 m_orf_info(1,2),
00076 m_plif_list(1),
00077 m_PEN(1,1),
00078 m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2),
00079 m_segment_ids(1),
00080 m_segment_mask(1),
00081 m_my_state_seq(1),
00082 m_my_pos_seq(1),
00083 m_my_scores(1),
00084 m_my_losses(1),
00085 m_scores(1),
00086 m_states(1,1),
00087 m_positions(1,1),
00088
00089 m_seq_sparse1(NULL),
00090 m_seq_sparse2(NULL),
00091 m_plif_matrices(NULL),
00092
00093 m_genestr_stop(1),
00094 m_intron_list(NULL),
00095 m_num_intron_plifs(0),
00096 m_lin_feat(1,1),
00097 m_raw_intensities(NULL),
00098 m_probe_pos(NULL),
00099 m_num_probes_cum(NULL),
00100 m_num_lin_feat_plifs_cum(NULL),
00101 m_num_raw_data(0),
00102
00103 m_long_transitions(true),
00104 m_long_transition_threshold(1000),
00105 m_long_transition_max(100000)
00106 {
00107 trans_list_forward = NULL ;
00108 trans_list_forward_cnt = NULL ;
00109 trans_list_forward_val = NULL ;
00110 trans_list_forward_id = NULL ;
00111 trans_list_len = 0 ;
00112
00113 mem_initialized = true ;
00114
00115 m_N=1;
00116
00117 m_raw_intensities = NULL;
00118 m_probe_pos = NULL;
00119 m_num_probes_cum = new int32_t[100];
00120 m_num_probes_cum[0] = 0;
00121
00122 m_num_lin_feat_plifs_cum = new int32_t[100];
00123 m_num_lin_feat_plifs_cum[0] = m_num_svms;
00124 m_num_raw_data = 0;
00125 #ifdef ARRAY_STATISTICS
00126 m_word_degree.set_name("word_degree");
00127 #endif
00128
00129 m_transition_matrix_a_id.set_name("transition_matrix_a_id");
00130 m_transition_matrix_a.set_name("transition_matrix_a");
00131 m_transition_matrix_a_deriv.set_name("transition_matrix_a_deriv");
00132 m_mod_words.set_name("mod_words");
00133 m_orf_info.set_name("orf_info");
00134 m_segment_sum_weights.set_name("segment_sum_weights");
00135 m_PEN.set_name("PEN");
00136 m_PEN_state_signals.set_name("PEN_state_signals");
00137 m_dict_weights.set_name("dict_weights");
00138 m_states.set_name("states");
00139 m_positions.set_name("positions");
00140 m_lin_feat.set_name("lin_feat");
00141
00142
00143 m_observation_matrix.set_name("m_observation_matrix");
00144 m_segment_loss.set_name("m_segment_loss");
00145 m_seg_loss_obj = new CSegmentLoss();
00146 }
00147
00148 CDynProg::~CDynProg()
00149 {
00150 if (trans_list_forward_cnt)
00151 delete[] trans_list_forward_cnt;
00152 if (trans_list_forward)
00153 {
00154 for (int32_t i=0; i<trans_list_len; i++)
00155 {
00156 if (trans_list_forward[i])
00157 delete[] trans_list_forward[i];
00158 }
00159 delete[] trans_list_forward;
00160 }
00161 if (trans_list_forward_val)
00162 {
00163 for (int32_t i=0; i<trans_list_len; i++)
00164 {
00165 if (trans_list_forward_val[i])
00166 delete[] trans_list_forward_val[i];
00167 }
00168 delete[] trans_list_forward_val;
00169 }
00170 if (trans_list_forward_id)
00171 {
00172 for (int32_t i=0; i<trans_list_len; i++)
00173 {
00174 if (trans_list_forward_id[i])
00175 delete[] trans_list_forward_id[i];
00176 }
00177 delete[] trans_list_forward_id;
00178 }
00179 if (m_raw_intensities)
00180 delete[] m_raw_intensities;
00181 if (m_probe_pos)
00182 delete[] m_probe_pos;
00183 if (m_num_probes_cum)
00184 delete[] m_num_probes_cum ;
00185 if (m_num_lin_feat_plifs_cum)
00186 delete[] m_num_lin_feat_plifs_cum ;
00187
00188 delete m_intron_list;
00189
00190 SG_UNREF(m_seq_sparse1);
00191 SG_UNREF(m_seq_sparse2);
00192 SG_UNREF(m_plif_matrices);
00193 }
00194
00196 int32_t CDynProg::get_num_svms()
00197 {
00198 return m_num_svms;
00199 }
00200
00201 void CDynProg::precompute_stop_codons()
00202 {
00203 int32_t length=m_genestr.get_dim1();
00204
00205 m_genestr_stop.resize_array(length) ;
00206 m_genestr_stop.zero() ;
00207 m_genestr_stop.set_name("genestr_stop") ;
00208 {
00209 for (int32_t i=0; i<length-2; i++)
00210 if ((m_genestr[i]=='t' || m_genestr[i]=='T') &&
00211 (((m_genestr[i+1]=='a' || m_genestr[i+1]=='A') &&
00212 (m_genestr[i+2]=='a' || m_genestr[i+2]=='g' || m_genestr[i+2]=='A' || m_genestr[i+2]=='G')) ||
00213 ((m_genestr[i+1]=='g'||m_genestr[i+1]=='G') && (m_genestr[i+2]=='a' || m_genestr[i+2]=='A') )))
00214 {
00215 m_genestr_stop.element(i)=true ;
00216 }
00217 else
00218 m_genestr_stop.element(i)=false ;
00219 m_genestr_stop.element(length-2)=false ;
00220 m_genestr_stop.element(length-1)=false ;
00221 }
00222 }
00223
00224 void CDynProg::set_num_states(int32_t p_N)
00225 {
00226 m_N=p_N ;
00227
00228 m_transition_matrix_a_id.resize_array(m_N,m_N) ;
00229 m_transition_matrix_a.resize_array(m_N,m_N) ;
00230 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
00231 m_initial_state_distribution_p.resize_array(m_N) ;
00232 m_initial_state_distribution_p_deriv.resize_array(m_N) ;
00233 m_end_state_distribution_q.resize_array(m_N);
00234 m_end_state_distribution_q_deriv.resize_array(m_N) ;
00235
00236 m_orf_info.resize_array(m_N,2) ;
00237 m_PEN.resize_array(m_N,m_N) ;
00238 }
00239
00240 int32_t CDynProg::get_num_states()
00241 {
00242 return m_N;
00243 }
00244
00245 void CDynProg::init_tiling_data(
00246 int32_t* probe_pos, float64_t* intensities, const int32_t num_probes)
00247 {
00248 m_num_raw_data++;
00249 m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes;
00250
00251 int32_t* tmp_probe_pos = new int32_t[m_num_probes_cum[m_num_raw_data]];
00252 float64_t* tmp_raw_intensities = new float64_t[m_num_probes_cum[m_num_raw_data]];
00253
00254
00255 if (m_num_raw_data==1){
00256 memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00257 memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t));
00258
00259 }else{
00260 memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t));
00261 memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t));
00262 memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t));
00263 memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t));
00264 }
00265 delete[] m_probe_pos;
00266 delete[] m_raw_intensities;
00267 m_probe_pos = tmp_probe_pos;
00268 m_raw_intensities = tmp_raw_intensities;
00269
00270
00271
00272
00273 }
00274
00275 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms)
00276 {
00277 m_lin_feat.resize_array(p_num_svms, m_seq_len);
00278
00279
00280 for (int s=0; s<p_num_svms; s++)
00281 for (int p=0; p<m_seq_len; p++)
00282 m_lin_feat.set_element(0.0, s, p) ;
00283 }
00284
00285 void CDynProg::resize_lin_feat(const int32_t num_new_feat)
00286 {
00287 int32_t dim1, dim2;
00288 m_lin_feat.get_array_size(dim1, dim2);
00289 ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]);
00290 ASSERT(dim2==m_seq_len);
00291
00292
00293
00294 float64_t* arr = m_lin_feat.get_array();
00295 float64_t* tmp = new float64_t[(dim1+num_new_feat)*dim2];
00296 memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ;
00297 for(int32_t j=0;j<m_seq_len;j++)
00298 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++)
00299 tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
00300
00301 m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2, true, true);
00302 delete[] tmp;
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316 }
00317
00318 void CDynProg::precompute_tiling_plifs(
00319 CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs)
00320 {
00321 m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs;
00322 float64_t* tiling_plif = new float64_t[num_tiling_plifs];
00323 float64_t* svm_value = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
00324 for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; i++)
00325 svm_value[i]=0.0;
00326 int32_t* tiling_rows = new int32_t[num_tiling_plifs];
00327 for (int32_t i=0; i<num_tiling_plifs; i++)
00328 {
00329 tiling_plif[i]=0.0;
00330 CPlif * plif = PEN[tiling_plif_ids[i]];
00331 tiling_rows[i] = plif->get_use_svm();
00332
00333 ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i)
00334 }
00335 resize_lin_feat(num_tiling_plifs);
00336
00337
00338 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]];
00339 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]];
00340 int32_t num=m_num_probes_cum[m_num_raw_data-1];
00341
00342 for (int32_t pos_idx=0;pos_idx<m_seq_len;pos_idx++)
00343 {
00344 while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<m_pos[pos_idx])
00345 {
00346 for (int32_t i=0; i<num_tiling_plifs; i++)
00347 {
00348 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data;
00349 CPlif * plif = PEN[tiling_plif_ids[i]];
00350 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1);
00351 plif->set_do_calc(true);
00352 tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
00353 plif->set_do_calc(false);
00354 }
00355 p_tiling_data++;
00356 p_tiling_pos++;
00357 num++;
00358 }
00359 for (int32_t i=0; i<num_tiling_plifs; i++)
00360 m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx);
00361 }
00362 delete[] svm_value;
00363 delete[] tiling_plif;
00364 delete[] tiling_rows;
00365 }
00366
00367 void CDynProg::create_word_string()
00368 {
00369 delete[] m_wordstr;
00370 m_wordstr=new uint16_t**[5440];
00371 int32_t k=0;
00372 int32_t genestr_len=m_genestr.get_dim1();
00373
00374 m_wordstr[k]=new uint16_t*[m_num_degrees] ;
00375 for (int32_t j=0; j<m_num_degrees; j++)
00376 {
00377 m_wordstr[k][j]=NULL ;
00378 {
00379 m_wordstr[k][j]=new uint16_t[genestr_len] ;
00380 for (int32_t i=0; i<genestr_len; i++)
00381 switch (m_genestr[i])
00382 {
00383 case 'A':
00384 case 'a': m_wordstr[k][j][i]=0 ; break ;
00385 case 'C':
00386 case 'c': m_wordstr[k][j][i]=1 ; break ;
00387 case 'G':
00388 case 'g': m_wordstr[k][j][i]=2 ; break ;
00389 case 'T':
00390 case 't': m_wordstr[k][j][i]=3 ; break ;
00391 default: ASSERT(0) ;
00392 }
00393 CAlphabet::translate_from_single_order(m_wordstr[k][j], genestr_len, m_word_degree[j]-1, m_word_degree[j], 2) ;
00394 }
00395 }
00396 }
00397
00398 void CDynProg::precompute_content_values()
00399 {
00400 for (int32_t s=0; s<m_num_svms; s++)
00401 m_lin_feat.set_element(0.0, s, 0);
00402
00403 for (int32_t p=0 ; p<m_seq_len-1 ; p++)
00404 {
00405 int32_t from_pos = m_pos[p];
00406 int32_t to_pos = m_pos[p+1];
00407 float64_t* my_svm_values_unnormalized = new float64_t[m_num_svms];
00408
00409
00410 ASSERT(from_pos<=m_genestr.get_dim1());
00411 ASSERT(to_pos<=m_genestr.get_dim1());
00412
00413 for (int32_t s=0; s<m_num_svms; s++)
00414 my_svm_values_unnormalized[s]=0.0;
00415
00416 for (int32_t i=from_pos; i<to_pos; i++)
00417 {
00418 for (int32_t j=0; j<m_num_degrees; j++)
00419 {
00420 uint16_t word = m_wordstr[0][j][i] ;
00421 for (int32_t s=0; s<m_num_svms; s++)
00422 {
00423
00424 if (m_mod_words.get_element(s,0)==3 && i%3!=m_mod_words.get_element(s,1))
00425 continue;
00426 my_svm_values_unnormalized[s] += m_dict_weights[(word+m_cum_num_words_array[j])+s*m_cum_num_words_array[m_num_degrees]] ;
00427 }
00428 }
00429 }
00430 for (int32_t s=0; s<m_num_svms; s++)
00431 {
00432 float64_t prev = m_lin_feat.get_element(s, p);
00433
00434 if (prev<-1e20 || prev>1e20)
00435 {
00436 SG_ERROR("initialization missing (%i, %i, %f)\n", s, p, prev) ;
00437 prev=0 ;
00438 }
00439 m_lin_feat.set_element(prev + my_svm_values_unnormalized[s], s, p+1);
00440 }
00441 delete[] my_svm_values_unnormalized;
00442 }
00443
00444
00445
00446 }
00447
00448 void CDynProg::set_p_vector(float64_t *p, int32_t N)
00449 {
00450 if (!(N==m_N))
00451 SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",N, m_N);
00452
00453 m_initial_state_distribution_p.set_array(p, N, true, true);
00454 }
00455
00456 void CDynProg::set_q_vector(float64_t *q, int32_t N)
00457 {
00458 if (!(N==m_N))
00459 SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",N, m_N);
00460 m_end_state_distribution_q.set_array(q, N, true, true);
00461 }
00462
00463 void CDynProg::set_a(float64_t *a, int32_t M, int32_t N)
00464 {
00465 ASSERT(N==m_N);
00466 ASSERT(M==N);
00467 m_transition_matrix_a.set_array(a, N, N, true, true);
00468 m_transition_matrix_a_deriv.resize_array(N, N);
00469 }
00470
00471 void CDynProg::set_a_id(int32_t *a, int32_t M, int32_t N)
00472 {
00473 ASSERT(N==m_N);
00474 ASSERT(M==N);
00475 m_transition_matrix_a_id.set_array(a, N, N, true, true);
00476 m_max_a_id = 0;
00477 for (int32_t i=0; i<N; i++)
00478 {
00479 for (int32_t j=0; j<N; j++)
00480 m_max_a_id=CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j));
00481 }
00482 }
00483
00484 void CDynProg::set_a_trans_matrix(
00485 float64_t *a_trans, int32_t num_trans, int32_t num_cols)
00486 {
00487
00488
00489
00490 if (!((num_cols==3) || (num_cols==4)))
00491 SG_ERROR("!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols);
00492
00493 delete[] trans_list_forward ;
00494 delete[] trans_list_forward_cnt ;
00495 delete[] trans_list_forward_val ;
00496 delete[] trans_list_forward_id ;
00497
00498 trans_list_forward = NULL ;
00499 trans_list_forward_cnt = NULL ;
00500 trans_list_forward_val = NULL ;
00501 trans_list_len = 0 ;
00502
00503 m_transition_matrix_a.zero() ;
00504 m_transition_matrix_a_id.zero() ;
00505
00506 mem_initialized = true ;
00507
00508 trans_list_forward_cnt=NULL ;
00509 trans_list_len = m_N ;
00510 trans_list_forward = new T_STATES*[m_N] ;
00511 trans_list_forward_cnt = new T_STATES[m_N] ;
00512 trans_list_forward_val = new float64_t*[m_N] ;
00513 trans_list_forward_id = new int32_t*[m_N] ;
00514
00515 int32_t start_idx=0;
00516 for (int32_t j=0; j<m_N; j++)
00517 {
00518 int32_t old_start_idx=start_idx;
00519
00520 while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00521 {
00522 start_idx++;
00523
00524 if (start_idx>1 && start_idx<num_trans)
00525 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00526 }
00527
00528 if (start_idx>1 && start_idx<num_trans)
00529 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00530
00531 int32_t len=start_idx-old_start_idx;
00532 ASSERT(len>=0);
00533
00534 trans_list_forward_cnt[j] = 0 ;
00535
00536 if (len>0)
00537 {
00538 trans_list_forward[j] = new T_STATES[len] ;
00539 trans_list_forward_val[j] = new float64_t[len] ;
00540 trans_list_forward_id[j] = new int32_t[len] ;
00541 }
00542 else
00543 {
00544 trans_list_forward[j] = NULL;
00545 trans_list_forward_val[j] = NULL;
00546 trans_list_forward_id[j] = NULL;
00547 }
00548 }
00549
00550 for (int32_t i=0; i<num_trans; i++)
00551 {
00552 int32_t from_state = (int32_t)a_trans[i] ;
00553 int32_t to_state = (int32_t)a_trans[i+num_trans] ;
00554 float64_t val = a_trans[i+num_trans*2] ;
00555 int32_t id = 0 ;
00556 if (num_cols==4)
00557 id = (int32_t)a_trans[i+num_trans*3] ;
00558
00559
00560 ASSERT(to_state>=0 && to_state<m_N);
00561 ASSERT(from_state>=0 && from_state<m_N);
00562
00563 trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
00564 trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
00565 trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
00566 trans_list_forward_cnt[to_state]++ ;
00567 m_transition_matrix_a.element(from_state, to_state) = val ;
00568 m_transition_matrix_a_id.element(from_state, to_state) = id ;
00569
00570 } ;
00571
00572 m_max_a_id = 0 ;
00573 for (int32_t i=0; i<m_N; i++)
00574 for (int32_t j=0; j<m_N; j++)
00575 {
00576
00577
00578 m_max_a_id = CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)) ;
00579 }
00580
00581 }
00582
00583
00584 void CDynProg::init_mod_words_array(
00585 int32_t * mod_words_input, int32_t num_elem, int32_t num_columns)
00586 {
00587
00588
00589
00590
00591
00592
00593 m_svm_arrays_clean=false ;
00594
00595 ASSERT(m_num_svms==num_elem);
00596 ASSERT(num_columns==2);
00597
00598 m_mod_words.set_array(mod_words_input, num_elem, 2, true, true) ;
00599 m_mod_words_array = m_mod_words.get_array() ;
00600
00601
00602
00603
00604
00605 }
00606
00607 bool CDynProg::check_svm_arrays()
00608 {
00609
00610 if ((m_word_degree.get_dim1()==m_num_degrees) &&
00611 (m_cum_num_words.get_dim1()==m_num_degrees+1) &&
00612 (m_num_words.get_dim1()==m_num_degrees) &&
00613
00614
00615
00616
00617
00618
00619 (m_num_unique_words.get_dim1()==m_num_degrees) &&
00620 (m_mod_words.get_dim1()==m_num_svms) &&
00621 (m_mod_words.get_dim2()==2) &&
00622 (m_sign_words.get_dim1()==m_num_svms) &&
00623 (m_string_words.get_dim1()==m_num_svms))
00624 {
00625 m_svm_arrays_clean=true ;
00626 return true ;
00627 }
00628 else
00629 {
00630 if ((m_num_unique_words.get_dim1()==m_num_degrees) &&
00631 (m_mod_words.get_dim1()==m_num_svms) &&
00632 (m_mod_words.get_dim2()==2) &&
00633 (m_sign_words.get_dim1()==m_num_svms) &&
00634 (m_string_words.get_dim1()==m_num_svms))
00635 SG_PRINT("OK\n") ;
00636 else
00637 SG_PRINT("not OK\n") ;
00638
00639 if (!(m_word_degree.get_dim1()==m_num_degrees))
00640 SG_WARNING("SVM array: word_degree.get_dim1()!=m_num_degrees") ;
00641 if (!(m_cum_num_words.get_dim1()==m_num_degrees+1))
00642 SG_WARNING("SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1") ;
00643 if (!(m_num_words.get_dim1()==m_num_degrees))
00644 SG_WARNING("SVM array: m_num_words.get_dim1()==m_num_degrees") ;
00645
00646
00647 if (!(m_num_unique_words.get_dim1()==m_num_degrees))
00648 SG_WARNING("SVM array: m_num_unique_words.get_dim1()!=m_num_degrees") ;
00649 if (!(m_mod_words.get_dim1()==m_num_svms))
00650 SG_WARNING("SVM array: m_mod_words.get_dim1()!=num_svms") ;
00651 if (!(m_mod_words.get_dim2()==2))
00652 SG_WARNING("SVM array: m_mod_words.get_dim2()!=2") ;
00653 if (!(m_sign_words.get_dim1()==m_num_svms))
00654 SG_WARNING("SVM array: m_sign_words.get_dim1()!=num_svms") ;
00655 if (!(m_string_words.get_dim1()==m_num_svms))
00656 SG_WARNING("SVM array: m_string_words.get_dim1()!=num_svms") ;
00657
00658 m_svm_arrays_clean=false ;
00659 return false ;
00660 }
00661 }
00662
00663 void CDynProg::set_observation_matrix(float64_t* seq, int32_t* dims, int32_t ndims)
00664 {
00665 if (ndims!=3)
00666 SG_ERROR("Expected 3-dimensional Matrix\n");
00667
00668 int32_t N=dims[0];
00669 int32_t cand_pos=dims[1];
00670 int32_t max_num_features=dims[2];
00671
00672 if (!m_svm_arrays_clean)
00673 {
00674 SG_ERROR( "SVM arrays not clean") ;
00675 return ;
00676 } ;
00677
00678 ASSERT(N==m_N);
00679 ASSERT(cand_pos==m_seq_len);
00680 ASSERT(m_initial_state_distribution_p.get_dim1()==N);
00681 ASSERT(m_end_state_distribution_q.get_dim1()==N);
00682
00683 m_observation_matrix.set_array(seq, N, m_seq_len, max_num_features, true, true) ;
00684 }
00685 int32_t CDynProg::get_num_positions()
00686 {
00687 return m_seq_len;
00688 }
00689
00690 void CDynProg::set_content_type_array(float64_t* seg_path, int32_t rows, int32_t cols)
00691 {
00692 ASSERT(rows==2);
00693 ASSERT(cols==m_seq_len);
00694
00695 if (seg_path!=NULL)
00696 {
00697 int32_t *segment_ids = new int32_t[m_seq_len] ;
00698 float64_t *segment_mask = new float64_t[m_seq_len] ;
00699 for (int32_t i=0; i<m_seq_len; i++)
00700 {
00701 segment_ids[i] = (int32_t)seg_path[2*i] ;
00702 segment_mask[i] = seg_path[2*i+1] ;
00703 }
00704 best_path_set_segment_ids_mask(segment_ids, segment_mask, m_seq_len) ;
00705 delete[] segment_ids;
00706 delete[] segment_mask;
00707 }
00708 else
00709 {
00710 int32_t *izeros = new int32_t[m_seq_len] ;
00711 float64_t *dzeros = new float64_t[m_seq_len] ;
00712 for (int32_t i=0; i<m_seq_len; i++)
00713 {
00714 izeros[i]=0 ;
00715 dzeros[i]=0.0 ;
00716 }
00717 best_path_set_segment_ids_mask(izeros, dzeros, m_seq_len) ;
00718 delete[] izeros ;
00719 delete[] dzeros ;
00720 }
00721 }
00722
00723 void CDynProg::set_pos(int32_t* pos, int32_t seq_len)
00724 {
00725
00726
00727
00728 m_pos.set_array(pos, seq_len, true, true) ;
00729 m_seq_len = seq_len;
00730 }
00731
00732 void CDynProg::set_orf_info(int32_t* orf_info, int32_t m, int32_t n)
00733 {
00734 if (n!=2)
00735 SG_ERROR( "orf_info size incorrect %i!=2\n", n) ;
00736
00737 m_orf_info.set_array(orf_info, m, n, true, true) ;
00738 m_orf_info.set_name("orf_info") ;
00739 }
00740
00741 void CDynProg::set_sparse_features(CSparseFeatures<float64_t>* seq_sparse1, CSparseFeatures<float64_t>* seq_sparse2)
00742 {
00743 if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2))
00744 SG_ERROR("Sparse features must either both be NULL or both NON-NULL\n");
00745
00746 SG_UNREF(m_seq_sparse1);
00747 SG_UNREF(m_seq_sparse2);
00748
00749 m_seq_sparse1=seq_sparse1;
00750 m_seq_sparse2=seq_sparse2;
00751 SG_REF(m_seq_sparse1);
00752 SG_REF(m_seq_sparse2);
00753 }
00754
00755 void CDynProg::set_plif_matrices(CPlifMatrix* pm)
00756 {
00757 SG_UNREF(m_plif_matrices);
00758
00759 m_plif_matrices=pm;
00760
00761 SG_REF(m_plif_matrices);
00762 }
00763
00764 void CDynProg::set_gene_string(char* genestr, int32_t genestr_len)
00765 {
00766 ASSERT(genestr);
00767 ASSERT(genestr_len>0);
00768
00769 m_genestr.set_array(genestr, genestr_len, true, true) ;
00770 }
00771
00772 void CDynProg::set_my_state_seq(int32_t* my_state_seq)
00773 {
00774 ASSERT(my_state_seq && m_seq_len>0);
00775 m_my_state_seq.resize_array(m_seq_len);
00776 for (int32_t i=0; i<m_seq_len; i++)
00777 m_my_state_seq[i]=my_state_seq[i];
00778 }
00779
00780 void CDynProg::set_my_pos_seq(int32_t* my_pos_seq)
00781 {
00782 ASSERT(my_pos_seq && m_seq_len>0);
00783 m_my_pos_seq.resize_array(m_seq_len);
00784 for (int32_t i=0; i<m_seq_len; i++)
00785 m_my_pos_seq[i]=my_pos_seq[i];
00786 }
00787
00788 void CDynProg::set_dict_weights(
00789 float64_t* dictionary_weights, int32_t dict_len, int32_t n)
00790 {
00791 if (m_num_svms!=n)
00792 SG_ERROR( "m_dict_weights array does not match num_svms=%i!=%i\n", m_num_svms, n) ;
00793
00794 m_dict_weights.set_array(dictionary_weights, dict_len, m_num_svms, true, true) ;
00795
00796
00797 m_segment_loss.resize_array(m_max_a_id+1, m_max_a_id+1, 2) ;
00798 m_segment_loss.zero() ;
00799 m_segment_ids.resize_array(m_observation_matrix.get_dim2()) ;
00800 m_segment_mask.resize_array(m_observation_matrix.get_dim2()) ;
00801 m_segment_ids.zero() ;
00802 m_segment_mask.zero() ;
00803 }
00804
00805 void CDynProg::best_path_set_segment_loss(
00806 float64_t* segment_loss, int32_t m, int32_t n)
00807 {
00808
00809 if (2*m!=n)
00810 SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ;
00811
00812 if (m!=m_max_a_id+1)
00813 SG_ERROR( "segment_loss size should match m_max_a_id: %i!=%i\n", m, m_max_a_id+1) ;
00814
00815 m_segment_loss.set_array(segment_loss, m, n/2, 2, true, true) ;
00816
00817
00818
00819 }
00820
00821 void CDynProg::best_path_set_segment_ids_mask(
00822 int32_t* segment_ids, float64_t* segment_mask, int32_t m)
00823 {
00824 int32_t max_id = 0;
00825 for (int32_t i=1;i<m;i++)
00826 max_id = CMath::max(max_id,segment_ids[i]);
00827
00828 m_segment_ids.set_array(segment_ids, m, true, true) ;
00829 m_segment_ids.set_name("m_segment_ids");
00830 m_segment_mask.set_array(segment_mask, m, true, true) ;
00831 m_segment_mask.set_name("m_segment_mask");
00832
00833 m_seg_loss_obj->set_segment_mask(&m_segment_mask);
00834 m_seg_loss_obj->set_segment_ids(&m_segment_ids);
00835 m_seg_loss_obj->compute_loss(m_pos.get_array(), m_seq_len);
00836 }
00837
00838 void CDynProg::get_scores(float64_t **scores, int32_t *m)
00839 {
00840 ASSERT(scores && m);
00841
00842
00843 *m=m_scores.get_dim1();
00844
00845 int32_t sz = sizeof(float64_t)*(*m);
00846
00847 *scores = (float64_t*) malloc(sz);
00848 ASSERT(*scores);
00849
00850 memcpy(*scores,m_scores.get_array(),sz);
00851 }
00852
00853 void CDynProg::get_states(int32_t **states, int32_t *m, int32_t *n)
00854 {
00855 ASSERT(states && m && n);
00856
00857 *m=m_states.get_dim1() ;
00858 *n=m_states.get_dim2() ;
00859
00860 int32_t sz = sizeof(int32_t)*( (*m) * (*n) );
00861
00862 *states = (int32_t*) malloc(sz);
00863 ASSERT(*states);
00864
00865 memcpy(*states,m_states.get_array(),sz);
00866 }
00867
00868 void CDynProg::get_positions(int32_t **positions, int32_t *m, int32_t *n)
00869 {
00870 ASSERT(positions && m && n);
00871
00872 *m=m_positions.get_dim1() ;
00873 *n=m_positions.get_dim2() ;
00874
00875 int32_t sz = sizeof(int32_t)*( (*m) * (*n) );
00876
00877 *positions = (int32_t*) malloc(sz);
00878 ASSERT(*positions);
00879
00880 memcpy(*positions,m_positions.get_array(),sz);
00881 }
00882
00883 void CDynProg::get_path_scores(float64_t** scores, int32_t* seq_len)
00884 {
00885 ASSERT(scores && seq_len);
00886
00887 *seq_len=m_my_scores.get_dim1();
00888
00889 int32_t sz = sizeof(float64_t)*(*seq_len);
00890
00891 *scores = (float64_t*) malloc(sz);
00892 ASSERT(*scores);
00893
00894 memcpy(*scores,m_my_scores.get_array(),sz);
00895 }
00896
00897 void CDynProg::get_path_losses(float64_t** losses, int32_t* seq_len)
00898 {
00899 ASSERT(losses && seq_len);
00900
00901 *seq_len=m_my_losses.get_dim1();
00902
00903 int32_t sz = sizeof(float64_t)*(*seq_len);
00904
00905 *losses = (float64_t*) malloc(sz);
00906 ASSERT(*losses);
00907
00908 memcpy(*losses,m_my_losses.get_array(),sz);
00909 }
00910
00912
00913 float64_t CDynProg::best_path_no_b(
00914 int32_t max_iter, int32_t &best_iter, int32_t *my_path)
00915 {
00916 CArray2<T_STATES> psi(max_iter, m_N) ;
00917 psi.set_name("psi");
00918 CArray<float64_t>* delta = new CArray<float64_t>(m_N) ;
00919 CArray<float64_t>* delta_new = new CArray<float64_t>(m_N) ;
00920
00921 {
00922 for (int32_t i=0; i<m_N; i++)
00923 {
00924 delta->element(i) = get_p(i) ;
00925 psi.element(0, i)= 0 ;
00926 }
00927 }
00928
00929 float64_t best_iter_prob = CMath::ALMOST_NEG_INFTY ;
00930 best_iter = 0 ;
00931
00932
00933 for (int32_t t=1; t<max_iter; t++)
00934 {
00935 CArray<float64_t>* dummy;
00936 int32_t NN=m_N ;
00937 for (int32_t j=0; j<NN; j++)
00938 {
00939 float64_t maxj = delta->element(0) + m_transition_matrix_a.element(0,j);
00940 int32_t argmax=0;
00941
00942 for (int32_t i=1; i<NN; i++)
00943 {
00944 float64_t temp = delta->element(i) + m_transition_matrix_a.element(i,j);
00945
00946 if (temp>maxj)
00947 {
00948 maxj=temp;
00949 argmax=i;
00950 }
00951 }
00952 delta_new->element(j)=maxj ;
00953 psi.element(t, j)=argmax ;
00954 }
00955
00956 dummy=delta;
00957 delta=delta_new;
00958 delta_new=dummy;
00959
00960 {
00961 float64_t maxj=delta->element(0)+get_q(0);
00962 int32_t argmax=0;
00963
00964 for (int32_t i=1; i<m_N; i++)
00965 {
00966 float64_t temp=delta->element(i)+get_q(i);
00967
00968 if (temp>maxj)
00969 {
00970 maxj=temp;
00971 argmax=i;
00972 }
00973 }
00974
00975
00976 if (maxj>best_iter_prob)
00977 {
00978 my_path[t]=argmax;
00979 best_iter=t ;
00980 best_iter_prob = maxj ;
00981 } ;
00982 } ;
00983 }
00984
00985
00986 {
00987 for (int32_t t = best_iter; t>0; t--)
00988 {
00989 my_path[t-1]=psi.element(t, my_path[t]);
00990 }
00991 }
00992
00993 delete delta ;
00994 delete delta_new ;
00995
00996 return best_iter_prob ;
00997 }
00998
00999 void CDynProg::best_path_no_b_trans(
01000 int32_t max_iter, int32_t &max_best_iter, int16_t nbest,
01001 float64_t *prob_nbest, int32_t *my_paths)
01002 {
01003
01004 CArray3<T_STATES> psi(max_iter, m_N, nbest) ;
01005 psi.set_name("psi");
01006 CArray3<int16_t> ktable(max_iter, m_N, nbest) ;
01007 ktable.set_name("ktable");
01008 CArray2<int16_t> ktable_ends(max_iter, nbest) ;
01009 ktable_ends.set_name("ktable_ends");
01010
01011 CArray<float64_t> tempvv(nbest*m_N) ;
01012 CArray<int32_t> tempii(nbest*m_N) ;
01013
01014 CArray2<T_STATES> path_ends(max_iter, nbest) ;
01015 path_ends.set_name("path_ends");
01016 CArray2<float64_t> *delta=new CArray2<float64_t>(m_N, nbest) ;
01017 delta->set_name("delta");
01018 CArray2<float64_t> *delta_new=new CArray2<float64_t>(m_N, nbest) ;
01019 delta_new->set_name("delta_new");
01020 CArray2<float64_t> delta_end(max_iter, nbest) ;
01021 delta_end.set_name("delta_end");
01022
01023 CArray2<int32_t> paths(max_iter, nbest) ;
01024 paths.set_name("paths");
01025 paths.set_array(my_paths, max_iter, nbest, false, false) ;
01026
01027 {
01028 for (T_STATES i=0; i<m_N; i++)
01029 {
01030 delta->element(i,0) = get_p(i) ;
01031 for (int16_t k=1; k<nbest; k++)
01032 {
01033 delta->element(i,k)=-CMath::INFTY ;
01034 ktable.element(0,i,k)=0 ;
01035 }
01036 }
01037 }
01038
01039
01040 for (int32_t t=1; t<max_iter; t++)
01041 {
01042 CArray2<float64_t>* dummy=NULL;
01043
01044 for (T_STATES j=0; j<m_N; j++)
01045 {
01046 const T_STATES num_elem = trans_list_forward_cnt[j] ;
01047 const T_STATES *elem_list = trans_list_forward[j] ;
01048 const float64_t *elem_val = trans_list_forward_val[j] ;
01049
01050 int32_t list_len=0 ;
01051 for (int16_t diff=0; diff<nbest; diff++)
01052 {
01053 for (int32_t i=0; i<num_elem; i++)
01054 {
01055 T_STATES ii = elem_list[i] ;
01056
01057 tempvv.element(list_len) = -(delta->element(ii,diff) + elem_val[i]) ;
01058 tempii.element(list_len) = diff*m_N + ii ;
01059 list_len++ ;
01060 }
01061 }
01062 CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01063
01064 for (int16_t k=0; k<nbest; k++)
01065 {
01066 if (k<list_len)
01067 {
01068 delta_new->element(j,k) = -tempvv[k] ;
01069 psi.element(t,j,k) = (tempii[k]%m_N) ;
01070 ktable.element(t,j,k) = (tempii[k]-(tempii[k]%m_N))/m_N ;
01071 }
01072 else
01073 {
01074 delta_new->element(j,k) = -CMath::INFTY ;
01075 psi.element(t,j,k) = 0 ;
01076 ktable.element(t,j,k) = 0 ;
01077 }
01078 }
01079 }
01080
01081 dummy=delta;
01082 delta=delta_new;
01083 delta_new=dummy;
01084
01085 {
01086 int32_t list_len = 0 ;
01087 for (int16_t diff=0; diff<nbest; diff++)
01088 {
01089 for (T_STATES i=0; i<m_N; i++)
01090 {
01091 tempvv.element(list_len) = -(delta->element(i,diff)+get_q(i));
01092 tempii.element(list_len) = diff*m_N + i ;
01093 list_len++ ;
01094 }
01095 }
01096 CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01097
01098 for (int16_t k=0; k<nbest; k++)
01099 {
01100 delta_end.element(t-1,k) = -tempvv[k] ;
01101 path_ends.element(t-1,k) = (tempii[k]%m_N) ;
01102 ktable_ends.element(t-1,k) = (tempii[k]-(tempii[k]%m_N))/m_N ;
01103 }
01104 }
01105 }
01106
01107 {
01108 max_best_iter=0 ;
01109
01110 CArray<float64_t> sort_delta_end(max_iter*nbest) ;
01111 CArray<int16_t> sort_k(max_iter*nbest) ;
01112 CArray<int32_t> sort_t(max_iter*nbest) ;
01113 CArray<int32_t> sort_idx(max_iter*nbest) ;
01114
01115 int32_t i=0 ;
01116 for (int32_t iter=0; iter<max_iter-1; iter++)
01117 for (int16_t k=0; k<nbest; k++)
01118 {
01119 sort_delta_end[i]=-delta_end.element(iter,k) ;
01120 sort_k[i]=k ;
01121 sort_t[i]=iter+1 ;
01122 sort_idx[i]=i ;
01123 i++ ;
01124 }
01125
01126 CMath::qsort_index(sort_delta_end.get_array(), sort_idx.get_array(), (max_iter-1)*nbest) ;
01127
01128 for (int16_t n=0; n<nbest; n++)
01129 {
01130 int16_t k=sort_k[sort_idx[n]] ;
01131 int32_t iter=sort_t[sort_idx[n]] ;
01132 prob_nbest[n]=-sort_delta_end[n] ;
01133
01134 if (iter>max_best_iter)
01135 max_best_iter=iter ;
01136
01137 ASSERT(k<nbest) ;
01138 ASSERT(iter<max_iter) ;
01139
01140 paths.element(iter,n) = path_ends.element(iter-1, k) ;
01141 int16_t q = ktable_ends.element(iter-1, k) ;
01142
01143 for (int32_t t = iter; t>0; t--)
01144 {
01145 paths.element(t-1,n)=psi.element(t, paths.element(t,n), q);
01146 q = ktable.element(t, paths.element(t,n), q) ;
01147 }
01148 }
01149 }
01150
01151 delete delta ;
01152 delete delta_new ;
01153 }
01154
01155 bool CDynProg::extend_orf(
01156 int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
01157 int32_t to)
01158 {
01159 #ifdef DYNPROG_TIMING_DETAIL
01160 MyTime.start() ;
01161 #endif
01162
01163 if (start<0)
01164 start=0 ;
01165 if (to<0)
01166 to=0 ;
01167
01168 int32_t orf_target = orf_to-orf_from ;
01169 if (orf_target<0) orf_target+=3 ;
01170
01171 int32_t pos ;
01172 if (last_pos==to)
01173 pos = to-orf_to-3 ;
01174 else
01175 pos=last_pos ;
01176
01177 if (pos<0)
01178 {
01179 #ifdef DYNPROG_TIMING_DETAIL
01180 MyTime.stop() ;
01181 orf_time += MyTime.time_diff_sec() ;
01182 #endif
01183 return true ;
01184 }
01185
01186 for (; pos>=start; pos-=3)
01187 if (m_genestr_stop[pos])
01188 {
01189 #ifdef DYNPROG_TIMING_DETAIL
01190 MyTime.stop() ;
01191 orf_time += MyTime.time_diff_sec() ;
01192 #endif
01193 return false ;
01194 }
01195
01196
01197 last_pos = CMath::min(pos+3,to-orf_to-3) ;
01198
01199 #ifdef DYNPROG_TIMING_DETAIL
01200 MyTime.stop() ;
01201 orf_time += MyTime.time_diff_sec() ;
01202 #endif
01203 return true ;
01204 }
01205
01206 void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf,
01207 int16_t nbest, bool with_loss, bool with_multiple_sequences)
01208 {
01209
01210
01211
01212
01213
01214
01215
01216
01217 const float64_t* seq_array = m_observation_matrix.get_array();
01218 m_scores.resize_array(nbest) ;
01219 m_states.resize_array(nbest, m_observation_matrix.get_dim2()) ;
01220 m_positions.resize_array(nbest, m_observation_matrix.get_dim2()) ;
01221
01222 for (int32_t i=0; i<nbest; i++)
01223 {
01224 m_scores[i]=-1;
01225 for (int32_t j=0; j<m_observation_matrix.get_dim2(); j++)
01226 {
01227 m_states.element(i,j)=-1;
01228 m_positions.element(i,j)=-1;
01229 }
01230 }
01231 float64_t* prob_nbest=m_scores.get_array();
01232 int32_t* my_state_seq=m_states.get_array();
01233 int32_t* my_pos_seq=m_positions.get_array();
01234
01235 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
01236 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
01237
01238
01239
01240 #ifdef DYNPROG_TIMING
01241 segment_init_time = 0.0 ;
01242 segment_pos_time = 0.0 ;
01243 segment_extend_time = 0.0 ;
01244 segment_clean_time = 0.0 ;
01245 orf_time = 0.0 ;
01246 svm_init_time = 0.0 ;
01247 svm_pos_time = 0.0 ;
01248 svm_clean_time = 0.0 ;
01249 inner_loop_time = 0.0 ;
01250 content_svm_values_time = 0.0 ;
01251 content_plifs_time = 0.0 ;
01252 inner_loop_max_time = 0.0 ;
01253 long_transition_time = 0.0 ;
01254
01255 MyTime2.start() ;
01256 #endif
01257
01258 if (!m_svm_arrays_clean)
01259 {
01260 SG_ERROR( "SVM arrays not clean") ;
01261 return ;
01262 }
01263
01264 #ifdef DYNPROG_DEBUG
01265 m_transition_matrix_a.set_name("transition_matrix");
01266 m_transition_matrix_a.display_array();
01267 m_mod_words.display_array() ;
01268 m_sign_words.display_array() ;
01269 m_string_words.display_array() ;
01270
01271 #endif
01272
01273 int32_t max_look_back = 1000 ;
01274 bool use_svm = false ;
01275
01276 SG_DEBUG("m_N:%i, m_seq_len:%i, max_num_signals:%i\n",m_N, m_seq_len, max_num_signals) ;
01277
01278
01279
01280
01281 CArray2<CPlifBase*> PEN(Plif_matrix, m_N, m_N, false, false) ;
01282 PEN.set_name("PEN");
01283 CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, m_N, max_num_signals, false, false) ;
01284 PEN_state_signals.set_name("state_signals");
01285
01286 CArray2<float64_t> seq(m_N, m_seq_len) ;
01287 seq.set_name("seq") ;
01288 seq.zero() ;
01289
01290 #ifdef DYNPROG_DEBUG
01291 SG_PRINT("m_num_raw_data: %i\n",m_num_raw_data);
01292 SG_PRINT("m_num_intron_plifs: %i\n", m_num_intron_plifs);
01293 SG_PRINT("m_num_svms: %i\n", m_num_svms);
01294 SG_PRINT("m_num_lin_feat_plifs_cum: ");
01295 for (int i=0; i<=m_num_raw_data; i++)
01296 SG_PRINT(" %i ",m_num_lin_feat_plifs_cum[i]);
01297 SG_PRINT("\n");
01298 #endif
01299
01300 float64_t* svm_value = new float64_t [m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
01301 {
01302 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01303 svm_value[s]=0 ;
01304 }
01305
01306 {
01307
01308
01309
01310 CArray3<float64_t> *seq_input=NULL ;
01311 if (seq_array!=NULL)
01312 {
01313
01314
01315 seq_input=new CArray3<float64_t>(seq_array, m_N, m_seq_len, max_num_signals) ;
01316 seq_input->set_name("seq_input") ;
01317
01318
01319 ASSERT(m_seq_sparse1==NULL) ;
01320 ASSERT(m_seq_sparse2==NULL) ;
01321 } else
01322 {
01323 SG_PRINT("using sparse seq_array\n") ;
01324
01325 ASSERT(m_seq_sparse1!=NULL) ;
01326 ASSERT(m_seq_sparse2!=NULL) ;
01327 ASSERT(max_num_signals==2) ;
01328 }
01329
01330 for (int32_t i=0; i<m_N; i++)
01331 for (int32_t j=0; j<m_seq_len; j++)
01332 seq.element(i,j) = 0 ;
01333
01334 for (int32_t i=0; i<m_N; i++)
01335 for (int32_t j=0; j<m_seq_len; j++)
01336 for (int32_t k=0; k<max_num_signals; k++)
01337 {
01338 if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
01339 {
01340
01341 if (seq_input!=NULL)
01342 seq.element(i,j) = seq_input->element(i,j,k) ;
01343 else
01344 {
01345 if (k==0)
01346 seq.element(i,j) = m_seq_sparse1->get_feature(i,j) ;
01347 if (k==1)
01348 seq.element(i,j) = m_seq_sparse2->get_feature(i,j) ;
01349 }
01350 break ;
01351 }
01352 if (PEN_state_signals.element(i,k)!=NULL)
01353 {
01354 if (seq_input!=NULL)
01355 {
01356
01357 if (CMath::is_finite(seq_input->element(i,j,k)))
01358 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(seq_input->element(i,j,k), svm_value) ;
01359 else
01360
01361 seq.element(i,j) = seq_input->element(i, j, k) ;
01362 }
01363 else
01364 {
01365 if (k==0)
01366 {
01367
01368 if (CMath::is_finite(m_seq_sparse1->get_feature(i,j)))
01369 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(m_seq_sparse1->get_feature(i,j), svm_value) ;
01370 else
01371
01372 seq.element(i,j) = m_seq_sparse1->get_feature(i, j) ;
01373 }
01374 if (k==1)
01375 {
01376
01377 if (CMath::is_finite(m_seq_sparse2->get_feature(i,j)))
01378 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(m_seq_sparse2->get_feature(i,j), svm_value) ;
01379 else
01380
01381 seq.element(i,j) = m_seq_sparse2->get_feature(i, j) ;
01382 }
01383 }
01384 }
01385 else
01386 break ;
01387 }
01388 delete seq_input;
01389 delete svm_value;
01390 }
01391
01392
01393 bool long_transitions = m_long_transitions ;
01394 CArray2<int32_t> long_transition_content_start_position(m_N,m_N) ;
01395 long_transition_content_start_position.set_name("long_transition_content_start_position");
01396 #ifdef DYNPROG_DEBUG
01397 CArray2<int32_t> long_transition_content_end_position(m_N,m_N) ;
01398 long_transition_content_end_position.set_name("long_transition_content_end_position");
01399 #endif
01400 CArray2<int32_t> long_transition_content_start(m_N,m_N) ;
01401 long_transition_content_start.set_name("long_transition_content_start");
01402 CArray2<float64_t> long_transition_content_scores(m_N,m_N) ;
01403 long_transition_content_scores.set_name("long_transition_content_scores");
01404 #ifdef DYNPROG_DEBUG
01405 CArray2<float64_t> long_transition_content_scores_pen(m_N,m_N) ;
01406 long_transition_content_scores_pen.set_name("long_transition_content_scores_pen");
01407 CArray2<float64_t> long_transition_content_scores_prev(m_N,m_N) ;
01408 long_transition_content_scores_prev.set_name("long_transition_content_scores_prev");
01409 CArray2<float64_t> long_transition_content_scores_elem(m_N,m_N) ;
01410 long_transition_content_scores_elem.set_name("long_transition_content_scores_elem");
01411 #endif
01412 CArray2<float64_t> long_transition_content_scores_loss(m_N,m_N) ;
01413 long_transition_content_scores_loss.set_name("long_transition_content_scores_loss");
01414
01415 if (nbest!=1)
01416 {
01417 SG_ERROR("Long transitions are not supported for nbest!=1") ;
01418 long_transitions = false ;
01419 }
01420 long_transition_content_scores.set_const(-CMath::INFTY);
01421 #ifdef DYNPROG_DEBUG
01422 long_transition_content_scores_pen.set_const(0) ;
01423 long_transition_content_scores_elem.set_const(0) ;
01424 long_transition_content_scores_prev.set_const(0) ;
01425 #endif
01426 if (with_loss)
01427 long_transition_content_scores_loss.set_const(0) ;
01428 long_transition_content_start.zero() ;
01429 long_transition_content_start_position.zero() ;
01430 #ifdef DYNPROG_DEBUG
01431 long_transition_content_end_position.zero() ;
01432 #endif
01433
01434 svm_value = new float64_t [m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
01435 {
01436 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01437 svm_value[s]=0 ;
01438 }
01439
01440 CArray2<int32_t> look_back(m_N,m_N) ;
01441 look_back.set_name("look_back");
01442 CArray2<int32_t> look_back_orig(m_N,m_N) ;
01443 look_back.set_name("look_back_orig");
01444
01445 {
01446 for (int32_t i=0; i<m_N; i++)
01447 for (int32_t j=0; j<m_N; j++)
01448 {
01449 look_back.set_element(INT_MAX, i, j) ;
01450 look_back_orig.set_element(INT_MAX, i, j) ;
01451 }
01452
01453 for (int32_t j=0; j<m_N; j++)
01454 {
01455
01456 const T_STATES num_elem = trans_list_forward_cnt[j] ;
01457 const T_STATES *elem_list = trans_list_forward[j] ;
01458
01459 for (int32_t i=0; i<num_elem; i++)
01460 {
01461 T_STATES ii = elem_list[i] ;
01462
01463 CPlifBase *penij=PEN.element(j, ii) ;
01464 if (penij==NULL)
01465 {
01466 if (long_transitions)
01467 {
01468 look_back.set_element(m_long_transition_threshold, j, ii) ;
01469 look_back_orig.set_element(m_long_transition_max, j, ii) ;
01470 }
01471 continue ;
01472 }
01473
01474
01475 if ((m_orf_info.element(ii,0)!=-1) || (m_orf_info.element(j,1)!=-1) || (!long_transitions))
01476 {
01477 look_back.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
01478 look_back_orig.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
01479 if (CMath::ceil(penij->get_max_value()) > max_look_back)
01480 {
01481 SG_DEBUG( "%d %d -> value: %f\n", ii,j,penij->get_max_value());
01482 max_look_back = (int32_t) (CMath::ceil(penij->get_max_value()));
01483 }
01484 }
01485 else
01486 {
01487 look_back.set_element(CMath::min( (int32_t)CMath::ceil(penij->get_max_value()), m_long_transition_threshold ), j, ii) ;
01488 look_back_orig.set_element( (int32_t)CMath::ceil(penij->get_max_value()), j, ii) ;
01489 }
01490
01491 if (penij->uses_svm_values())
01492 use_svm=true ;
01493 }
01494 }
01495
01496 if (long_transitions)
01497 max_look_back = CMath::max(m_long_transition_threshold, max_look_back) ;
01498
01499
01500 max_look_back = CMath::min(m_genestr.get_dim1(), max_look_back) ;
01501
01502 int32_t num_long_transitions = 0 ;
01503 for (int32_t i=0; i<m_N; i++)
01504 for (int32_t j=0; j<m_N; j++)
01505 {
01506 if (look_back.get_element(i,j)==m_long_transition_threshold)
01507 num_long_transitions++ ;
01508 if (look_back.get_element(i,j)==INT_MAX)
01509 {
01510 if (long_transitions)
01511 {
01512 look_back.set_element(m_long_transition_threshold, i, j) ;
01513 look_back_orig.set_element(m_long_transition_max, i, j) ;
01514 }
01515 else
01516 {
01517 look_back.set_element(max_look_back, i, j) ;
01518 look_back_orig.set_element(m_long_transition_max, i, j) ;
01519 }
01520 }
01521 }
01522 SG_DEBUG("Using %i long transitions\n", num_long_transitions) ;
01523 }
01524
01525
01526 SG_DEBUG("use_svm=%i\n", use_svm) ;
01527
01528 SG_DEBUG("maxlook: %d m_N: %d nbest: %d \n", max_look_back, m_N, nbest);
01529 const int32_t look_back_buflen = (max_look_back*m_N+1)*nbest ;
01530 SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
01531
01532
01533
01534
01535
01536
01537
01538
01539
01540
01541
01542
01543
01544
01545 ASSERT(nbest<32000) ;
01546
01547
01548
01549 CArray3<float64_t> delta(m_seq_len, m_N, nbest) ;
01550 delta.set_name("delta");
01551 float64_t* delta_array = delta.get_array() ;
01552
01553
01554 CArray3<T_STATES> psi(m_seq_len, m_N, nbest) ;
01555 psi.set_name("psi");
01556
01557
01558 CArray3<int16_t> ktable(m_seq_len, m_N, nbest) ;
01559 ktable.set_name("ktable");
01560
01561
01562 CArray3<int32_t> ptable(m_seq_len, m_N, nbest) ;
01563 ptable.set_name("ptable");
01564
01565
01566 CArray<float64_t> delta_end(nbest) ;
01567 delta_end.set_name("delta_end");
01568
01569
01570 CArray<T_STATES> path_ends(nbest) ;
01571 path_ends.set_name("path_ends");
01572
01573
01574 CArray<int16_t> ktable_end(nbest) ;
01575 ktable_end.set_name("ktable_end");
01576
01577
01578 float64_t * fixedtempvv=new float64_t[look_back_buflen] ;
01579 memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ;
01580 int32_t * fixedtempii=new int32_t[look_back_buflen] ;
01581 memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ;
01582
01583 CArray<float64_t> oldtempvv(look_back_buflen) ;
01584 oldtempvv.set_name("oldtempvv");
01585 CArray<float64_t> oldtempvv2(look_back_buflen) ;
01586 oldtempvv2.set_name("oldtempvv2");
01587
01588
01589
01590 CArray<int32_t> oldtempii(look_back_buflen) ;
01591 oldtempii.set_name("oldtempii");
01592 CArray<int32_t> oldtempii2(look_back_buflen) ;
01593 oldtempii2.set_name("oldtempii2");
01594
01595
01596 CArray<T_STATES> state_seq(m_seq_len) ;
01597 state_seq.set_name("state_seq");
01598
01599
01600 CArray<int32_t> pos_seq(m_seq_len) ;
01601 pos_seq.set_name("pos_seq");
01602
01603
01604
01605 m_dict_weights.set_name("dict_weights") ;
01606 m_word_degree.set_name("word_degree") ;
01607 m_cum_num_words.set_name("cum_num_words") ;
01608 m_num_words.set_name("num_words") ;
01609
01610
01611
01612 m_num_unique_words.set_name("num_unique_words") ;
01613
01614 PEN.set_name("PEN") ;
01615 seq.set_name("seq") ;
01616
01617 delta.set_name("delta") ;
01618 psi.set_name("psi") ;
01619 ktable.set_name("ktable") ;
01620 ptable.set_name("ptable") ;
01621 delta_end.set_name("delta_end") ;
01622 path_ends.set_name("path_ends") ;
01623 ktable_end.set_name("ktable_end") ;
01624
01625 #ifdef USE_TMP_ARRAYCLASS
01626 fixedtempvv.set_name("fixedtempvv") ;
01627 fixedtempii.set_name("fixedtempvv") ;
01628 #endif
01629
01630 oldtempvv.set_name("oldtempvv") ;
01631 oldtempvv2.set_name("oldtempvv2") ;
01632 oldtempii.set_name("oldtempii") ;
01633 oldtempii2.set_name("oldtempii2") ;
01634
01635
01637
01638 #ifdef DYNPROG_DEBUG
01639 state_seq.display_size() ;
01640 pos_seq.display_size() ;
01641
01642 m_dict_weights.display_size() ;
01643 m_word_degree.display_array() ;
01644 m_cum_num_words.display_array() ;
01645 m_num_words.display_array() ;
01646
01647
01648
01649 m_num_unique_words.display_array() ;
01650
01651 PEN.display_size() ;
01652 PEN_state_signals.display_size() ;
01653 seq.display_size() ;
01654 m_orf_info.display_size() ;
01655
01656
01657 delta.display_size() ;
01658 psi.display_size() ;
01659 ktable.display_size() ;
01660 ptable.display_size() ;
01661 delta_end.display_size() ;
01662 path_ends.display_size() ;
01663 ktable_end.display_size() ;
01664
01665 #ifdef USE_TMP_ARRAYCLASS
01666 fixedtempvv.display_size() ;
01667 fixedtempii.display_size() ;
01668 #endif
01669
01670
01671
01672
01673 state_seq.display_size() ;
01674 pos_seq.display_size() ;
01675
01676
01677
01678 #endif //DYNPROG_DEBUG
01679
01681
01682
01683
01684 {
01685 for (int32_t s=0; s<m_num_svms; s++)
01686 ASSERT(m_string_words_array[s]<1) ;
01687 }
01688
01689
01690
01691
01692
01693 {
01694
01695 for (T_STATES i=0; i<m_N; i++)
01696 {
01697
01698 delta.element(delta_array, 0, i, 0, m_seq_len, m_N) = get_p(i) + seq.element(i,0) ;
01699 psi.element(0,i,0) = 0 ;
01700 if (nbest>1)
01701 ktable.element(0,i,0) = 0 ;
01702 ptable.element(0,i,0) = 0 ;
01703 for (int16_t k=1; k<nbest; k++)
01704 {
01705 int32_t dim1, dim2, dim3 ;
01706 delta.get_array_size(dim1, dim2, dim3) ;
01707
01708
01709 delta.element(delta_array, 0, i, k, m_seq_len, m_N) = -CMath::INFTY ;
01710 psi.element(0,i,0) = 0 ;
01711 if (nbest>1)
01712 ktable.element(0,i,k) = 0 ;
01713 ptable.element(0,i,k) = 0 ;
01714 }
01715
01716
01717
01718
01719
01720
01721
01722
01723
01724
01725
01726
01727
01728
01729
01730
01731
01732
01733 }
01734 }
01735
01736
01737 SG_DEBUG("START_RECURSION \n\n");
01738
01739
01740 for (int32_t t=1; t<m_seq_len; t++)
01741 {
01742
01743
01744
01745
01746
01747 for (T_STATES j=0; j<m_N; j++)
01748 {
01749 if (seq.element(j,t)<=-1e20)
01750 {
01751 for (int16_t k=0; k<nbest; k++)
01752 {
01753 delta.element(delta_array, t, j, k, m_seq_len, m_N) = seq.element(j,t) ;
01754 psi.element(t,j,k) = 0 ;
01755 if (nbest>1)
01756 ktable.element(t,j,k) = 0 ;
01757 ptable.element(t,j,k) = 0 ;
01758 }
01759 }
01760 else
01761 {
01762 const T_STATES num_elem = trans_list_forward_cnt[j] ;
01763 const T_STATES *elem_list = trans_list_forward[j] ;
01764 const float64_t *elem_val = trans_list_forward_val[j] ;
01765 const int32_t *elem_id = trans_list_forward_id[j] ;
01766
01767 int32_t fixed_list_len = 0 ;
01768 float64_t fixedtempvv_ = CMath::INFTY ;
01769 int32_t fixedtempii_ = 0 ;
01770 bool fixedtemplong = false ;
01771
01772 for (int32_t i=0; i<num_elem; i++)
01773 {
01774 T_STATES ii = elem_list[i] ;
01775
01776 const CPlifBase * penalty = PEN.element(j,ii) ;
01777
01778
01779
01780
01781
01782
01783
01784
01785
01786
01787
01788
01789 int32_t look_back_ = look_back.element(j, ii) ;
01790
01791 int32_t orf_from = m_orf_info.element(ii,0) ;
01792 int32_t orf_to = m_orf_info.element(j,1) ;
01793 if((orf_from!=-1)!=(orf_to!=-1))
01794 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
01795 ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
01796
01797 int32_t orf_target = -1 ;
01798 if (orf_from!=-1)
01799 {
01800 orf_target=orf_to-orf_from ;
01801 if (orf_target<0)
01802 orf_target+=3 ;
01803 ASSERT(orf_target>=0 && orf_target<3) ;
01804 }
01805
01806 int32_t orf_last_pos = m_pos[t] ;
01807
01808
01809
01810 #ifdef DYNPROG_TIMING
01811 MyTime3.start() ;
01812 #endif
01813 int32_t num_ok_pos = 0 ;
01814 float64_t last_mval=0 ;
01815 int32_t last_ts = 0 ;
01816
01817 for (int32_t ts=t-1; ts>=0 && m_pos[t]-m_pos[ts]<=look_back_; ts--)
01818 {
01819 bool ok ;
01820
01821
01822
01823
01824
01825
01826
01827
01828
01829 if (orf_target==-1)
01830 ok=true ;
01831 else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target)
01832 ok=(!use_orf) || extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ;
01833 else
01834 ok=false ;
01835
01836 if (ok)
01837 {
01838
01839 float64_t segment_loss = 0.0 ;
01840 if (with_loss)
01841 {
01842 segment_loss = m_seg_loss_obj->get_segment_loss(ts, t, elem_id[i]);
01843
01844
01845 }
01847
01849
01850 int32_t frame = orf_from;
01851 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
01852
01853 float64_t pen_val = 0.0 ;
01854 if (penalty)
01855 {
01856 #ifdef DYNPROG_TIMING_DETAIL
01857 MyTime.start() ;
01858 #endif
01859 pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
01860
01861 #ifdef DYNPROG_TIMING_DETAIL
01862 MyTime.stop() ;
01863 content_plifs_time += MyTime.time_diff_sec() ;
01864 #endif
01865 }
01866
01867 #ifdef DYNPROG_TIMING_DETAIL
01868 MyTime.start() ;
01869 #endif
01870 num_ok_pos++ ;
01871
01872 if (nbest==1)
01873 {
01874 float64_t val = elem_val[i] + pen_val ;
01875 if (with_loss)
01876 val += segment_loss ;
01877
01878 float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N)) ;
01879
01880 if (mval<fixedtempvv_)
01881 {
01882 fixedtempvv_ = mval ;
01883 fixedtempii_ = ii + ts*m_N;
01884 fixed_list_len = 1 ;
01885 fixedtemplong = false ;
01886 }
01887 last_mval = mval ;
01888 last_ts = ts ;
01889 }
01890 else
01891 {
01892 for (int16_t diff=0; diff<nbest; diff++)
01893 {
01894 float64_t val = elem_val[i] ;
01895 val += pen_val ;
01896 if (with_loss)
01897 val += segment_loss ;
01898
01899 float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, m_seq_len, m_N)) ;
01900
01901
01902
01903
01904
01905 if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
01906 {
01907 if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
01908 {
01909 fixedtempvv[fixed_list_len] = mval ;
01910 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest;
01911 fixed_list_len++ ;
01912 }
01913 else
01914 {
01915 int32_t addhere = fixed_list_len;
01916 while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
01917 addhere--;
01918
01919
01920 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
01921 {
01922 fixedtempvv[jj] = fixedtempvv[jj-1];
01923 fixedtempii[jj] = fixedtempii[jj-1];
01924 }
01925
01926 fixedtempvv[addhere] = mval;
01927 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest;
01928
01929 if (fixed_list_len < nbest)
01930 fixed_list_len++;
01931 }
01932 }
01933 }
01934 }
01935 #ifdef DYNPROG_TIMING_DETAIL
01936 MyTime.stop() ;
01937 inner_loop_max_time += MyTime.time_diff_sec() ;
01938 #endif
01939 }
01940 }
01941 #ifdef DYNPROG_TIMING
01942 MyTime3.stop() ;
01943 inner_loop_time += MyTime3.time_diff_sec() ;
01944 #endif
01945 }
01946 for (int32_t i=0; i<num_elem; i++)
01947 {
01948 T_STATES ii = elem_list[i] ;
01949
01950 const CPlifBase * penalty = PEN.element(j,ii) ;
01951
01952
01953
01954
01955
01956
01957
01958
01959
01960
01961
01962
01963 int32_t look_back_ = look_back.element(j, ii) ;
01964 int32_t look_back_orig_ = look_back_orig.element(j, ii) ;
01965
01966 int32_t orf_from = m_orf_info.element(ii,0) ;
01967 int32_t orf_to = m_orf_info.element(j,1) ;
01968 if((orf_from!=-1)!=(orf_to!=-1))
01969 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
01970 ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
01971
01972 int32_t orf_target = -1 ;
01973 if (orf_from!=-1)
01974 {
01975 orf_target=orf_to-orf_from ;
01976 if (orf_target<0)
01977 orf_target+=3 ;
01978 ASSERT(orf_target>=0 && orf_target<3) ;
01979 }
01980
01981
01982
01983
01984 #ifdef DYNPROG_TIMING
01985 MyTime3.start() ;
01986 #endif
01987
01988
01989
01990
01991
01992
01993
01994
01995 #ifdef DYNPROG_TIMING
01996 MyTime3.start() ;
01997 #endif
01998
01999 if ( long_transitions && orf_target==-1 && look_back_ == m_long_transition_threshold )
02000 {
02001
02002 int32_t start = long_transition_content_start.get_element(ii, j) ;
02003 int32_t end_3p_part = start ;
02004 for (int32_t start_3p_part=start; m_pos[t]-m_pos[start_3p_part] > m_long_transition_threshold ; start_3p_part++)
02005 {
02006
02007 while (end_3p_part<=t && m_pos[end_3p_part+1]-m_pos[start_3p_part]<=m_long_transition_threshold)
02008 end_3p_part++ ;
02009
02010 ASSERT(m_pos[end_3p_part+1]-m_pos[start_3p_part] > m_long_transition_threshold || end_3p_part==t) ;
02011 ASSERT(m_pos[end_3p_part]-m_pos[start_3p_part] <= m_long_transition_threshold) ;
02012
02013 float64_t pen_val = 0.0;
02014
02015 if (penalty)
02016 {
02017 int32_t frame = m_orf_info.element(ii,0);
02018 lookup_content_svm_values(start_3p_part, end_3p_part, m_pos[start_3p_part], m_pos[end_3p_part], svm_value, frame);
02019 pen_val = penalty->lookup_penalty(m_pos[end_3p_part]-m_pos[start_3p_part], svm_value) ;
02020 }
02021
02022
02023
02024
02025
02026
02027
02028 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.element(delta_array, start_3p_part, ii, 0, m_seq_len, m_N) ) ;
02029
02030
02031 float64_t segment_loss_part1=0.0 ;
02032 if (with_loss)
02033 {
02034
02035 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(start_3p_part , end_3p_part, elem_id[i]);
02036
02037 mval_trans -= segment_loss_part1 ;
02038 }
02039
02040
02041 if (0)
02042 {
02043
02044
02045
02046
02047 long_transition_content_scores.set_element(-CMath::INFTY, ii, j) ;
02048 long_transition_content_start_position.set_element(0, ii, j) ;
02049 if (with_loss)
02050 long_transition_content_scores_loss.set_element(0.0, ii, j) ;
02051 #ifdef DYNPROG_DEBUG
02052 long_transition_content_scores_pen.set_element(0.0, ii, j) ;
02053 long_transition_content_scores_elem.set_element(0.0, ii, j) ;
02054 long_transition_content_scores_prev.set_element(0.0, ii, j) ;
02055 long_transition_content_end_position.set_element(0, ii, j) ;
02056 #endif
02057 }
02058 if (with_loss)
02059 {
02060 float64_t old_loss = long_transition_content_scores_loss.get_element(ii, j) ;
02061 float64_t new_loss = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), end_3p_part, elem_id[i]);
02062 float64_t score = long_transition_content_scores.get_element(ii, j) - old_loss + new_loss ;
02063 long_transition_content_scores.set_element(score, ii, j) ;
02064 long_transition_content_scores_loss.set_element(new_loss, ii, j) ;
02065 #ifdef DYNPROG_DEBUG
02066 long_transition_content_end_position.set_element(end_3p_part, ii, j) ;
02067 #endif
02068
02069 }
02070 if (-long_transition_content_scores.get_element(ii, j) > mval_trans )
02071 {
02072
02073 long_transition_content_scores.set_element(-mval_trans, ii, j) ;
02074 long_transition_content_start_position.set_element(start_3p_part, ii, j) ;
02075 if (with_loss)
02076 long_transition_content_scores_loss.set_element(segment_loss_part1, ii, j) ;
02077 #ifdef DYNPROG_DEBUG
02078 long_transition_content_scores_pen.set_element(pen_val*0.5, ii, j) ;
02079 long_transition_content_scores_elem.set_element(elem_val[i], ii, j) ;
02080 long_transition_content_scores_prev.set_element(delta.element(delta_array, start_3p_part, ii, 0, m_seq_len, m_N), ii, j) ;
02081
02082
02083
02084 long_transition_content_end_position.set_element(end_3p_part, ii, j) ;
02085 #endif
02086 }
02087
02088 long_transition_content_start.set_element(start_3p_part, ii, j) ;
02089 }
02090
02091
02092
02093
02094
02095
02096
02097 int ts = t;
02098 while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold)
02099 ts-- ;
02100
02101 if (ts>0)
02102 {
02103 ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold)) ;
02104
02105
02106
02107 float pen_val = 0.0 ;
02108 if (penalty)
02109 {
02110 int32_t frame = orf_from ;
02111 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
02112 pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
02113 }
02114
02115 float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val*0.5) ;
02116
02117 {
02118 #ifdef DYNPROG_DEBUG
02119 float64_t segment_loss_part2=0.0 ;
02120 float64_t segment_loss_part1=0.0 ;
02121 #endif
02122 float64_t segment_loss_total=0.0 ;
02123
02124 if (with_loss)
02125 {
02126
02127
02128 #ifdef DYNPROG_DEBUG
02129
02130 segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]);
02131
02132 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]);
02133 #endif
02134 segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]);
02135 mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ;
02136 }
02137
02138 #ifdef DYNPROG_DEBUG
02139 if (m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561)
02140 {
02141 SG_PRINT("Part2: %i,%i,%i: val=%1.6f pen_val*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i); scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i, loss=%1.1f/%1.1f (%i,%i)\n",
02142 m_pos[t], j, ii, -mval, 0.5*pen_val, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1],
02143 long_transition_content_scores.get_element(ii, j),
02144 long_transition_content_scores_pen.get_element(ii, j),
02145 long_transition_content_scores_prev.get_element(ii, j),
02146 long_transition_content_scores_elem.get_element(ii, j),
02147 long_transition_content_scores_loss.get_element(ii, j),
02148 m_pos[long_transition_content_start_position.get_element(ii,j)],
02149 m_pos[long_transition_content_end_position.get_element(ii,j)],
02150 m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ;
02151 SG_PRINT("fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] );
02152 }
02153
02154 if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3)
02155 {
02156 SG_ERROR("LOSS: total=%1.1f (%i-%i) part1=%1.1f/%1.1f (%i-%i) part2=%1.1f (%i-%i) sum=%1.1f diff=%1.1f\n",
02157 segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t],
02158 long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)],
02159 segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t],
02160 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j),
02161 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ;
02162 }
02163 #endif
02164 }
02165
02166
02167 if ((mval < fixedtempvv_) &&
02168 (m_pos[t] - m_pos[long_transition_content_start_position.get_element(ii, j)])<=look_back_orig_ )
02169 {
02170
02171 int32_t fromtjk = fixedtempii_ ;
02172
02173
02174
02175
02176
02177 ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong) ;
02178
02179 fixedtempvv_ = mval ;
02180 fixedtempii_ = ii + m_N*long_transition_content_start_position.get_element(ii, j) ;
02181 fixed_list_len = 1 ;
02182 fixedtemplong = true ;
02183 }
02184
02185
02186
02187
02188
02189
02190 }
02191 }
02192 }
02193 #ifdef DYNPROG_TIMING
02194 MyTime3.stop() ;
02195 long_transition_time += MyTime3.time_diff_sec() ;
02196 #endif
02197
02198
02199 int32_t numEnt = fixed_list_len;
02200
02201 float64_t minusscore;
02202 int64_t fromtjk;
02203
02204 for (int16_t k=0; k<nbest; k++)
02205 {
02206 if (k<numEnt)
02207 {
02208 if (nbest==1)
02209 {
02210 minusscore = fixedtempvv_ ;
02211 fromtjk = fixedtempii_ ;
02212 }
02213 else
02214 {
02215 minusscore = fixedtempvv[k];
02216 fromtjk = fixedtempii[k];
02217 }
02218
02219 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -minusscore + seq.element(j,t);
02220 psi.element(t,j,k) = (fromtjk%m_N) ;
02221 if (nbest>1)
02222 ktable.element(t,j,k) = (fromtjk%(m_N*nbest)-psi.element(t,j,k))/m_N ;
02223 ptable.element(t,j,k) = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ;
02224 }
02225 else
02226 {
02227 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -CMath::INFTY ;
02228 psi.element(t,j,k) = 0 ;
02229 if (nbest>1)
02230 ktable.element(t,j,k) = 0 ;
02231 ptable.element(t,j,k) = 0 ;
02232 }
02233 }
02234 }
02235 }
02236 }
02237
02238 {
02239 int32_t list_len = 0 ;
02240 for (int16_t diff=0; diff<nbest; diff++)
02241 {
02242 for (T_STATES i=0; i<m_N; i++)
02243 {
02244 oldtempvv[list_len] = -(delta.element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+get_q(i)) ;
02245 oldtempii[list_len] = i + diff*m_N ;
02246 list_len++ ;
02247 }
02248 }
02249
02250 CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
02251
02252 #ifdef nonsense
02253 int t = m_seq_len-1;
02254 for (T_STATES j=0; j<m_N; j++)
02255 {
02256 if (seq.element(j,t)<=-1e20)
02257 {
02258 for (int16_t k=0; k<nbest; k++)
02259 {
02260 delta.element(delta_array, t, j, k, m_seq_len, m_N) = seq.element(j,t) ;
02261 psi.element(t,j,k) = 0 ;
02262 if (nbest>1)
02263 ktable.element(t,j,k) = 0 ;
02264 ptable.element(t,j,k) = 0 ;
02265 }
02266 }
02267 else
02268 {
02269 const T_STATES num_elem = trans_list_forward_cnt[j] ;
02270 const T_STATES *elem_list = trans_list_forward[j] ;
02271 const float64_t *elem_val = trans_list_forward_val[j] ;
02272 const int32_t *elem_id = trans_list_forward_id[j] ;
02273
02274 int32_t fixed_list_len = 0 ;
02275 float64_t fixedtempvv_ = CMath::INFTY ;
02276 int32_t fixedtempii_ = 0 ;
02277 bool fixedtemplong = false ;
02278
02279 for (int32_t i=0; i<num_elem; i++)
02280 {
02281 T_STATES ii = elem_list[i] ;
02282
02283 const CPlifBase * penalty = PEN.element(j,ii) ;
02284
02285
02286
02287
02288
02289
02290
02291
02292
02293
02294
02295
02296 int32_t look_back_ = look_back.element(j, ii) ;
02297 int32_t look_back_orig_ = look_back_orig.element(j, ii) ;
02298
02299 int32_t orf_from = m_orf_info.element(ii,0) ;
02300 int32_t orf_to = m_orf_info.element(j,1) ;
02301 if((orf_from!=-1)!=(orf_to!=-1))
02302 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
02303 ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
02304
02305 int32_t orf_target = -1 ;
02306 if (orf_from!=-1)
02307 {
02308 orf_target=orf_to-orf_from ;
02309 if (orf_target<0)
02310 orf_target+=3 ;
02311 ASSERT(orf_target>=0 && orf_target<3) ;
02312 }
02313
02314 int32_t orf_last_pos = m_pos[t] ;
02315
02316 int ts = t;
02317 while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold)
02318 ts-- ;
02319
02320 if (ts>0)
02321 {
02322 ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold)) ;
02323
02324
02325 float pen_val = 0.0 ;
02326 if (penalty)
02327 {
02328 int32_t frame = orf_from ;
02329 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
02330 pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
02331 }
02332
02333 float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val*0.5) ;
02334
02335 {
02336 #ifdef DYNPROG_DEBUG
02337 float64_t segment_loss_part2=0.0 ;
02338 float64_t segment_loss_part1=0.0 ;
02339 #endif
02340 float64_t segment_loss_total=0.0 ;
02341
02342 if (with_loss)
02343 {
02344
02345
02346 #ifdef DYNPROG_DEBUG
02347
02348 segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]);
02349
02350 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]);
02351 #endif
02352 segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]);
02353 mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ;
02354 }
02355
02356 #ifdef DYNPROG_DEBUG
02357 if (1)
02358 {
02359 SG_PRINT("Part2: %i,%i,%i: val=%1.6f pen_val*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i); scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i, loss=%1.1f/%1.1f (%i,%i)\n",
02360 m_pos[t], j, ii, -mval, 0.5*pen_val, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1],
02361 long_transition_content_scores.get_element(ii, j),
02362 long_transition_content_scores_pen.get_element(ii, j),
02363 long_transition_content_scores_prev.get_element(ii, j),
02364 long_transition_content_scores_elem.get_element(ii, j),
02365 long_transition_content_scores_loss.get_element(ii, j),
02366 m_pos[long_transition_content_start_position.get_element(ii,j)],
02367 m_pos[long_transition_content_end_position.get_element(ii,j)],
02368 m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ;
02369 SG_PRINT("oldtempvv: %1.6f, from_state:%i from_pos:%i\n ",-oldtempvv[0], (oldtempii[0]%m_N), m_pos[(oldtempii[0]-(oldtempii[0]%(m_N*nbest)))/(m_N*nbest)] );
02370 }
02371
02372 if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3)
02373 {
02374 SG_ERROR("LOSS: total=%1.1f (%i-%i) part1=%1.1f/%1.1f (%i-%i) part2=%1.1f (%i-%i) sum=%1.1f diff=%1.1f\n",
02375 segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t],
02376 long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)],
02377 segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t],
02378 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j),
02379 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ;
02380 }
02381 #endif
02382 }
02383
02384
02385 if ((mval < oldtempvv[0]) &&
02386 (m_pos[t] - m_pos[long_transition_content_start_position.get_element(ii, j)])<=look_back_orig_ )
02387 {
02388
02389 int32_t fromtjk = fixedtempii_ ;
02390
02391
02392
02393
02394
02395 ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong) ;
02396
02397 oldtempvv[0] = mval ;
02398 oldtempii[0] = ii ;
02399 }
02400 }
02401
02402 }
02403 }
02404 }
02405 #endif
02406 for (int16_t k=0; k<nbest; k++)
02407 {
02408 delta_end.element(k) = -oldtempvv[k] ;
02409 path_ends.element(k) = (oldtempii[k]%m_N) ;
02410 if (nbest>1)
02411 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/m_N ;
02412 }
02413
02414
02415 }
02416
02417 {
02418 for (int16_t k=0; k<nbest; k++)
02419 {
02420 prob_nbest[k]= delta_end.element(k) ;
02421
02422 int32_t i = 0 ;
02423 state_seq[i] = path_ends.element(k) ;
02424 int16_t q = 0 ;
02425 if (nbest>1)
02426 q=ktable_end.element(k) ;
02427 pos_seq[i] = m_seq_len-1 ;
02428
02429 while (pos_seq[i]>0)
02430 {
02431 ASSERT(i+1<m_seq_len);
02432
02433 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
02434 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ;
02435 if (nbest>1)
02436 q = ktable.element(pos_seq[i], state_seq[i], q) ;
02437 i++ ;
02438 }
02439
02440 int32_t num_states = i+1 ;
02441 for (i=0; i<num_states;i++)
02442 {
02443 my_state_seq[i+k*m_seq_len] = state_seq[num_states-i-1] ;
02444 my_pos_seq[i+k*m_seq_len] = pos_seq[num_states-i-1] ;
02445 }
02446 if (num_states<m_seq_len)
02447 {
02448 my_state_seq[num_states+k*m_seq_len]=-1 ;
02449 my_pos_seq[num_states+k*m_seq_len]=-1 ;
02450 }
02451 }
02452 }
02453
02454
02455
02456
02457
02458 #ifdef DYNPROG_TIMING
02459 MyTime2.stop() ;
02460
02461
02462 SG_PRINT("Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n content_svm_values_time=%1.2f content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec()) ;
02463 #endif
02464
02465 delete[] fixedtempvv ;
02466 delete[] fixedtempii ;
02467 }
02468
02469 void CDynProg::best_path_trans_deriv(
02470 int32_t *my_state_seq, int32_t *my_pos_seq,
02471 int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
02472 {
02473 m_initial_state_distribution_p_deriv.resize_array(m_N) ;
02474 m_end_state_distribution_q_deriv.resize_array(m_N) ;
02475 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
02476
02477
02478 m_my_scores.resize_array(my_seq_len);
02479 m_my_losses.resize_array(my_seq_len);
02480 float64_t* my_scores=m_my_scores.get_array();
02481 float64_t* my_losses=m_my_losses.get_array();
02482 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
02483 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
02484
02485 if (!m_svm_arrays_clean)
02486 {
02487 SG_ERROR( "SVM arrays not clean") ;
02488 return ;
02489 } ;
02490
02491
02492
02493
02494
02495 bool use_svm = false ;
02496
02497 CArray2<CPlifBase*> PEN(Plif_matrix, m_N, m_N, false, false) ;
02498 PEN.set_name("PEN");
02499 CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, m_N, max_num_signals, false, false) ;
02500 PEN_state_signals.set_name("PEN_state_signals");
02501 CArray3<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ;
02502 seq_input.set_name("seq_input");
02503
02504 {
02505 for (int32_t i=0; i<m_N; i++)
02506 for (int32_t j=0; j<m_N; j++)
02507 {
02508 CPlifBase *penij=PEN.element(i,j) ;
02509 if (penij==NULL)
02510 continue ;
02511
02512 if (penij->uses_svm_values())
02513 use_svm=true ;
02514 penij->penalty_clear_derivative() ;
02515 }
02516 for (int32_t i=0; i<m_N; i++)
02517 for (int32_t j=0; j<max_num_signals; j++)
02518 {
02519 CPlifBase *penij=PEN_state_signals.element(i,j) ;
02520 if (penij==NULL)
02521 continue ;
02522 if (penij->uses_svm_values())
02523 use_svm=true ;
02524 penij->penalty_clear_derivative() ;
02525 }
02526 }
02527
02528 {
02529
02530 for (int32_t i=0; i<m_N; i++)
02531 {
02532 m_initial_state_distribution_p_deriv.element(i)=0 ;
02533 m_end_state_distribution_q_deriv.element(i)=0 ;
02534 for (int32_t j=0; j<m_N; j++)
02535 m_transition_matrix_a_deriv.element(i,j)=0 ;
02536 }
02537 }
02538
02539 {
02540 for (int32_t i=0; i<my_seq_len; i++)
02541 {
02542 my_scores[i]=0.0 ;
02543 my_losses[i]=0.0 ;
02544 }
02545 }
02546
02547
02548
02549
02550
02551
02552
02553 float64_t* svm_value = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
02554 float64_t* svm_value_part1 = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
02555 float64_t* svm_value_part2 = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
02556 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02557 {
02558 svm_value[s]=0 ;
02559 svm_value_part1[s]=0 ;
02560 svm_value_part2[s]=0 ;
02561 }
02562
02563
02564 float64_t total_score = 0.0 ;
02565 float64_t total_loss = 0.0 ;
02566
02567
02568 ASSERT(my_state_seq[0]>=0) ;
02569 m_initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
02570 my_scores[0] += m_initial_state_distribution_p.element(my_state_seq[0]) ;
02571
02572 ASSERT(my_state_seq[my_seq_len-1]>=0) ;
02573 m_end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
02574 my_scores[my_seq_len-1] += m_end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
02575
02576
02577 total_score += my_scores[0] + my_scores[my_seq_len-1] ;
02578
02579
02580 SG_DEBUG( "m_seq_len=%i\n", my_seq_len) ;
02581 for (int32_t i=0; i<my_seq_len-1; i++)
02582 {
02583 if (my_state_seq[i+1]==-1)
02584 break ;
02585 int32_t from_state = my_state_seq[i] ;
02586 int32_t to_state = my_state_seq[i+1] ;
02587 int32_t from_pos = my_pos_seq[i] ;
02588 int32_t to_pos = my_pos_seq[i+1] ;
02589
02590
02591
02592 int32_t elem_id = m_transition_matrix_a_id.element(from_state, to_state) ;
02593 my_losses[i] = m_seg_loss_obj->get_segment_loss(from_pos, to_pos, elem_id);
02594
02595 #ifdef DYNPROG_DEBUG
02596
02597
02598 if (i>0)
02599 {
02600 float32_t loss1 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i], elem_id);
02601 float32_t loss2 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i], my_pos_seq[i+1], elem_id);
02602 float32_t loss3 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i+1], elem_id);
02603 SG_PRINT("loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3);
02604 if (CMath::abs(loss1+loss2-loss3)>0)
02605 {
02606 SG_PRINT( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
02607 }
02608 }
02609 io->set_loglevel(M_DEBUG) ;
02610 SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
02611 #endif
02612
02613 m_transition_matrix_a_deriv.element(from_state, to_state)++ ;
02614 my_scores[i] += m_transition_matrix_a.element(from_state, to_state) ;
02615
02616 #ifdef DYNPROG_DEBUG
02617 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
02618 #endif
02619
02620
02621
02622
02623
02624 bool is_long_transition = false ;
02625 if (m_long_transitions)
02626 {
02627 if (m_pos[to_pos]-m_pos[from_pos]>m_long_transition_threshold)
02628 is_long_transition = true ;
02629 if (m_orf_info.element(from_state,0)!=-1)
02630 is_long_transition = false ;
02631 }
02632
02633 int32_t from_pos_thresh = from_pos ;
02634 int32_t to_pos_thresh = to_pos ;
02635
02636 if (use_svm)
02637 {
02638 if (is_long_transition)
02639 {
02640 while (from_pos_thresh<to_pos && m_pos[from_pos_thresh+1] - m_pos[from_pos] <= m_long_transition_threshold)
02641 from_pos_thresh++ ;
02642 ASSERT(from_pos_thresh<to_pos) ;
02643 ASSERT(m_pos[from_pos_thresh] - m_pos[from_pos] <= m_long_transition_threshold);
02644 ASSERT(m_pos[from_pos_thresh+1] - m_pos[from_pos] > m_long_transition_threshold);
02645
02646 int32_t frame = m_orf_info.element(from_state,0);
02647 lookup_content_svm_values(from_pos, from_pos_thresh, m_pos[from_pos], m_pos[from_pos_thresh], svm_value_part1, frame);
02648
02649 #ifdef DYNPROG_DEBUG
02650 SG_PRINT("part1: pos1: %i pos2: %i pos3: %i \nsvm_value_part1: ", m_pos[from_pos], m_pos[from_pos_thresh], m_pos[from_pos_thresh+1]) ;
02651 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02652 SG_PRINT("%1.4f ", svm_value_part1[s]);
02653 SG_PRINT("\n");
02654 #endif
02655
02656 while (to_pos_thresh>0 && m_pos[to_pos] - m_pos[to_pos_thresh-1] <= m_long_transition_threshold)
02657 to_pos_thresh-- ;
02658 ASSERT(to_pos_thresh>0) ;
02659 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh] <= m_long_transition_threshold) ;
02660 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh-1] > m_long_transition_threshold) ;
02661
02662 lookup_content_svm_values(to_pos_thresh, to_pos, m_pos[to_pos_thresh], m_pos[to_pos], svm_value_part2, frame);
02663
02664 #ifdef DYNPROG_DEBUG
02665 SG_PRINT("part2: pos1: %i pos2: %i pos3: %i \nsvm_value_part2: ", m_pos[to_pos], m_pos[to_pos_thresh], m_pos[to_pos_thresh+1]) ;
02666 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02667 SG_PRINT("%1.4f ", svm_value_part2[s]);
02668 SG_PRINT("\n");
02669 #endif
02670 }
02671 else
02672 {
02673
02674
02675
02676 int32_t frame = m_orf_info.element(from_state,0);
02677 if (false)
02678 {
02679 int32_t num_current_svms=0;
02680 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02681 SG_PRINT("penalties(%i, %i), frame:%i ", from_state, to_state, frame);
02682 PEN.element(to_state, from_state)->get_used_svms(&num_current_svms, svm_ids);
02683 SG_PRINT("\n");
02684 }
02685
02686 lookup_content_svm_values(from_pos, to_pos, m_pos[from_pos],m_pos[to_pos], svm_value, frame);
02687 #ifdef DYNPROG_DEBUG
02688 SG_PRINT("part2: pos1: %i pos2: %i \nsvm_values: ", m_pos[from_pos], m_pos[to_pos]) ;
02689 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02690 SG_PRINT("%1.4f ", svm_value[s]);
02691 SG_PRINT("\n");
02692 #endif
02693 }
02694 }
02695
02696 if (PEN.element(to_state, from_state)!=NULL)
02697 {
02698 float64_t nscore = 0 ;
02699 if (is_long_transition)
02700 {
02701 float64_t pen_value_part1 = PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1) ;
02702 float64_t pen_value_part2 = PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2) ;
02703 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ;
02704
02705 }
02706 else
02707 nscore = PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[from_pos], svm_value) ;
02708 my_scores[i] += nscore ;
02709
02710 for (int32_t s=m_num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)
02711 {
02712 svm_value[s]=-CMath::INFTY;
02713 svm_value_part1[s]=-CMath::INFTY;
02714 svm_value_part2[s]=-CMath::INFTY;
02715 }
02716
02717 #ifdef DYNPROG_DEBUG
02718
02719 #endif
02720 if (is_long_transition)
02721 {
02722 float64_t sum_score = 0.0 ;
02723
02724 for (int kk=0; kk<i; kk++)
02725 sum_score += my_scores[i] ;
02726
02727 #ifdef DYNPROG_DEBUG
02728 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i) 2: %1.6f (%i-%i) \n",
02729 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state,
02730 nscore, sum_score,
02731 PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1)*0.5, m_pos[from_pos], m_pos[from_pos_thresh],
02732 PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ;
02733 #endif
02734 }
02735
02736 if (is_long_transition)
02737 {
02738 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1, 0.5) ;
02739 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2, 0.5) ;
02740 }
02741 else
02742 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[to_pos]-m_pos[from_pos], svm_value, 1) ;
02743
02744
02745
02746
02747
02748
02749
02750 if (is_long_transition)
02751 {
02752 for (int32_t d=1; d<=m_num_raw_data; d++)
02753 {
02754 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02755 svm_value[s]=-CMath::INFTY;
02756 float64_t* intensities = new float64_t[m_num_probes_cum[d]];
02757 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[from_pos_thresh],intensities, d);
02758 for (int32_t k=0;k<num_intensities;k++)
02759 {
02760 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02761 svm_value[j]=intensities[k];
02762
02763 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;
02764
02765 }
02766 num_intensities = raw_intensities_interval_query(m_pos[to_pos_thresh], m_pos[to_pos],intensities, d);
02767 for (int32_t k=0;k<num_intensities;k++)
02768 {
02769 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02770 svm_value[j]=intensities[k];
02771
02772 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;
02773
02774 }
02775 delete[] intensities;
02776
02777 }
02778 }
02779 else
02780 {
02781 for (int32_t d=1; d<=m_num_raw_data; d++)
02782 {
02783 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02784 svm_value[s]=-CMath::INFTY;
02785 float64_t* intensities = new float64_t[m_num_probes_cum[d]];
02786 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[to_pos],intensities, d);
02787
02788 for (int32_t k=0;k<num_intensities;k++)
02789 {
02790 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02791 svm_value[j]=intensities[k];
02792
02793 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 1) ;
02794
02795 }
02796 delete[] intensities;
02797 }
02798 }
02799
02800 }
02801 #ifdef DYNPROG_DEBUG
02802 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
02803 #endif
02804
02805
02806 for (int32_t k=0; k<max_num_signals; k++)
02807 {
02808 if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
02809 {
02810 #ifdef DYNPROG_DEBUG
02811 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ;
02812 #endif
02813 my_scores[i] += seq_input.element(to_state, to_pos, k) ;
02814
02815
02816 break ;
02817 }
02818 if (PEN_state_signals.element(to_state, k)!=NULL)
02819 {
02820 float64_t nscore = PEN_state_signals.element(to_state,k)->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ;
02821 my_scores[i] += nscore ;
02822
02823
02824
02825
02826
02827
02828
02829
02830 #ifdef DYNPROG_DEBUG
02831 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
02832 #endif
02833 PEN_state_signals.element(to_state,k)->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value, 1) ;
02834 } else
02835 break ;
02836 }
02837
02838
02839
02840
02841 total_score += my_scores[i] ;
02842 total_loss += my_losses[i] ;
02843
02844 }
02845
02846
02847
02848
02849 delete[] svm_value;
02850 delete[] svm_value_part1 ;
02851 delete[] svm_value_part2 ;
02852 }
02853
02854 int32_t CDynProg::raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type)
02855 {
02856 ASSERT(from_pos<to_pos);
02857 int32_t num_intensities = 0;
02858 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[type-1]];
02859 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[type-1]];
02860 int32_t last_pos;
02861 int32_t num = m_num_probes_cum[type-1];
02862 while (*p_tiling_pos<to_pos)
02863 {
02864 if (*p_tiling_pos>=from_pos)
02865 {
02866 intensities[num_intensities] = *p_tiling_data;
02867 num_intensities++;
02868 }
02869 num++;
02870 if (num>=m_num_probes_cum[type])
02871 break;
02872 last_pos = *p_tiling_pos;
02873 p_tiling_pos++;
02874 p_tiling_data++;
02875 ASSERT(last_pos<*p_tiling_pos);
02876 }
02877 return num_intensities;
02878 }
02879
02880 void CDynProg::lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t* svm_values, int32_t frame)
02881 {
02882 #ifdef DYNPROG_TIMING_DETAIL
02883 MyTime.start() ;
02884 #endif
02885
02886
02887
02888 for (int32_t i=0;i<m_num_svms;i++)
02889 {
02890 float64_t to_val = m_lin_feat.get_element(i, to_state);
02891 float64_t from_val = m_lin_feat.get_element(i, from_state);
02892 svm_values[i] = (to_val-from_val)/(to_pos-from_pos);
02893 }
02894 for (int32_t i=m_num_svms;i<m_num_lin_feat_plifs_cum[m_num_raw_data];i++)
02895 {
02896 float64_t to_val = m_lin_feat.get_element(i, to_state);
02897 float64_t from_val = m_lin_feat.get_element(i, from_state);
02898 svm_values[i] = to_val-from_val ;
02899 }
02900 if (m_intron_list)
02901 {
02902 int32_t* support = new int32_t[m_num_intron_plifs];
02903 m_intron_list->get_intron_support(support, from_state, to_state);
02904 int32_t intron_list_start = m_num_lin_feat_plifs_cum[m_num_raw_data];
02905 int32_t intron_list_end = m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;
02906 int32_t cnt = 0;
02907 for (int32_t i=intron_list_start; i<intron_list_end;i++)
02908 {
02909 svm_values[i] = (float64_t) (support[cnt]);
02910 cnt++;
02911 }
02912
02913
02914 delete[] support;
02915 }
02916
02917 if (frame!=-1)
02918 {
02919 svm_values[frame_plifs[0]] = 1e10;
02920 svm_values[frame_plifs[1]] = 1e10;
02921 svm_values[frame_plifs[2]] = 1e10;
02922 int32_t global_frame = from_pos%3;
02923 int32_t row = ((global_frame+frame)%3)+4;
02924 float64_t to_val = m_lin_feat.get_element(row, to_state);
02925 float64_t from_val = m_lin_feat.get_element(row, from_state);
02926 svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos);
02927 }
02928 #ifdef DYNPROG_TIMING_DETAIL
02929 MyTime.stop() ;
02930 content_svm_values_time += MyTime.time_diff_sec() ;
02931 #endif
02932 }
02933 void CDynProg::set_intron_list(CIntronList* intron_list, int32_t num_plifs)
02934 {
02935 m_intron_list = intron_list;
02936 m_num_intron_plifs = num_plifs;
02937 }
02938