SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 2 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2007 Soeren Sonnenburg 00008 * Written (W) 1999-2007 Gunnar Raetsch 00009 * Written (W) 2008-2009 Jonas Behr 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 */ 00012 00013 #include "structure/DynProg.h" 00014 #include "lib/Mathematics.h" 00015 #include "lib/io.h" 00016 #include "lib/config.h" 00017 #include "features/StringFeatures.h" 00018 #include "features/Alphabet.h" 00019 #include "structure/Plif.h" 00020 #include "structure/IntronList.h" 00021 #include "lib/Array.h" 00022 #include "lib/Array2.h" 00023 #include "lib/Array3.h" 00024 00025 #include <stdlib.h> 00026 #include <stdio.h> 00027 #include <time.h> 00028 #include <ctype.h> 00029 00030 using namespace shogun; 00031 00032 //#define USE_TMP_ARRAYCLASS 00033 //#define DYNPROG_DEBUG 00034 00035 int32_t CDynProg::word_degree_default[4]={3,4,5,6} ; 00036 int32_t CDynProg::cum_num_words_default[5]={0,64,320,1344,5440} ; 00037 int32_t CDynProg::frame_plifs[3]={4,5,6}; 00038 int32_t CDynProg::num_words_default[4]= {64,256,1024,4096} ; 00039 int32_t CDynProg::mod_words_default[32] = {1,1,1,1,1,1,1,1, 00040 1,1,1,1,1,1,1,1, 00041 0,0,0,0,0,0,0,0, 00042 0,0,0,0,0,0,0,0} ; 00043 bool CDynProg::sign_words_default[16] = {true,true,true,true,true,true,true,true, 00044 false,false,false,false,false,false,false,false} ; // whether to use counts or signum of counts 00045 int32_t CDynProg::string_words_default[16] = {0,0,0,0,0,0,0,0, 00046 1,1,1,1,1,1,1,1} ; // which string should be used 00047 00048 CDynProg::CDynProg(int32_t num_svms /*= 8 */) 00049 : CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1), 00050 m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1), 00051 m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1), 00052 m_end_state_distribution_q_deriv(1), 00053 00054 // multi svm 00055 m_num_degrees(4), 00056 m_num_svms(num_svms), 00057 m_word_degree(word_degree_default, m_num_degrees, true, true), 00058 m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true), 00059 m_cum_num_words_array(m_cum_num_words.get_array()), 00060 m_num_words(num_words_default, m_num_degrees, true, true), 00061 m_num_words_array(m_num_words.get_array()), 00062 m_mod_words(mod_words_default, m_num_svms, 2, true, true), 00063 m_mod_words_array(m_mod_words.get_array()), 00064 m_sign_words(sign_words_default, m_num_svms, true, true), 00065 m_sign_words_array(m_sign_words.get_array()), 00066 m_string_words(string_words_default, m_num_svms, true, true), 00067 m_string_words_array(m_string_words.get_array()), 00068 //m_svm_pos_start(m_num_degrees), 00069 m_num_unique_words(m_num_degrees), 00070 m_svm_arrays_clean(true), 00071 00072 m_max_a_id(0), m_observation_matrix(1,1,1), 00073 m_pos(1), 00074 m_seq_len(0), 00075 m_orf_info(1,2), 00076 m_plif_list(1), 00077 m_PEN(1,1), 00078 m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2), 00079 m_segment_ids(1), 00080 m_segment_mask(1), 00081 m_my_state_seq(1), 00082 m_my_pos_seq(1), 00083 m_my_scores(1), 00084 m_my_losses(1), 00085 m_scores(1), 00086 m_states(1,1), 00087 m_positions(1,1), 00088 00089 m_seq_sparse1(NULL), 00090 m_seq_sparse2(NULL), 00091 m_plif_matrices(NULL), 00092 00093 m_genestr_stop(1), 00094 m_intron_list(NULL), 00095 m_num_intron_plifs(0), 00096 m_lin_feat(1,1), //by Jonas 00097 m_raw_intensities(NULL), 00098 m_probe_pos(NULL), 00099 m_num_probes_cum(NULL), 00100 m_num_lin_feat_plifs_cum(NULL), 00101 m_num_raw_data(0), 00102 00103 m_long_transitions(true), 00104 m_long_transition_threshold(1000) 00105 { 00106 trans_list_forward = NULL ; 00107 trans_list_forward_cnt = NULL ; 00108 trans_list_forward_val = NULL ; 00109 trans_list_forward_id = NULL ; 00110 trans_list_len = 0 ; 00111 00112 mem_initialized = true ; 00113 00114 m_N=1; 00115 00116 m_raw_intensities = NULL; 00117 m_probe_pos = NULL; 00118 m_num_probes_cum = new int32_t[100]; 00119 m_num_probes_cum[0] = 0; 00120 //m_use_tiling=false; 00121 m_num_lin_feat_plifs_cum = new int32_t[100]; 00122 m_num_lin_feat_plifs_cum[0] = m_num_svms; 00123 m_num_raw_data = 0; 00124 #ifdef ARRAY_STATISTICS 00125 m_word_degree.set_name("word_degree"); 00126 #endif 00127 00128 m_transition_matrix_a_id.set_name("transition_matrix_a_id"); 00129 m_transition_matrix_a.set_name("transition_matrix_a"); 00130 m_transition_matrix_a_deriv.set_name("transition_matrix_a_deriv"); 00131 m_mod_words.set_name("mod_words"); 00132 m_orf_info.set_name("orf_info"); 00133 m_segment_sum_weights.set_name("segment_sum_weights"); 00134 m_PEN.set_name("PEN"); 00135 m_PEN_state_signals.set_name("PEN_state_signals"); 00136 m_dict_weights.set_name("dict_weights"); 00137 m_states.set_name("states"); 00138 m_positions.set_name("positions"); 00139 m_lin_feat.set_name("lin_feat"); 00140 00141 00142 m_observation_matrix.set_name("m_observation_matrix"); 00143 m_segment_loss.set_name("m_segment_loss"); 00144 m_seg_loss_obj = new CSegmentLoss(); 00145 } 00146 00147 CDynProg::~CDynProg() 00148 { 00149 if (trans_list_forward_cnt) 00150 delete[] trans_list_forward_cnt; 00151 if (trans_list_forward) 00152 { 00153 for (int32_t i=0; i<trans_list_len; i++) 00154 { 00155 if (trans_list_forward[i]) 00156 delete[] trans_list_forward[i]; 00157 } 00158 delete[] trans_list_forward; 00159 } 00160 if (trans_list_forward_val) 00161 { 00162 for (int32_t i=0; i<trans_list_len; i++) 00163 { 00164 if (trans_list_forward_val[i]) 00165 delete[] trans_list_forward_val[i]; 00166 } 00167 delete[] trans_list_forward_val; 00168 } 00169 if (trans_list_forward_id) 00170 { 00171 for (int32_t i=0; i<trans_list_len; i++) 00172 { 00173 if (trans_list_forward_id[i]) 00174 delete[] trans_list_forward_id[i]; 00175 } 00176 delete[] trans_list_forward_id; 00177 } 00178 if (m_raw_intensities) 00179 delete[] m_raw_intensities; 00180 if (m_probe_pos) 00181 delete[] m_probe_pos; 00182 if (m_num_probes_cum) 00183 delete[] m_num_probes_cum ; 00184 if (m_num_lin_feat_plifs_cum) 00185 delete[] m_num_lin_feat_plifs_cum ; 00186 00187 delete m_intron_list; 00188 00189 SG_UNREF(m_seq_sparse1); 00190 SG_UNREF(m_seq_sparse2); 00191 SG_UNREF(m_plif_matrices); 00192 } 00193 00195 int32_t CDynProg::get_num_svms() 00196 { 00197 return m_num_svms; 00198 } 00199 00200 void CDynProg::precompute_stop_codons() 00201 { 00202 int32_t length=m_genestr.get_dim1(); 00203 00204 m_genestr_stop.resize_array(length) ; 00205 m_genestr_stop.zero() ; 00206 m_genestr_stop.set_name("genestr_stop") ; 00207 { 00208 for (int32_t i=0; i<length-2; i++) 00209 if ((m_genestr[i]=='t' || m_genestr[i]=='T') && 00210 (((m_genestr[i+1]=='a' || m_genestr[i+1]=='A') && 00211 (m_genestr[i+2]=='a' || m_genestr[i+2]=='g' || m_genestr[i+2]=='A' || m_genestr[i+2]=='G')) || 00212 ((m_genestr[i+1]=='g'||m_genestr[i+1]=='G') && (m_genestr[i+2]=='a' || m_genestr[i+2]=='A') ))) 00213 { 00214 m_genestr_stop.element(i)=true ; 00215 } 00216 else 00217 m_genestr_stop.element(i)=false ; 00218 m_genestr_stop.element(length-2)=false ; 00219 m_genestr_stop.element(length-1)=false ; 00220 } 00221 } 00222 00223 void CDynProg::set_num_states(int32_t p_N) 00224 { 00225 m_N=p_N ; 00226 00227 m_transition_matrix_a_id.resize_array(m_N,m_N) ; 00228 m_transition_matrix_a.resize_array(m_N,m_N) ; 00229 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ; 00230 m_initial_state_distribution_p.resize_array(m_N) ; 00231 m_initial_state_distribution_p_deriv.resize_array(m_N) ; 00232 m_end_state_distribution_q.resize_array(m_N); 00233 m_end_state_distribution_q_deriv.resize_array(m_N) ; 00234 00235 m_orf_info.resize_array(m_N,2) ; 00236 m_PEN.resize_array(m_N,m_N) ; 00237 } 00238 00239 int32_t CDynProg::get_num_states() 00240 { 00241 return m_N; 00242 } 00243 00244 void CDynProg::init_tiling_data( 00245 int32_t* probe_pos, float64_t* intensities, const int32_t num_probes) 00246 { 00247 m_num_raw_data++; 00248 m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes; 00249 00250 int32_t* tmp_probe_pos = new int32_t[m_num_probes_cum[m_num_raw_data]]; 00251 float64_t* tmp_raw_intensities = new float64_t[m_num_probes_cum[m_num_raw_data]]; 00252 00253 00254 if (m_num_raw_data==1){ 00255 memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t)); 00256 memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t)); 00257 //SG_PRINT("raw_intens:%f \n",*tmp_raw_intensities+2); 00258 }else{ 00259 memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t)); 00260 memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t)); 00261 memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t)); 00262 memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t)); 00263 } 00264 delete[] m_probe_pos; 00265 delete[] m_raw_intensities; 00266 m_probe_pos = tmp_probe_pos; //new int32_t[num_probes]; 00267 m_raw_intensities = tmp_raw_intensities;//new float64_t[num_probes]; 00268 00269 //memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t)); 00270 //memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t)); 00271 00272 } 00273 00274 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms) 00275 { 00276 m_lin_feat.resize_array(p_num_svms, m_seq_len); 00277 00278 // initialize array 00279 for (int s=0; s<p_num_svms; s++) 00280 for (int p=0; p<m_seq_len; p++) 00281 m_lin_feat.set_element(0.0, s, p) ; 00282 } 00283 00284 void CDynProg::resize_lin_feat(const int32_t num_new_feat) 00285 { 00286 int32_t dim1, dim2; 00287 m_lin_feat.get_array_size(dim1, dim2); 00288 ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]); 00289 ASSERT(dim2==m_seq_len); // == number of candidate positions 00290 00291 00292 00293 float64_t* arr = m_lin_feat.get_array(); 00294 float64_t* tmp = new float64_t[(dim1+num_new_feat)*dim2]; 00295 memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ; 00296 for(int32_t j=0;j<m_seq_len;j++) 00297 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++) 00298 tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k]; 00299 00300 m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2, true, true);// copy array and free it later 00301 delete[] tmp; 00302 00303 /*for(int32_t j=0;j<5;j++) 00304 { 00305 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data];k++) 00306 { 00307 SG_PRINT("(%i,%i)%f ",k,j,m_lin_feat.get_element(k,j)); 00308 } 00309 SG_PRINT("\n"); 00310 } 00311 m_lin_feat.get_array_size(dim1,dim2); 00312 SG_PRINT("resize_lin_feat: dim1:%i, dim2:%i\n",dim1,dim2);*/ 00313 00314 //SG_PRINT("resize_lin_feat: done\n"); 00315 } 00316 00317 void CDynProg::precompute_tiling_plifs( 00318 CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs) 00319 { 00320 m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs; 00321 float64_t* tiling_plif = new float64_t[num_tiling_plifs]; 00322 float64_t* svm_value = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs]; 00323 for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; i++) 00324 svm_value[i]=0.0; 00325 int32_t* tiling_rows = new int32_t[num_tiling_plifs]; 00326 for (int32_t i=0; i<num_tiling_plifs; i++) 00327 { 00328 tiling_plif[i]=0.0; 00329 CPlif * plif = PEN[tiling_plif_ids[i]]; 00330 tiling_rows[i] = plif->get_use_svm(); 00331 00332 ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i) 00333 } 00334 resize_lin_feat(num_tiling_plifs); 00335 00336 00337 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]]; 00338 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]]; 00339 int32_t num=m_num_probes_cum[m_num_raw_data-1]; 00340 00341 for (int32_t pos_idx=0;pos_idx<m_seq_len;pos_idx++) 00342 { 00343 while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<m_pos[pos_idx]) 00344 { 00345 for (int32_t i=0; i<num_tiling_plifs; i++) 00346 { 00347 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data; 00348 CPlif * plif = PEN[tiling_plif_ids[i]]; 00349 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1); 00350 plif->set_do_calc(true); 00351 tiling_plif[i]+=plif->lookup_penalty(0,svm_value); 00352 plif->set_do_calc(false); 00353 } 00354 p_tiling_data++; 00355 p_tiling_pos++; 00356 num++; 00357 } 00358 for (int32_t i=0; i<num_tiling_plifs; i++) 00359 m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx); 00360 } 00361 delete[] svm_value; 00362 delete[] tiling_plif; 00363 delete[] tiling_rows; 00364 } 00365 00366 void CDynProg::create_word_string() 00367 { 00368 delete[] m_wordstr; 00369 m_wordstr=new uint16_t**[5440]; 00370 int32_t k=0; 00371 int32_t genestr_len=m_genestr.get_dim1(); 00372 00373 m_wordstr[k]=new uint16_t*[m_num_degrees] ; 00374 for (int32_t j=0; j<m_num_degrees; j++) 00375 { 00376 m_wordstr[k][j]=NULL ; 00377 { 00378 m_wordstr[k][j]=new uint16_t[genestr_len] ; 00379 for (int32_t i=0; i<genestr_len; i++) 00380 switch (m_genestr[i]) 00381 { 00382 case 'A': 00383 case 'a': m_wordstr[k][j][i]=0 ; break ; 00384 case 'C': 00385 case 'c': m_wordstr[k][j][i]=1 ; break ; 00386 case 'G': 00387 case 'g': m_wordstr[k][j][i]=2 ; break ; 00388 case 'T': 00389 case 't': m_wordstr[k][j][i]=3 ; break ; 00390 default: ASSERT(0) ; 00391 } 00392 CAlphabet::translate_from_single_order(m_wordstr[k][j], genestr_len, m_word_degree[j]-1, m_word_degree[j], 2) ; 00393 } 00394 } 00395 } 00396 00397 void CDynProg::precompute_content_values() 00398 { 00399 for (int32_t s=0; s<m_num_svms; s++) 00400 m_lin_feat.set_element(0.0, s, 0); 00401 00402 for (int32_t p=0 ; p<m_seq_len-1 ; p++) 00403 { 00404 int32_t from_pos = m_pos[p]; 00405 int32_t to_pos = m_pos[p+1]; 00406 float64_t* my_svm_values_unnormalized = new float64_t[m_num_svms]; 00407 //SG_PRINT("%i(%i->%i) ",p,from_pos, to_pos); 00408 00409 ASSERT(from_pos<=m_genestr.get_dim1()); 00410 ASSERT(to_pos<=m_genestr.get_dim1()); 00411 00412 for (int32_t s=0; s<m_num_svms; s++) 00413 my_svm_values_unnormalized[s]=0.0;//precomputed_svm_values.element(s,p); 00414 00415 for (int32_t i=from_pos; i<to_pos; i++) 00416 { 00417 for (int32_t j=0; j<m_num_degrees; j++) 00418 { 00419 uint16_t word = m_wordstr[0][j][i] ; 00420 for (int32_t s=0; s<m_num_svms; s++) 00421 { 00422 // check if this k-mer should be considered for this SVM 00423 if (m_mod_words.get_element(s,0)==3 && i%3!=m_mod_words.get_element(s,1)) 00424 continue; 00425 my_svm_values_unnormalized[s] += m_dict_weights[(word+m_cum_num_words_array[j])+s*m_cum_num_words_array[m_num_degrees]] ; 00426 } 00427 } 00428 } 00429 for (int32_t s=0; s<m_num_svms; s++) 00430 { 00431 float64_t prev = m_lin_feat.get_element(s, p); 00432 //SG_PRINT("elem (%i, %i, %f)\n", s, p, prev) ; 00433 if (prev<-1e20 || prev>1e20) 00434 { 00435 SG_ERROR("initialization missing (%i, %i, %f)\n", s, p, prev) ; 00436 prev=0 ; 00437 } 00438 m_lin_feat.set_element(prev + my_svm_values_unnormalized[s], s, p+1); 00439 } 00440 delete[] my_svm_values_unnormalized; 00441 } 00442 //for (int32_t j=0; j<m_num_degrees; j++) 00443 // delete[] m_wordstr[0][j] ; 00444 //delete[] m_wordstr[0] ; 00445 } 00446 00447 void CDynProg::set_p_vector(float64_t *p, int32_t N) 00448 { 00449 if (!(N==m_N)) 00450 SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",N, m_N); 00451 00452 m_initial_state_distribution_p.set_array(p, N, true, true); 00453 } 00454 00455 void CDynProg::set_q_vector(float64_t *q, int32_t N) 00456 { 00457 if (!(N==m_N)) 00458 SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",N, m_N); 00459 m_end_state_distribution_q.set_array(q, N, true, true); 00460 } 00461 00462 void CDynProg::set_a(float64_t *a, int32_t M, int32_t N) 00463 { 00464 ASSERT(N==m_N); 00465 ASSERT(M==N); 00466 m_transition_matrix_a.set_array(a, N, N, true, true); 00467 m_transition_matrix_a_deriv.resize_array(N, N); 00468 } 00469 00470 void CDynProg::set_a_id(int32_t *a, int32_t M, int32_t N) 00471 { 00472 ASSERT(N==m_N); 00473 ASSERT(M==N); 00474 m_transition_matrix_a_id.set_array(a, N, N, true, true); 00475 m_max_a_id = 0; 00476 for (int32_t i=0; i<N; i++) 00477 { 00478 for (int32_t j=0; j<N; j++) 00479 m_max_a_id=CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)); 00480 } 00481 } 00482 00483 void CDynProg::set_a_trans_matrix( 00484 float64_t *a_trans, int32_t num_trans, int32_t num_cols) 00485 { 00486 00487 //CMath::display_matrix(a_trans,num_trans, num_cols,"a_trans"); 00488 00489 if (!((num_cols==3) || (num_cols==4))) 00490 SG_ERROR("!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols); 00491 00492 delete[] trans_list_forward ; 00493 delete[] trans_list_forward_cnt ; 00494 delete[] trans_list_forward_val ; 00495 delete[] trans_list_forward_id ; 00496 00497 trans_list_forward = NULL ; 00498 trans_list_forward_cnt = NULL ; 00499 trans_list_forward_val = NULL ; 00500 trans_list_len = 0 ; 00501 00502 m_transition_matrix_a.zero() ; 00503 m_transition_matrix_a_id.zero() ; 00504 00505 mem_initialized = true ; 00506 00507 trans_list_forward_cnt=NULL ; 00508 trans_list_len = m_N ; 00509 trans_list_forward = new T_STATES*[m_N] ; 00510 trans_list_forward_cnt = new T_STATES[m_N] ; 00511 trans_list_forward_val = new float64_t*[m_N] ; 00512 trans_list_forward_id = new int32_t*[m_N] ; 00513 00514 int32_t start_idx=0; 00515 for (int32_t j=0; j<m_N; j++) 00516 { 00517 int32_t old_start_idx=start_idx; 00518 00519 while (start_idx<num_trans && a_trans[start_idx+num_trans]==j) 00520 { 00521 start_idx++; 00522 00523 if (start_idx>1 && start_idx<num_trans) 00524 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]); 00525 } 00526 00527 if (start_idx>1 && start_idx<num_trans) 00528 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]); 00529 00530 int32_t len=start_idx-old_start_idx; 00531 ASSERT(len>=0); 00532 00533 trans_list_forward_cnt[j] = 0 ; 00534 00535 if (len>0) 00536 { 00537 trans_list_forward[j] = new T_STATES[len] ; 00538 trans_list_forward_val[j] = new float64_t[len] ; 00539 trans_list_forward_id[j] = new int32_t[len] ; 00540 } 00541 else 00542 { 00543 trans_list_forward[j] = NULL; 00544 trans_list_forward_val[j] = NULL; 00545 trans_list_forward_id[j] = NULL; 00546 } 00547 } 00548 00549 for (int32_t i=0; i<num_trans; i++) 00550 { 00551 int32_t from_state = (int32_t)a_trans[i] ; 00552 int32_t to_state = (int32_t)a_trans[i+num_trans] ; 00553 float64_t val = a_trans[i+num_trans*2] ; 00554 int32_t id = 0 ; 00555 if (num_cols==4) 00556 id = (int32_t)a_trans[i+num_trans*3] ; 00557 //SG_DEBUG( "id=%i\n", id) ; 00558 00559 ASSERT(to_state>=0 && to_state<m_N); 00560 ASSERT(from_state>=0 && from_state<m_N); 00561 00562 trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ; 00563 trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ; 00564 trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ; 00565 trans_list_forward_cnt[to_state]++ ; 00566 m_transition_matrix_a.element(from_state, to_state) = val ; 00567 m_transition_matrix_a_id.element(from_state, to_state) = id ; 00568 //SG_PRINT("from_state:%i to_state:%i trans_matrix_a_id:%i \n",from_state, to_state,m_transition_matrix_a_id.element(from_state, to_state)); 00569 } ; 00570 00571 m_max_a_id = 0 ; 00572 for (int32_t i=0; i<m_N; i++) 00573 for (int32_t j=0; j<m_N; j++) 00574 { 00575 //if (m_transition_matrix_a_id.element(i,j)) 00576 //SG_DEBUG( "(%i,%i)=%i\n", i,j, m_transition_matrix_a_id.element(i,j)) ; 00577 m_max_a_id = CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)) ; 00578 } 00579 //SG_DEBUG( "m_max_a_id=%i\n", m_max_a_id) ; 00580 } 00581 00582 00583 void CDynProg::init_mod_words_array( 00584 int32_t * mod_words_input, int32_t num_elem, int32_t num_columns) 00585 { 00586 //for (int32_t i=0; i<num_columns; i++) 00587 //{ 00588 // for (int32_t j=0; j<num_elem; j++) 00589 // SG_PRINT("%i ",mod_words_input[i*num_elem+j]); 00590 // SG_PRINT("\n"); 00591 //} 00592 m_svm_arrays_clean=false ; 00593 00594 ASSERT(m_num_svms==num_elem); 00595 ASSERT(num_columns==2); 00596 00597 m_mod_words.set_array(mod_words_input, num_elem, 2, true, true) ; 00598 m_mod_words_array = m_mod_words.get_array() ; 00599 00600 /*SG_DEBUG( "m_mod_words=[") ; 00601 for (int32_t i=0; i<num_elem; i++) 00602 SG_DEBUG( "%i, ", p_mod_words_array[i]) ; 00603 SG_DEBUG( "]\n") ;*/ 00604 } 00605 00606 bool CDynProg::check_svm_arrays() 00607 { 00608 //SG_DEBUG( "wd_dim1=%d, m_cum_num_words=%d, m_num_words=%d, m_svm_pos_start=%d, num_uniq_w=%d, mod_words_dims=(%d,%d), sign_w=%d,string_w=%d\n m_num_degrees=%d, m_num_svms=%d, m_num_strings=%d", m_word_degree.get_dim1(), m_cum_num_words.get_dim1(), m_num_words.get_dim1(), m_svm_pos_start.get_dim1(), m_num_unique_words.get_dim1(), m_mod_words.get_dim1(), m_mod_words.get_dim2(), m_sign_words.get_dim1(), m_string_words.get_dim1(), m_num_degrees, m_num_svms, m_num_strings); 00609 if ((m_word_degree.get_dim1()==m_num_degrees) && 00610 (m_cum_num_words.get_dim1()==m_num_degrees+1) && 00611 (m_num_words.get_dim1()==m_num_degrees) && 00612 //(word_used.get_dim1()==m_num_degrees) && 00613 //(word_used.get_dim2()==m_num_words[m_num_degrees-1]) && 00614 //(word_used.get_dim3()==m_num_strings) && 00615 // (svm_values_unnormalized.get_dim1()==m_num_degrees) && 00616 // (svm_values_unnormalized.get_dim2()==m_num_svms) && 00617 //(m_svm_pos_start.get_dim1()==m_num_degrees) && 00618 (m_num_unique_words.get_dim1()==m_num_degrees) && 00619 (m_mod_words.get_dim1()==m_num_svms) && 00620 (m_mod_words.get_dim2()==2) && 00621 (m_sign_words.get_dim1()==m_num_svms) && 00622 (m_string_words.get_dim1()==m_num_svms)) 00623 { 00624 m_svm_arrays_clean=true ; 00625 return true ; 00626 } 00627 else 00628 { 00629 if ((m_num_unique_words.get_dim1()==m_num_degrees) && 00630 (m_mod_words.get_dim1()==m_num_svms) && 00631 (m_mod_words.get_dim2()==2) && 00632 (m_sign_words.get_dim1()==m_num_svms) && 00633 (m_string_words.get_dim1()==m_num_svms)) 00634 SG_PRINT("OK\n") ; 00635 else 00636 SG_PRINT("not OK\n") ; 00637 00638 if (!(m_word_degree.get_dim1()==m_num_degrees)) 00639 SG_WARNING("SVM array: word_degree.get_dim1()!=m_num_degrees") ; 00640 if (!(m_cum_num_words.get_dim1()==m_num_degrees+1)) 00641 SG_WARNING("SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1") ; 00642 if (!(m_num_words.get_dim1()==m_num_degrees)) 00643 SG_WARNING("SVM array: m_num_words.get_dim1()==m_num_degrees") ; 00644 //if (!(m_svm_pos_start.get_dim1()==m_num_degrees)) 00645 // SG_WARNING("SVM array: m_svm_pos_start.get_dim1()!=m_num_degrees") ; 00646 if (!(m_num_unique_words.get_dim1()==m_num_degrees)) 00647 SG_WARNING("SVM array: m_num_unique_words.get_dim1()!=m_num_degrees") ; 00648 if (!(m_mod_words.get_dim1()==m_num_svms)) 00649 SG_WARNING("SVM array: m_mod_words.get_dim1()!=num_svms") ; 00650 if (!(m_mod_words.get_dim2()==2)) 00651 SG_WARNING("SVM array: m_mod_words.get_dim2()!=2") ; 00652 if (!(m_sign_words.get_dim1()==m_num_svms)) 00653 SG_WARNING("SVM array: m_sign_words.get_dim1()!=num_svms") ; 00654 if (!(m_string_words.get_dim1()==m_num_svms)) 00655 SG_WARNING("SVM array: m_string_words.get_dim1()!=num_svms") ; 00656 00657 m_svm_arrays_clean=false ; 00658 return false ; 00659 } 00660 } 00661 00662 void CDynProg::set_observation_matrix(float64_t* seq, int32_t* dims, int32_t ndims) 00663 { 00664 if (ndims!=3) 00665 SG_ERROR("Expected 3-dimensional Matrix\n"); 00666 00667 int32_t N=dims[0]; 00668 int32_t cand_pos=dims[1]; 00669 int32_t max_num_features=dims[2]; 00670 00671 if (!m_svm_arrays_clean) 00672 { 00673 SG_ERROR( "SVM arrays not clean") ; 00674 return ; 00675 } ; 00676 00677 ASSERT(N==m_N); 00678 ASSERT(cand_pos==m_seq_len); 00679 ASSERT(m_initial_state_distribution_p.get_dim1()==N); 00680 ASSERT(m_end_state_distribution_q.get_dim1()==N); 00681 00682 m_observation_matrix.set_array(seq, N, m_seq_len, max_num_features, true, true) ; 00683 } 00684 int32_t CDynProg::get_num_positions() 00685 { 00686 return m_seq_len; 00687 } 00688 00689 void CDynProg::set_content_type_array(float64_t* seg_path, int32_t rows, int32_t cols) 00690 { 00691 ASSERT(rows==2); 00692 ASSERT(cols==m_seq_len); 00693 00694 if (seg_path!=NULL) 00695 { 00696 int32_t *segment_ids = new int32_t[m_seq_len] ; 00697 float64_t *segment_mask = new float64_t[m_seq_len] ; 00698 for (int32_t i=0; i<m_seq_len; i++) 00699 { 00700 segment_ids[i] = (int32_t)seg_path[2*i] ; 00701 segment_mask[i] = seg_path[2*i+1] ; 00702 } 00703 best_path_set_segment_ids_mask(segment_ids, segment_mask, m_seq_len) ; 00704 delete[] segment_ids; 00705 delete[] segment_mask; 00706 } 00707 else 00708 { 00709 int32_t *izeros = new int32_t[m_seq_len] ; 00710 float64_t *dzeros = new float64_t[m_seq_len] ; 00711 for (int32_t i=0; i<m_seq_len; i++) 00712 { 00713 izeros[i]=0 ; 00714 dzeros[i]=0.0 ; 00715 } 00716 best_path_set_segment_ids_mask(izeros, dzeros, m_seq_len) ; 00717 delete[] izeros ; 00718 delete[] dzeros ; 00719 } 00720 } 00721 00722 void CDynProg::set_pos(int32_t* pos, int32_t seq_len) 00723 { 00724 //if (seq_len!=m_observation_matrix.get_dim2()) 00725 // SG_ERROR( "pos size does not match previous info %i!=%i\n", seq_len, m_observation_matrix.get_dim2()) ; 00726 00727 m_pos.set_array(pos, seq_len, true, true) ; 00728 m_seq_len = seq_len; 00729 } 00730 00731 void CDynProg::set_orf_info(int32_t* orf_info, int32_t m, int32_t n) 00732 { 00733 if (n!=2) 00734 SG_ERROR( "orf_info size incorrect %i!=2\n", n) ; 00735 00736 m_orf_info.set_array(orf_info, m, n, true, true) ; 00737 m_orf_info.set_name("orf_info") ; 00738 } 00739 00740 void CDynProg::set_sparse_features(CSparseFeatures<float64_t>* seq_sparse1, CSparseFeatures<float64_t>* seq_sparse2) 00741 { 00742 if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2)) 00743 SG_ERROR("Sparse features must either both be NULL or both NON-NULL\n"); 00744 00745 SG_UNREF(m_seq_sparse1); 00746 SG_UNREF(m_seq_sparse2); 00747 00748 m_seq_sparse1=seq_sparse1; 00749 m_seq_sparse2=seq_sparse2; 00750 SG_REF(m_seq_sparse1); 00751 SG_REF(m_seq_sparse2); 00752 } 00753 00754 void CDynProg::set_plif_matrices(CPlifMatrix* pm) 00755 { 00756 SG_UNREF(m_plif_matrices); 00757 00758 m_plif_matrices=pm; 00759 00760 SG_REF(m_plif_matrices); 00761 } 00762 00763 void CDynProg::set_gene_string(char* genestr, int32_t genestr_len) 00764 { 00765 ASSERT(genestr); 00766 ASSERT(genestr_len>0); 00767 00768 m_genestr.set_array(genestr, genestr_len, true, true) ; 00769 } 00770 00771 void CDynProg::set_my_state_seq(int32_t* my_state_seq) 00772 { 00773 ASSERT(my_state_seq && m_seq_len>0); 00774 m_my_state_seq.resize_array(m_seq_len); 00775 for (int32_t i=0; i<m_seq_len; i++) 00776 m_my_state_seq[i]=my_state_seq[i]; 00777 } 00778 00779 void CDynProg::set_my_pos_seq(int32_t* my_pos_seq) 00780 { 00781 ASSERT(my_pos_seq && m_seq_len>0); 00782 m_my_pos_seq.resize_array(m_seq_len); 00783 for (int32_t i=0; i<m_seq_len; i++) 00784 m_my_pos_seq[i]=my_pos_seq[i]; 00785 } 00786 00787 void CDynProg::set_dict_weights( 00788 float64_t* dictionary_weights, int32_t dict_len, int32_t n) 00789 { 00790 if (m_num_svms!=n) 00791 SG_ERROR( "m_dict_weights array does not match num_svms=%i!=%i\n", m_num_svms, n) ; 00792 00793 m_dict_weights.set_array(dictionary_weights, dict_len, m_num_svms, true, true) ; 00794 00795 // initialize, so it does not bother when not used 00796 m_segment_loss.resize_array(m_max_a_id+1, m_max_a_id+1, 2) ; 00797 m_segment_loss.zero() ; 00798 m_segment_ids.resize_array(m_observation_matrix.get_dim2()) ; 00799 m_segment_mask.resize_array(m_observation_matrix.get_dim2()) ; 00800 m_segment_ids.zero() ; 00801 m_segment_mask.zero() ; 00802 } 00803 00804 void CDynProg::best_path_set_segment_loss( 00805 float64_t* segment_loss, int32_t m, int32_t n) 00806 { 00807 // here we need two matrices. Store it in one: 2N x N 00808 if (2*m!=n) 00809 SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ; 00810 00811 if (m!=m_max_a_id+1) 00812 SG_ERROR( "segment_loss size should match m_max_a_id: %i!=%i\n", m, m_max_a_id+1) ; 00813 00814 m_segment_loss.set_array(segment_loss, m, n/2, 2, true, true) ; 00815 /*for (int32_t i=0; i<n; i++) 00816 for (int32_t j=0; j<n; j++) 00817 SG_DEBUG( "loss(%i,%i)=%f\n", i,j, m_segment_loss.element(0,i,j)) ;*/ 00818 } 00819 00820 void CDynProg::best_path_set_segment_ids_mask( 00821 int32_t* segment_ids, float64_t* segment_mask, int32_t m) 00822 { 00823 00824 if (m!=m_observation_matrix.get_dim2()) 00825 SG_ERROR("size of segment_ids or segment_mask (%i) does not match the size of the feature matrix (%i)", m, m_observation_matrix.get_dim2()); 00826 int32_t max_id = 0; 00827 for (int32_t i=1;i<m;i++) 00828 max_id = CMath::max(max_id,segment_ids[i]); 00829 //SG_PRINT("max_id: %i, m:%i\n",max_id, m); 00830 m_segment_ids.set_array(segment_ids, m, true, true) ; 00831 m_segment_ids.set_name("m_segment_ids"); 00832 m_segment_mask.set_array(segment_mask, m, true, true) ; 00833 m_segment_mask.set_name("m_segment_mask"); 00834 00835 m_seg_loss_obj->set_segment_mask(&m_segment_mask); 00836 m_seg_loss_obj->set_segment_ids(&m_segment_ids); 00837 m_seg_loss_obj->compute_loss(m_pos.get_array(), m_seq_len); 00838 } 00839 00840 void CDynProg::get_scores(float64_t **scores, int32_t *m) 00841 { 00842 ASSERT(scores && m); 00843 00844 //*scores=m_scores.get_array(); 00845 *m=m_scores.get_dim1(); 00846 00847 int32_t sz = sizeof(float64_t)*(*m); 00848 00849 *scores = (float64_t*) malloc(sz); 00850 ASSERT(*scores); 00851 00852 memcpy(*scores,m_scores.get_array(),sz); 00853 } 00854 00855 void CDynProg::get_states(int32_t **states, int32_t *m, int32_t *n) 00856 { 00857 ASSERT(states && m && n); 00858 00859 *m=m_states.get_dim1() ; 00860 *n=m_states.get_dim2() ; 00861 00862 int32_t sz = sizeof(int32_t)*( (*m) * (*n) ); 00863 00864 *states = (int32_t*) malloc(sz); 00865 ASSERT(*states); 00866 00867 memcpy(*states,m_states.get_array(),sz); 00868 } 00869 00870 void CDynProg::get_positions(int32_t **positions, int32_t *m, int32_t *n) 00871 { 00872 ASSERT(positions && m && n); 00873 00874 *m=m_positions.get_dim1() ; 00875 *n=m_positions.get_dim2() ; 00876 00877 int32_t sz = sizeof(int32_t)*( (*m) * (*n) ); 00878 00879 *positions = (int32_t*) malloc(sz); 00880 ASSERT(*positions); 00881 00882 memcpy(*positions,m_positions.get_array(),sz); 00883 } 00884 00885 void CDynProg::get_path_scores(float64_t** scores, int32_t* seq_len) 00886 { 00887 ASSERT(scores && seq_len); 00888 00889 *seq_len=m_my_scores.get_dim1(); 00890 00891 int32_t sz = sizeof(float64_t)*(*seq_len); 00892 00893 *scores = (float64_t*) malloc(sz); 00894 ASSERT(*scores); 00895 00896 memcpy(*scores,m_my_scores.get_array(),sz); 00897 } 00898 00899 void CDynProg::get_path_losses(float64_t** losses, int32_t* seq_len) 00900 { 00901 ASSERT(losses && seq_len); 00902 00903 *seq_len=m_my_losses.get_dim1(); 00904 00905 int32_t sz = sizeof(float64_t)*(*seq_len); 00906 00907 *losses = (float64_t*) malloc(sz); 00908 ASSERT(*losses); 00909 00910 memcpy(*losses,m_my_losses.get_array(),sz); 00911 } 00912 00914 00915 bool CDynProg::extend_orf( 00916 int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, 00917 int32_t to) 00918 { 00919 #ifdef DYNPROG_TIMING_DETAIL 00920 MyTime.start() ; 00921 #endif 00922 00923 if (start<0) 00924 start=0 ; 00925 if (to<0) 00926 to=0 ; 00927 00928 int32_t orf_target = orf_to-orf_from ; 00929 if (orf_target<0) orf_target+=3 ; 00930 00931 int32_t pos ; 00932 if (last_pos==to) 00933 pos = to-orf_to-3 ; 00934 else 00935 pos=last_pos ; 00936 00937 if (pos<0) 00938 { 00939 #ifdef DYNPROG_TIMING_DETAIL 00940 MyTime.stop() ; 00941 orf_time += MyTime.time_diff_sec() ; 00942 #endif 00943 return true ; 00944 } 00945 00946 for (; pos>=start; pos-=3) 00947 if (m_genestr_stop[pos]) 00948 { 00949 #ifdef DYNPROG_TIMING_DETAIL 00950 MyTime.stop() ; 00951 orf_time += MyTime.time_diff_sec() ; 00952 #endif 00953 return false ; 00954 } 00955 00956 00957 last_pos = CMath::min(pos+3,to-orf_to-3) ; 00958 00959 #ifdef DYNPROG_TIMING_DETAIL 00960 MyTime.stop() ; 00961 orf_time += MyTime.time_diff_sec() ; 00962 #endif 00963 return true ; 00964 } 00965 00966 void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf, 00967 int16_t nbest, bool with_loss, bool with_multiple_sequences) 00968 { 00969 00970 //FIXME we need checks here if all the fields are of right size 00971 //SG_PRINT("m_seq_len: %i\n", m_seq_len); 00972 //SG_PRINT("m_pos[0]: %i\n", m_pos[0]); 00973 //SG_PRINT("\n"); 00974 00975 //FIXME these variables can go away when compute_nbest_paths uses them 00976 //instead of the local pointers below 00977 const float64_t* seq_array = m_observation_matrix.get_array(); 00978 m_scores.resize_array(nbest) ; 00979 m_states.resize_array(nbest, m_observation_matrix.get_dim2()) ; 00980 m_positions.resize_array(nbest, m_observation_matrix.get_dim2()) ; 00981 00982 for (int32_t i=0; i<nbest; i++) 00983 { 00984 m_scores[i]=-1; 00985 for (int32_t j=0; j<m_observation_matrix.get_dim2(); j++) 00986 { 00987 m_states.element(i,j)=-1; 00988 m_positions.element(i,j)=-1; 00989 } 00990 } 00991 float64_t* prob_nbest=m_scores.get_array(); 00992 int32_t* my_state_seq=m_states.get_array(); 00993 int32_t* my_pos_seq=m_positions.get_array(); 00994 00995 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix(); 00996 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals(); 00997 //END FIXME 00998 00999 01000 #ifdef DYNPROG_TIMING 01001 segment_init_time = 0.0 ; 01002 segment_pos_time = 0.0 ; 01003 segment_extend_time = 0.0 ; 01004 segment_clean_time = 0.0 ; 01005 orf_time = 0.0 ; 01006 svm_init_time = 0.0 ; 01007 svm_pos_time = 0.0 ; 01008 svm_clean_time = 0.0 ; 01009 inner_loop_time = 0.0 ; 01010 content_svm_values_time = 0.0 ; 01011 content_plifs_time = 0.0 ; 01012 inner_loop_max_time = 0.0 ; 01013 long_transition_time = 0.0 ; 01014 01015 MyTime2.start() ; 01016 #endif 01017 01018 if (!m_svm_arrays_clean) 01019 { 01020 SG_ERROR( "SVM arrays not clean") ; 01021 return ; 01022 } 01023 01024 #ifdef DYNPROG_DEBUG 01025 m_transition_matrix_a.set_name("transition_matrix"); 01026 m_transition_matrix_a.display_array(); 01027 m_mod_words.display_array() ; 01028 m_sign_words.display_array() ; 01029 m_string_words.display_array() ; 01030 //SG_PRINT("use_orf = %i\n", use_orf) ; 01031 #endif 01032 01033 int32_t max_look_back = 1000 ; 01034 bool use_svm = false ; 01035 01036 SG_DEBUG("m_N:%i, m_seq_len:%i, max_num_signals:%i\n",m_N, m_seq_len, max_num_signals) ; 01037 01038 //for (int32_t i=0;i<m_N*m_seq_len*max_num_signals;i++) 01039 // SG_PRINT("(%i)%0.2f ",i,seq_array[i]); 01040 01041 CArray2<CPlifBase*> PEN(Plif_matrix, m_N, m_N, false, false) ; 01042 PEN.set_name("PEN"); 01043 CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, m_N, max_num_signals, false, false) ; 01044 PEN_state_signals.set_name("state_signals"); 01045 01046 CArray2<float64_t> seq(m_N, m_seq_len) ; 01047 seq.set_name("seq") ; 01048 seq.zero() ; 01049 01050 #ifdef DYNPROG_DEBUG 01051 SG_PRINT("m_num_raw_data: %i\n",m_num_raw_data); 01052 SG_PRINT("m_num_intron_plifs: %i\n", m_num_intron_plifs); 01053 SG_PRINT("m_num_svms: %i\n", m_num_svms); 01054 SG_PRINT("m_num_lin_feat_plifs_cum: "); 01055 for (int i=0; i<=m_num_raw_data; i++) 01056 SG_PRINT(" %i ",m_num_lin_feat_plifs_cum[i]); 01057 SG_PRINT("\n"); 01058 #endif 01059 01060 float64_t* svm_value = new float64_t [m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs]; 01061 { // initialize svm_svalue 01062 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 01063 svm_value[s]=0 ; 01064 } 01065 01066 { // convert seq_input to seq 01067 // this is independent of the svm values 01068 01069 //CArray3<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ; 01070 CArray3<float64_t> *seq_input=NULL ; 01071 if (seq_array!=NULL) 01072 { 01073 //SG_PRINT("using dense seq_array\n") ; 01074 01075 seq_input=new CArray3<float64_t>(seq_array, m_N, m_seq_len, max_num_signals) ; 01076 seq_input->set_name("seq_input") ; 01077 //seq_input.display_array() ; 01078 01079 ASSERT(m_seq_sparse1==NULL) ; 01080 ASSERT(m_seq_sparse2==NULL) ; 01081 } else 01082 { 01083 SG_PRINT("using sparse seq_array\n") ; 01084 01085 ASSERT(m_seq_sparse1!=NULL) ; 01086 ASSERT(m_seq_sparse2!=NULL) ; 01087 ASSERT(max_num_signals==2) ; 01088 } 01089 01090 for (int32_t i=0; i<m_N; i++) 01091 for (int32_t j=0; j<m_seq_len; j++) 01092 seq.element(i,j) = 0 ; 01093 01094 for (int32_t i=0; i<m_N; i++) 01095 for (int32_t j=0; j<m_seq_len; j++) 01096 for (int32_t k=0; k<max_num_signals; k++) 01097 { 01098 if ((PEN_state_signals.element(i,k)==NULL) && (k==0)) 01099 { 01100 // no plif 01101 if (seq_input!=NULL) 01102 seq.element(i,j) = seq_input->element(i,j,k) ; 01103 else 01104 { 01105 if (k==0) 01106 seq.element(i,j) = m_seq_sparse1->get_feature(i,j) ; 01107 if (k==1) 01108 seq.element(i,j) = m_seq_sparse2->get_feature(i,j) ; 01109 } 01110 break ; 01111 } 01112 if (PEN_state_signals.element(i,k)!=NULL) 01113 { 01114 if (seq_input!=NULL) 01115 { 01116 // just one plif 01117 if (CMath::is_finite(seq_input->element(i,j,k))) 01118 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(seq_input->element(i,j,k), svm_value) ; 01119 else 01120 // keep infinity values 01121 seq.element(i,j) = seq_input->element(i, j, k) ; 01122 } 01123 else 01124 { 01125 if (k==0) 01126 { 01127 // just one plif 01128 if (CMath::is_finite(m_seq_sparse1->get_feature(i,j))) 01129 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(m_seq_sparse1->get_feature(i,j), svm_value) ; 01130 else 01131 // keep infinity values 01132 seq.element(i,j) = m_seq_sparse1->get_feature(i, j) ; 01133 } 01134 if (k==1) 01135 { 01136 // just one plif 01137 if (CMath::is_finite(m_seq_sparse2->get_feature(i,j))) 01138 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(m_seq_sparse2->get_feature(i,j), svm_value) ; 01139 else 01140 // keep infinity values 01141 seq.element(i,j) = m_seq_sparse2->get_feature(i, j) ; 01142 } 01143 } 01144 } 01145 else 01146 break ; 01147 } 01148 delete seq_input; 01149 delete[] svm_value; 01150 } 01151 01152 // allow longer transitions than look_back 01153 bool long_transitions = m_long_transitions ; 01154 CArray2<int32_t> long_transition_content_start_position(m_N,m_N) ; 01155 long_transition_content_start_position.set_name("long_transition_content_start_position"); 01156 #ifdef DYNPROG_DEBUG 01157 CArray2<int32_t> long_transition_content_end_position(m_N,m_N) ; 01158 long_transition_content_end_position.set_name("long_transition_content_end_position"); 01159 #endif 01160 CArray2<int32_t> long_transition_content_start(m_N,m_N) ; 01161 long_transition_content_start.set_name("long_transition_content_start"); 01162 CArray2<float64_t> long_transition_content_scores(m_N,m_N) ; 01163 long_transition_content_scores.set_name("long_transition_content_scores"); 01164 #ifdef DYNPROG_DEBUG 01165 CArray2<float64_t> long_transition_content_scores_pen(m_N,m_N) ; 01166 long_transition_content_scores_pen.set_name("long_transition_content_scores_pen"); 01167 CArray2<float64_t> long_transition_content_scores_prev(m_N,m_N) ; 01168 long_transition_content_scores_prev.set_name("long_transition_content_scores_prev"); 01169 CArray2<float64_t> long_transition_content_scores_elem(m_N,m_N) ; 01170 long_transition_content_scores_elem.set_name("long_transition_content_scores_elem"); 01171 #endif 01172 CArray2<float64_t> long_transition_content_scores_loss(m_N,m_N) ; 01173 long_transition_content_scores_loss.set_name("long_transition_content_scores_loss"); 01174 01175 if (nbest!=1) 01176 { 01177 SG_ERROR("Long transitions are not supported for nbest!=1") ; 01178 long_transitions = false ; 01179 } 01180 long_transition_content_scores.set_const(-CMath::INFTY); 01181 #ifdef DYNPROG_DEBUG 01182 long_transition_content_scores_pen.set_const(0) ; 01183 long_transition_content_scores_elem.set_const(0) ; 01184 long_transition_content_scores_prev.set_const(0) ; 01185 #endif 01186 if (with_loss) 01187 long_transition_content_scores_loss.set_const(0) ; 01188 long_transition_content_start.zero() ; 01189 long_transition_content_start_position.zero() ; 01190 #ifdef DYNPROG_DEBUG 01191 long_transition_content_end_position.zero() ; 01192 #endif 01193 01194 svm_value = new float64_t [m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs]; 01195 { // initialize svm_svalue 01196 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 01197 svm_value[s]=0 ; 01198 } 01199 01200 CArray2<int32_t> look_back(m_N,m_N) ; 01201 look_back.set_name("look_back"); 01202 //CArray2<int32_t> look_back_orig(m_N,m_N) ; 01203 //look_back.set_name("look_back_orig"); 01204 01205 01206 { // determine maximal length of look-back 01207 for (int32_t i=0; i<m_N; i++) 01208 for (int32_t j=0; j<m_N; j++) 01209 { 01210 look_back.set_element(INT_MAX, i, j) ; 01211 //look_back_orig.set_element(INT_MAX, i, j) ; 01212 } 01213 01214 for (int32_t j=0; j<m_N; j++) 01215 { 01216 // only consider transitions that are actually allowed 01217 const T_STATES num_elem = trans_list_forward_cnt[j] ; 01218 const T_STATES *elem_list = trans_list_forward[j] ; 01219 01220 for (int32_t i=0; i<num_elem; i++) 01221 { 01222 T_STATES ii = elem_list[i] ; 01223 01224 CPlifBase *penij=PEN.element(j, ii) ; 01225 if (penij==NULL) 01226 { 01227 if (long_transitions) 01228 { 01229 look_back.set_element(m_long_transition_threshold, j, ii) ; 01230 //look_back_orig.set_element(m_long_transition_max, j, ii) ; 01231 } 01232 continue ; 01233 } 01234 01235 /* if the transition is an ORF or we do computation with loss, we have to disable long transitions */ 01236 if ((m_orf_info.element(ii,0)!=-1) || (m_orf_info.element(j,1)!=-1) || (!long_transitions)) 01237 { 01238 look_back.set_element(CMath::ceil(penij->get_max_value()), j, ii) ; 01239 //look_back_orig.set_element(CMath::ceil(penij->get_max_value()), j, ii) ; 01240 if (CMath::ceil(penij->get_max_value()) > max_look_back) 01241 { 01242 SG_DEBUG( "%d %d -> value: %f\n", ii,j,penij->get_max_value()); 01243 max_look_back = (int32_t) (CMath::ceil(penij->get_max_value())); 01244 } 01245 } 01246 else 01247 { 01248 look_back.set_element(CMath::min( (int32_t)CMath::ceil(penij->get_max_value()), m_long_transition_threshold ), j, ii) ; 01249 //look_back_orig.set_element( (int32_t)CMath::ceil(penij->get_max_value()), j, ii) ; 01250 } 01251 01252 if (penij->uses_svm_values()) 01253 use_svm=true ; 01254 } 01255 } 01256 /* make sure max_look_back is at least as long as a long transition */ 01257 if (long_transitions) 01258 max_look_back = CMath::max(m_long_transition_threshold, max_look_back) ; 01259 01260 /* make sure max_look_back is not longer than the whole string */ 01261 max_look_back = CMath::min(m_genestr.get_dim1(), max_look_back) ; 01262 01263 int32_t num_long_transitions = 0 ; 01264 for (int32_t i=0; i<m_N; i++) 01265 for (int32_t j=0; j<m_N; j++) 01266 { 01267 if (look_back.get_element(i,j)==m_long_transition_threshold) 01268 num_long_transitions++ ; 01269 if (look_back.get_element(i,j)==INT_MAX) 01270 { 01271 if (long_transitions) 01272 { 01273 look_back.set_element(m_long_transition_threshold, i, j) ; 01274 //look_back_orig.set_element(m_long_transition_max, i, j) ; 01275 } 01276 else 01277 { 01278 look_back.set_element(max_look_back, i, j) ; 01279 //look_back_orig.set_element(m_long_transition_max, i, j) ; 01280 } 01281 } 01282 } 01283 SG_DEBUG("Using %i long transitions\n", num_long_transitions) ; 01284 } 01285 //SG_PRINT("max_look_back: %i \n", max_look_back) ; 01286 01287 //SG_PRINT("use_svm=%i, genestr_len: \n", use_svm, m_genestr.get_dim1()) ; 01288 SG_DEBUG("use_svm=%i\n", use_svm) ; 01289 01290 SG_DEBUG("maxlook: %d m_N: %d nbest: %d \n", max_look_back, m_N, nbest); 01291 const int32_t look_back_buflen = (max_look_back*m_N+1)*nbest ; 01292 SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ; 01293 /*const float64_t mem_use = (float64_t)(m_seq_len*m_N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+ 01294 look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+ 01295 m_seq_len*(sizeof(T_STATES)+sizeof(int32_t))+ 01296 m_genestr.get_dim1()*sizeof(bool))/(1024*1024);*/ 01297 01298 //bool is_big = (mem_use>200) || (m_seq_len>5000) ; 01299 01300 /*if (is_big) 01301 { 01302 SG_DEBUG("calling compute_nbest_paths: m_seq_len=%i, m_N=%i, lookback=%i nbest=%i\n", 01303 m_seq_len, m_N, max_look_back, nbest) ; 01304 SG_DEBUG("allocating %1.2fMB of memory\n", 01305 mem_use) ; 01306 }*/ 01307 ASSERT(nbest<32000) ; 01308 01309 01310 01311 CArray3<float64_t> delta(m_seq_len, m_N, nbest) ; 01312 delta.set_name("delta"); 01313 float64_t* delta_array = delta.get_array() ; 01314 //delta.zero() ; 01315 01316 CArray3<T_STATES> psi(m_seq_len, m_N, nbest) ; 01317 psi.set_name("psi"); 01318 //psi.zero() ; 01319 01320 CArray3<int16_t> ktable(m_seq_len, m_N, nbest) ; 01321 ktable.set_name("ktable"); 01322 //ktable.zero() ; 01323 01324 CArray3<int32_t> ptable(m_seq_len, m_N, nbest) ; 01325 ptable.set_name("ptable"); 01326 //ptable.zero() ; 01327 01328 CArray<float64_t> delta_end(nbest) ; 01329 delta_end.set_name("delta_end"); 01330 //delta_end.zero() ; 01331 01332 CArray<T_STATES> path_ends(nbest) ; 01333 path_ends.set_name("path_ends"); 01334 //path_ends.zero() ; 01335 01336 CArray<int16_t> ktable_end(nbest) ; 01337 ktable_end.set_name("ktable_end"); 01338 //ktable_end.zero() ; 01339 01340 float64_t * fixedtempvv=new float64_t[look_back_buflen] ; 01341 memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ; 01342 int32_t * fixedtempii=new int32_t[look_back_buflen] ; 01343 memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ; 01344 01345 CArray<float64_t> oldtempvv(look_back_buflen) ; 01346 oldtempvv.set_name("oldtempvv"); 01347 CArray<float64_t> oldtempvv2(look_back_buflen) ; 01348 oldtempvv2.set_name("oldtempvv2"); 01349 //oldtempvv.zero() ; 01350 //oldtempvv.display_size() ; 01351 01352 CArray<int32_t> oldtempii(look_back_buflen) ; 01353 oldtempii.set_name("oldtempii"); 01354 CArray<int32_t> oldtempii2(look_back_buflen) ; 01355 oldtempii2.set_name("oldtempii2"); 01356 //oldtempii.zero() ; 01357 01358 CArray<T_STATES> state_seq(m_seq_len) ; 01359 state_seq.set_name("state_seq"); 01360 //state_seq.zero() ; 01361 01362 CArray<int32_t> pos_seq(m_seq_len) ; 01363 pos_seq.set_name("pos_seq"); 01364 //pos_seq.zero() ; 01365 01366 01367 m_dict_weights.set_name("dict_weights") ; 01368 m_word_degree.set_name("word_degree") ; 01369 m_cum_num_words.set_name("cum_num_words") ; 01370 m_num_words.set_name("num_words") ; 01371 //word_used.set_name("word_used") ; 01372 //svm_values_unnormalized.set_name("svm_values_unnormalized") ; 01373 //m_svm_pos_start.set_name("svm_pos_start") ; 01374 m_num_unique_words.set_name("num_unique_words") ; 01375 01376 PEN.set_name("PEN") ; 01377 seq.set_name("seq") ; 01378 01379 delta.set_name("delta") ; 01380 psi.set_name("psi") ; 01381 ktable.set_name("ktable") ; 01382 ptable.set_name("ptable") ; 01383 delta_end.set_name("delta_end") ; 01384 path_ends.set_name("path_ends") ; 01385 ktable_end.set_name("ktable_end") ; 01386 01387 #ifdef USE_TMP_ARRAYCLASS 01388 fixedtempvv.set_name("fixedtempvv") ; 01389 fixedtempii.set_name("fixedtempvv") ; 01390 #endif 01391 01392 oldtempvv.set_name("oldtempvv") ; 01393 oldtempvv2.set_name("oldtempvv2") ; 01394 oldtempii.set_name("oldtempii") ; 01395 oldtempii2.set_name("oldtempii2") ; 01396 01397 01399 01400 #ifdef DYNPROG_DEBUG 01401 state_seq.display_size() ; 01402 pos_seq.display_size() ; 01403 01404 m_dict_weights.display_size() ; 01405 m_word_degree.display_array() ; 01406 m_cum_num_words.display_array() ; 01407 m_num_words.display_array() ; 01408 //word_used.display_size() ; 01409 //svm_values_unnormalized.display_size() ; 01410 //m_svm_pos_start.display_array() ; 01411 m_num_unique_words.display_array() ; 01412 01413 PEN.display_size() ; 01414 PEN_state_signals.display_size() ; 01415 seq.display_size() ; 01416 m_orf_info.display_size() ; 01417 01418 //m_genestr_stop.display_size() ; 01419 delta.display_size() ; 01420 psi.display_size() ; 01421 ktable.display_size() ; 01422 ptable.display_size() ; 01423 delta_end.display_size() ; 01424 path_ends.display_size() ; 01425 ktable_end.display_size() ; 01426 01427 #ifdef USE_TMP_ARRAYCLASS 01428 fixedtempvv.display_size() ; 01429 fixedtempii.display_size() ; 01430 #endif 01431 01432 //oldtempvv.display_size() ; 01433 //oldtempii.display_size() ; 01434 01435 state_seq.display_size() ; 01436 pos_seq.display_size() ; 01437 01438 //seq.zero() ; 01439 01440 #endif //DYNPROG_DEBUG 01441 01443 01444 01445 01446 { 01447 for (int32_t s=0; s<m_num_svms; s++) 01448 ASSERT(m_string_words_array[s]<1) ; 01449 } 01450 01451 01452 //CArray2<int32_t*> trans_matrix_svms(m_N,m_N); 01453 //CArray2<int32_t> trans_matrix_num_svms(m_N,m_N); 01454 01455 { // initialization 01456 01457 for (T_STATES i=0; i<m_N; i++) 01458 { 01459 //delta.element(0, i, 0) = get_p(i) + seq.element(i,0) ; // get_p defined in HMM.h to be equiv to initial_state_distribution 01460 delta.element(delta_array, 0, i, 0, m_seq_len, m_N) = get_p(i) + seq.element(i,0) ; // get_p defined in HMM.h to be equiv to initial_state_distribution 01461 psi.element(0,i,0) = 0 ; 01462 if (nbest>1) 01463 ktable.element(0,i,0) = 0 ; 01464 ptable.element(0,i,0) = 0 ; 01465 for (int16_t k=1; k<nbest; k++) 01466 { 01467 int32_t dim1, dim2, dim3 ; 01468 delta.get_array_size(dim1, dim2, dim3) ; 01469 //SG_DEBUG("i=%i, k=%i -- %i, %i, %i\n", i, k, dim1, dim2, dim3) ; 01470 //delta.element(0, i, k) = -CMath::INFTY ; 01471 delta.element(delta_array, 0, i, k, m_seq_len, m_N) = -CMath::INFTY ; 01472 psi.element(0,i,0) = 0 ; // <--- what's this for? 01473 if (nbest>1) 01474 ktable.element(0,i,k) = 0 ; 01475 ptable.element(0,i,k) = 0 ; 01476 } 01477 /* 01478 for (T_STATES j=0; j<m_N; j++) 01479 { 01480 CPlifBase * penalty = PEN.element(i,j) ; 01481 int32_t num_current_svms=0; 01482 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1}; 01483 if (penalty) 01484 { 01485 SG_PRINT("trans %i -> %i \n",i,j); 01486 penalty->get_used_svms(&num_current_svms, svm_ids); 01487 trans_matrix_svms.set_element(svm_ids,i,j); 01488 for (int32_t l=0;l<num_current_svms;l++) 01489 SG_PRINT("svm_ids[%i]: %i \n",l,svm_ids[l]); 01490 trans_matrix_num_svms.set_element(num_current_svms,i,j); 01491 } 01492 } 01493 */ 01494 01495 } 01496 } 01497 01498 SG_DEBUG("START_RECURSION \n\n"); 01499 01500 // recursion 01501 for (int32_t t=1; t<m_seq_len; t++) 01502 { 01503 //if (is_big && t%(1+(m_seq_len/1000))==1) 01504 // SG_PROGRESS(t, 0, m_seq_len); 01505 //SG_PRINT("%i\n", t) ; 01506 01507 for (T_STATES j=0; j<m_N; j++) 01508 { 01509 if (seq.element(j,t)<=-1e20) 01510 { // if we cannot observe the symbol here, then we can omit the rest 01511 for (int16_t k=0; k<nbest; k++) 01512 { 01513 delta.element(delta_array, t, j, k, m_seq_len, m_N) = seq.element(j,t) ; 01514 psi.element(t,j,k) = 0 ; 01515 if (nbest>1) 01516 ktable.element(t,j,k) = 0 ; 01517 ptable.element(t,j,k) = 0 ; 01518 } 01519 } 01520 else 01521 { 01522 const T_STATES num_elem = trans_list_forward_cnt[j] ; 01523 const T_STATES *elem_list = trans_list_forward[j] ; 01524 const float64_t *elem_val = trans_list_forward_val[j] ; 01525 const int32_t *elem_id = trans_list_forward_id[j] ; 01526 01527 int32_t fixed_list_len = 0 ; 01528 float64_t fixedtempvv_ = CMath::INFTY ; 01529 int32_t fixedtempii_ = 0 ; 01530 bool fixedtemplong = false ; 01531 01532 for (int32_t i=0; i<num_elem; i++) 01533 { 01534 T_STATES ii = elem_list[i] ; 01535 01536 const CPlifBase * penalty = PEN.element(j,ii) ; 01537 01538 /*int32_t look_back = max_look_back ; 01539 if (0) 01540 { // find lookback length 01541 CPlifBase *pen = (CPlifBase*) penalty ; 01542 if (pen!=NULL) 01543 look_back=(int32_t) (CMath::ceil(pen->get_max_value())); 01544 if (look_back>=1e6) 01545 SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ; 01546 ASSERT(look_back<1e6); 01547 } */ 01548 01549 int32_t look_back_ = look_back.element(j, ii) ; 01550 01551 int32_t orf_from = m_orf_info.element(ii,0) ; 01552 int32_t orf_to = m_orf_info.element(j,1) ; 01553 if((orf_from!=-1)!=(orf_to!=-1)) 01554 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ; 01555 ASSERT((orf_from!=-1)==(orf_to!=-1)) ; 01556 01557 int32_t orf_target = -1 ; 01558 if (orf_from!=-1) 01559 { 01560 orf_target=orf_to-orf_from ; 01561 if (orf_target<0) 01562 orf_target+=3 ; 01563 ASSERT(orf_target>=0 && orf_target<3) ; 01564 } 01565 01566 int32_t orf_last_pos = m_pos[t] ; 01567 #ifdef DYNPROG_TIMING 01568 MyTime3.start() ; 01569 #endif 01570 int32_t num_ok_pos = 0 ; 01571 float64_t last_mval=0 ; 01572 int32_t last_ts = 0 ; 01573 01574 for (int32_t ts=t-1; ts>=0 && m_pos[t]-m_pos[ts]<=look_back_; ts--) 01575 { 01576 bool ok ; 01577 //int32_t plen=t-ts; 01578 01579 /*for (int32_t s=0; s<m_num_svms; s++) 01580 if ((fabs(svs.svm_values[s*svs.seqlen+plen]-svs2.svm_values[s*svs.seqlen+plen])>1e-6) || 01581 (fabs(svs.svm_values[s*svs.seqlen+plen]-svs3.svm_values[s*svs.seqlen+plen])>1e-6)) 01582 { 01583 SG_DEBUG( "s=%i, t=%i, ts=%i, %1.5e, %1.5e, %1.5e\n", s, t, ts, svs.svm_values[s*svs.seqlen+plen], svs2.svm_values[s*svs.seqlen+plen], svs3.svm_values[s*svs.seqlen+plen]); 01584 }*/ 01585 01586 if (orf_target==-1) 01587 ok=true ; 01588 else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target) 01589 ok=(!use_orf) || extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ; 01590 else 01591 ok=false ; 01592 01593 if (ok) 01594 { 01595 01596 float64_t segment_loss = 0.0 ; 01597 if (with_loss) 01598 { 01599 segment_loss = m_seg_loss_obj->get_segment_loss(ts, t, elem_id[i]); 01600 //if (segment_loss!=segment_loss2) 01601 //SG_PRINT("segment_loss:%f segment_loss2:%f\n", segment_loss, segment_loss2); 01602 } 01604 // BEST_PATH_TRANS 01606 01607 int32_t frame = orf_from;//m_orf_info.element(ii,0); 01608 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame); 01609 01610 float64_t pen_val = 0.0 ; 01611 if (penalty) 01612 { 01613 #ifdef DYNPROG_TIMING_DETAIL 01614 MyTime.start() ; 01615 #endif 01616 pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ; 01617 01618 #ifdef DYNPROG_TIMING_DETAIL 01619 MyTime.stop() ; 01620 content_plifs_time += MyTime.time_diff_sec() ; 01621 #endif 01622 } 01623 01624 #ifdef DYNPROG_TIMING_DETAIL 01625 MyTime.start() ; 01626 #endif 01627 num_ok_pos++ ; 01628 01629 if (nbest==1) 01630 { 01631 float64_t val = elem_val[i] + pen_val ; 01632 if (with_loss) 01633 val += segment_loss ; 01634 01635 float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N)) ; 01636 01637 if (mval<fixedtempvv_) 01638 { 01639 fixedtempvv_ = mval ; 01640 fixedtempii_ = ii + ts*m_N; 01641 fixed_list_len = 1 ; 01642 fixedtemplong = false ; 01643 } 01644 last_mval = mval ; 01645 last_ts = ts ; 01646 } 01647 else 01648 { 01649 for (int16_t diff=0; diff<nbest; diff++) 01650 { 01651 float64_t val = elem_val[i] ; 01652 val += pen_val ; 01653 if (with_loss) 01654 val += segment_loss ; 01655 01656 float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, m_seq_len, m_N)) ; 01657 01658 /* only place -val in fixedtempvv if it is one of the nbest lowest values in there */ 01659 /* fixedtempvv[i], i=0:nbest-1, is sorted so that fixedtempvv[0] <= fixedtempvv[1] <= ...*/ 01660 /* fixed_list_len has the number of elements in fixedtempvv */ 01661 01662 if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1]))) 01663 { 01664 if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) ) 01665 { 01666 fixedtempvv[fixed_list_len] = mval ; 01667 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest; 01668 fixed_list_len++ ; 01669 } 01670 else // must have mval < fixedtempvv[fixed_list_len-1] 01671 { 01672 int32_t addhere = fixed_list_len; 01673 while ((addhere > 0) && (mval < fixedtempvv[addhere-1])) 01674 addhere--; 01675 01676 // move everything from addhere+1 one forward 01677 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--) 01678 { 01679 fixedtempvv[jj] = fixedtempvv[jj-1]; 01680 fixedtempii[jj] = fixedtempii[jj-1]; 01681 } 01682 01683 fixedtempvv[addhere] = mval; 01684 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest; 01685 01686 if (fixed_list_len < nbest) 01687 fixed_list_len++; 01688 } 01689 } 01690 } 01691 } 01692 #ifdef DYNPROG_TIMING_DETAIL 01693 MyTime.stop() ; 01694 inner_loop_max_time += MyTime.time_diff_sec() ; 01695 #endif 01696 } 01697 } 01698 #ifdef DYNPROG_TIMING 01699 MyTime3.stop() ; 01700 inner_loop_time += MyTime3.time_diff_sec() ; 01701 #endif 01702 } 01703 for (int32_t i=0; i<num_elem; i++) 01704 { 01705 T_STATES ii = elem_list[i] ; 01706 01707 const CPlifBase * penalty = PEN.element(j,ii) ; 01708 01709 /*int32_t look_back = max_look_back ; 01710 if (0) 01711 { // find lookback length 01712 CPlifBase *pen = (CPlifBase*) penalty ; 01713 if (pen!=NULL) 01714 look_back=(int32_t) (CMath::ceil(pen->get_max_value())); 01715 if (look_back>=1e6) 01716 SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ; 01717 ASSERT(look_back<1e6); 01718 } */ 01719 01720 int32_t look_back_ = look_back.element(j, ii) ; 01721 //int32_t look_back_orig_ = look_back_orig.element(j, ii) ; 01722 01723 int32_t orf_from = m_orf_info.element(ii,0) ; 01724 int32_t orf_to = m_orf_info.element(j,1) ; 01725 if((orf_from!=-1)!=(orf_to!=-1)) 01726 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ; 01727 ASSERT((orf_from!=-1)==(orf_to!=-1)) ; 01728 01729 int32_t orf_target = -1 ; 01730 if (orf_from!=-1) 01731 { 01732 orf_target=orf_to-orf_from ; 01733 if (orf_target<0) 01734 orf_target+=3 ; 01735 ASSERT(orf_target>=0 && orf_target<3) ; 01736 } 01737 01738 //int32_t loss_last_pos = t ; 01739 //float64_t last_loss = 0.0 ; 01740 01741 #ifdef DYNPROG_TIMING 01742 MyTime3.start() ; 01743 #endif 01744 01745 /* long transition stuff */ 01746 /* only do this, if 01747 * this feature is enabled 01748 * this is not a transition with ORF restrictions 01749 * the loss is switched off 01750 * nbest=1 01751 */ 01752 #ifdef DYNPROG_TIMING 01753 MyTime3.start() ; 01754 #endif 01755 // long transitions, only when not considering ORFs 01756 if ( long_transitions && orf_target==-1 && look_back_ == m_long_transition_threshold ) 01757 { 01758 01759 // update table for 5' part of the long segment 01760 01761 int32_t start = long_transition_content_start.get_element(ii, j) ; 01762 int32_t end_5p_part = start ; 01763 for (int32_t start_5p_part=start; m_pos[t]-m_pos[start_5p_part] > m_long_transition_threshold ; start_5p_part++) 01764 { 01765 // find end_5p_part, which is greater than start_5p_part and at least m_long_transition_threshold away 01766 while (end_5p_part<=t && m_pos[end_5p_part+1]-m_pos[start_5p_part]<=m_long_transition_threshold) 01767 end_5p_part++ ; 01768 01769 ASSERT(m_pos[end_5p_part+1]-m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t) ; 01770 ASSERT(m_pos[end_5p_part]-m_pos[start_5p_part] <= m_long_transition_threshold) ; 01771 01772 float64_t pen_val = 0.0; 01773 /* recompute penalty, if necessary */ 01774 if (penalty) 01775 { 01776 int32_t frame = m_orf_info.element(ii,0); 01777 lookup_content_svm_values(start_5p_part, end_5p_part, m_pos[start_5p_part], m_pos[end_5p_part], svm_value, frame); // * t -> end_5p_part 01778 pen_val = penalty->lookup_penalty(m_pos[end_5p_part]-m_pos[start_5p_part], svm_value) ; 01779 } 01780 01781 /*if (m_pos[start_5p_part]==1003) 01782 { 01783 SG_PRINT("Part1: %i - %i vs %i - %i\n", m_pos[t], m_pos[ts], m_pos[end_5p_part], m_pos[start_5p_part]) ; 01784 SG_PRINT("Part1: ts=%i t=%i start_5p_part=%i m_seq_len=%i\n", m_pos[ts], m_pos[t], m_pos[start_5p_part], m_seq_len) ; 01785 }*/ 01786 01787 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ; 01788 //float64_t mval_trans = -( elem_val[i] + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N) ) ; // enable this for the incomplete extra check 01789 01790 float64_t segment_loss_part1=0.0 ; 01791 if (with_loss) 01792 { // this is the loss from the start of the long segment (5' part + middle section) 01793 01794 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(start_5p_part /*long_transition_content_start_position.get_element(ii,j)*/, end_5p_part, elem_id[i]); // * unsure 01795 01796 mval_trans -= segment_loss_part1 ; 01797 } 01798 01799 01800 if (0)//m_pos[end_5p_part] - m_pos[long_transition_content_start_position.get_element(ii, j)] > look_back_orig_/*m_long_transition_max*/) 01801 { 01802 // this restricts the maximal length of segments, 01803 // but the current implementation is not valid since the 01804 // long transition is discarded without loocking if there 01805 // is a second best long transition in between 01806 long_transition_content_scores.set_element(-CMath::INFTY, ii, j) ; 01807 long_transition_content_start_position.set_element(0, ii, j) ; 01808 if (with_loss) 01809 long_transition_content_scores_loss.set_element(0.0, ii, j) ; 01810 #ifdef DYNPROG_DEBUG 01811 long_transition_content_scores_pen.set_element(0.0, ii, j) ; 01812 long_transition_content_scores_elem.set_element(0.0, ii, j) ; 01813 long_transition_content_scores_prev.set_element(0.0, ii, j) ; 01814 long_transition_content_end_position.set_element(0, ii, j) ; 01815 #endif 01816 } 01817 if (with_loss) 01818 { 01819 float64_t old_loss = long_transition_content_scores_loss.get_element(ii, j) ; 01820 float64_t new_loss = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), end_5p_part, elem_id[i]); 01821 float64_t score = long_transition_content_scores.get_element(ii, j) - old_loss + new_loss ; 01822 long_transition_content_scores.set_element(score, ii, j) ; 01823 long_transition_content_scores_loss.set_element(new_loss, ii, j) ; 01824 #ifdef DYNPROG_DEBUG 01825 long_transition_content_end_position.set_element(end_5p_part, ii, j) ; 01826 #endif 01827 01828 } 01829 if (-long_transition_content_scores.get_element(ii, j) > mval_trans ) 01830 { 01831 /* then the old long transition is either too far away or worse than the current one */ 01832 long_transition_content_scores.set_element(-mval_trans, ii, j) ; 01833 long_transition_content_start_position.set_element(start_5p_part, ii, j) ; 01834 if (with_loss) 01835 long_transition_content_scores_loss.set_element(segment_loss_part1, ii, j) ; 01836 #ifdef DYNPROG_DEBUG 01837 long_transition_content_scores_pen.set_element(pen_val*0.5, ii, j) ; 01838 long_transition_content_scores_elem.set_element(elem_val[i], ii, j) ; 01839 long_transition_content_scores_prev.set_element(delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ; 01840 /*ASSERT(fabs(long_transition_content_scores.get_element(ii, j)-(long_transition_content_scores_pen.get_element(ii, j) + 01841 long_transition_content_scores_elem.get_element(ii, j) + 01842 long_transition_content_scores_prev.get_element(ii, j)))<1e-6) ;*/ 01843 long_transition_content_end_position.set_element(end_5p_part, ii, j) ; 01844 #endif 01845 } 01846 // 01847 // this sets the position where the search for better 5'parts is started the next time 01848 // whithout this the prediction takes ages 01849 // 01850 long_transition_content_start.set_element(start_5p_part, ii, j) ; 01851 } 01852 01853 // consider the 3' part at the end of the long segment: 01854 // * with length = m_long_transition_threshold 01855 // * content prediction and loss only for this part 01856 01857 // find ts > 0 with distance from m_pos[t] greater m_long_transition_threshold 01858 // precompute: only depends on t 01859 int ts = t; 01860 while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold) 01861 ts-- ; 01862 01863 if (ts>0) 01864 { 01865 ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold)) ; 01866 01867 01868 /* only consider this transition, if the right position was found */ 01869 float pen_val_3p = 0.0 ; 01870 if (penalty) 01871 { 01872 int32_t frame = orf_from ; //m_orf_info.element(ii, 0); 01873 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame); 01874 pen_val_3p = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ; 01875 } 01876 01877 float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val_3p*0.5) ; 01878 01879 { 01880 #ifdef DYNPROG_DEBUG 01881 float64_t segment_loss_part2=0.0 ; 01882 float64_t segment_loss_part1=0.0 ; 01883 #endif 01884 float64_t segment_loss_total=0.0 ; 01885 01886 if (with_loss) 01887 { // this is the loss for the 3' end fragment of the segment 01888 // (the 5' end and the middle section loss is already contained in mval) 01889 01890 #ifdef DYNPROG_DEBUG 01891 // this is an alternative, which should be identical, if the loss is additive 01892 segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]); 01893 //mval -= segment_loss_part2 ; 01894 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]); 01895 #endif 01896 segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]); 01897 mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ; 01898 } 01899 01900 #ifdef DYNPROG_DEBUG 01901 if (m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561) 01902 { 01903 SG_PRINT("Part2: %i,%i,%i: val=%1.6f pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i); scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i, loss=%1.1f/%1.1f (%i,%i)\n", 01904 m_pos[t], j, ii, -mval, 0.5*pen_val_3p, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1], 01905 long_transition_content_scores.get_element(ii, j), 01906 long_transition_content_scores_pen.get_element(ii, j), 01907 long_transition_content_scores_prev.get_element(ii, j), 01908 long_transition_content_scores_elem.get_element(ii, j), 01909 long_transition_content_scores_loss.get_element(ii, j), 01910 m_pos[long_transition_content_start_position.get_element(ii,j)], 01911 m_pos[long_transition_content_end_position.get_element(ii,j)], 01912 m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ; 01913 SG_PRINT("fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] ); 01914 } 01915 01916 if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3) 01917 { 01918 SG_ERROR("LOSS: total=%1.1f (%i-%i) part1=%1.1f/%1.1f (%i-%i) part2=%1.1f (%i-%i) sum=%1.1f diff=%1.1f\n", 01919 segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t], 01920 long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)], 01921 segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t], 01922 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j), 01923 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ; 01924 } 01925 #endif 01926 } 01927 01928 // prefer simpler version to guarantee optimality 01929 // 01930 // original: 01931 /* if ((mval < fixedtempvv_) && 01932 (m_pos[t] - m_pos[long_transition_content_start_position.get_element(ii, j)])<=look_back_orig_) */ 01933 if (mval < fixedtempvv_) 01934 { 01935 /* then the long transition is better than the short one => replace it */ 01936 int32_t fromtjk = fixedtempii_ ; 01937 /*SG_PRINT("%i,%i: Long transition (%1.5f=-(%1.5f+%1.5f+%1.5f+%1.5f), %i) to m_pos %i better than short transition (%1.5f,%i) to m_pos %i \n", 01938 m_pos[t], j, 01939 mval, pen_val_3p*0.5, long_transition_content_scores_pen.get_element(ii, j), long_transition_content_scores_elem.get_element(ii, j), long_transition_content_scores_prev.get_element(ii, j), ii, 01940 m_pos[long_transition_content_position.get_element(ii, j)], 01941 fixedtempvv_, (fromtjk%m_N), m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]) ;*/ 01942 ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong) ; 01943 01944 fixedtempvv_ = mval ; 01945 fixedtempii_ = ii + m_N*long_transition_content_start_position.get_element(ii, j) ; 01946 fixed_list_len = 1 ; 01947 fixedtemplong = true ; 01948 } 01949 01950 /* // extra check 01951 float64_t mval_trans2 = -( elem_val[i] + pen_val_3p + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N) ) ; 01952 if (last_ts==ts && fabs(last_mval-mval_trans2)>1e-5) 01953 SG_PRINT("last_mval=%1.2f at m_pos %i vs. mval_trans2=%1.2f at m_pos %i (diff=%f)\n", last_mval, m_pos[last_ts], mval_trans2, m_pos[ts], last_mval-mval_trans2) ; 01954 */ 01955 } 01956 } 01957 } 01958 #ifdef DYNPROG_TIMING 01959 MyTime3.stop() ; 01960 long_transition_time += MyTime3.time_diff_sec() ; 01961 #endif 01962 01963 01964 int32_t numEnt = fixed_list_len; 01965 01966 float64_t minusscore; 01967 int64_t fromtjk; 01968 01969 for (int16_t k=0; k<nbest; k++) 01970 { 01971 if (k<numEnt) 01972 { 01973 if (nbest==1) 01974 { 01975 minusscore = fixedtempvv_ ; 01976 fromtjk = fixedtempii_ ; 01977 } 01978 else 01979 { 01980 minusscore = fixedtempvv[k]; 01981 fromtjk = fixedtempii[k]; 01982 } 01983 01984 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -minusscore + seq.element(j,t); 01985 psi.element(t,j,k) = (fromtjk%m_N) ; 01986 if (nbest>1) 01987 ktable.element(t,j,k) = (fromtjk%(m_N*nbest)-psi.element(t,j,k))/m_N ; 01988 ptable.element(t,j,k) = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ; 01989 } 01990 else 01991 { 01992 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -CMath::INFTY ; 01993 psi.element(t,j,k) = 0 ; 01994 if (nbest>1) 01995 ktable.element(t,j,k) = 0 ; 01996 ptable.element(t,j,k) = 0 ; 01997 } 01998 } 01999 } 02000 } 02001 } 02002 { //termination 02003 int32_t list_len = 0 ; 02004 for (int16_t diff=0; diff<nbest; diff++) 02005 { 02006 for (T_STATES i=0; i<m_N; i++) 02007 { 02008 oldtempvv[list_len] = -(delta.element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+get_q(i)) ; 02009 oldtempii[list_len] = i + diff*m_N ; 02010 list_len++ ; 02011 } 02012 } 02013 02014 CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ; 02015 02016 for (int16_t k=0; k<nbest; k++) 02017 { 02018 delta_end.element(k) = -oldtempvv[k] ; 02019 path_ends.element(k) = (oldtempii[k]%m_N) ; 02020 if (nbest>1) 02021 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/m_N ; 02022 } 02023 02024 02025 } 02026 02027 { //state sequence backtracking 02028 for (int16_t k=0; k<nbest; k++) 02029 { 02030 prob_nbest[k]= delta_end.element(k) ; 02031 02032 int32_t i = 0 ; 02033 state_seq[i] = path_ends.element(k) ; 02034 int16_t q = 0 ; 02035 if (nbest>1) 02036 q=ktable_end.element(k) ; 02037 pos_seq[i] = m_seq_len-1 ; 02038 02039 while (pos_seq[i]>0) 02040 { 02041 ASSERT(i+1<m_seq_len); 02042 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ; 02043 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q); 02044 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ; 02045 if (nbest>1) 02046 q = ktable.element(pos_seq[i], state_seq[i], q) ; 02047 i++ ; 02048 } 02049 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ; 02050 int32_t num_states = i+1 ; 02051 for (i=0; i<num_states;i++) 02052 { 02053 my_state_seq[i+k*m_seq_len] = state_seq[num_states-i-1] ; 02054 my_pos_seq[i+k*m_seq_len] = pos_seq[num_states-i-1] ; 02055 } 02056 if (num_states<m_seq_len) 02057 { 02058 my_state_seq[num_states+k*m_seq_len]=-1 ; 02059 my_pos_seq[num_states+k*m_seq_len]=-1 ; 02060 } 02061 } 02062 } 02063 02064 //if (is_big) 02065 // SG_PRINT( "DONE. \n") ; 02066 02067 02068 #ifdef DYNPROG_TIMING 02069 MyTime2.stop() ; 02070 02071 //if (is_big) 02072 SG_PRINT("Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n content_svm_values_time=%1.2f content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec()) ; 02073 #endif 02074 02075 delete[] fixedtempvv ; 02076 delete[] fixedtempii ; 02077 } 02078 02079 02080 void CDynProg::best_path_trans_deriv( 02081 int32_t *my_state_seq, int32_t *my_pos_seq, 02082 int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals) 02083 { 02084 m_initial_state_distribution_p_deriv.resize_array(m_N) ; 02085 m_end_state_distribution_q_deriv.resize_array(m_N) ; 02086 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ; 02087 //m_my_scores.resize_array(m_my_state_seq.get_array_size()) ; 02088 //m_my_losses.resize_array(m_my_state_seq.get_array_size()) ; 02089 m_my_scores.resize_array(my_seq_len); 02090 m_my_losses.resize_array(my_seq_len); 02091 float64_t* my_scores=m_my_scores.get_array(); 02092 float64_t* my_losses=m_my_losses.get_array(); 02093 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix(); 02094 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals(); 02095 02096 if (!m_svm_arrays_clean) 02097 { 02098 SG_ERROR( "SVM arrays not clean") ; 02099 return ; 02100 } ; 02101 //SG_PRINT( "genestr_len=%i, genestr_num=%i\n", genestr_len, genestr_num) ; 02102 //m_mod_words.display() ; 02103 //m_sign_words.display() ; 02104 //m_string_words.display() ; 02105 02106 bool use_svm = false ; 02107 02108 CArray2<CPlifBase*> PEN(Plif_matrix, m_N, m_N, false, false) ; 02109 PEN.set_name("PEN"); 02110 CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, m_N, max_num_signals, false, false) ; 02111 PEN_state_signals.set_name("PEN_state_signals"); 02112 CArray3<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ; 02113 seq_input.set_name("seq_input"); 02114 02115 { // determine whether to use svm outputs and clear derivatives 02116 for (int32_t i=0; i<m_N; i++) 02117 for (int32_t j=0; j<m_N; j++) 02118 { 02119 CPlifBase *penij=PEN.element(i,j) ; 02120 if (penij==NULL) 02121 continue ; 02122 02123 if (penij->uses_svm_values()) 02124 use_svm=true ; 02125 penij->penalty_clear_derivative() ; 02126 } 02127 for (int32_t i=0; i<m_N; i++) 02128 for (int32_t j=0; j<max_num_signals; j++) 02129 { 02130 CPlifBase *penij=PEN_state_signals.element(i,j) ; 02131 if (penij==NULL) 02132 continue ; 02133 if (penij->uses_svm_values()) 02134 use_svm=true ; 02135 penij->penalty_clear_derivative() ; 02136 } 02137 } 02138 02139 { // set derivatives of p, q and a to zero 02140 02141 for (int32_t i=0; i<m_N; i++) 02142 { 02143 m_initial_state_distribution_p_deriv.element(i)=0 ; 02144 m_end_state_distribution_q_deriv.element(i)=0 ; 02145 for (int32_t j=0; j<m_N; j++) 02146 m_transition_matrix_a_deriv.element(i,j)=0 ; 02147 } 02148 } 02149 02150 { // clear score vector 02151 for (int32_t i=0; i<my_seq_len; i++) 02152 { 02153 my_scores[i]=0.0 ; 02154 my_losses[i]=0.0 ; 02155 } 02156 } 02157 02158 //int32_t total_len = 0 ; 02159 02160 //m_transition_matrix_a.display_array() ; 02161 //m_transition_matrix_a_id.display_array() ; 02162 02163 // compute derivatives for given path 02164 float64_t* svm_value = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs]; 02165 float64_t* svm_value_part1 = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs]; 02166 float64_t* svm_value_part2 = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs]; 02167 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02168 { 02169 svm_value[s]=0 ; 02170 svm_value_part1[s]=0 ; 02171 svm_value_part2[s]=0 ; 02172 } 02173 02174 //#ifdef DYNPROG_DEBUG 02175 float64_t total_score = 0.0 ; 02176 float64_t total_loss = 0.0 ; 02177 //#endif 02178 02179 ASSERT(my_state_seq[0]>=0) ; 02180 m_initial_state_distribution_p_deriv.element(my_state_seq[0])++ ; 02181 my_scores[0] += m_initial_state_distribution_p.element(my_state_seq[0]) ; 02182 02183 ASSERT(my_state_seq[my_seq_len-1]>=0) ; 02184 m_end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ; 02185 my_scores[my_seq_len-1] += m_end_state_distribution_q.element(my_state_seq[my_seq_len-1]); 02186 02187 //#ifdef DYNPROG_DEBUG 02188 total_score += my_scores[0] + my_scores[my_seq_len-1] ; 02189 //#endif 02190 02191 SG_DEBUG( "m_seq_len=%i\n", my_seq_len) ; 02192 for (int32_t i=0; i<my_seq_len-1; i++) 02193 { 02194 if (my_state_seq[i+1]==-1) 02195 break ; 02196 int32_t from_state = my_state_seq[i] ; 02197 int32_t to_state = my_state_seq[i+1] ; 02198 int32_t from_pos = my_pos_seq[i] ; 02199 int32_t to_pos = my_pos_seq[i+1] ; 02200 02201 int32_t elem_id = m_transition_matrix_a_id.element(from_state, to_state) ; 02202 my_losses[i] = m_seg_loss_obj->get_segment_loss(from_pos, to_pos, elem_id); 02203 02204 #ifdef DYNPROG_DEBUG 02205 02206 02207 if (i>0)// test if segment loss is additive 02208 { 02209 float32_t loss1 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i], elem_id); 02210 float32_t loss2 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i], my_pos_seq[i+1], elem_id); 02211 float32_t loss3 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i+1], elem_id); 02212 SG_PRINT("loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3); 02213 if (CMath::abs(loss1+loss2-loss3)>0) 02214 { 02215 SG_PRINT( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ; 02216 } 02217 } 02218 io->set_loglevel(M_DEBUG) ; 02219 SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ; 02220 #endif 02221 // increase usage of this transition 02222 m_transition_matrix_a_deriv.element(from_state, to_state)++ ; 02223 my_scores[i] += m_transition_matrix_a.element(from_state, to_state) ; 02224 //SG_PRINT("m_transition_matrix_a.element(%i, %i),%f \n",from_state, to_state, m_transition_matrix_a.element(from_state, to_state)); 02225 #ifdef DYNPROG_DEBUG 02226 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ; 02227 #endif 02228 02229 /*int32_t last_svm_pos[m_num_degrees] ; 02230 for (int32_t qq=0; qq<m_num_degrees; qq++) 02231 last_svm_pos[qq]=-1 ;*/ 02232 02233 bool is_long_transition = false ; 02234 if (m_long_transitions) 02235 { 02236 if (m_pos[to_pos]-m_pos[from_pos]>m_long_transition_threshold) 02237 is_long_transition = true ; 02238 if (m_orf_info.element(from_state,0)!=-1) 02239 is_long_transition = false ; 02240 } 02241 02242 int32_t from_pos_thresh = from_pos ; 02243 int32_t to_pos_thresh = to_pos ; 02244 02245 if (use_svm) 02246 { 02247 if (is_long_transition) 02248 { 02249 02250 while (from_pos_thresh<to_pos && m_pos[from_pos_thresh+1] - m_pos[from_pos] <= m_long_transition_threshold) // * 02251 from_pos_thresh++ ; 02252 ASSERT(from_pos_thresh<to_pos) ; 02253 ASSERT(m_pos[from_pos_thresh] - m_pos[from_pos] <= m_long_transition_threshold); // * 02254 ASSERT(m_pos[from_pos_thresh+1] - m_pos[from_pos] > m_long_transition_threshold);// * 02255 02256 int32_t frame = m_orf_info.element(from_state,0); 02257 lookup_content_svm_values(from_pos, from_pos_thresh, m_pos[from_pos], m_pos[from_pos_thresh], svm_value_part1, frame); 02258 02259 #ifdef DYNPROG_DEBUG 02260 SG_PRINT("part1: pos1: %i pos2: %i pos3: %i \nsvm_value_part1: ", m_pos[from_pos], m_pos[from_pos_thresh], m_pos[from_pos_thresh+1]) ; 02261 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02262 SG_PRINT("%1.4f ", svm_value_part1[s]); 02263 SG_PRINT("\n"); 02264 #endif 02265 02266 while (to_pos_thresh>0 && m_pos[to_pos] - m_pos[to_pos_thresh-1] <= m_long_transition_threshold) // * 02267 to_pos_thresh-- ; 02268 ASSERT(to_pos_thresh>0) ; 02269 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh] <= m_long_transition_threshold) ; // * 02270 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh-1] > m_long_transition_threshold) ; // * 02271 02272 lookup_content_svm_values(to_pos_thresh, to_pos, m_pos[to_pos_thresh], m_pos[to_pos], svm_value_part2, frame); 02273 02274 #ifdef DYNPROG_DEBUG 02275 SG_PRINT("part2: pos1: %i pos2: %i pos3: %i \nsvm_value_part2: ", m_pos[to_pos], m_pos[to_pos_thresh], m_pos[to_pos_thresh+1]) ; 02276 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02277 SG_PRINT("%1.4f ", svm_value_part2[s]); 02278 SG_PRINT("\n"); 02279 #endif 02280 } 02281 else 02282 { 02283 /* normal case */ 02284 02285 //SG_PRINT("from_pos: %i; to_pos: %i; m_pos[to_pos]-m_pos[from_pos]: %i \n",from_pos, to_pos, m_pos[to_pos]-m_pos[from_pos]); 02286 int32_t frame = m_orf_info.element(from_state,0); 02287 if (false)//(frame>=0) 02288 { 02289 int32_t num_current_svms=0; 02290 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1}; 02291 SG_PRINT("penalties(%i, %i), frame:%i ", from_state, to_state, frame); 02292 PEN.element(to_state, from_state)->get_used_svms(&num_current_svms, svm_ids); 02293 SG_PRINT("\n"); 02294 } 02295 02296 lookup_content_svm_values(from_pos, to_pos, m_pos[from_pos],m_pos[to_pos], svm_value, frame); 02297 #ifdef DYNPROG_DEBUG 02298 SG_PRINT("part2: pos1: %i pos2: %i \nsvm_values: ", m_pos[from_pos], m_pos[to_pos]) ; 02299 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02300 SG_PRINT("%1.4f ", svm_value[s]); 02301 SG_PRINT("\n"); 02302 #endif 02303 } 02304 } 02305 02306 if (PEN.element(to_state, from_state)!=NULL) 02307 { 02308 float64_t nscore = 0 ; 02309 if (is_long_transition) 02310 { 02311 float64_t pen_value_part1 = PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1) ; 02312 float64_t pen_value_part2 = PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2) ; 02313 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ; 02314 } 02315 else 02316 nscore = PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[from_pos], svm_value) ; 02317 02318 if (false)//(nscore<-1e9) 02319 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n", 02320 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, nscore) ; 02321 02322 my_scores[i] += nscore ; 02323 02324 for (int32_t s=m_num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)/*set tiling plif values to neutral values (that do not influence derivative calculation)*/ 02325 { 02326 svm_value[s]=-CMath::INFTY; 02327 svm_value_part1[s]=-CMath::INFTY; 02328 svm_value_part2[s]=-CMath::INFTY; 02329 } 02330 02331 #ifdef DYNPROG_DEBUG 02332 //SG_DEBUG( "%i. transition penalty: from_state=%i to_state=%i from_pos=%i [%i] to_pos=%i [%i] value=%i\n", i, from_state, to_state, from_pos, m_pos[from_pos], to_pos, m_pos[to_pos], m_pos[to_pos]-m_pos[from_pos]) ; 02333 #endif 02334 if (is_long_transition) 02335 { 02336 #ifdef DYNPROG_DEBUG 02337 float64_t sum_score = 0.0 ; 02338 02339 for (int kk=0; kk<i; kk++) 02340 sum_score += my_scores[i] ; 02341 02342 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i) 2: %1.6f (%i-%i) \n", 02343 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, 02344 nscore, sum_score, 02345 PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1)*0.5, m_pos[from_pos], m_pos[from_pos_thresh], 02346 PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ; 02347 #endif 02348 } 02349 02350 if (is_long_transition) 02351 { 02352 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1, 0.5) ; 02353 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2, 0.5) ; 02354 } 02355 else 02356 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[to_pos]-m_pos[from_pos], svm_value, 1) ; 02357 02358 //SG_PRINT("m_num_raw_data = %i \n", m_num_raw_data) ; 02359 02360 // for tiling array and rna-seq data every single measurement must be added to the derivative 02361 // in contrast to the content svm predictions where we have a single value per transition; 02362 // content svm predictions have already been added to the derivative, thus we start with d=1 02363 // instead of d=0 02364 if (is_long_transition) 02365 { 02366 for (int32_t d=1; d<=m_num_raw_data; d++) 02367 { 02368 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++) 02369 svm_value[s]=-CMath::INFTY; 02370 float64_t* intensities = new float64_t[m_num_probes_cum[d]]; 02371 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[from_pos_thresh],intensities, d); 02372 for (int32_t k=0;k<num_intensities;k++) 02373 { 02374 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++) 02375 svm_value[j]=intensities[k]; 02376 02377 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ; 02378 02379 } 02380 num_intensities = raw_intensities_interval_query(m_pos[to_pos_thresh], m_pos[to_pos],intensities, d); 02381 for (int32_t k=0;k<num_intensities;k++) 02382 { 02383 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++) 02384 svm_value[j]=intensities[k]; 02385 02386 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ; 02387 02388 } 02389 delete[] intensities; 02390 02391 } 02392 } 02393 else 02394 { 02395 for (int32_t d=1; d<=m_num_raw_data; d++) 02396 { 02397 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++) 02398 svm_value[s]=-CMath::INFTY; 02399 float64_t* intensities = new float64_t[m_num_probes_cum[d]]; 02400 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[to_pos],intensities, d); 02401 //SG_PRINT("m_pos[from_pos]:%i, m_pos[to_pos]:%i, num_intensities:%i\n",m_pos[from_pos],m_pos[to_pos], num_intensities); 02402 for (int32_t k=0;k<num_intensities;k++) 02403 { 02404 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++) 02405 svm_value[j]=intensities[k]; 02406 02407 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 1) ; 02408 02409 } 02410 delete[] intensities; 02411 } 02412 } 02413 02414 } 02415 #ifdef DYNPROG_DEBUG 02416 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ; 02417 #endif 02418 02419 //SG_DEBUG( "emmission penalty skipped: to_state=%i to_pos=%i value=%1.2f score=%1.2f\n", to_state, to_pos, seq_input.element(to_state, to_pos), 0.0) ; 02420 for (int32_t k=0; k<max_num_signals; k++) 02421 { 02422 if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0)) 02423 { 02424 #ifdef DYNPROG_DEBUG 02425 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ; 02426 #endif 02427 my_scores[i] += seq_input.element(to_state, to_pos, k) ; 02428 //if (seq_input.element(to_state, to_pos, k) !=0) 02429 // SG_PRINT("features(%i,%i): %f\n",to_state,to_pos,seq_input.element(to_state, to_pos, k)); 02430 break ; 02431 } 02432 if (PEN_state_signals.element(to_state, k)!=NULL) 02433 { 02434 float64_t nscore = PEN_state_signals.element(to_state,k)->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ; // this should be ok for long_transitions (svm_value does not matter) 02435 my_scores[i] += nscore ; 02436 #ifdef DYNPROG_DEBUG 02437 if (false)//(nscore<-1e9) 02438 { 02439 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n", 02440 is_long_transition, m_pos[from_pos], from_pos, from_state, m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.element(to_state, to_pos, k)) ; 02441 for (int x=0; x<23; x++) 02442 { 02443 for (int i=-10; i<10; i++) 02444 SG_PRINT("%1.4f\t", seq_input.element(x, to_pos+i, k)); 02445 SG_PRINT("\n"); 02446 } 02447 02448 } 02449 #endif 02450 //break ; 02451 //int32_t num_current_svms=0; 02452 //int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1}; 02453 //SG_PRINT("PEN_state_signals->id: "); 02454 //PEN_state_signals.element(to_state, k)->get_used_svms(&num_current_svms, svm_ids); 02455 //SG_PRINT("\n"); 02456 //if (nscore != 0) 02457 //SG_PRINT( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ; 02458 #ifdef DYNPROG_DEBUG 02459 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ; 02460 #endif 02461 PEN_state_signals.element(to_state,k)->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value, 1) ; // this should be ok for long_transitions (svm_value does not matter) 02462 } else 02463 break ; 02464 } 02465 02466 //#ifdef DYNPROG_DEBUG 02467 //SG_PRINT( "scores[%i]=%f (final) \n", i, my_scores[i]) ; 02468 //SG_PRINT( "losses[%i]=%f (final) , total_loss: %f \n", i, my_losses[i], total_loss) ; 02469 total_score += my_scores[i] ; 02470 total_loss += my_losses[i] ; 02471 //#endif 02472 } 02473 //#ifdef DYNPROG_DEBUG 02474 //SG_PRINT( "total score = %f \n", total_score) ; 02475 //SG_PRINT( "total loss = %f \n", total_loss) ; 02476 //#endif 02477 delete[] svm_value; 02478 delete[] svm_value_part1 ; 02479 delete[] svm_value_part2 ; 02480 } 02481 02482 int32_t CDynProg::raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type) 02483 { 02484 ASSERT(from_pos<to_pos); 02485 int32_t num_intensities = 0; 02486 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[type-1]]; 02487 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[type-1]]; 02488 int32_t last_pos; 02489 int32_t num = m_num_probes_cum[type-1]; 02490 while (*p_tiling_pos<to_pos) 02491 { 02492 if (*p_tiling_pos>=from_pos) 02493 { 02494 intensities[num_intensities] = *p_tiling_data; 02495 num_intensities++; 02496 } 02497 num++; 02498 if (num>=m_num_probes_cum[type]) 02499 break; 02500 last_pos = *p_tiling_pos; 02501 p_tiling_pos++; 02502 p_tiling_data++; 02503 ASSERT(last_pos<*p_tiling_pos); 02504 } 02505 return num_intensities; 02506 } 02507 02508 void CDynProg::lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t* svm_values, int32_t frame) 02509 { 02510 #ifdef DYNPROG_TIMING_DETAIL 02511 MyTime.start() ; 02512 #endif 02513 // ASSERT(from_state<to_state); 02514 // if (!(from_pos<to_pos)) 02515 // SG_ERROR("from_pos!<to_pos, from_pos: %i to_pos: %i \n",from_pos,to_pos); 02516 for (int32_t i=0;i<m_num_svms;i++) 02517 { 02518 float64_t to_val = m_lin_feat.get_element(i, to_state); 02519 float64_t from_val = m_lin_feat.get_element(i, from_state); 02520 svm_values[i] = (to_val-from_val)/(to_pos-from_pos); 02521 } 02522 for (int32_t i=m_num_svms;i<m_num_lin_feat_plifs_cum[m_num_raw_data];i++) 02523 { 02524 float64_t to_val = m_lin_feat.get_element(i, to_state); 02525 float64_t from_val = m_lin_feat.get_element(i, from_state); 02526 svm_values[i] = to_val-from_val ; 02527 } 02528 if (m_intron_list) 02529 { 02530 int32_t* support = new int32_t[m_num_intron_plifs]; 02531 m_intron_list->get_intron_support(support, from_state, to_state); 02532 int32_t intron_list_start = m_num_lin_feat_plifs_cum[m_num_raw_data]; 02533 int32_t intron_list_end = m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; 02534 int32_t cnt = 0; 02535 for (int32_t i=intron_list_start; i<intron_list_end;i++) 02536 { 02537 svm_values[i] = (float64_t) (support[cnt]); 02538 cnt++; 02539 } 02540 //if (to_pos>3990 && to_pos<4010) 02541 // SG_PRINT("from_state:%i to_state:%i support[0]:%i support[1]:%i\n",from_state, to_state, support[0], support[1]); 02542 delete[] support; 02543 } 02544 // find the correct row with precomputed frame predictions 02545 if (frame!=-1) 02546 { 02547 svm_values[frame_plifs[0]] = 1e10; 02548 svm_values[frame_plifs[1]] = 1e10; 02549 svm_values[frame_plifs[2]] = 1e10; 02550 int32_t global_frame = from_pos%3; 02551 int32_t row = ((global_frame+frame)%3)+4; 02552 float64_t to_val = m_lin_feat.get_element(row, to_state); 02553 float64_t from_val = m_lin_feat.get_element(row, from_state); 02554 svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos); 02555 } 02556 #ifdef DYNPROG_TIMING_DETAIL 02557 MyTime.stop() ; 02558 content_svm_values_time += MyTime.time_diff_sec() ; 02559 #endif 02560 } 02561 void CDynProg::set_intron_list(CIntronList* intron_list, int32_t num_plifs) 02562 { 02563 m_intron_list = intron_list; 02564 m_num_intron_plifs = num_plifs; 02565 } 02566