DynProg.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2007 Soeren Sonnenburg
00008  * Written (W) 1999-2007 Gunnar Raetsch
00009  * Written (W) 2008-2009 Jonas Behr
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #include "structure/DynProg.h"
00014 #include "lib/Mathematics.h"
00015 #include "lib/io.h"
00016 #include "lib/config.h"
00017 #include "features/StringFeatures.h"
00018 #include "features/Alphabet.h"
00019 #include "structure/Plif.h"
00020 #include "structure/IntronList.h"
00021 #include "lib/Array.h"
00022 #include "lib/Array2.h"
00023 #include "lib/Array3.h"
00024 
00025 #include <stdlib.h>
00026 #include <stdio.h>
00027 #include <time.h>
00028 #include <ctype.h>
00029 
00030 using namespace shogun;
00031 
00032 //#define USE_TMP_ARRAYCLASS
00033 //#define DYNPROG_DEBUG
00034 
00035 int32_t CDynProg::word_degree_default[4]={3,4,5,6} ;
00036 int32_t CDynProg::cum_num_words_default[5]={0,64,320,1344,5440} ;
00037 int32_t CDynProg::frame_plifs[3]={4,5,6};
00038 int32_t CDynProg::num_words_default[4]=   {64,256,1024,4096} ;
00039 int32_t CDynProg::mod_words_default[32] = {1,1,1,1,1,1,1,1,
00040                                     1,1,1,1,1,1,1,1,
00041                                     0,0,0,0,0,0,0,0,
00042                                     0,0,0,0,0,0,0,0} ;  
00043 bool CDynProg::sign_words_default[16] = {true,true,true,true,true,true,true,true,
00044                                       false,false,false,false,false,false,false,false} ; // whether to use counts or signum of counts
00045 int32_t CDynProg::string_words_default[16] = {0,0,0,0,0,0,0,0,
00046                                        1,1,1,1,1,1,1,1} ; // which string should be used
00047 
00048 CDynProg::CDynProg(int32_t num_svms /*= 8 */)
00049 : CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1),
00050     m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1),
00051     m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1),
00052     m_end_state_distribution_q_deriv(1),
00053 
00054       // multi svm
00055       m_num_degrees(4), 
00056       m_num_svms(num_svms), 
00057       m_word_degree(word_degree_default, m_num_degrees, true, true),
00058       m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true),
00059       m_cum_num_words_array(m_cum_num_words.get_array()),
00060       m_num_words(num_words_default, m_num_degrees, true, true),
00061       m_num_words_array(m_num_words.get_array()),
00062       m_mod_words(mod_words_default, m_num_svms, 2, true, true),
00063       m_mod_words_array(m_mod_words.get_array()),
00064       m_sign_words(sign_words_default, m_num_svms, true, true),
00065       m_sign_words_array(m_sign_words.get_array()),
00066       m_string_words(string_words_default, m_num_svms, true, true),
00067       m_string_words_array(m_string_words.get_array()),
00068       //m_svm_pos_start(m_num_degrees),
00069       m_num_unique_words(m_num_degrees),
00070       m_svm_arrays_clean(true),
00071 
00072       m_max_a_id(0), m_observation_matrix(1,1,1), 
00073       m_pos(1), 
00074       m_seq_len(0),
00075       m_orf_info(1,2), 
00076       m_plif_list(1), 
00077       m_PEN(1,1),
00078       m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2), 
00079       m_segment_ids(1),
00080       m_segment_mask(1),
00081       m_my_state_seq(1),
00082       m_my_pos_seq(1),
00083       m_my_scores(1),
00084       m_my_losses(1),
00085       m_scores(1),
00086       m_states(1,1),
00087       m_positions(1,1),
00088 
00089       m_seq_sparse1(NULL),
00090       m_seq_sparse2(NULL),
00091       m_plif_matrices(NULL),
00092       
00093       m_genestr_stop(1),
00094       m_intron_list(NULL),
00095       m_num_intron_plifs(0),
00096       m_lin_feat(1,1), //by Jonas
00097       m_raw_intensities(NULL),
00098       m_probe_pos(NULL),
00099       m_num_probes_cum(NULL),
00100       m_num_lin_feat_plifs_cum(NULL),
00101       m_num_raw_data(0),
00102       
00103       m_long_transitions(true),
00104       m_long_transition_threshold(1000),
00105       m_long_transition_max(100000)
00106 {
00107     trans_list_forward = NULL ;
00108     trans_list_forward_cnt = NULL ;
00109     trans_list_forward_val = NULL ;
00110     trans_list_forward_id = NULL ;
00111     trans_list_len = 0 ;
00112 
00113     mem_initialized = true ;
00114 
00115     m_N=1;
00116 
00117     m_raw_intensities = NULL;
00118     m_probe_pos = NULL;
00119     m_num_probes_cum = new int32_t[100];
00120     m_num_probes_cum[0] = 0;
00121     //m_use_tiling=false;
00122     m_num_lin_feat_plifs_cum = new int32_t[100];
00123     m_num_lin_feat_plifs_cum[0] = m_num_svms;
00124     m_num_raw_data = 0;
00125 #ifdef ARRAY_STATISTICS
00126     m_word_degree.set_name("word_degree");
00127 #endif
00128 
00129     m_transition_matrix_a_id.set_name("transition_matrix_a_id");
00130     m_transition_matrix_a.set_name("transition_matrix_a");
00131     m_transition_matrix_a_deriv.set_name("transition_matrix_a_deriv");
00132     m_mod_words.set_name("mod_words");
00133     m_orf_info.set_name("orf_info");
00134     m_segment_sum_weights.set_name("segment_sum_weights");
00135     m_PEN.set_name("PEN");
00136     m_PEN_state_signals.set_name("PEN_state_signals");
00137     m_dict_weights.set_name("dict_weights");
00138     m_states.set_name("states");
00139     m_positions.set_name("positions");
00140     m_lin_feat.set_name("lin_feat");
00141 
00142 
00143     m_observation_matrix.set_name("m_observation_matrix");
00144     m_segment_loss.set_name("m_segment_loss");
00145     m_seg_loss_obj = new CSegmentLoss();
00146 }
00147 
00148 CDynProg::~CDynProg()
00149 {
00150     if (trans_list_forward_cnt)
00151         delete[] trans_list_forward_cnt;
00152     if (trans_list_forward)
00153     {
00154         for (int32_t i=0; i<trans_list_len; i++)
00155         {
00156             if (trans_list_forward[i])
00157                 delete[] trans_list_forward[i];
00158         }
00159         delete[] trans_list_forward;
00160     }
00161     if (trans_list_forward_val)
00162     {
00163         for (int32_t i=0; i<trans_list_len; i++)
00164         {
00165             if (trans_list_forward_val[i])
00166                 delete[] trans_list_forward_val[i];
00167         }
00168         delete[] trans_list_forward_val;
00169     }
00170     if (trans_list_forward_id)
00171     {
00172         for (int32_t i=0; i<trans_list_len; i++)
00173         {
00174             if (trans_list_forward_id[i])
00175                 delete[] trans_list_forward_id[i];
00176         }
00177         delete[] trans_list_forward_id;
00178     }
00179     if (m_raw_intensities)
00180         delete[] m_raw_intensities;
00181     if (m_probe_pos)
00182         delete[] m_probe_pos;
00183     if (m_num_probes_cum)
00184       delete[] m_num_probes_cum ;
00185     if (m_num_lin_feat_plifs_cum)
00186       delete[] m_num_lin_feat_plifs_cum ;
00187 
00188     delete m_intron_list;
00189 
00190     SG_UNREF(m_seq_sparse1);
00191     SG_UNREF(m_seq_sparse2);
00192     SG_UNREF(m_plif_matrices);
00193 }
00194 
00196 int32_t CDynProg::get_num_svms()
00197 {
00198     return m_num_svms;
00199 }
00200 
00201 void CDynProg::precompute_stop_codons()
00202 {
00203     int32_t length=m_genestr.get_dim1();
00204 
00205     m_genestr_stop.resize_array(length) ;
00206     m_genestr_stop.zero() ;
00207     m_genestr_stop.set_name("genestr_stop") ;
00208     {
00209         for (int32_t i=0; i<length-2; i++)
00210             if ((m_genestr[i]=='t' || m_genestr[i]=='T') && 
00211                     (((m_genestr[i+1]=='a' || m_genestr[i+1]=='A') && 
00212                       (m_genestr[i+2]=='a' || m_genestr[i+2]=='g' || m_genestr[i+2]=='A' || m_genestr[i+2]=='G')) ||
00213                      ((m_genestr[i+1]=='g'||m_genestr[i+1]=='G') && (m_genestr[i+2]=='a' || m_genestr[i+2]=='A') )))
00214             {
00215                 m_genestr_stop.element(i)=true ;
00216             }
00217             else
00218                 m_genestr_stop.element(i)=false ;
00219         m_genestr_stop.element(length-2)=false ;
00220         m_genestr_stop.element(length-1)=false ;
00221     }
00222 }
00223 
00224 void CDynProg::set_num_states(int32_t p_N)
00225 {
00226     m_N=p_N ;
00227 
00228     m_transition_matrix_a_id.resize_array(m_N,m_N) ;
00229     m_transition_matrix_a.resize_array(m_N,m_N) ;
00230     m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
00231     m_initial_state_distribution_p.resize_array(m_N) ;
00232     m_initial_state_distribution_p_deriv.resize_array(m_N) ;
00233     m_end_state_distribution_q.resize_array(m_N);
00234     m_end_state_distribution_q_deriv.resize_array(m_N) ;
00235 
00236     m_orf_info.resize_array(m_N,2) ;
00237     m_PEN.resize_array(m_N,m_N) ;
00238 }
00239 
00240 int32_t CDynProg::get_num_states()
00241 {
00242     return m_N;
00243 }
00244 
00245 void CDynProg::init_tiling_data(
00246     int32_t* probe_pos, float64_t* intensities, const int32_t num_probes)
00247 {
00248     m_num_raw_data++;
00249     m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes;
00250 
00251     int32_t* tmp_probe_pos = new int32_t[m_num_probes_cum[m_num_raw_data]];
00252     float64_t* tmp_raw_intensities = new float64_t[m_num_probes_cum[m_num_raw_data]];
00253 
00254 
00255     if (m_num_raw_data==1){
00256         memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00257         memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t));
00258         //SG_PRINT("raw_intens:%f \n",*tmp_raw_intensities+2);  
00259     }else{
00260         memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t)); 
00261         memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t));   
00262         memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t));    
00263         memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t));  
00264     }
00265     delete[] m_probe_pos;
00266     delete[] m_raw_intensities;
00267     m_probe_pos = tmp_probe_pos; //new int32_t[num_probes];
00268     m_raw_intensities = tmp_raw_intensities;//new float64_t[num_probes];
00269 
00270     //memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00271     //memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t));
00272 
00273 }
00274 
00275 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms)
00276 {
00277     m_lin_feat.resize_array(p_num_svms, m_seq_len);
00278 
00279     // initialize array
00280     for (int s=0; s<p_num_svms; s++)
00281       for (int p=0; p<m_seq_len; p++)
00282         m_lin_feat.set_element(0.0, s, p) ;
00283 }
00284 
00285 void CDynProg::resize_lin_feat(const int32_t num_new_feat)
00286 {
00287     int32_t dim1, dim2;
00288     m_lin_feat.get_array_size(dim1, dim2);
00289     ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]);
00290     ASSERT(dim2==m_seq_len); // == number of candidate positions
00291 
00292 
00293 
00294     float64_t* arr = m_lin_feat.get_array();
00295     float64_t* tmp = new float64_t[(dim1+num_new_feat)*dim2];   
00296     memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ;
00297     for(int32_t j=0;j<m_seq_len;j++)
00298                 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++)
00299             tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
00300 
00301     m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2, true, true);// copy array and free it later 
00302     delete[] tmp;
00303 
00304     /*for(int32_t j=0;j<5;j++)
00305     {
00306         for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data];k++)
00307         {
00308             SG_PRINT("(%i,%i)%f ",k,j,m_lin_feat.get_element(k,j));
00309         }
00310         SG_PRINT("\n");
00311     }
00312     m_lin_feat.get_array_size(dim1,dim2);
00313     SG_PRINT("resize_lin_feat: dim1:%i, dim2:%i\n",dim1,dim2);*/
00314 
00315     //SG_PRINT("resize_lin_feat: done\n");
00316 }
00317 
00318 void CDynProg::precompute_tiling_plifs(
00319     CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs)
00320 {
00321     m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs;
00322     float64_t* tiling_plif = new float64_t[num_tiling_plifs];
00323     float64_t* svm_value = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
00324     for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; i++)
00325         svm_value[i]=0.0;
00326     int32_t* tiling_rows = new int32_t[num_tiling_plifs];
00327     for (int32_t i=0; i<num_tiling_plifs; i++)
00328     {
00329         tiling_plif[i]=0.0;
00330         CPlif * plif = PEN[tiling_plif_ids[i]];
00331         tiling_rows[i] = plif->get_use_svm();
00332 
00333         ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i)
00334     }
00335     resize_lin_feat(num_tiling_plifs);
00336 
00337 
00338     int32_t* p_tiling_pos  = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]];
00339     float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]];
00340     int32_t num=m_num_probes_cum[m_num_raw_data-1];
00341 
00342     for (int32_t pos_idx=0;pos_idx<m_seq_len;pos_idx++)
00343     {
00344         while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<m_pos[pos_idx])
00345         {
00346             for (int32_t i=0; i<num_tiling_plifs; i++)
00347             {
00348                 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data;
00349                 CPlif * plif = PEN[tiling_plif_ids[i]];
00350                 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1);
00351                 plif->set_do_calc(true);
00352                 tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
00353                 plif->set_do_calc(false);
00354             }
00355             p_tiling_data++;
00356             p_tiling_pos++;
00357             num++;
00358         }
00359         for (int32_t i=0; i<num_tiling_plifs; i++)
00360             m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx);
00361     }
00362     delete[] svm_value;
00363     delete[] tiling_plif;
00364     delete[] tiling_rows;
00365 }
00366 
00367 void CDynProg::create_word_string()
00368 {
00369     delete[] m_wordstr;
00370     m_wordstr=new uint16_t**[5440];
00371     int32_t k=0;
00372     int32_t genestr_len=m_genestr.get_dim1();
00373 
00374     m_wordstr[k]=new uint16_t*[m_num_degrees] ;
00375     for (int32_t j=0; j<m_num_degrees; j++)
00376     {
00377         m_wordstr[k][j]=NULL ;
00378         {
00379             m_wordstr[k][j]=new uint16_t[genestr_len] ;
00380             for (int32_t i=0; i<genestr_len; i++)
00381                 switch (m_genestr[i])
00382                 {
00383                     case 'A':
00384                     case 'a': m_wordstr[k][j][i]=0 ; break ;
00385                     case 'C':
00386                     case 'c': m_wordstr[k][j][i]=1 ; break ;
00387                     case 'G':
00388                     case 'g': m_wordstr[k][j][i]=2 ; break ;
00389                     case 'T':
00390                     case 't': m_wordstr[k][j][i]=3 ; break ;
00391                     default: ASSERT(0) ;
00392                 }
00393             CAlphabet::translate_from_single_order(m_wordstr[k][j], genestr_len, m_word_degree[j]-1, m_word_degree[j], 2) ;
00394         }
00395     }
00396 }
00397 
00398 void CDynProg::precompute_content_values()
00399 {
00400     for (int32_t s=0; s<m_num_svms; s++)
00401       m_lin_feat.set_element(0.0, s, 0);
00402 
00403     for (int32_t p=0 ; p<m_seq_len-1 ; p++)
00404     {
00405         int32_t from_pos = m_pos[p];
00406         int32_t to_pos = m_pos[p+1];
00407         float64_t* my_svm_values_unnormalized = new float64_t[m_num_svms];
00408         //SG_PRINT("%i(%i->%i) ",p,from_pos, to_pos);
00409         
00410         ASSERT(from_pos<=m_genestr.get_dim1());
00411         ASSERT(to_pos<=m_genestr.get_dim1());
00412         
00413         for (int32_t s=0; s<m_num_svms; s++)
00414             my_svm_values_unnormalized[s]=0.0;//precomputed_svm_values.element(s,p);
00415 
00416         for (int32_t i=from_pos; i<to_pos; i++)
00417         {
00418             for (int32_t j=0; j<m_num_degrees; j++)
00419             {
00420                 uint16_t word = m_wordstr[0][j][i] ;
00421                 for (int32_t s=0; s<m_num_svms; s++)
00422                 {
00423                     // check if this k-mer should be considered for this SVM
00424                     if (m_mod_words.get_element(s,0)==3 && i%3!=m_mod_words.get_element(s,1))
00425                         continue;
00426                     my_svm_values_unnormalized[s] += m_dict_weights[(word+m_cum_num_words_array[j])+s*m_cum_num_words_array[m_num_degrees]] ;
00427                 }
00428             }
00429         }
00430         for (int32_t s=0; s<m_num_svms; s++)
00431         {
00432             float64_t prev = m_lin_feat.get_element(s, p);
00433             //SG_PRINT("elem (%i, %i, %f)\n", s, p, prev) ;
00434             if (prev<-1e20 || prev>1e20)
00435             {
00436                 SG_ERROR("initialization missing (%i, %i, %f)\n", s, p, prev) ;
00437                 prev=0 ;
00438             }
00439             m_lin_feat.set_element(prev + my_svm_values_unnormalized[s], s, p+1);
00440         }
00441         delete[] my_svm_values_unnormalized;
00442     }
00443     //for (int32_t j=0; j<m_num_degrees; j++)
00444     //  delete[] m_wordstr[0][j] ;
00445     //delete[] m_wordstr[0] ;
00446 }
00447 
00448 void CDynProg::set_p_vector(float64_t *p, int32_t N)
00449 {
00450     if (!(N==m_N))
00451         SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",N, m_N);
00452 
00453     m_initial_state_distribution_p.set_array(p, N, true, true);
00454 }
00455 
00456 void CDynProg::set_q_vector(float64_t *q, int32_t N)
00457 {
00458     if (!(N==m_N))
00459         SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",N, m_N);
00460     m_end_state_distribution_q.set_array(q, N, true, true);
00461 }
00462 
00463 void CDynProg::set_a(float64_t *a, int32_t M, int32_t N)
00464 {
00465     ASSERT(N==m_N);
00466     ASSERT(M==N);
00467     m_transition_matrix_a.set_array(a, N, N, true, true);
00468     m_transition_matrix_a_deriv.resize_array(N, N);
00469 }
00470 
00471 void CDynProg::set_a_id(int32_t *a, int32_t M, int32_t N)
00472 {
00473     ASSERT(N==m_N);
00474     ASSERT(M==N);
00475     m_transition_matrix_a_id.set_array(a, N, N, true, true);
00476     m_max_a_id = 0;
00477     for (int32_t i=0; i<N; i++)
00478     {
00479         for (int32_t j=0; j<N; j++)
00480             m_max_a_id=CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j));
00481     }
00482 }
00483 
00484 void CDynProg::set_a_trans_matrix(
00485     float64_t *a_trans, int32_t num_trans, int32_t num_cols)
00486 {
00487 
00488     //CMath::display_matrix(a_trans,num_trans, num_cols,"a_trans");
00489 
00490     if (!((num_cols==3) || (num_cols==4)))
00491         SG_ERROR("!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols);
00492 
00493     delete[] trans_list_forward ;
00494     delete[] trans_list_forward_cnt ;
00495     delete[] trans_list_forward_val ;
00496     delete[] trans_list_forward_id ;
00497 
00498     trans_list_forward = NULL ;
00499     trans_list_forward_cnt = NULL ;
00500     trans_list_forward_val = NULL ;
00501     trans_list_len = 0 ;
00502 
00503     m_transition_matrix_a.zero() ;
00504     m_transition_matrix_a_id.zero() ;
00505 
00506     mem_initialized = true ;
00507 
00508     trans_list_forward_cnt=NULL ;
00509     trans_list_len = m_N ;
00510     trans_list_forward = new T_STATES*[m_N] ;
00511     trans_list_forward_cnt = new T_STATES[m_N] ;
00512     trans_list_forward_val = new float64_t*[m_N] ;
00513     trans_list_forward_id = new int32_t*[m_N] ;
00514     
00515     int32_t start_idx=0;
00516     for (int32_t j=0; j<m_N; j++)
00517     {
00518         int32_t old_start_idx=start_idx;
00519 
00520         while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00521         {
00522             start_idx++;
00523             
00524             if (start_idx>1 && start_idx<num_trans)
00525                 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00526         }
00527         
00528         if (start_idx>1 && start_idx<num_trans)
00529             ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00530         
00531         int32_t len=start_idx-old_start_idx;
00532         ASSERT(len>=0);
00533         
00534         trans_list_forward_cnt[j] = 0 ;
00535         
00536         if (len>0)
00537         {
00538             trans_list_forward[j]     = new T_STATES[len] ;
00539             trans_list_forward_val[j] = new float64_t[len] ;
00540             trans_list_forward_id[j] = new int32_t[len] ;
00541         }
00542         else
00543         {
00544             trans_list_forward[j]     = NULL;
00545             trans_list_forward_val[j] = NULL;
00546             trans_list_forward_id[j]  = NULL;
00547         }
00548     }
00549     
00550     for (int32_t i=0; i<num_trans; i++)
00551     {
00552         int32_t from_state   = (int32_t)a_trans[i] ;
00553         int32_t to_state = (int32_t)a_trans[i+num_trans] ;
00554         float64_t val = a_trans[i+num_trans*2] ;
00555         int32_t id = 0 ;
00556         if (num_cols==4)
00557             id = (int32_t)a_trans[i+num_trans*3] ;
00558         //SG_DEBUG( "id=%i\n", id) ;
00559             
00560         ASSERT(to_state>=0 && to_state<m_N);
00561         ASSERT(from_state>=0 && from_state<m_N);
00562         
00563         trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
00564         trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
00565         trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
00566         trans_list_forward_cnt[to_state]++ ;
00567         m_transition_matrix_a.element(from_state, to_state) = val ;
00568         m_transition_matrix_a_id.element(from_state, to_state) = id ;
00569         //SG_PRINT("from_state:%i to_state:%i trans_matrix_a_id:%i \n",from_state, to_state,m_transition_matrix_a_id.element(from_state, to_state));
00570     } ;
00571 
00572     m_max_a_id = 0 ;
00573     for (int32_t i=0; i<m_N; i++)
00574         for (int32_t j=0; j<m_N; j++)
00575         {
00576             //if (m_transition_matrix_a_id.element(i,j))
00577             //SG_DEBUG( "(%i,%i)=%i\n", i,j, m_transition_matrix_a_id.element(i,j)) ;
00578             m_max_a_id = CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)) ;
00579         }
00580     //SG_DEBUG( "m_max_a_id=%i\n", m_max_a_id) ;
00581 }
00582 
00583 
00584 void CDynProg::init_mod_words_array(
00585     int32_t * mod_words_input, int32_t num_elem, int32_t num_columns)
00586 {
00587     //for (int32_t i=0; i<num_columns; i++)
00588     //{
00589     //  for (int32_t j=0; j<num_elem; j++)
00590     //      SG_PRINT("%i ",mod_words_input[i*num_elem+j]);
00591     //  SG_PRINT("\n");
00592     //}
00593     m_svm_arrays_clean=false ;
00594 
00595     ASSERT(m_num_svms==num_elem);
00596     ASSERT(num_columns==2);
00597 
00598     m_mod_words.set_array(mod_words_input, num_elem, 2, true, true) ;
00599     m_mod_words_array = m_mod_words.get_array() ;
00600     
00601     /*SG_DEBUG( "m_mod_words=[") ;
00602     for (int32_t i=0; i<num_elem; i++)
00603         SG_DEBUG( "%i, ", p_mod_words_array[i]) ;
00604         SG_DEBUG( "]\n") ;*/
00605 } 
00606 
00607 bool CDynProg::check_svm_arrays()
00608 {
00609     //SG_DEBUG( "wd_dim1=%d, m_cum_num_words=%d, m_num_words=%d, m_svm_pos_start=%d, num_uniq_w=%d, mod_words_dims=(%d,%d), sign_w=%d,string_w=%d\n m_num_degrees=%d, m_num_svms=%d, m_num_strings=%d", m_word_degree.get_dim1(), m_cum_num_words.get_dim1(), m_num_words.get_dim1(), m_svm_pos_start.get_dim1(), m_num_unique_words.get_dim1(), m_mod_words.get_dim1(), m_mod_words.get_dim2(), m_sign_words.get_dim1(), m_string_words.get_dim1(), m_num_degrees, m_num_svms, m_num_strings);
00610     if ((m_word_degree.get_dim1()==m_num_degrees) &&
00611             (m_cum_num_words.get_dim1()==m_num_degrees+1) &&
00612             (m_num_words.get_dim1()==m_num_degrees) &&
00613             //(word_used.get_dim1()==m_num_degrees) &&
00614             //(word_used.get_dim2()==m_num_words[m_num_degrees-1]) &&
00615             //(word_used.get_dim3()==m_num_strings) &&
00616             //      (svm_values_unnormalized.get_dim1()==m_num_degrees) &&
00617             //      (svm_values_unnormalized.get_dim2()==m_num_svms) &&
00618             //(m_svm_pos_start.get_dim1()==m_num_degrees) &&
00619             (m_num_unique_words.get_dim1()==m_num_degrees) &&
00620             (m_mod_words.get_dim1()==m_num_svms) &&
00621             (m_mod_words.get_dim2()==2) && 
00622             (m_sign_words.get_dim1()==m_num_svms) &&
00623             (m_string_words.get_dim1()==m_num_svms))
00624     {
00625         m_svm_arrays_clean=true ;
00626         return true ;
00627     }
00628     else
00629     {
00630         if ((m_num_unique_words.get_dim1()==m_num_degrees) &&
00631             (m_mod_words.get_dim1()==m_num_svms) &&
00632             (m_mod_words.get_dim2()==2) &&
00633             (m_sign_words.get_dim1()==m_num_svms) &&
00634             (m_string_words.get_dim1()==m_num_svms))
00635             SG_PRINT("OK\n") ;
00636         else
00637             SG_PRINT("not OK\n") ;
00638 
00639         if (!(m_word_degree.get_dim1()==m_num_degrees))
00640             SG_WARNING("SVM array: word_degree.get_dim1()!=m_num_degrees") ;
00641         if (!(m_cum_num_words.get_dim1()==m_num_degrees+1))
00642             SG_WARNING("SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1") ;
00643         if (!(m_num_words.get_dim1()==m_num_degrees))
00644             SG_WARNING("SVM array: m_num_words.get_dim1()==m_num_degrees") ;
00645         //if (!(m_svm_pos_start.get_dim1()==m_num_degrees))
00646         //  SG_WARNING("SVM array: m_svm_pos_start.get_dim1()!=m_num_degrees") ;
00647         if (!(m_num_unique_words.get_dim1()==m_num_degrees))
00648             SG_WARNING("SVM array: m_num_unique_words.get_dim1()!=m_num_degrees") ;
00649         if (!(m_mod_words.get_dim1()==m_num_svms))
00650             SG_WARNING("SVM array: m_mod_words.get_dim1()!=num_svms") ;
00651         if (!(m_mod_words.get_dim2()==2))
00652             SG_WARNING("SVM array: m_mod_words.get_dim2()!=2") ;
00653         if (!(m_sign_words.get_dim1()==m_num_svms))
00654             SG_WARNING("SVM array: m_sign_words.get_dim1()!=num_svms") ;
00655         if (!(m_string_words.get_dim1()==m_num_svms))
00656             SG_WARNING("SVM array: m_string_words.get_dim1()!=num_svms") ;
00657 
00658         m_svm_arrays_clean=false ;
00659         return false ;  
00660     }
00661 }
00662 
00663 void CDynProg::set_observation_matrix(float64_t* seq, int32_t* dims, int32_t ndims)
00664 {
00665     if (ndims!=3)
00666         SG_ERROR("Expected 3-dimensional Matrix\n");
00667 
00668     int32_t N=dims[0];
00669     int32_t cand_pos=dims[1];
00670     int32_t max_num_features=dims[2];
00671 
00672     if (!m_svm_arrays_clean)
00673     {
00674         SG_ERROR( "SVM arrays not clean") ;
00675         return ;
00676     } ;
00677 
00678     ASSERT(N==m_N);
00679     ASSERT(cand_pos==m_seq_len);
00680     ASSERT(m_initial_state_distribution_p.get_dim1()==N);
00681     ASSERT(m_end_state_distribution_q.get_dim1()==N);
00682     
00683     m_observation_matrix.set_array(seq, N, m_seq_len, max_num_features, true, true) ;
00684 }
00685 int32_t CDynProg::get_num_positions()
00686 {
00687     return m_seq_len;
00688 }
00689 
00690 void CDynProg::set_content_type_array(float64_t* seg_path, int32_t rows, int32_t cols)
00691 {
00692     ASSERT(rows==2);
00693     ASSERT(cols==m_seq_len);
00694 
00695     if (seg_path!=NULL)
00696     {
00697         int32_t *segment_ids = new int32_t[m_seq_len] ;
00698         float64_t *segment_mask = new float64_t[m_seq_len] ;
00699         for (int32_t i=0; i<m_seq_len; i++)
00700         {
00701                 segment_ids[i] = (int32_t)seg_path[2*i] ;
00702                 segment_mask[i] = seg_path[2*i+1] ;
00703         }
00704         best_path_set_segment_ids_mask(segment_ids, segment_mask, m_seq_len) ;
00705         delete[] segment_ids;
00706         delete[] segment_mask;
00707     }
00708     else
00709     {
00710         int32_t *izeros = new int32_t[m_seq_len] ;
00711         float64_t *dzeros = new float64_t[m_seq_len] ;
00712         for (int32_t i=0; i<m_seq_len; i++)
00713         {
00714             izeros[i]=0 ;
00715             dzeros[i]=0.0 ;
00716         }
00717         best_path_set_segment_ids_mask(izeros, dzeros, m_seq_len) ;
00718         delete[] izeros ;
00719         delete[] dzeros ;
00720     }
00721 }
00722 
00723 void CDynProg::set_pos(int32_t* pos, int32_t seq_len)  
00724 {
00725     //if (seq_len!=m_observation_matrix.get_dim2())
00726     //  SG_ERROR( "pos size does not match previous info %i!=%i\n", seq_len, m_observation_matrix.get_dim2()) ;
00727     
00728     m_pos.set_array(pos, seq_len, true, true) ;
00729     m_seq_len = seq_len;
00730 }
00731 
00732 void CDynProg::set_orf_info(int32_t* orf_info, int32_t m, int32_t n) 
00733 {
00734     if (n!=2)
00735         SG_ERROR( "orf_info size incorrect %i!=2\n", n) ;
00736 
00737     m_orf_info.set_array(orf_info, m, n, true, true) ;
00738     m_orf_info.set_name("orf_info") ;
00739 }
00740 
00741 void CDynProg::set_sparse_features(CSparseFeatures<float64_t>* seq_sparse1, CSparseFeatures<float64_t>* seq_sparse2)
00742 {
00743     if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2))
00744         SG_ERROR("Sparse features must either both be NULL or both NON-NULL\n");
00745 
00746     SG_UNREF(m_seq_sparse1);
00747     SG_UNREF(m_seq_sparse2);
00748 
00749     m_seq_sparse1=seq_sparse1;
00750     m_seq_sparse2=seq_sparse2;
00751     SG_REF(m_seq_sparse1);
00752     SG_REF(m_seq_sparse2);
00753 }
00754 
00755 void CDynProg::set_plif_matrices(CPlifMatrix* pm)
00756 {
00757     SG_UNREF(m_plif_matrices);
00758 
00759     m_plif_matrices=pm;
00760 
00761     SG_REF(m_plif_matrices);
00762 }
00763 
00764 void CDynProg::set_gene_string(char* genestr, int32_t genestr_len)
00765 {
00766     ASSERT(genestr);
00767     ASSERT(genestr_len>0);
00768 
00769     m_genestr.set_array(genestr, genestr_len, true, true) ;
00770 }
00771 
00772 void CDynProg::set_my_state_seq(int32_t* my_state_seq)
00773 {
00774     ASSERT(my_state_seq && m_seq_len>0);
00775     m_my_state_seq.resize_array(m_seq_len);
00776     for (int32_t i=0; i<m_seq_len; i++)
00777         m_my_state_seq[i]=my_state_seq[i];
00778 }
00779 
00780 void CDynProg::set_my_pos_seq(int32_t* my_pos_seq)
00781 {
00782     ASSERT(my_pos_seq && m_seq_len>0);
00783     m_my_pos_seq.resize_array(m_seq_len);
00784     for (int32_t i=0; i<m_seq_len; i++)
00785         m_my_pos_seq[i]=my_pos_seq[i];
00786 }
00787 
00788 void CDynProg::set_dict_weights(
00789     float64_t* dictionary_weights, int32_t dict_len, int32_t n)
00790 {
00791     if (m_num_svms!=n)
00792         SG_ERROR( "m_dict_weights array does not match num_svms=%i!=%i\n", m_num_svms, n) ;
00793 
00794     m_dict_weights.set_array(dictionary_weights, dict_len, m_num_svms, true, true) ;
00795 
00796     // initialize, so it does not bother when not used
00797     m_segment_loss.resize_array(m_max_a_id+1, m_max_a_id+1, 2) ;
00798     m_segment_loss.zero() ;
00799     m_segment_ids.resize_array(m_observation_matrix.get_dim2()) ;
00800     m_segment_mask.resize_array(m_observation_matrix.get_dim2()) ;
00801     m_segment_ids.zero() ;
00802     m_segment_mask.zero() ;
00803 }
00804 
00805 void CDynProg::best_path_set_segment_loss(
00806     float64_t* segment_loss, int32_t m, int32_t n)
00807 {
00808     // here we need two matrices. Store it in one: 2N x N
00809     if (2*m!=n)
00810         SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ;
00811 
00812     if (m!=m_max_a_id+1)
00813         SG_ERROR( "segment_loss size should match m_max_a_id: %i!=%i\n", m, m_max_a_id+1) ;
00814 
00815     m_segment_loss.set_array(segment_loss, m, n/2, 2, true, true) ;
00816     /*for (int32_t i=0; i<n; i++)
00817         for (int32_t j=0; j<n; j++)
00818         SG_DEBUG( "loss(%i,%i)=%f\n", i,j, m_segment_loss.element(0,i,j)) ;*/
00819 }
00820 
00821 void CDynProg::best_path_set_segment_ids_mask(
00822     int32_t* segment_ids, float64_t* segment_mask, int32_t m)
00823 {
00824     int32_t max_id = 0;
00825     for (int32_t i=1;i<m;i++)
00826         max_id = CMath::max(max_id,segment_ids[i]);
00827     //SG_PRINT("max_id: %i, m:%i\n",max_id, m);     
00828     m_segment_ids.set_array(segment_ids, m, true, true) ;
00829     m_segment_ids.set_name("m_segment_ids");
00830     m_segment_mask.set_array(segment_mask, m, true, true) ;
00831     m_segment_mask.set_name("m_segment_mask");
00832     
00833     m_seg_loss_obj->set_segment_mask(&m_segment_mask);
00834     m_seg_loss_obj->set_segment_ids(&m_segment_ids);
00835     m_seg_loss_obj->compute_loss(m_pos.get_array(), m_seq_len);
00836 }
00837 
00838 void CDynProg::get_scores(float64_t **scores, int32_t *m) 
00839 {
00840    ASSERT(scores && m);
00841 
00842    //*scores=m_scores.get_array();
00843    *m=m_scores.get_dim1();
00844 
00845    int32_t sz = sizeof(float64_t)*(*m);
00846 
00847    *scores = (float64_t*) malloc(sz);
00848    ASSERT(*scores);
00849 
00850    memcpy(*scores,m_scores.get_array(),sz);
00851 }
00852 
00853 void CDynProg::get_states(int32_t **states, int32_t *m, int32_t *n) 
00854 {
00855    ASSERT(states && m && n);
00856 
00857     *m=m_states.get_dim1() ;
00858     *n=m_states.get_dim2() ;
00859 
00860    int32_t sz = sizeof(int32_t)*( (*m) * (*n) );
00861 
00862    *states = (int32_t*) malloc(sz);
00863    ASSERT(*states);
00864 
00865    memcpy(*states,m_states.get_array(),sz);
00866 }
00867 
00868 void CDynProg::get_positions(int32_t **positions, int32_t *m, int32_t *n)
00869 {
00870    ASSERT(positions && m && n);
00871 
00872     *m=m_positions.get_dim1() ;
00873     *n=m_positions.get_dim2() ;
00874 
00875    int32_t sz = sizeof(int32_t)*( (*m) * (*n) );
00876 
00877    *positions = (int32_t*) malloc(sz);
00878    ASSERT(*positions);
00879 
00880    memcpy(*positions,m_positions.get_array(),sz);
00881 }
00882 
00883 void CDynProg::get_path_scores(float64_t** scores, int32_t* seq_len)
00884 {
00885    ASSERT(scores && seq_len);
00886 
00887    *seq_len=m_my_scores.get_dim1();
00888 
00889    int32_t sz = sizeof(float64_t)*(*seq_len);
00890 
00891    *scores = (float64_t*) malloc(sz);
00892    ASSERT(*scores);
00893 
00894    memcpy(*scores,m_my_scores.get_array(),sz);
00895 }
00896 
00897 void CDynProg::get_path_losses(float64_t** losses, int32_t* seq_len)
00898 {
00899     ASSERT(losses && seq_len);
00900 
00901     *seq_len=m_my_losses.get_dim1();
00902 
00903    int32_t sz = sizeof(float64_t)*(*seq_len);
00904 
00905    *losses = (float64_t*) malloc(sz);
00906    ASSERT(*losses);
00907 
00908    memcpy(*losses,m_my_losses.get_array(),sz);
00909 }
00910 
00912 
00913 float64_t CDynProg::best_path_no_b(
00914     int32_t max_iter, int32_t &best_iter, int32_t *my_path)
00915 {
00916     CArray2<T_STATES> psi(max_iter, m_N) ;
00917    psi.set_name("psi");
00918     CArray<float64_t>* delta = new CArray<float64_t>(m_N) ;
00919     CArray<float64_t>* delta_new = new CArray<float64_t>(m_N) ;
00920     
00921     { // initialization
00922         for (int32_t i=0; i<m_N; i++)
00923         {
00924             delta->element(i) = get_p(i) ;
00925             psi.element(0, i)= 0 ;
00926         }
00927     } 
00928     
00929     float64_t best_iter_prob = CMath::ALMOST_NEG_INFTY ;
00930     best_iter = 0 ;
00931     
00932     // recursion
00933     for (int32_t t=1; t<max_iter; t++)
00934     {
00935         CArray<float64_t>* dummy;
00936         int32_t NN=m_N ;
00937         for (int32_t j=0; j<NN; j++)
00938         {
00939             float64_t maxj = delta->element(0) + m_transition_matrix_a.element(0,j);
00940             int32_t argmax=0;
00941             
00942             for (int32_t i=1; i<NN; i++)
00943             {
00944                 float64_t temp = delta->element(i) + m_transition_matrix_a.element(i,j);
00945                 
00946                 if (temp>maxj)
00947                 {
00948                     maxj=temp;
00949                     argmax=i;
00950                 }
00951             }
00952             delta_new->element(j)=maxj ;
00953             psi.element(t, j)=argmax ;
00954         }
00955         
00956         dummy=delta;
00957         delta=delta_new;
00958         delta_new=dummy;    //switch delta/delta_new
00959         
00960         { //termination
00961             float64_t maxj=delta->element(0)+get_q(0);
00962             int32_t argmax=0;
00963             
00964             for (int32_t i=1; i<m_N; i++)
00965             {
00966                 float64_t temp=delta->element(i)+get_q(i);
00967                 
00968                 if (temp>maxj)
00969                 {
00970                     maxj=temp;
00971                     argmax=i;
00972                 }
00973             }
00974             //pat_prob=maxj;
00975             
00976             if (maxj>best_iter_prob)
00977             {
00978                 my_path[t]=argmax;
00979                 best_iter=t ;
00980                 best_iter_prob = maxj ;
00981             } ;
00982         } ;
00983     }
00984 
00985     
00986     { //state sequence backtracking
00987         for (int32_t t = best_iter; t>0; t--)
00988         {
00989             my_path[t-1]=psi.element(t, my_path[t]);
00990         }
00991     }
00992 
00993     delete delta ;
00994     delete delta_new ;
00995     
00996     return best_iter_prob ;
00997 }
00998 
00999 void CDynProg::best_path_no_b_trans(
01000     int32_t max_iter, int32_t &max_best_iter, int16_t nbest,
01001     float64_t *prob_nbest, int32_t *my_paths)
01002 {
01003     //T_STATES *psi=new T_STATES[max_iter*m_N*nbest] ;
01004     CArray3<T_STATES> psi(max_iter, m_N, nbest) ;
01005    psi.set_name("psi");
01006     CArray3<int16_t> ktable(max_iter, m_N, nbest) ;
01007    ktable.set_name("ktable");
01008     CArray2<int16_t> ktable_ends(max_iter, nbest) ;
01009    ktable_ends.set_name("ktable_ends");
01010 
01011     CArray<float64_t> tempvv(nbest*m_N) ;
01012     CArray<int32_t> tempii(nbest*m_N) ;
01013 
01014     CArray2<T_STATES> path_ends(max_iter, nbest) ;
01015    path_ends.set_name("path_ends");
01016     CArray2<float64_t> *delta=new CArray2<float64_t>(m_N, nbest) ;
01017    delta->set_name("delta");
01018     CArray2<float64_t> *delta_new=new CArray2<float64_t>(m_N, nbest) ;
01019    delta_new->set_name("delta_new");
01020     CArray2<float64_t> delta_end(max_iter, nbest) ;
01021    delta_end.set_name("delta_end");
01022 
01023     CArray2<int32_t> paths(max_iter, nbest) ;
01024    paths.set_name("paths");
01025     paths.set_array(my_paths, max_iter, nbest, false, false) ;
01026 
01027     { // initialization
01028         for (T_STATES i=0; i<m_N; i++)
01029         {
01030             delta->element(i,0) = get_p(i) ;
01031             for (int16_t k=1; k<nbest; k++)
01032             {
01033                 delta->element(i,k)=-CMath::INFTY ;
01034                 ktable.element(0,i,k)=0 ;
01035             }
01036         }
01037     }
01038     
01039     // recursion
01040     for (int32_t t=1; t<max_iter; t++)
01041     {
01042         CArray2<float64_t>* dummy=NULL;
01043 
01044         for (T_STATES j=0; j<m_N; j++)
01045         {
01046             const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01047             const T_STATES *elem_list = trans_list_forward[j] ;
01048             const float64_t *elem_val = trans_list_forward_val[j] ;
01049             
01050             int32_t list_len=0 ;
01051             for (int16_t diff=0; diff<nbest; diff++)
01052             {
01053                 for (int32_t i=0; i<num_elem; i++)
01054                 {
01055                     T_STATES ii = elem_list[i] ;
01056                     
01057                     tempvv.element(list_len) = -(delta->element(ii,diff) + elem_val[i]) ;
01058                     tempii.element(list_len) = diff*m_N + ii ;
01059                     list_len++ ;
01060                 }
01061             }
01062             CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01063             
01064             for (int16_t k=0; k<nbest; k++)
01065             {
01066                 if (k<list_len)
01067                 {
01068                     delta_new->element(j,k)  = -tempvv[k] ;
01069                     psi.element(t,j,k)      = (tempii[k]%m_N) ;
01070                     ktable.element(t,j,k)   = (tempii[k]-(tempii[k]%m_N))/m_N ;
01071                 }
01072                 else
01073                 {
01074                     delta_new->element(j,k)  = -CMath::INFTY ;
01075                     psi.element(t,j,k)      = 0 ;
01076                     ktable.element(t,j,k)   = 0 ;
01077                 }
01078             }
01079         }
01080         
01081         dummy=delta;
01082         delta=delta_new;
01083         delta_new=dummy;    //switch delta/delta_new
01084         
01085         { //termination
01086             int32_t list_len = 0 ;
01087             for (int16_t diff=0; diff<nbest; diff++)
01088             {
01089                 for (T_STATES i=0; i<m_N; i++)
01090                 {
01091                     tempvv.element(list_len) = -(delta->element(i,diff)+get_q(i));
01092                     tempii.element(list_len) = diff*m_N + i ;
01093                     list_len++ ;
01094                 }
01095             }
01096             CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01097             
01098             for (int16_t k=0; k<nbest; k++)
01099             {
01100                 delta_end.element(t-1,k) = -tempvv[k] ;
01101                 path_ends.element(t-1,k) = (tempii[k]%m_N) ;
01102                 ktable_ends.element(t-1,k) = (tempii[k]-(tempii[k]%m_N))/m_N ;
01103             }
01104         }
01105     }
01106     
01107     { //state sequence backtracking
01108         max_best_iter=0 ;
01109         
01110         CArray<float64_t> sort_delta_end(max_iter*nbest) ;
01111         CArray<int16_t> sort_k(max_iter*nbest) ;
01112         CArray<int32_t> sort_t(max_iter*nbest) ;
01113         CArray<int32_t> sort_idx(max_iter*nbest) ;
01114         
01115         int32_t i=0 ;
01116         for (int32_t iter=0; iter<max_iter-1; iter++)
01117             for (int16_t k=0; k<nbest; k++)
01118             {
01119                 sort_delta_end[i]=-delta_end.element(iter,k) ;
01120                 sort_k[i]=k ;
01121                 sort_t[i]=iter+1 ;
01122                 sort_idx[i]=i ;
01123                 i++ ;
01124             }
01125         
01126         CMath::qsort_index(sort_delta_end.get_array(), sort_idx.get_array(), (max_iter-1)*nbest) ;
01127 
01128         for (int16_t n=0; n<nbest; n++)
01129         {
01130             int16_t k=sort_k[sort_idx[n]] ;
01131             int32_t iter=sort_t[sort_idx[n]] ;
01132             prob_nbest[n]=-sort_delta_end[n] ;
01133 
01134             if (iter>max_best_iter)
01135                 max_best_iter=iter ;
01136             
01137             ASSERT(k<nbest) ;
01138             ASSERT(iter<max_iter) ;
01139             
01140             paths.element(iter,n) = path_ends.element(iter-1, k) ;
01141             int16_t q   = ktable_ends.element(iter-1, k) ;
01142             
01143             for (int32_t t = iter; t>0; t--)
01144             {
01145                 paths.element(t-1,n)=psi.element(t, paths.element(t,n), q);
01146                 q = ktable.element(t, paths.element(t,n), q) ;
01147             }
01148         }
01149     }
01150 
01151     delete delta ;
01152     delete delta_new ;
01153 }
01154 
01155 bool CDynProg::extend_orf(
01156     int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
01157     int32_t to)
01158 {
01159 #ifdef DYNPROG_TIMING_DETAIL
01160     MyTime.start() ;
01161 #endif
01162     
01163     if (start<0) 
01164         start=0 ;
01165     if (to<0)
01166         to=0 ;
01167     
01168     int32_t orf_target = orf_to-orf_from ;
01169     if (orf_target<0) orf_target+=3 ;
01170     
01171     int32_t pos ;
01172     if (last_pos==to)
01173         pos = to-orf_to-3 ;
01174     else
01175         pos=last_pos ;
01176 
01177     if (pos<0)
01178     {
01179 #ifdef DYNPROG_TIMING_DETAIL
01180         MyTime.stop() ;
01181         orf_time += MyTime.time_diff_sec() ;
01182 #endif
01183         return true ;
01184     }
01185     
01186     for (; pos>=start; pos-=3)
01187         if (m_genestr_stop[pos])
01188         {
01189 #ifdef DYNPROG_TIMING_DETAIL
01190             MyTime.stop() ;
01191             orf_time += MyTime.time_diff_sec() ;
01192 #endif
01193             return false ;
01194         }
01195     
01196     
01197     last_pos = CMath::min(pos+3,to-orf_to-3) ;
01198 
01199 #ifdef DYNPROG_TIMING_DETAIL
01200     MyTime.stop() ;
01201     orf_time += MyTime.time_diff_sec() ;
01202 #endif
01203     return true ;
01204 }
01205 
01206 void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf,
01207         int16_t nbest, bool with_loss, bool with_multiple_sequences)
01208     {
01209 
01210     //FIXME we need checks here if all the fields are of right size
01211     //SG_PRINT("m_seq_len: %i\n", m_seq_len);
01212     //SG_PRINT("m_pos[0]: %i\n", m_pos[0]);
01213     //SG_PRINT("\n");
01214 
01215     //FIXME these variables can go away when compute_nbest_paths uses them
01216     //instead of the local pointers below
01217     const float64_t* seq_array = m_observation_matrix.get_array();
01218     m_scores.resize_array(nbest) ;
01219     m_states.resize_array(nbest, m_observation_matrix.get_dim2()) ;
01220     m_positions.resize_array(nbest, m_observation_matrix.get_dim2()) ;
01221 
01222     for (int32_t i=0; i<nbest; i++)
01223     {
01224         m_scores[i]=-1;
01225         for (int32_t j=0; j<m_observation_matrix.get_dim2(); j++)
01226         {
01227             m_states.element(i,j)=-1;
01228             m_positions.element(i,j)=-1;
01229         }
01230     }
01231     float64_t* prob_nbest=m_scores.get_array();
01232     int32_t* my_state_seq=m_states.get_array();
01233     int32_t* my_pos_seq=m_positions.get_array();
01234 
01235     CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
01236     CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
01237     //END FIXME
01238 
01239 
01240 #ifdef DYNPROG_TIMING
01241         segment_init_time = 0.0 ;
01242         segment_pos_time = 0.0 ;
01243         segment_extend_time = 0.0 ;
01244         segment_clean_time = 0.0 ;
01245         orf_time = 0.0 ;
01246         svm_init_time = 0.0 ;
01247         svm_pos_time = 0.0 ;
01248         svm_clean_time = 0.0 ;
01249         inner_loop_time = 0.0 ;
01250         content_svm_values_time = 0.0 ;
01251         content_plifs_time = 0.0 ;
01252         inner_loop_max_time = 0.0 ;
01253         long_transition_time = 0.0 ;
01254 
01255         MyTime2.start() ;
01256 #endif
01257 
01258         if (!m_svm_arrays_clean)
01259         {
01260             SG_ERROR( "SVM arrays not clean") ;
01261             return ;
01262         }
01263 
01264 #ifdef DYNPROG_DEBUG
01265         m_transition_matrix_a.set_name("transition_matrix");
01266         m_transition_matrix_a.display_array();
01267         m_mod_words.display_array() ;
01268         m_sign_words.display_array() ;
01269         m_string_words.display_array() ;
01270         //SG_PRINT("use_orf = %i\n", use_orf) ;
01271 #endif
01272 
01273         int32_t max_look_back = 1000 ;
01274         bool use_svm = false ;
01275 
01276         SG_DEBUG("m_N:%i, m_seq_len:%i, max_num_signals:%i\n",m_N, m_seq_len, max_num_signals) ;
01277 
01278         //for (int32_t i=0;i<m_N*m_seq_len*max_num_signals;i++)
01279       //   SG_PRINT("(%i)%0.2f ",i,seq_array[i]);
01280 
01281         CArray2<CPlifBase*> PEN(Plif_matrix, m_N, m_N, false, false) ;
01282         PEN.set_name("PEN");
01283         CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, m_N, max_num_signals, false, false) ;
01284         PEN_state_signals.set_name("state_signals");
01285 
01286         CArray2<float64_t> seq(m_N, m_seq_len) ;
01287         seq.set_name("seq") ;
01288         seq.zero() ;
01289 
01290 #ifdef DYNPROG_DEBUG
01291         SG_PRINT("m_num_raw_data: %i\n",m_num_raw_data);
01292         SG_PRINT("m_num_intron_plifs: %i\n", m_num_intron_plifs);
01293         SG_PRINT("m_num_svms: %i\n", m_num_svms);
01294         SG_PRINT("m_num_lin_feat_plifs_cum: ");
01295         for (int i=0; i<=m_num_raw_data; i++)
01296             SG_PRINT(" %i  ",m_num_lin_feat_plifs_cum[i]);
01297         SG_PRINT("\n");
01298 #endif
01299 
01300         float64_t* svm_value = new float64_t [m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
01301         { // initialize svm_svalue
01302             for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01303                 svm_value[s]=0 ;
01304         }
01305 
01306         { // convert seq_input to seq
01307             // this is independent of the svm values 
01308 
01309             //CArray3<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ;
01310             CArray3<float64_t> *seq_input=NULL ;
01311             if (seq_array!=NULL)
01312             {
01313                 //SG_PRINT("using dense seq_array\n") ;
01314 
01315                 seq_input=new CArray3<float64_t>(seq_array, m_N, m_seq_len, max_num_signals) ;
01316                 seq_input->set_name("seq_input") ;
01317                 //seq_input.display_array() ;
01318 
01319                 ASSERT(m_seq_sparse1==NULL) ;
01320                 ASSERT(m_seq_sparse2==NULL) ;
01321             } else
01322             {
01323                 SG_PRINT("using sparse seq_array\n") ;
01324 
01325                 ASSERT(m_seq_sparse1!=NULL) ;
01326                 ASSERT(m_seq_sparse2!=NULL) ;
01327                 ASSERT(max_num_signals==2) ;
01328             }
01329 
01330             for (int32_t i=0; i<m_N; i++)
01331                 for (int32_t j=0; j<m_seq_len; j++)
01332                     seq.element(i,j) = 0 ;
01333 
01334             for (int32_t i=0; i<m_N; i++)
01335                 for (int32_t j=0; j<m_seq_len; j++)
01336                     for (int32_t k=0; k<max_num_signals; k++)
01337                     {
01338                         if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
01339                         {
01340                             // no plif
01341                             if (seq_input!=NULL)
01342                                 seq.element(i,j) = seq_input->element(i,j,k) ;
01343                             else
01344                             {
01345                                 if (k==0)
01346                                     seq.element(i,j) = m_seq_sparse1->get_feature(i,j) ;
01347                                 if (k==1)
01348                                     seq.element(i,j) = m_seq_sparse2->get_feature(i,j) ;
01349                             }
01350                             break ;
01351                         }
01352                         if (PEN_state_signals.element(i,k)!=NULL)
01353                         {
01354                             if (seq_input!=NULL)
01355                             {
01356                                 // just one plif
01357                                 if (CMath::is_finite(seq_input->element(i,j,k)))
01358                                     seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(seq_input->element(i,j,k), svm_value) ;
01359                                 else
01360                                     // keep infinity values
01361                                     seq.element(i,j) = seq_input->element(i, j, k) ;
01362                             }
01363                             else
01364                             {
01365                                 if (k==0)
01366                                 {
01367                                     // just one plif
01368                                     if (CMath::is_finite(m_seq_sparse1->get_feature(i,j)))
01369                                         seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(m_seq_sparse1->get_feature(i,j), svm_value) ;
01370                                     else
01371                                         // keep infinity values
01372                                         seq.element(i,j) = m_seq_sparse1->get_feature(i, j) ;
01373                                 }
01374                                 if (k==1)
01375                                 {
01376                                     // just one plif
01377                                     if (CMath::is_finite(m_seq_sparse2->get_feature(i,j)))
01378                                         seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(m_seq_sparse2->get_feature(i,j), svm_value) ;
01379                                     else
01380                                         // keep infinity values
01381                                         seq.element(i,j) = m_seq_sparse2->get_feature(i, j) ;
01382                                 }
01383                             }
01384                         } 
01385                         else
01386                             break ;
01387                     }
01388             delete seq_input;
01389             delete svm_value;
01390         }
01391 
01392         // allow longer transitions than look_back
01393         bool long_transitions = m_long_transitions ;
01394         CArray2<int32_t> long_transition_content_start_position(m_N,m_N) ;
01395         long_transition_content_start_position.set_name("long_transition_content_start_position");
01396 #ifdef DYNPROG_DEBUG
01397         CArray2<int32_t> long_transition_content_end_position(m_N,m_N) ;
01398         long_transition_content_end_position.set_name("long_transition_content_end_position");
01399 #endif
01400         CArray2<int32_t> long_transition_content_start(m_N,m_N) ;
01401         long_transition_content_start.set_name("long_transition_content_start");
01402         CArray2<float64_t> long_transition_content_scores(m_N,m_N) ;
01403         long_transition_content_scores.set_name("long_transition_content_scores");
01404 #ifdef DYNPROG_DEBUG
01405         CArray2<float64_t> long_transition_content_scores_pen(m_N,m_N) ;
01406         long_transition_content_scores_pen.set_name("long_transition_content_scores_pen");
01407         CArray2<float64_t> long_transition_content_scores_prev(m_N,m_N) ;
01408         long_transition_content_scores_prev.set_name("long_transition_content_scores_prev");
01409         CArray2<float64_t> long_transition_content_scores_elem(m_N,m_N) ;
01410         long_transition_content_scores_elem.set_name("long_transition_content_scores_elem");
01411 #endif      
01412         CArray2<float64_t> long_transition_content_scores_loss(m_N,m_N) ;
01413         long_transition_content_scores_loss.set_name("long_transition_content_scores_loss");
01414 
01415         if (nbest!=1)
01416         {
01417             SG_ERROR("Long transitions are not supported for nbest!=1") ;
01418             long_transitions = false ;
01419         }
01420         long_transition_content_scores.set_const(-CMath::INFTY);
01421 #ifdef DYNPROG_DEBUG
01422         long_transition_content_scores_pen.set_const(0) ;
01423         long_transition_content_scores_elem.set_const(0) ;
01424         long_transition_content_scores_prev.set_const(0) ;
01425 #endif
01426         if (with_loss)
01427             long_transition_content_scores_loss.set_const(0) ;
01428         long_transition_content_start.zero() ;
01429         long_transition_content_start_position.zero() ;
01430 #ifdef DYNPROG_DEBUG
01431         long_transition_content_end_position.zero() ;
01432 #endif
01433 
01434         svm_value = new float64_t [m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
01435         { // initialize svm_svalue
01436             for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01437                 svm_value[s]=0 ;
01438         }
01439 
01440         CArray2<int32_t> look_back(m_N,m_N) ;
01441         look_back.set_name("look_back");
01442         CArray2<int32_t> look_back_orig(m_N,m_N) ;
01443         look_back.set_name("look_back_orig");
01444 
01445         { // determine maximal length of look-back
01446             for (int32_t i=0; i<m_N; i++)
01447                 for (int32_t j=0; j<m_N; j++)
01448                 {
01449                     look_back.set_element(INT_MAX, i, j) ;
01450                     look_back_orig.set_element(INT_MAX, i, j) ;
01451                 }
01452 
01453             for (int32_t j=0; j<m_N; j++)
01454             {
01455                 // only consider transitions that are actually allowed
01456                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01457                 const T_STATES *elem_list = trans_list_forward[j] ;
01458 
01459                 for (int32_t i=0; i<num_elem; i++)
01460                 {
01461                     T_STATES ii = elem_list[i] ;
01462 
01463                     CPlifBase *penij=PEN.element(j, ii) ;
01464                     if (penij==NULL)
01465                     {
01466                         if (long_transitions)
01467                         {
01468                             look_back.set_element(m_long_transition_threshold, j, ii) ;
01469                             look_back_orig.set_element(m_long_transition_max, j, ii) ;
01470                         }
01471                         continue ;
01472                     }
01473 
01474                     /* if the transition is an ORF or we do computation with loss, we have to disable long transitions */
01475                     if ((m_orf_info.element(ii,0)!=-1) || (m_orf_info.element(j,1)!=-1) || (!long_transitions))
01476                     {
01477                         look_back.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
01478                         look_back_orig.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
01479                         if (CMath::ceil(penij->get_max_value()) > max_look_back)
01480                         {
01481                             SG_DEBUG( "%d %d -> value: %f\n", ii,j,penij->get_max_value());
01482                             max_look_back = (int32_t) (CMath::ceil(penij->get_max_value()));
01483                         }
01484                     }
01485                     else
01486                     {
01487                         look_back.set_element(CMath::min( (int32_t)CMath::ceil(penij->get_max_value()), m_long_transition_threshold ), j, ii) ;
01488                         look_back_orig.set_element( (int32_t)CMath::ceil(penij->get_max_value()), j, ii) ;
01489                     }
01490                     
01491                     if (penij->uses_svm_values())
01492                         use_svm=true ;
01493                 }
01494             }
01495             /* make sure max_look_back is at least as long as a long transition */
01496             if (long_transitions)
01497                 max_look_back = CMath::max(m_long_transition_threshold, max_look_back) ;
01498 
01499             /* make sure max_look_back is not longer than the whole string */
01500             max_look_back = CMath::min(m_genestr.get_dim1(), max_look_back) ;
01501 
01502             int32_t num_long_transitions = 0 ;
01503             for (int32_t i=0; i<m_N; i++)
01504                 for (int32_t j=0; j<m_N; j++)
01505                 {
01506                     if (look_back.get_element(i,j)==m_long_transition_threshold)
01507                         num_long_transitions++ ;
01508                     if (look_back.get_element(i,j)==INT_MAX)
01509                     {
01510                         if (long_transitions)
01511                         {
01512                             look_back.set_element(m_long_transition_threshold, i, j) ;
01513                             look_back_orig.set_element(m_long_transition_max, i, j) ;
01514                         }
01515                         else
01516                         {
01517                             look_back.set_element(max_look_back, i, j) ;
01518                             look_back_orig.set_element(m_long_transition_max, i, j) ;
01519                         }
01520                     }
01521                 }
01522             SG_DEBUG("Using %i long transitions\n", num_long_transitions) ;
01523         }
01524 
01525         //SG_PRINT("use_svm=%i, genestr_len: \n", use_svm, m_genestr.get_dim1()) ;
01526         SG_DEBUG("use_svm=%i\n", use_svm) ;
01527 
01528         SG_DEBUG("maxlook: %d m_N: %d nbest: %d \n", max_look_back, m_N, nbest);
01529         const int32_t look_back_buflen = (max_look_back*m_N+1)*nbest ;
01530         SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
01531         /*const float64_t mem_use = (float64_t)(m_seq_len*m_N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
01532           look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
01533           m_seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
01534           m_genestr.get_dim1()*sizeof(bool))/(1024*1024);*/
01535 
01536         //bool is_big = (mem_use>200) || (m_seq_len>5000) ;
01537 
01538         /*if (is_big)
01539           {
01540           SG_DEBUG("calling compute_nbest_paths: m_seq_len=%i, m_N=%i, lookback=%i nbest=%i\n", 
01541           m_seq_len, m_N, max_look_back, nbest) ;
01542           SG_DEBUG("allocating %1.2fMB of memory\n", 
01543           mem_use) ;
01544           }*/
01545         ASSERT(nbest<32000) ;
01546 
01547 
01548 
01549         CArray3<float64_t> delta(m_seq_len, m_N, nbest) ;
01550         delta.set_name("delta");
01551         float64_t* delta_array = delta.get_array() ;
01552         //delta.zero() ;
01553 
01554         CArray3<T_STATES> psi(m_seq_len, m_N, nbest) ;
01555         psi.set_name("psi");
01556         //psi.zero() ;
01557 
01558         CArray3<int16_t> ktable(m_seq_len, m_N, nbest) ;
01559         ktable.set_name("ktable");
01560         //ktable.zero() ;
01561 
01562         CArray3<int32_t> ptable(m_seq_len, m_N, nbest) ;    
01563         ptable.set_name("ptable");
01564         //ptable.zero() ;
01565 
01566         CArray<float64_t> delta_end(nbest) ;
01567         delta_end.set_name("delta_end");
01568         //delta_end.zero() ;
01569 
01570         CArray<T_STATES> path_ends(nbest) ;
01571         path_ends.set_name("path_ends");
01572         //path_ends.zero() ;
01573 
01574         CArray<int16_t> ktable_end(nbest) ;
01575         ktable_end.set_name("ktable_end");
01576         //ktable_end.zero() ;
01577 
01578         float64_t * fixedtempvv=new float64_t[look_back_buflen] ;
01579         memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ;
01580         int32_t * fixedtempii=new int32_t[look_back_buflen] ;
01581         memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ;
01582 
01583         CArray<float64_t> oldtempvv(look_back_buflen) ;
01584         oldtempvv.set_name("oldtempvv");
01585         CArray<float64_t> oldtempvv2(look_back_buflen) ;
01586         oldtempvv2.set_name("oldtempvv2");
01587         //oldtempvv.zero() ;
01588         //oldtempvv.display_size() ;
01589 
01590         CArray<int32_t> oldtempii(look_back_buflen) ;
01591         oldtempii.set_name("oldtempii");
01592         CArray<int32_t> oldtempii2(look_back_buflen) ;
01593         oldtempii2.set_name("oldtempii2");
01594         //oldtempii.zero() ;
01595 
01596         CArray<T_STATES> state_seq(m_seq_len) ;
01597         state_seq.set_name("state_seq");
01598         //state_seq.zero() ;
01599 
01600         CArray<int32_t> pos_seq(m_seq_len) ;
01601         pos_seq.set_name("pos_seq");
01602         //pos_seq.zero() ;
01603 
01604 
01605         m_dict_weights.set_name("dict_weights") ;
01606         m_word_degree.set_name("word_degree") ;
01607         m_cum_num_words.set_name("cum_num_words") ;
01608         m_num_words.set_name("num_words") ;
01609         //word_used.set_name("word_used") ;
01610         //svm_values_unnormalized.set_name("svm_values_unnormalized") ;
01611         //m_svm_pos_start.set_name("svm_pos_start") ;
01612         m_num_unique_words.set_name("num_unique_words") ;
01613 
01614         PEN.set_name("PEN") ;
01615         seq.set_name("seq") ;
01616 
01617         delta.set_name("delta") ;
01618         psi.set_name("psi") ;
01619         ktable.set_name("ktable") ;
01620         ptable.set_name("ptable") ;
01621         delta_end.set_name("delta_end") ;
01622         path_ends.set_name("path_ends") ;
01623         ktable_end.set_name("ktable_end") ;
01624 
01625 #ifdef USE_TMP_ARRAYCLASS
01626         fixedtempvv.set_name("fixedtempvv") ;
01627         fixedtempii.set_name("fixedtempvv") ;
01628 #endif
01629 
01630         oldtempvv.set_name("oldtempvv") ;
01631         oldtempvv2.set_name("oldtempvv2") ;
01632         oldtempii.set_name("oldtempii") ;
01633         oldtempii2.set_name("oldtempii2") ;
01634 
01635 
01637 
01638 #ifdef DYNPROG_DEBUG
01639         state_seq.display_size() ;
01640         pos_seq.display_size() ;
01641 
01642         m_dict_weights.display_size() ;
01643         m_word_degree.display_array() ;
01644         m_cum_num_words.display_array() ;
01645         m_num_words.display_array() ;
01646         //word_used.display_size() ;
01647         //svm_values_unnormalized.display_size() ;
01648         //m_svm_pos_start.display_array() ;
01649         m_num_unique_words.display_array() ;
01650 
01651         PEN.display_size() ;
01652         PEN_state_signals.display_size() ;
01653         seq.display_size() ;
01654         m_orf_info.display_size() ;
01655 
01656         //m_genestr_stop.display_size() ;
01657         delta.display_size() ;
01658         psi.display_size() ;
01659         ktable.display_size() ;
01660         ptable.display_size() ;
01661         delta_end.display_size() ;
01662         path_ends.display_size() ;
01663         ktable_end.display_size() ;
01664 
01665 #ifdef USE_TMP_ARRAYCLASS
01666         fixedtempvv.display_size() ;
01667         fixedtempii.display_size() ;
01668 #endif
01669 
01670         //oldtempvv.display_size() ;
01671         //oldtempii.display_size() ;
01672 
01673         state_seq.display_size() ;
01674         pos_seq.display_size() ;
01675 
01676         //seq.zero() ;
01677 
01678 #endif //DYNPROG_DEBUG
01679 
01681 
01682 
01683 
01684         {
01685             for (int32_t s=0; s<m_num_svms; s++)
01686                 ASSERT(m_string_words_array[s]<1)  ;
01687         }
01688 
01689 
01690         //CArray2<int32_t*> trans_matrix_svms(m_N,m_N);
01691         //CArray2<int32_t> trans_matrix_num_svms(m_N,m_N);
01692 
01693         { // initialization
01694 
01695             for (T_STATES i=0; i<m_N; i++)
01696             {
01697                 //delta.element(0, i, 0) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
01698                 delta.element(delta_array, 0, i, 0, m_seq_len, m_N) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
01699                 psi.element(0,i,0)   = 0 ;
01700                 if (nbest>1)
01701                     ktable.element(0,i,0)  = 0 ;
01702                 ptable.element(0,i,0)  = 0 ;
01703                 for (int16_t k=1; k<nbest; k++)
01704                 {
01705                     int32_t dim1, dim2, dim3 ;
01706                     delta.get_array_size(dim1, dim2, dim3) ;
01707                     //SG_DEBUG("i=%i, k=%i -- %i, %i, %i\n", i, k, dim1, dim2, dim3) ;
01708                     //delta.element(0, i, k)    = -CMath::INFTY ;
01709                     delta.element(delta_array, 0, i, k, m_seq_len, m_N)    = -CMath::INFTY ;
01710                     psi.element(0,i,0)      = 0 ;                  // <--- what's this for?
01711                     if (nbest>1)
01712                         ktable.element(0,i,k)     = 0 ;
01713                     ptable.element(0,i,k)     = 0 ;
01714                 }
01715                 /*
01716                    for (T_STATES j=0; j<m_N; j++)
01717                    {
01718                    CPlifBase * penalty = PEN.element(i,j) ;
01719                    int32_t num_current_svms=0;
01720                    int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
01721                    if (penalty)
01722                    {
01723                    SG_PRINT("trans %i -> %i \n",i,j);
01724                    penalty->get_used_svms(&num_current_svms, svm_ids);
01725                    trans_matrix_svms.set_element(svm_ids,i,j);
01726                    for (int32_t l=0;l<num_current_svms;l++)
01727                    SG_PRINT("svm_ids[%i]: %i \n",l,svm_ids[l]);
01728                    trans_matrix_num_svms.set_element(num_current_svms,i,j);
01729                    }
01730                    }
01731                    */
01732 
01733             }
01734         }
01735 
01736 
01737         SG_DEBUG("START_RECURSION \n\n");
01738 
01739         // recursion
01740         for (int32_t t=1; t<m_seq_len; t++)
01741         {
01742             //if (is_big && t%(1+(m_seq_len/1000))==1)
01743             //  SG_PROGRESS(t, 0, m_seq_len);
01744             //SG_PRINT("%i\n", t) ;
01745 
01746 
01747             for (T_STATES j=0; j<m_N; j++)
01748             {
01749                 if (seq.element(j,t)<=-1e20)
01750                 { // if we cannot observe the symbol here, then we can omit the rest
01751                     for (int16_t k=0; k<nbest; k++)
01752                     {
01753                         delta.element(delta_array, t, j, k, m_seq_len, m_N)    = seq.element(j,t) ;
01754                         psi.element(t,j,k)         = 0 ;
01755                         if (nbest>1)
01756                             ktable.element(t,j,k)  = 0 ;
01757                         ptable.element(t,j,k)      = 0 ;
01758                     }
01759                 }
01760                 else
01761                 {
01762                     const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01763                     const T_STATES *elem_list = trans_list_forward[j] ;
01764                     const float64_t *elem_val      = trans_list_forward_val[j] ;
01765                     const int32_t *elem_id      = trans_list_forward_id[j] ;
01766 
01767                     int32_t fixed_list_len = 0 ;
01768                     float64_t fixedtempvv_ = CMath::INFTY ;
01769                     int32_t fixedtempii_ = 0 ;
01770                     bool fixedtemplong = false ;
01771 
01772                     for (int32_t i=0; i<num_elem; i++)
01773                     {
01774                         T_STATES ii = elem_list[i] ;
01775 
01776                         const CPlifBase * penalty = PEN.element(j,ii) ;
01777 
01778                         /*int32_t look_back = max_look_back ;
01779                           if (0)
01780                           { // find lookback length
01781                           CPlifBase *pen = (CPlifBase*) penalty ;
01782                           if (pen!=NULL)
01783                           look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
01784                           if (look_back>=1e6)
01785                           SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ;
01786                           ASSERT(look_back<1e6);
01787                           } */
01788 
01789                         int32_t look_back_ = look_back.element(j, ii) ;
01790 
01791                         int32_t orf_from = m_orf_info.element(ii,0) ;
01792                         int32_t orf_to   = m_orf_info.element(j,1) ;
01793                         if((orf_from!=-1)!=(orf_to!=-1))
01794                             SG_DEBUG("j=%i  ii=%i  orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
01795                         ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
01796 
01797                         int32_t orf_target = -1 ;
01798                         if (orf_from!=-1)
01799                         {
01800                             orf_target=orf_to-orf_from ;
01801                             if (orf_target<0) 
01802                                 orf_target+=3 ;
01803                             ASSERT(orf_target>=0 && orf_target<3) ;
01804                         }
01805 
01806                         int32_t orf_last_pos = m_pos[t] ;
01807                         //int32_t loss_last_pos = t ;
01808                         //float64_t last_loss = 0.0 ;
01809 
01810 #ifdef DYNPROG_TIMING
01811                         MyTime3.start() ;
01812 #endif              
01813                         int32_t num_ok_pos = 0 ;
01814                         float64_t last_mval=0 ;
01815                         int32_t last_ts = 0 ;
01816 
01817                         for (int32_t ts=t-1; ts>=0 && m_pos[t]-m_pos[ts]<=look_back_; ts--)
01818                         {
01819                             bool ok ;
01820                             //int32_t plen=t-ts;
01821 
01822                             /*for (int32_t s=0; s<m_num_svms; s++)
01823                               if ((fabs(svs.svm_values[s*svs.seqlen+plen]-svs2.svm_values[s*svs.seqlen+plen])>1e-6) ||
01824                               (fabs(svs.svm_values[s*svs.seqlen+plen]-svs3.svm_values[s*svs.seqlen+plen])>1e-6))
01825                               {
01826                               SG_DEBUG( "s=%i, t=%i, ts=%i, %1.5e, %1.5e, %1.5e\n", s, t, ts, svs.svm_values[s*svs.seqlen+plen], svs2.svm_values[s*svs.seqlen+plen], svs3.svm_values[s*svs.seqlen+plen]);
01827                               }*/
01828 
01829                             if (orf_target==-1)
01830                                 ok=true ;
01831                             else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target)
01832                                 ok=(!use_orf) || extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ;
01833                             else
01834                                 ok=false ;
01835 
01836                             if (ok)
01837                             {
01838 
01839                                 float64_t segment_loss = 0.0 ;
01840                                 if (with_loss)
01841                                 {
01842                                     segment_loss = m_seg_loss_obj->get_segment_loss(ts, t, elem_id[i]);
01843                                     //if (segment_loss!=segment_loss2)
01844                                         //SG_PRINT("segment_loss:%f segment_loss2:%f\n", segment_loss, segment_loss2);
01845                                 }
01847                                 // BEST_PATH_TRANS
01849 
01850                                 int32_t frame = orf_from;//m_orf_info.element(ii,0);
01851                                 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
01852 
01853                                 float64_t pen_val = 0.0 ;
01854                                 if (penalty)
01855                                 {
01856 #ifdef DYNPROG_TIMING_DETAIL
01857                                     MyTime.start() ;
01858 #endif                              
01859                                     pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
01860 
01861 #ifdef DYNPROG_TIMING_DETAIL
01862                                     MyTime.stop() ;
01863                                     content_plifs_time += MyTime.time_diff_sec() ;
01864 #endif
01865                                 }
01866 
01867 #ifdef DYNPROG_TIMING_DETAIL
01868                                 MyTime.start() ;
01869 #endif                              
01870                                 num_ok_pos++ ;
01871 
01872                                 if (nbest==1)
01873                                 {
01874                                     float64_t  val        = elem_val[i] + pen_val ;
01875                                     if (with_loss)
01876                                         val              += segment_loss ;
01877 
01878                                     float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N)) ;
01879 
01880                                     if (mval<fixedtempvv_)
01881                                     {
01882                                         fixedtempvv_ = mval ;
01883                                         fixedtempii_ = ii + ts*m_N;
01884                                         fixed_list_len = 1 ;
01885                                         fixedtemplong = false ;
01886                                     }
01887                                     last_mval = mval ;
01888                                     last_ts = ts ;
01889                                 }
01890                                 else
01891                                 {
01892                                     for (int16_t diff=0; diff<nbest; diff++)
01893                                     {
01894                                         float64_t  val        = elem_val[i]  ;
01895                                         val                  += pen_val ;
01896                                         if (with_loss)
01897                                             val              += segment_loss ;
01898 
01899                                         float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, m_seq_len, m_N)) ;
01900 
01901                                         /* only place -val in fixedtempvv if it is one of the nbest lowest values in there */
01902                                         /* fixedtempvv[i], i=0:nbest-1, is sorted so that fixedtempvv[0] <= fixedtempvv[1] <= ...*/
01903                                         /* fixed_list_len has the number of elements in fixedtempvv */
01904 
01905                                         if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
01906                                         {
01907                                             if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
01908                                             {
01909                                                 fixedtempvv[fixed_list_len] = mval ;
01910                                                 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest;
01911                                                 fixed_list_len++ ;
01912                                             }
01913                                             else  // must have mval < fixedtempvv[fixed_list_len-1]
01914                                             {
01915                                                 int32_t addhere = fixed_list_len;
01916                                                 while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
01917                                                     addhere--;
01918 
01919                                                 // move everything from addhere+1 one forward 
01920                                                 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
01921                                                 {
01922                                                     fixedtempvv[jj] = fixedtempvv[jj-1];
01923                                                     fixedtempii[jj] = fixedtempii[jj-1];
01924                                                 }
01925 
01926                                                 fixedtempvv[addhere] = mval;
01927                                                 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest;
01928 
01929                                                 if (fixed_list_len < nbest)
01930                                                     fixed_list_len++;
01931                                             }
01932                                         }
01933                                     }
01934                                 }
01935 #ifdef DYNPROG_TIMING_DETAIL
01936                                 MyTime.stop() ;
01937                                 inner_loop_max_time += MyTime.time_diff_sec() ;
01938 #endif
01939                             }
01940                         }
01941 #ifdef DYNPROG_TIMING
01942                         MyTime3.stop() ;
01943                         inner_loop_time += MyTime3.time_diff_sec() ;
01944 #endif
01945                     }
01946                     for (int32_t i=0; i<num_elem; i++)
01947                     {
01948                         T_STATES ii = elem_list[i] ;
01949 
01950                         const CPlifBase * penalty = PEN.element(j,ii) ;
01951 
01952                         /*int32_t look_back = max_look_back ;
01953                           if (0)
01954                           { // find lookback length
01955                           CPlifBase *pen = (CPlifBase*) penalty ;
01956                           if (pen!=NULL)
01957                           look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
01958                           if (look_back>=1e6)
01959                           SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ;
01960                           ASSERT(look_back<1e6);
01961                           } */
01962 
01963                         int32_t look_back_ = look_back.element(j, ii) ;
01964                         int32_t look_back_orig_ = look_back_orig.element(j, ii) ;
01965 
01966                         int32_t orf_from = m_orf_info.element(ii,0) ;
01967                         int32_t orf_to   = m_orf_info.element(j,1) ;
01968                         if((orf_from!=-1)!=(orf_to!=-1))
01969                             SG_DEBUG("j=%i  ii=%i  orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
01970                         ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
01971 
01972                         int32_t orf_target = -1 ;
01973                         if (orf_from!=-1)
01974                         {
01975                             orf_target=orf_to-orf_from ;
01976                             if (orf_target<0) 
01977                                 orf_target+=3 ;
01978                             ASSERT(orf_target>=0 && orf_target<3) ;
01979                         }
01980 
01981                         //int32_t loss_last_pos = t ;
01982                         //float64_t last_loss = 0.0 ;
01983 
01984 #ifdef DYNPROG_TIMING
01985                         MyTime3.start() ;
01986 #endif              
01987 
01988                         /* long transition stuff */
01989                         /* only do this, if 
01990                          * this feature is enabled
01991                          * this is not a transition with ORF restrictions
01992                          * the loss is switched off
01993                          * nbest=1
01994                          */ 
01995 #ifdef DYNPROG_TIMING
01996                         MyTime3.start() ;
01997 #endif
01998                         // long transitions, only when not considering ORFs
01999                         if ( long_transitions && orf_target==-1 && look_back_ == m_long_transition_threshold )
02000                         {
02001 
02002                             int32_t start = long_transition_content_start.get_element(ii, j) ;
02003                             int32_t end_3p_part = start ;
02004                             for (int32_t start_3p_part=start; m_pos[t]-m_pos[start_3p_part] > m_long_transition_threshold ; start_3p_part++)
02005                             {
02006                                 // find end_3p_part, which is greater than start_3p_part and at least m_long_transition_threshold away
02007                                 while (end_3p_part<=t && m_pos[end_3p_part+1]-m_pos[start_3p_part]<=m_long_transition_threshold)
02008                                     end_3p_part++ ;
02009 
02010                                 ASSERT(m_pos[end_3p_part+1]-m_pos[start_3p_part] > m_long_transition_threshold || end_3p_part==t) ;
02011                                 ASSERT(m_pos[end_3p_part]-m_pos[start_3p_part] <= m_long_transition_threshold) ;
02012 
02013                                 float64_t pen_val = 0.0;
02014                                 /* recompute penalty, if necessary */
02015                                 if (penalty)
02016                                 {
02017                                     int32_t frame = m_orf_info.element(ii,0);
02018                                     lookup_content_svm_values(start_3p_part, end_3p_part, m_pos[start_3p_part], m_pos[end_3p_part], svm_value, frame); // * t -> end_3p_part 
02019                                     pen_val = penalty->lookup_penalty(m_pos[end_3p_part]-m_pos[start_3p_part], svm_value) ;
02020                                 }
02021 
02022                                 /*if (m_pos[start_3p_part]==1003)
02023                                   {
02024                                   SG_PRINT("Part1: %i - %i   vs  %i - %i\n", m_pos[t], m_pos[ts], m_pos[end_3p_part], m_pos[start_3p_part]) ;
02025                                   SG_PRINT("Part1: ts=%i  t=%i  start_3p_part=%i  m_seq_len=%i\n", m_pos[ts], m_pos[t], m_pos[start_3p_part], m_seq_len) ;
02026                                   }*/
02027 
02028                                 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.element(delta_array, start_3p_part, ii, 0, m_seq_len, m_N) ) ;
02029                                 //float64_t mval_trans = -( elem_val[i] + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N) ) ; // enable this for the incomplete extra check
02030 
02031                                 float64_t segment_loss_part1=0.0 ;
02032                                 if (with_loss)
02033                                 {  // this is the loss from the start of the long segment (5' part + middle section)
02034 
02035                                     segment_loss_part1 = m_seg_loss_obj->get_segment_loss(start_3p_part /*long_transition_content_start_position.get_element(ii,j)*/, end_3p_part, elem_id[i]); // * unsure
02036 
02037                                     mval_trans -= segment_loss_part1 ;
02038                                 }
02039 
02040                                 
02041                                 if (0)//m_pos[end_3p_part] - m_pos[long_transition_content_start_position.get_element(ii, j)] > look_back_orig_/*m_long_transition_max*/) // unsure: should it be end_3p_part or t ??
02042                                 {
02043                                     // this restricts the maximal length of segments, 
02044                                     // but the current implementation is not valid since the 
02045                                     // long transition is discarded without loocking if there 
02046                                     // is a second best long transition in between
02047                                     long_transition_content_scores.set_element(-CMath::INFTY, ii, j) ;
02048                                     long_transition_content_start_position.set_element(0, ii, j) ;
02049                                     if (with_loss)
02050                                         long_transition_content_scores_loss.set_element(0.0, ii, j) ;
02051 #ifdef DYNPROG_DEBUG
02052                                     long_transition_content_scores_pen.set_element(0.0, ii, j) ;
02053                                     long_transition_content_scores_elem.set_element(0.0, ii, j) ;
02054                                     long_transition_content_scores_prev.set_element(0.0, ii, j) ;
02055                                     long_transition_content_end_position.set_element(0, ii, j) ;
02056 #endif
02057                                 }
02058                                 if (with_loss)
02059                                 {
02060                                     float64_t old_loss = long_transition_content_scores_loss.get_element(ii, j) ;
02061                                     float64_t new_loss = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), end_3p_part, elem_id[i]);
02062                                     float64_t score = long_transition_content_scores.get_element(ii, j) - old_loss + new_loss ;
02063                                     long_transition_content_scores.set_element(score, ii, j) ;
02064                                     long_transition_content_scores_loss.set_element(new_loss, ii, j) ;
02065 #ifdef DYNPROG_DEBUG
02066                                     long_transition_content_end_position.set_element(end_3p_part, ii, j) ;
02067 #endif
02068 
02069                                 }
02070                                 if (-long_transition_content_scores.get_element(ii, j) > mval_trans )
02071                                 {
02072                                     /* then the old long transition is either too far away or worse than the current one */
02073                                     long_transition_content_scores.set_element(-mval_trans, ii, j) ;
02074                                     long_transition_content_start_position.set_element(start_3p_part, ii, j) ;
02075                                     if (with_loss)
02076                                         long_transition_content_scores_loss.set_element(segment_loss_part1, ii, j) ;
02077 #ifdef DYNPROG_DEBUG
02078                                     long_transition_content_scores_pen.set_element(pen_val*0.5, ii, j) ;
02079                                     long_transition_content_scores_elem.set_element(elem_val[i], ii, j) ;
02080                                     long_transition_content_scores_prev.set_element(delta.element(delta_array, start_3p_part, ii, 0, m_seq_len, m_N), ii, j) ;
02081                                     /*ASSERT(fabs(long_transition_content_scores.get_element(ii, j)-(long_transition_content_scores_pen.get_element(ii, j) +
02082                                       long_transition_content_scores_elem.get_element(ii, j) + 
02083                                       long_transition_content_scores_prev.get_element(ii, j)))<1e-6) ;*/
02084                                     long_transition_content_end_position.set_element(end_3p_part, ii, j) ;
02085 #endif
02086                                 }
02087 
02088                                 long_transition_content_start.set_element(start_3p_part, ii, j) ;
02089                             }
02090 
02091                             // consider the 3' part at the end of the long segment:
02092                             // * with length = m_long_transition_threshold
02093                             // * content prediction and loss only for this part
02094 
02095                             // find ts > 0 with distance from m_pos[t] greater m_long_transition_threshold 
02096                             // precompute: only depends on t
02097                             int ts = t;
02098                             while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold)
02099                                 ts-- ;
02100 
02101                             if (ts>0)
02102                             {
02103                                 ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold)) ;
02104 
02105 
02106                                 /* only consider this transition, if the right position was found */
02107                                 float pen_val = 0.0 ;
02108                                 if (penalty)
02109                                 {
02110                                     int32_t frame = orf_from ; //m_orf_info.element(ii, 0);
02111                                     lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame); 
02112                                     pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
02113                                 }
02114 
02115                                 float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val*0.5) ;
02116                                 
02117                                 {
02118 #ifdef DYNPROG_DEBUG
02119                                     float64_t segment_loss_part2=0.0 ;
02120                                     float64_t segment_loss_part1=0.0 ;
02121 #endif
02122                                     float64_t segment_loss_total=0.0 ;
02123                                     
02124                                     if (with_loss)
02125                                     {   // this is the loss for the 3' end fragment of the segment
02126                                         // (the 5' end and the middle section loss is already contained in mval)
02127                                         
02128 #ifdef DYNPROG_DEBUG
02129                                         // this is an alternative, which should be identical, if the loss is additive
02130                                         segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]);
02131                                         //mval -= segment_loss_part2 ;
02132                                         segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]);
02133 #endif
02134                                         segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]);
02135                                         mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ;
02136                                     }
02137                                     
02138 #ifdef DYNPROG_DEBUG
02139                                     if (m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561)
02140                                     {
02141                                         SG_PRINT("Part2: %i,%i,%i: val=%1.6f  pen_val*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i); scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i,  loss=%1.1f/%1.1f (%i,%i)\n", 
02142                                                  m_pos[t], j, ii, -mval, 0.5*pen_val, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1], 
02143                                                  long_transition_content_scores.get_element(ii, j), 
02144                                                  long_transition_content_scores_pen.get_element(ii, j), 
02145                                                  long_transition_content_scores_prev.get_element(ii, j), 
02146                                                  long_transition_content_scores_elem.get_element(ii, j), 
02147                                                  long_transition_content_scores_loss.get_element(ii, j), 
02148                                                  m_pos[long_transition_content_start_position.get_element(ii,j)], 
02149                                                  m_pos[long_transition_content_end_position.get_element(ii,j)], 
02150                                                  m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ;
02151                                         SG_PRINT("fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] );
02152                                     }
02153                                     
02154                                     if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3)
02155                                     {
02156                                         SG_ERROR("LOSS: total=%1.1f (%i-%i)  part1=%1.1f/%1.1f (%i-%i)  part2=%1.1f (%i-%i)  sum=%1.1f  diff=%1.1f\n", 
02157                                                  segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t], 
02158                                                  long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)],
02159                                                  segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t], 
02160                                                  segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j), 
02161                                                  segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ;
02162                                     }
02163 #endif
02164                                 }
02165 
02166 
02167                                 if ((mval < fixedtempvv_) &&
02168                                     (m_pos[t] - m_pos[long_transition_content_start_position.get_element(ii, j)])<=look_back_orig_ /*m_long_transition_max*/)
02169                                 {
02170                                     /* then the long transition is better than the short one => replace it */ 
02171                                     int32_t fromtjk =  fixedtempii_ ;
02172                                     /*SG_PRINT("%i,%i: Long transition (%1.5f=-(%1.5f+%1.5f+%1.5f+%1.5f), %i) to m_pos %i better than short transition (%1.5f,%i) to m_pos %i \n", 
02173                                       m_pos[t], j, 
02174                                       mval, pen_val*0.5, long_transition_content_scores_pen.get_element(ii, j), long_transition_content_scores_elem.get_element(ii, j), long_transition_content_scores_prev.get_element(ii, j), ii, 
02175                                       m_pos[long_transition_content_position.get_element(ii, j)], 
02176                                       fixedtempvv_, (fromtjk%m_N), m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]) ;*/
02177                                     ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong) ;
02178 
02179                                     fixedtempvv_ = mval ;
02180                                     fixedtempii_ = ii + m_N*long_transition_content_start_position.get_element(ii, j) ;
02181                                     fixed_list_len = 1 ;
02182                                     fixedtemplong = true ;
02183                                 }
02184 
02185                                 /* // extra check
02186                                    float64_t mval_trans2 = -( elem_val[i] + pen_val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N) ) ;
02187                                    if (last_ts==ts && fabs(last_mval-mval_trans2)>1e-5)
02188                                    SG_PRINT("last_mval=%1.2f at m_pos %i vs. mval_trans2=%1.2f at m_pos %i (diff=%f)\n", last_mval, m_pos[last_ts], mval_trans2, m_pos[ts], last_mval-mval_trans2) ;
02189                                    */
02190                             }
02191                         }
02192                     }
02193 #ifdef DYNPROG_TIMING
02194                     MyTime3.stop() ;
02195                     long_transition_time += MyTime3.time_diff_sec() ;
02196 #endif
02197 
02198 
02199                     int32_t numEnt = fixed_list_len;
02200 
02201                     float64_t minusscore;
02202                     int64_t fromtjk;
02203 
02204                     for (int16_t k=0; k<nbest; k++)
02205                     {
02206                         if (k<numEnt)
02207                         {
02208                             if (nbest==1)
02209                             {
02210                                 minusscore = fixedtempvv_ ;
02211                                 fromtjk = fixedtempii_ ;
02212                             }
02213                             else
02214                             {
02215                                 minusscore = fixedtempvv[k];
02216                                 fromtjk = fixedtempii[k];
02217                             }
02218 
02219                             delta.element(delta_array, t, j, k, m_seq_len, m_N)    = -minusscore + seq.element(j,t);
02220                             psi.element(t,j,k)      = (fromtjk%m_N) ;
02221                             if (nbest>1)
02222                                 ktable.element(t,j,k)   = (fromtjk%(m_N*nbest)-psi.element(t,j,k))/m_N ;
02223                             ptable.element(t,j,k)   = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ;
02224                         }
02225                         else
02226                         {
02227                             delta.element(delta_array, t, j, k, m_seq_len, m_N)    = -CMath::INFTY ;
02228                             psi.element(t,j,k)      = 0 ;
02229                             if (nbest>1)
02230                                 ktable.element(t,j,k)     = 0 ;
02231                             ptable.element(t,j,k)     = 0 ;
02232                         }
02233                     }
02234                 }
02235             }
02236         }
02237 
02238         { //termination
02239             int32_t list_len = 0 ;
02240             for (int16_t diff=0; diff<nbest; diff++)
02241             {
02242                 for (T_STATES i=0; i<m_N; i++)
02243                 {
02244                     oldtempvv[list_len] = -(delta.element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+get_q(i)) ;
02245                     oldtempii[list_len] = i + diff*m_N ;
02246                     list_len++ ;
02247                 }
02248             }
02249 
02250             CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
02251 
02252 #ifdef nonsense
02253             int t = m_seq_len-1;
02254             for (T_STATES j=0; j<m_N; j++)
02255             {
02256                 if (seq.element(j,t)<=-1e20)
02257                 { // if we cannot observe the symbol here, then we can omit the rest
02258                     for (int16_t k=0; k<nbest; k++)
02259                     {
02260                         delta.element(delta_array, t, j, k, m_seq_len, m_N)    = seq.element(j,t) ;
02261                         psi.element(t,j,k)         = 0 ;
02262                         if (nbest>1)
02263                             ktable.element(t,j,k)  = 0 ;
02264                         ptable.element(t,j,k)      = 0 ;
02265                     }
02266                 }
02267                 else
02268                 {
02269                     const T_STATES num_elem   = trans_list_forward_cnt[j] ;
02270                     const T_STATES *elem_list = trans_list_forward[j] ;
02271                     const float64_t *elem_val      = trans_list_forward_val[j] ;
02272                     const int32_t *elem_id      = trans_list_forward_id[j] ;
02273 
02274                     int32_t fixed_list_len = 0 ;
02275                     float64_t fixedtempvv_ = CMath::INFTY ;
02276                     int32_t fixedtempii_ = 0 ;
02277                     bool fixedtemplong = false ;
02278 
02279                     for (int32_t i=0; i<num_elem; i++)
02280                     {
02281                         T_STATES ii = elem_list[i] ;
02282 
02283                         const CPlifBase * penalty = PEN.element(j,ii) ;
02284 
02285                         /*int32_t look_back = max_look_back ;
02286                           if (0)
02287                           { // find lookback length
02288                           CPlifBase *pen = (CPlifBase*) penalty ;
02289                           if (pen!=NULL)
02290                           look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
02291                           if (look_back>=1e6)
02292                           SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ;
02293                           ASSERT(look_back<1e6);
02294                           } */
02295 
02296                         int32_t look_back_ = look_back.element(j, ii) ;
02297                         int32_t look_back_orig_ = look_back_orig.element(j, ii) ;
02298 
02299                         int32_t orf_from = m_orf_info.element(ii,0) ;
02300                         int32_t orf_to   = m_orf_info.element(j,1) ;
02301                         if((orf_from!=-1)!=(orf_to!=-1))
02302                             SG_DEBUG("j=%i  ii=%i  orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
02303                         ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
02304 
02305                         int32_t orf_target = -1 ;
02306                         if (orf_from!=-1)
02307                         {
02308                             orf_target=orf_to-orf_from ;
02309                             if (orf_target<0) 
02310                                 orf_target+=3 ;
02311                             ASSERT(orf_target>=0 && orf_target<3) ;
02312                         }
02313 
02314                         int32_t orf_last_pos = m_pos[t] ;
02315 
02316                         int ts = t;
02317                         while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold)
02318                             ts-- ;
02319 
02320                         if (ts>0)
02321                         {
02322                             ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold)) ;
02323 
02324                             /* only consider this transition, if the right position was found */
02325                             float pen_val = 0.0 ;
02326                             if (penalty)
02327                             {
02328                                 int32_t frame = orf_from ; //m_orf_info.element(ii, 0);
02329                                 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame); 
02330                                 pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
02331                             }
02332 
02333                             float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val*0.5) ;
02334 
02335                             {
02336 #ifdef DYNPROG_DEBUG
02337                                 float64_t segment_loss_part2=0.0 ;
02338                                 float64_t segment_loss_part1=0.0 ;
02339 #endif
02340                                 float64_t segment_loss_total=0.0 ;
02341 
02342                                 if (with_loss)
02343                                 {   // this is the loss for the 3' end fragment of the segment
02344                                     // (the 5' end and the middle section loss is already contained in mval)
02345 
02346 #ifdef DYNPROG_DEBUG
02347                                     // this is an alternative, which should be identical, if the loss is additive
02348                                     segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]);
02349                                     //mval -= segment_loss_part2 ;
02350                                     segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]);
02351 #endif
02352                                     segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]);
02353                                     mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ;
02354                                 }
02355 
02356 #ifdef DYNPROG_DEBUG
02357                                 if (1)//(m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561)
02358                                 {
02359                                     SG_PRINT("Part2: %i,%i,%i: val=%1.6f  pen_val*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i); scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i,  loss=%1.1f/%1.1f (%i,%i)\n", 
02360                                             m_pos[t], j, ii, -mval, 0.5*pen_val, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1], 
02361                                             long_transition_content_scores.get_element(ii, j), 
02362                                             long_transition_content_scores_pen.get_element(ii, j), 
02363                                             long_transition_content_scores_prev.get_element(ii, j), 
02364                                             long_transition_content_scores_elem.get_element(ii, j), 
02365                                             long_transition_content_scores_loss.get_element(ii, j), 
02366                                             m_pos[long_transition_content_start_position.get_element(ii,j)], 
02367                                             m_pos[long_transition_content_end_position.get_element(ii,j)], 
02368                                             m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ;
02369                                     SG_PRINT("oldtempvv: %1.6f, from_state:%i from_pos:%i\n ",-oldtempvv[0], (oldtempii[0]%m_N), m_pos[(oldtempii[0]-(oldtempii[0]%(m_N*nbest)))/(m_N*nbest)] );
02370                                 }
02371 
02372                                 if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3)
02373                                 {
02374                                     SG_ERROR("LOSS: total=%1.1f (%i-%i)  part1=%1.1f/%1.1f (%i-%i)  part2=%1.1f (%i-%i)  sum=%1.1f  diff=%1.1f\n", 
02375                                             segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t], 
02376                                             long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)],
02377                                             segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t], 
02378                                             segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j), 
02379                                             segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ;
02380                                 }
02381 #endif
02382                             }
02383 
02384 
02385                             if ((mval < oldtempvv[0]) &&
02386                                     (m_pos[t] - m_pos[long_transition_content_start_position.get_element(ii, j)])<=look_back_orig_ /*m_long_transition_max*/)
02387                             {
02388                                 /* then the long transition is better than the short one => replace it */ 
02389                                 int32_t fromtjk =  fixedtempii_ ;
02390                                 /*SG_PRINT("%i,%i: Long transition (%1.5f=-(%1.5f+%1.5f+%1.5f+%1.5f), %i) to m_pos %i better than short transition (%1.5f,%i) to m_pos %i \n", 
02391                                   m_pos[t], j, 
02392                                   mval, pen_val*0.5, long_transition_content_scores_pen.get_element(ii, j), long_transition_content_scores_elem.get_element(ii, j), long_transition_content_scores_prev.get_element(ii, j), ii, 
02393                                   m_pos[long_transition_content_position.get_element(ii, j)], 
02394                                   fixedtempvv_, (fromtjk%m_N), m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]) ;*/
02395                                 ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong) ;
02396 
02397                                 oldtempvv[0] = mval ;
02398                                 oldtempii[0] = ii ;//+ m_N*long_transition_content_start_position.get_element(ii, j) ;
02399                             }
02400                         }
02401 
02402                     }
02403                 }
02404             }
02405 #endif
02406             for (int16_t k=0; k<nbest; k++)
02407             {
02408                 delta_end.element(k) = -oldtempvv[k] ;
02409                 path_ends.element(k) = (oldtempii[k]%m_N) ;
02410                 if (nbest>1)
02411                     ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/m_N ;
02412             }
02413 
02414 
02415         }
02416 
02417         { //state sequence backtracking     
02418             for (int16_t k=0; k<nbest; k++)
02419             {
02420                 prob_nbest[k]= delta_end.element(k) ;
02421 
02422                 int32_t i         = 0 ;
02423                 state_seq[i]  = path_ends.element(k) ;
02424                 int16_t q   = 0 ;
02425                 if (nbest>1)
02426                     q=ktable_end.element(k) ;
02427                 pos_seq[i]    = m_seq_len-1 ;
02428 
02429                 while (pos_seq[i]>0)
02430                 {
02431                     ASSERT(i+1<m_seq_len);
02432                     //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
02433                     state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
02434                     pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
02435                     if (nbest>1)
02436                         q              = ktable.element(pos_seq[i], state_seq[i], q) ;
02437                     i++ ;
02438                 }
02439                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
02440                 int32_t num_states = i+1 ;
02441                 for (i=0; i<num_states;i++)
02442                 {
02443                     my_state_seq[i+k*m_seq_len] = state_seq[num_states-i-1] ;
02444                     my_pos_seq[i+k*m_seq_len]   = pos_seq[num_states-i-1] ;
02445                 }
02446                 if (num_states<m_seq_len)
02447                 {
02448                     my_state_seq[num_states+k*m_seq_len]=-1 ;
02449                     my_pos_seq[num_states+k*m_seq_len]=-1 ;
02450                 }
02451             }
02452         }
02453 
02454         //if (is_big)
02455         //  SG_PRINT( "DONE.     \n") ;
02456 
02457 
02458 #ifdef DYNPROG_TIMING
02459         MyTime2.stop() ;
02460 
02461         //if (is_big)
02462         SG_PRINT("Timing:  orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s  Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s  svm_pos=%1.2f  svm_clean=%1.2f\n  content_svm_values_time=%1.2f  content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec()) ;
02463 #endif
02464 
02465         delete[] fixedtempvv ;
02466         delete[] fixedtempii ;
02467     }
02468 
02469 void CDynProg::best_path_trans_deriv(
02470     int32_t *my_state_seq, int32_t *my_pos_seq,
02471     int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
02472 {   
02473     m_initial_state_distribution_p_deriv.resize_array(m_N) ;
02474     m_end_state_distribution_q_deriv.resize_array(m_N) ;
02475     m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
02476     //m_my_scores.resize_array(m_my_state_seq.get_array_size()) ;
02477     //m_my_losses.resize_array(m_my_state_seq.get_array_size()) ;
02478     m_my_scores.resize_array(my_seq_len);
02479     m_my_losses.resize_array(my_seq_len);
02480     float64_t* my_scores=m_my_scores.get_array();
02481     float64_t* my_losses=m_my_losses.get_array();
02482     CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
02483     CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
02484 
02485     if (!m_svm_arrays_clean)
02486     {
02487         SG_ERROR( "SVM arrays not clean") ;
02488         return ;
02489     } ;
02490     //SG_PRINT( "genestr_len=%i, genestr_num=%i\n", genestr_len, genestr_num) ;
02491     //m_mod_words.display() ;
02492     //m_sign_words.display() ;
02493     //m_string_words.display() ;
02494 
02495     bool use_svm = false ;
02496 
02497     CArray2<CPlifBase*> PEN(Plif_matrix, m_N, m_N, false, false) ;
02498    PEN.set_name("PEN");
02499     CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, m_N, max_num_signals, false, false) ;
02500    PEN_state_signals.set_name("PEN_state_signals");
02501     CArray3<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ;
02502    seq_input.set_name("seq_input");
02503 
02504     { // determine whether to use svm outputs and clear derivatives
02505         for (int32_t i=0; i<m_N; i++)
02506             for (int32_t j=0; j<m_N; j++)
02507             {
02508                 CPlifBase *penij=PEN.element(i,j) ;
02509                 if (penij==NULL)
02510                     continue ;
02511 
02512                 if (penij->uses_svm_values())
02513                     use_svm=true ;
02514                 penij->penalty_clear_derivative() ;
02515             }
02516         for (int32_t i=0; i<m_N; i++)
02517             for (int32_t j=0; j<max_num_signals; j++)
02518             {
02519                 CPlifBase *penij=PEN_state_signals.element(i,j) ;
02520                 if (penij==NULL)
02521                     continue ;
02522                 if (penij->uses_svm_values())
02523                     use_svm=true ;
02524                 penij->penalty_clear_derivative() ;
02525             }
02526     }
02527 
02528     { // set derivatives of p, q and a to zero
02529 
02530         for (int32_t i=0; i<m_N; i++)
02531         {
02532             m_initial_state_distribution_p_deriv.element(i)=0 ;
02533             m_end_state_distribution_q_deriv.element(i)=0 ;
02534             for (int32_t j=0; j<m_N; j++)
02535                 m_transition_matrix_a_deriv.element(i,j)=0 ;
02536         }
02537     }
02538 
02539     { // clear score vector
02540         for (int32_t i=0; i<my_seq_len; i++)
02541         {
02542             my_scores[i]=0.0 ;
02543             my_losses[i]=0.0 ;
02544         }
02545     }
02546 
02547     //int32_t total_len = 0 ;
02548 
02549     //m_transition_matrix_a.display_array() ;
02550     //m_transition_matrix_a_id.display_array() ;
02551 
02552     // compute derivatives for given path
02553     float64_t* svm_value = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
02554     float64_t* svm_value_part1 = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
02555     float64_t* svm_value_part2 = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
02556     for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02557     {
02558         svm_value[s]=0 ;
02559         svm_value_part1[s]=0 ;
02560         svm_value_part2[s]=0 ;
02561     }
02562 
02563     //#ifdef DYNPROG_DEBUG
02564     float64_t total_score = 0.0 ;
02565     float64_t total_loss = 0.0 ;
02566     //#endif        
02567 
02568     ASSERT(my_state_seq[0]>=0) ;
02569     m_initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
02570     my_scores[0] += m_initial_state_distribution_p.element(my_state_seq[0]) ;
02571 
02572     ASSERT(my_state_seq[my_seq_len-1]>=0) ;
02573     m_end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
02574     my_scores[my_seq_len-1] += m_end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
02575 
02576     //#ifdef DYNPROG_DEBUG
02577     total_score += my_scores[0] + my_scores[my_seq_len-1] ;
02578     //#endif        
02579 
02580     SG_DEBUG( "m_seq_len=%i\n", my_seq_len) ;
02581     for (int32_t i=0; i<my_seq_len-1; i++)
02582     {
02583         if (my_state_seq[i+1]==-1)
02584             break ;
02585         int32_t from_state = my_state_seq[i] ;
02586         int32_t to_state   = my_state_seq[i+1] ;
02587         int32_t from_pos   = my_pos_seq[i] ;
02588         int32_t to_pos     = my_pos_seq[i+1] ;
02589 
02590         //int32_t loss_last_pos = to_pos ;
02591         //float64_t last_loss = 0.0 ;
02592         int32_t elem_id = m_transition_matrix_a_id.element(from_state, to_state) ;
02593         my_losses[i] = m_seg_loss_obj->get_segment_loss(from_pos, to_pos, elem_id);
02594 
02595 #ifdef DYNPROG_DEBUG
02596 
02597 
02598         if (i>0)// test if segment loss is additive
02599         {
02600             float32_t loss1 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i], elem_id);
02601             float32_t loss2 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i], my_pos_seq[i+1], elem_id);
02602             float32_t loss3 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i+1], elem_id);
02603             SG_PRINT("loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3);
02604             if (CMath::abs(loss1+loss2-loss3)>0)
02605             {
02606                 SG_PRINT( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
02607             }
02608         }
02609         io->set_loglevel(M_DEBUG) ;
02610         SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
02611 #endif
02612         // increase usage of this transition
02613         m_transition_matrix_a_deriv.element(from_state, to_state)++ ;
02614         my_scores[i] += m_transition_matrix_a.element(from_state, to_state) ;
02615         //SG_PRINT("m_transition_matrix_a.element(%i, %i),%f \n",from_state, to_state, m_transition_matrix_a.element(from_state, to_state));
02616 #ifdef DYNPROG_DEBUG
02617         SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
02618 #endif
02619 
02620         /*int32_t last_svm_pos[m_num_degrees] ;
02621           for (int32_t qq=0; qq<m_num_degrees; qq++)
02622           last_svm_pos[qq]=-1 ;*/
02623 
02624         bool is_long_transition = false ;
02625         if (m_long_transitions)
02626         {
02627             if (m_pos[to_pos]-m_pos[from_pos]>m_long_transition_threshold)
02628                 is_long_transition = true ;
02629             if (m_orf_info.element(from_state,0)!=-1)
02630                 is_long_transition = false ;
02631         }
02632         
02633         int32_t from_pos_thresh = from_pos ;
02634         int32_t to_pos_thresh = to_pos ;
02635 
02636         if (use_svm)
02637         {
02638             if (is_long_transition)
02639             {
02640                 while (from_pos_thresh<to_pos && m_pos[from_pos_thresh+1] - m_pos[from_pos] <= m_long_transition_threshold) // *
02641                     from_pos_thresh++ ;
02642                 ASSERT(from_pos_thresh<to_pos) ;
02643                 ASSERT(m_pos[from_pos_thresh] - m_pos[from_pos] <= m_long_transition_threshold); // *
02644                 ASSERT(m_pos[from_pos_thresh+1] - m_pos[from_pos] > m_long_transition_threshold);// *
02645 
02646                 int32_t frame = m_orf_info.element(from_state,0);
02647                 lookup_content_svm_values(from_pos, from_pos_thresh, m_pos[from_pos], m_pos[from_pos_thresh], svm_value_part1, frame);
02648 
02649 #ifdef DYNPROG_DEBUG
02650                 SG_PRINT("part1: pos1: %i  pos2: %i   pos3: %i  \nsvm_value_part1: ", m_pos[from_pos], m_pos[from_pos_thresh], m_pos[from_pos_thresh+1]) ;
02651                 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02652                     SG_PRINT("%1.4f  ", svm_value_part1[s]);
02653                 SG_PRINT("\n");
02654 #endif
02655 
02656                 while (to_pos_thresh>0 && m_pos[to_pos] - m_pos[to_pos_thresh-1] <= m_long_transition_threshold) // *
02657                     to_pos_thresh-- ;
02658                 ASSERT(to_pos_thresh>0) ;
02659                 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh] <= m_long_transition_threshold) ; // *
02660                 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh-1] > m_long_transition_threshold) ; // *
02661 
02662                 lookup_content_svm_values(to_pos_thresh, to_pos, m_pos[to_pos_thresh], m_pos[to_pos], svm_value_part2, frame);
02663 
02664 #ifdef DYNPROG_DEBUG
02665                 SG_PRINT("part2: pos1: %i  pos2: %i   pos3: %i  \nsvm_value_part2: ", m_pos[to_pos], m_pos[to_pos_thresh], m_pos[to_pos_thresh+1]) ;
02666                 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02667                     SG_PRINT("%1.4f  ", svm_value_part2[s]);
02668                 SG_PRINT("\n");
02669 #endif
02670             }
02671             else
02672             {
02673                 /* normal case */
02674 
02675                 //SG_PRINT("from_pos: %i; to_pos: %i; m_pos[to_pos]-m_pos[from_pos]: %i \n",from_pos, to_pos, m_pos[to_pos]-m_pos[from_pos]); 
02676                 int32_t frame = m_orf_info.element(from_state,0);
02677                 if (false)//(frame>=0)
02678                 {
02679                     int32_t num_current_svms=0;
02680                     int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02681                     SG_PRINT("penalties(%i, %i), frame:%i  ", from_state, to_state, frame);
02682                     PEN.element(to_state, from_state)->get_used_svms(&num_current_svms, svm_ids);
02683                     SG_PRINT("\n");
02684                 }
02685 
02686                 lookup_content_svm_values(from_pos, to_pos, m_pos[from_pos],m_pos[to_pos], svm_value, frame);
02687 #ifdef DYNPROG_DEBUG
02688                 SG_PRINT("part2: pos1: %i  pos2: %i   \nsvm_values: ", m_pos[from_pos], m_pos[to_pos]) ;
02689                 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02690                     SG_PRINT("%1.4f  ", svm_value[s]);
02691                 SG_PRINT("\n");
02692 #endif
02693             }
02694         }
02695 
02696         if (PEN.element(to_state, from_state)!=NULL)
02697         {
02698             float64_t nscore = 0 ;
02699             if (is_long_transition)
02700             {
02701                 float64_t pen_value_part1 = PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1) ;
02702                 float64_t pen_value_part2 = PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2) ;
02703                 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ;
02704                 //fprintf(stdout, "pen_value_part1=%f  pen_value_part2=%f  nscore=%f\n", pen_value_part1, pen_value_part2, nscore) ;
02705             }
02706             else
02707                 nscore = PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[from_pos], svm_value) ;
02708             my_scores[i] += nscore ;
02709 
02710             for (int32_t s=m_num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)/*set tiling plif values to neutral values (that do not influence derivative calculation)*/
02711             {
02712                 svm_value[s]=-CMath::INFTY;
02713                 svm_value_part1[s]=-CMath::INFTY;
02714                 svm_value_part2[s]=-CMath::INFTY;
02715             }
02716 
02717 #ifdef DYNPROG_DEBUG
02718             //SG_DEBUG( "%i. transition penalty: from_state=%i to_state=%i from_pos=%i [%i] to_pos=%i [%i] value=%i\n", i, from_state, to_state, from_pos, m_pos[from_pos], to_pos, m_pos[to_pos], m_pos[to_pos]-m_pos[from_pos]) ;
02719 #endif
02720             if (is_long_transition)
02721             {
02722                 float64_t sum_score = 0.0 ;
02723 
02724                 for (int kk=0; kk<i; kk++)
02725                     sum_score += my_scores[i] ;
02726 
02727 #ifdef DYNPROG_DEBUG
02728                 SG_PRINT("is_long_transition=%i  (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i)  2: %1.6f (%i-%i) \n", 
02729                         is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, 
02730                         nscore, sum_score, 
02731                         PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1)*0.5, m_pos[from_pos], m_pos[from_pos_thresh], 
02732                         PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ;
02733 #endif
02734             }
02735 
02736             if (is_long_transition)
02737             {
02738                 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1, 0.5) ;
02739                 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2, 0.5) ;
02740             }
02741             else
02742                 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[to_pos]-m_pos[from_pos], svm_value, 1) ;
02743 
02744             //SG_PRINT("m_num_raw_data = %i \n", m_num_raw_data) ;
02745 
02746             // for tiling array and rna-seq data every single measurement must be added to the derivative 
02747             // in contrast to the content svm predictions where we have a single value per transition;
02748             // content svm predictions have already been added to the derivative, thus we start with d=1 
02749             // instead of d=0
02750             if (is_long_transition)
02751             {
02752                 for (int32_t d=1; d<=m_num_raw_data; d++) 
02753                 {
02754                     for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02755                         svm_value[s]=-CMath::INFTY;
02756                     float64_t* intensities = new float64_t[m_num_probes_cum[d]];
02757                     int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[from_pos_thresh],intensities, d);
02758                     for (int32_t k=0;k<num_intensities;k++)
02759                     {
02760                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02761                             svm_value[j]=intensities[k];
02762 
02763                         PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;  
02764 
02765                     }
02766                     num_intensities = raw_intensities_interval_query(m_pos[to_pos_thresh], m_pos[to_pos],intensities, d);
02767                     for (int32_t k=0;k<num_intensities;k++)
02768                     {
02769                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02770                             svm_value[j]=intensities[k];
02771 
02772                         PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;  
02773 
02774                     }
02775                     delete[] intensities;
02776 
02777                 }
02778             }
02779             else
02780             {
02781                 for (int32_t d=1; d<=m_num_raw_data; d++) 
02782                 {
02783                     for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02784                         svm_value[s]=-CMath::INFTY;
02785                     float64_t* intensities = new float64_t[m_num_probes_cum[d]];
02786                     int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[to_pos],intensities, d);
02787                     //SG_PRINT("m_pos[from_pos]:%i, m_pos[to_pos]:%i, num_intensities:%i\n",m_pos[from_pos],m_pos[to_pos], num_intensities);
02788                     for (int32_t k=0;k<num_intensities;k++)
02789                     {
02790                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02791                             svm_value[j]=intensities[k];
02792 
02793                         PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 1) ;    
02794 
02795                     }
02796                     delete[] intensities;
02797                 }
02798             }
02799 
02800         }
02801 #ifdef DYNPROG_DEBUG
02802         SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
02803 #endif
02804 
02805         //SG_DEBUG( "emmission penalty skipped: to_state=%i to_pos=%i value=%1.2f score=%1.2f\n", to_state, to_pos, seq_input.element(to_state, to_pos), 0.0) ;
02806         for (int32_t k=0; k<max_num_signals; k++)
02807         {
02808             if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
02809             {
02810 #ifdef DYNPROG_DEBUG
02811                 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ;
02812 #endif
02813                 my_scores[i] += seq_input.element(to_state, to_pos, k) ;
02814                 //if (seq_input.element(to_state, to_pos, k) !=0)
02815                 //  SG_PRINT("features(%i,%i): %f\n",to_state,to_pos,seq_input.element(to_state, to_pos, k));
02816                 break ;
02817             }
02818             if (PEN_state_signals.element(to_state, k)!=NULL)
02819             {
02820                 float64_t nscore = PEN_state_signals.element(to_state,k)->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ; // this should be ok for long_transitions (svm_value does not matter)
02821                 my_scores[i] += nscore ;
02822                 //break ;
02823                 //int32_t num_current_svms=0;
02824                 //int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02825                 //SG_PRINT("PEN_state_signals->id: ");
02826                 //PEN_state_signals.element(to_state, k)->get_used_svms(&num_current_svms, svm_ids);
02827                 //SG_PRINT("\n");
02828                 //if (nscore != 0)
02829                 //SG_PRINT( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
02830 #ifdef DYNPROG_DEBUG
02831                 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
02832 #endif
02833                 PEN_state_signals.element(to_state,k)->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value, 1) ; // this should be ok for long_transitions (svm_value does not matter)
02834             } else
02835                 break ;
02836         }
02837 
02838         //#ifdef DYNPROG_DEBUG
02839         //SG_PRINT( "scores[%i]=%f (final) \n", i, my_scores[i]) ;
02840         //SG_PRINT( "losses[%i]=%f (final) , total_loss: %f \n", i, my_losses[i], total_loss) ;
02841         total_score += my_scores[i] ;
02842         total_loss += my_losses[i] ;
02843         //#endif
02844     }
02845     //#ifdef DYNPROG_DEBUG
02846     //SG_PRINT( "total score = %f \n", total_score) ;
02847     //SG_PRINT( "total loss = %f \n", total_loss) ;
02848     //#endif
02849     delete[] svm_value;
02850     delete[] svm_value_part1 ;
02851     delete[] svm_value_part2 ;
02852 }
02853 
02854 int32_t CDynProg::raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type)
02855 {
02856     ASSERT(from_pos<to_pos);
02857     int32_t num_intensities = 0;
02858     int32_t* p_tiling_pos  = &m_probe_pos[m_num_probes_cum[type-1]];
02859     float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[type-1]];
02860     int32_t last_pos;
02861     int32_t num = m_num_probes_cum[type-1];
02862     while (*p_tiling_pos<to_pos)
02863     {
02864         if (*p_tiling_pos>=from_pos)
02865         {
02866             intensities[num_intensities] = *p_tiling_data;
02867             num_intensities++;
02868         }
02869         num++;
02870         if (num>=m_num_probes_cum[type])
02871             break;
02872         last_pos = *p_tiling_pos;
02873         p_tiling_pos++;
02874         p_tiling_data++;
02875         ASSERT(last_pos<*p_tiling_pos);
02876     }
02877     return num_intensities;
02878 }
02879 
02880 void CDynProg::lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t* svm_values, int32_t frame)
02881 {
02882 #ifdef DYNPROG_TIMING_DETAIL
02883     MyTime.start() ;
02884 #endif
02885 //  ASSERT(from_state<to_state);
02886 //  if (!(from_pos<to_pos))
02887 //      SG_ERROR("from_pos!<to_pos, from_pos: %i to_pos: %i \n",from_pos,to_pos);
02888     for (int32_t i=0;i<m_num_svms;i++)
02889     {
02890         float64_t to_val   = m_lin_feat.get_element(i, to_state);
02891         float64_t from_val = m_lin_feat.get_element(i, from_state);
02892         svm_values[i] = (to_val-from_val)/(to_pos-from_pos);
02893     }
02894     for (int32_t i=m_num_svms;i<m_num_lin_feat_plifs_cum[m_num_raw_data];i++)
02895     {
02896         float64_t to_val   = m_lin_feat.get_element(i, to_state);
02897         float64_t from_val = m_lin_feat.get_element(i, from_state);
02898         svm_values[i] = to_val-from_val ;
02899     }
02900     if (m_intron_list)
02901     {
02902         int32_t* support = new int32_t[m_num_intron_plifs];
02903         m_intron_list->get_intron_support(support, from_state, to_state);
02904         int32_t intron_list_start = m_num_lin_feat_plifs_cum[m_num_raw_data];
02905         int32_t intron_list_end = m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; 
02906         int32_t cnt = 0;
02907         for (int32_t i=intron_list_start; i<intron_list_end;i++)
02908         {
02909             svm_values[i] = (float64_t) (support[cnt]);
02910             cnt++;
02911         }
02912         //if (to_pos>3990 && to_pos<4010)
02913         //  SG_PRINT("from_state:%i to_state:%i support[0]:%i support[1]:%i\n",from_state, to_state, support[0], support[1]);
02914         delete[] support;
02915     }
02916     // find the correct row with precomputed frame predictions
02917     if (frame!=-1)
02918     {
02919         svm_values[frame_plifs[0]] = 1e10;
02920         svm_values[frame_plifs[1]] = 1e10;
02921         svm_values[frame_plifs[2]] = 1e10;
02922         int32_t global_frame = from_pos%3;
02923         int32_t row = ((global_frame+frame)%3)+4;
02924         float64_t to_val   = m_lin_feat.get_element(row, to_state);
02925         float64_t from_val = m_lin_feat.get_element(row, from_state);
02926         svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos);
02927     }
02928 #ifdef DYNPROG_TIMING_DETAIL
02929     MyTime.stop() ;
02930     content_svm_values_time += MyTime.time_diff_sec() ;
02931 #endif
02932 }
02933 void CDynProg::set_intron_list(CIntronList* intron_list, int32_t num_plifs)
02934 {
02935     m_intron_list = intron_list;
02936     m_num_intron_plifs = num_plifs;
02937 }
02938 

SHOGUN Machine Learning Toolbox - Documentation