00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "distributions/HMM.h"
00012 #include "lib/Mathematics.h"
00013 #include "lib/io.h"
00014 #include "lib/config.h"
00015 #include "lib/Signal.h"
00016 #include "base/Parallel.h"
00017 #include "features/StringFeatures.h"
00018 #include "features/Alphabet.h"
00019
00020 #include <stdlib.h>
00021 #include <stdio.h>
00022 #include <time.h>
00023 #include <ctype.h>
00024
00025 #define VAL_MACRO log((default_value == 0) ? (CMath::random(MIN_RAND, MAX_RAND)) : default_value)
00026 #define ARRAY_SIZE 65336
00027
00028 using namespace shogun;
00029
00031
00033
00034 const int32_t CHMM::GOTN= (1<<1);
00035 const int32_t CHMM::GOTM= (1<<2);
00036 const int32_t CHMM::GOTO= (1<<3);
00037 const int32_t CHMM::GOTa= (1<<4);
00038 const int32_t CHMM::GOTb= (1<<5);
00039 const int32_t CHMM::GOTp= (1<<6);
00040 const int32_t CHMM::GOTq= (1<<7);
00041
00042 const int32_t CHMM::GOTlearn_a= (1<<1);
00043 const int32_t CHMM::GOTlearn_b= (1<<2);
00044 const int32_t CHMM::GOTlearn_p= (1<<3);
00045 const int32_t CHMM::GOTlearn_q= (1<<4);
00046 const int32_t CHMM::GOTconst_a= (1<<5);
00047 const int32_t CHMM::GOTconst_b= (1<<6);
00048 const int32_t CHMM::GOTconst_p= (1<<7);
00049 const int32_t CHMM::GOTconst_q= (1<<8);
00050
00051 enum E_STATE
00052 {
00053 INITIAL,
00054 ARRAYs,
00055 GET_N,
00056 GET_M,
00057 GET_a,
00058 GET_b,
00059 GET_p,
00060 GET_q,
00061 GET_learn_a,
00062 GET_learn_b,
00063 GET_learn_p,
00064 GET_learn_q,
00065 GET_const_a,
00066 GET_const_b,
00067 GET_const_p,
00068 GET_const_q,
00069 COMMENT,
00070 END
00071 };
00072
00073
00074 #ifdef FIX_POS
00075 const char CModel::FIX_DISALLOWED=0 ;
00076 const char CModel::FIX_ALLOWED=1 ;
00077 const char CModel::FIX_DEFAULT=-1 ;
00078 const float64_t CModel::DISALLOWED_PENALTY=CMath::ALMOST_NEG_INFTY ;
00079 #endif
00080
00081 CModel::CModel()
00082 {
00083 const_a=new int[ARRAY_SIZE];
00084 const_b=new int[ARRAY_SIZE];
00085 const_p=new int[ARRAY_SIZE];
00086 const_q=new int[ARRAY_SIZE];
00087 const_a_val=new float64_t[ARRAY_SIZE];
00088 const_b_val=new float64_t[ARRAY_SIZE];
00089 const_p_val=new float64_t[ARRAY_SIZE];
00090 const_q_val=new float64_t[ARRAY_SIZE];
00091
00092
00093 learn_a=new int[ARRAY_SIZE];
00094 learn_b=new int[ARRAY_SIZE];
00095 learn_p=new int[ARRAY_SIZE];
00096 learn_q=new int[ARRAY_SIZE];
00097
00098 #ifdef FIX_POS
00099 fix_pos_state = new char[ARRAY_SIZE];
00100 #endif
00101 for (int32_t i=0; i<ARRAY_SIZE; i++)
00102 {
00103 const_a[i]=-1 ;
00104 const_b[i]=-1 ;
00105 const_p[i]=-1 ;
00106 const_q[i]=-1 ;
00107 const_a_val[i]=1.0 ;
00108 const_b_val[i]=1.0 ;
00109 const_p_val[i]=1.0 ;
00110 const_q_val[i]=1.0 ;
00111 learn_a[i]=-1 ;
00112 learn_b[i]=-1 ;
00113 learn_p[i]=-1 ;
00114 learn_q[i]=-1 ;
00115 #ifdef FIX_POS
00116 fix_pos_state[i] = FIX_DEFAULT ;
00117 #endif
00118 } ;
00119 }
00120
00121 CModel::~CModel()
00122 {
00123 delete[] const_a;
00124 delete[] const_b;
00125 delete[] const_p;
00126 delete[] const_q;
00127 delete[] const_a_val;
00128 delete[] const_b_val;
00129 delete[] const_p_val;
00130 delete[] const_q_val;
00131
00132 delete[] learn_a;
00133 delete[] learn_b;
00134 delete[] learn_p;
00135 delete[] learn_q;
00136
00137 #ifdef FIX_POS
00138 delete[] fix_pos_state;
00139 #endif
00140
00141 }
00142
00143 CHMM::CHMM(CHMM* h)
00144 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00145 {
00146 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00147
00148 this->N=h->get_N();
00149 this->M=h->get_M();
00150 status=initialize(NULL, h->get_pseudo());
00151 this->copy_model(h);
00152 set_observations(h->p_observations);
00153 }
00154
00155 CHMM::CHMM(int32_t p_N, int32_t p_M, CModel* p_model, float64_t p_PSEUDO)
00156 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00157 {
00158 this->N=p_N;
00159 this->M=p_M;
00160 model=NULL ;
00161
00162 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00163
00164 status=initialize(p_model, p_PSEUDO);
00165 }
00166
00167 CHMM::CHMM(
00168 CStringFeatures<uint16_t>* obs, int32_t p_N, int32_t p_M,
00169 float64_t p_PSEUDO)
00170 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00171 {
00172 this->N=p_N;
00173 this->M=p_M;
00174 model=NULL ;
00175
00176 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00177
00178 initialize(model, p_PSEUDO);
00179 set_observations(obs);
00180 }
00181
00182 CHMM::CHMM(int32_t p_N, float64_t* p, float64_t* q, float64_t* a)
00183 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00184 {
00185 this->N=p_N;
00186 this->M=0;
00187 model=NULL ;
00188
00189 trans_list_forward = NULL ;
00190 trans_list_forward_cnt = NULL ;
00191 trans_list_forward_val = NULL ;
00192 trans_list_backward = NULL ;
00193 trans_list_backward_cnt = NULL ;
00194 trans_list_len = 0 ;
00195 mem_initialized = false ;
00196
00197 this->transition_matrix_a=NULL;
00198 this->observation_matrix_b=NULL;
00199 this->initial_state_distribution_p=NULL;
00200 this->end_state_distribution_q=NULL;
00201 this->PSEUDO= PSEUDO;
00202 this->model= model;
00203 this->p_observations=NULL;
00204 this->reused_caches=false;
00205
00206 #ifdef USE_HMMPARALLEL_STRUCTURES
00207 this->alpha_cache=NULL;
00208 this->beta_cache=NULL;
00209 #else
00210 this->alpha_cache.table=NULL;
00211 this->beta_cache.table=NULL;
00212 this->alpha_cache.dimension=0;
00213 this->beta_cache.dimension=0;
00214 #endif
00215
00216 this->states_per_observation_psi=NULL ;
00217 this->path=NULL;
00218 arrayN1=NULL ;
00219 arrayN2=NULL ;
00220
00221 this->loglikelihood=false;
00222 mem_initialized = true ;
00223
00224 transition_matrix_a=a ;
00225 observation_matrix_b=NULL ;
00226 initial_state_distribution_p=p ;
00227 end_state_distribution_q=q ;
00228 transition_matrix_A=NULL ;
00229 observation_matrix_B=NULL ;
00230
00231
00232 }
00233
00234 CHMM::CHMM(
00235 int32_t p_N, float64_t* p, float64_t* q, int32_t num_trans,
00236 float64_t* a_trans)
00237 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00238 {
00239 model=NULL ;
00240
00241 this->N=p_N;
00242 this->M=0;
00243
00244 trans_list_forward = NULL ;
00245 trans_list_forward_cnt = NULL ;
00246 trans_list_forward_val = NULL ;
00247 trans_list_backward = NULL ;
00248 trans_list_backward_cnt = NULL ;
00249 trans_list_len = 0 ;
00250 mem_initialized = false ;
00251
00252 this->transition_matrix_a=NULL;
00253 this->observation_matrix_b=NULL;
00254 this->initial_state_distribution_p=NULL;
00255 this->end_state_distribution_q=NULL;
00256 this->PSEUDO= PSEUDO;
00257 this->model= model;
00258 this->p_observations=NULL;
00259 this->reused_caches=false;
00260
00261 #ifdef USE_HMMPARALLEL_STRUCTURES
00262 this->alpha_cache=NULL;
00263 this->beta_cache=NULL;
00264 #else
00265 this->alpha_cache.table=NULL;
00266 this->beta_cache.table=NULL;
00267 this->alpha_cache.dimension=0;
00268 this->beta_cache.dimension=0;
00269 #endif
00270
00271 this->states_per_observation_psi=NULL ;
00272 this->path=NULL;
00273 arrayN1=NULL ;
00274 arrayN2=NULL ;
00275
00276 this->loglikelihood=false;
00277 mem_initialized = true ;
00278
00279 trans_list_forward_cnt=NULL ;
00280 trans_list_len = N ;
00281 trans_list_forward = new T_STATES*[N] ;
00282 trans_list_forward_val = new float64_t*[N] ;
00283 trans_list_forward_cnt = new T_STATES[N] ;
00284
00285 int32_t start_idx=0;
00286 for (int32_t j=0; j<N; j++)
00287 {
00288 int32_t old_start_idx=start_idx;
00289
00290 while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00291 {
00292 start_idx++;
00293
00294 if (start_idx>1 && start_idx<num_trans)
00295 ASSERT(a_trans[start_idx+num_trans-1]<=
00296 a_trans[start_idx+num_trans]);
00297 }
00298
00299 if (start_idx>1 && start_idx<num_trans)
00300 ASSERT(a_trans[start_idx+num_trans-1]<=
00301 a_trans[start_idx+num_trans]);
00302
00303 int32_t len=start_idx-old_start_idx;
00304 ASSERT(len>=0);
00305
00306 trans_list_forward_cnt[j] = 0 ;
00307
00308 if (len>0)
00309 {
00310 trans_list_forward[j] = new T_STATES[len] ;
00311 trans_list_forward_val[j] = new float64_t[len] ;
00312 }
00313 else
00314 {
00315 trans_list_forward[j] = NULL;
00316 trans_list_forward_val[j] = NULL;
00317 }
00318 }
00319
00320 for (int32_t i=0; i<num_trans; i++)
00321 {
00322 int32_t from = (int32_t)a_trans[i+num_trans] ;
00323 int32_t to = (int32_t)a_trans[i] ;
00324 float64_t val = a_trans[i+num_trans*2] ;
00325
00326 ASSERT(from>=0 && from<N);
00327 ASSERT(to>=0 && to<N);
00328
00329 trans_list_forward[from][trans_list_forward_cnt[from]]=to ;
00330 trans_list_forward_val[from][trans_list_forward_cnt[from]]=val ;
00331 trans_list_forward_cnt[from]++ ;
00332
00333 } ;
00334
00335 transition_matrix_a=NULL ;
00336 observation_matrix_b=NULL ;
00337 initial_state_distribution_p=p ;
00338 end_state_distribution_q=q ;
00339 transition_matrix_A=NULL ;
00340 observation_matrix_B=NULL ;
00341
00342
00343 }
00344
00345
00346 CHMM::CHMM(FILE* model_file, float64_t p_PSEUDO)
00347 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00348 {
00349 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00350
00351 status=initialize(NULL, p_PSEUDO, model_file);
00352 }
00353
00354 CHMM::~CHMM()
00355 {
00356 SG_UNREF(p_observations);
00357
00358 if (trans_list_forward_cnt)
00359 delete[] trans_list_forward_cnt ;
00360 if (trans_list_backward_cnt)
00361 delete[] trans_list_backward_cnt ;
00362 if (trans_list_forward)
00363 {
00364 for (int32_t i=0; i<trans_list_len; i++)
00365 if (trans_list_forward[i])
00366 delete[] trans_list_forward[i] ;
00367 delete[] trans_list_forward ;
00368 }
00369 if (trans_list_forward_val)
00370 {
00371 for (int32_t i=0; i<trans_list_len; i++)
00372 if (trans_list_forward_val[i])
00373 delete[] trans_list_forward_val[i] ;
00374 delete[] trans_list_forward_val ;
00375 }
00376 if (trans_list_backward)
00377 {
00378 for (int32_t i=0; i<trans_list_len; i++)
00379 if (trans_list_backward[i])
00380 delete[] trans_list_backward[i] ;
00381 delete[] trans_list_backward ;
00382 } ;
00383
00384 free_state_dependend_arrays();
00385
00386 if (!reused_caches)
00387 {
00388 #ifdef USE_HMMPARALLEL_STRUCTURES
00389 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00390 {
00391 delete[] alpha_cache[i].table;
00392 delete[] beta_cache[i].table;
00393 alpha_cache[i].table=NULL;
00394 beta_cache[i].table=NULL;
00395 }
00396
00397 delete[] alpha_cache;
00398 delete[] beta_cache;
00399 alpha_cache=NULL;
00400 beta_cache=NULL;
00401 #else // USE_HMMPARALLEL_STRUCTURES
00402 delete[] alpha_cache.table;
00403 delete[] beta_cache.table;
00404 alpha_cache.table=NULL;
00405 beta_cache.table=NULL;
00406 #endif // USE_HMMPARALLEL_STRUCTURES
00407
00408 delete[] states_per_observation_psi;
00409 states_per_observation_psi=NULL;
00410 }
00411
00412 #ifdef USE_LOGSUMARRAY
00413 #ifdef USE_HMMPARALLEL_STRUCTURES
00414 {
00415 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00416 delete[] arrayS[i];
00417 delete[] arrayS ;
00418 } ;
00419 #else //USE_HMMPARALLEL_STRUCTURES
00420 delete[] arrayS;
00421 #endif //USE_HMMPARALLEL_STRUCTURES
00422 #endif //USE_LOGSUMARRAY
00423
00424 if (!reused_caches)
00425 {
00426 #ifdef USE_HMMPARALLEL_STRUCTURES
00427 delete[] path_prob_updated ;
00428 delete[] path_prob_dimension ;
00429 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00430 delete[] path[i] ;
00431 #endif //USE_HMMPARALLEL_STRUCTURES
00432 delete[] path;
00433 }
00434 }
00435
00436 bool CHMM::train(CFeatures* data)
00437 {
00438 if (data)
00439 {
00440 if (data->get_feature_class() != C_STRING ||
00441 data->get_feature_type() != F_WORD)
00442 {
00443 SG_ERROR("Expected features of class string type word\n");
00444 }
00445 set_observations((CStringFeatures<uint16_t>*) data);
00446 }
00447 return baum_welch_viterbi_train(BW_NORMAL);
00448 }
00449
00450 bool CHMM::alloc_state_dependend_arrays()
00451 {
00452
00453 if (!transition_matrix_a && !observation_matrix_b &&
00454 !initial_state_distribution_p && !end_state_distribution_q)
00455 {
00456 transition_matrix_a=new float64_t[N*N];
00457 observation_matrix_b=new float64_t[N*M];
00458 initial_state_distribution_p=new float64_t[N];
00459 end_state_distribution_q=new float64_t[N];
00460 init_model_random();
00461 convert_to_log();
00462 }
00463
00464 #ifdef USE_HMMPARALLEL_STRUCTURES
00465 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00466 {
00467 arrayN1[i]=new float64_t[N];
00468 arrayN2[i]=new float64_t[N];
00469 }
00470 #else //USE_HMMPARALLEL_STRUCTURES
00471 arrayN1=new float64_t[N];
00472 arrayN2=new float64_t[N];
00473 #endif //USE_HMMPARALLEL_STRUCTURES
00474
00475 #ifdef LOG_SUMARRAY
00476 #ifdef USE_HMMPARALLEL_STRUCTURES
00477 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00478 arrayS[i]=new float64_t[(int32_t)(this->N/2+1)];
00479 #else //USE_HMMPARALLEL_STRUCTURES
00480 arrayS=new float64_t[(int32_t)(this->N/2+1)];
00481 #endif //USE_HMMPARALLEL_STRUCTURES
00482 #endif //LOG_SUMARRAY
00483 transition_matrix_A=new float64_t[this->N*this->N];
00484 observation_matrix_B=new float64_t[this->N*this->M];
00485
00486 if (p_observations)
00487 {
00488 #ifdef USE_HMMPARALLEL_STRUCTURES
00489 if (alpha_cache[0].table!=NULL)
00490 #else //USE_HMMPARALLEL_STRUCTURES
00491 if (alpha_cache.table!=NULL)
00492 #endif //USE_HMMPARALLEL_STRUCTURES
00493 set_observations(p_observations);
00494 else
00495 set_observation_nocache(p_observations);
00496 SG_UNREF(p_observations);
00497 }
00498
00499 this->invalidate_model();
00500
00501 return ((transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00502 (transition_matrix_a != NULL) && (observation_matrix_b != NULL) &&
00503 (initial_state_distribution_p != NULL) &&
00504 (end_state_distribution_q != NULL));
00505 }
00506
00507 void CHMM::free_state_dependend_arrays()
00508 {
00509 #ifdef USE_HMMPARALLEL_STRUCTURES
00510 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00511 {
00512 delete[] arrayN1[i];
00513 delete[] arrayN2[i];
00514
00515 arrayN1[i]=NULL;
00516 arrayN2[i]=NULL;
00517 }
00518 #else
00519 delete[] arrayN1;
00520 delete[] arrayN2;
00521 arrayN1=NULL;
00522 arrayN2=NULL;
00523 #endif
00524 if (observation_matrix_b)
00525 {
00526 delete[] transition_matrix_A;
00527 delete[] observation_matrix_B;
00528 delete[] transition_matrix_a;
00529 delete[] observation_matrix_b;
00530 delete[] initial_state_distribution_p;
00531 delete[] end_state_distribution_q;
00532 } ;
00533
00534 transition_matrix_A=NULL;
00535 observation_matrix_B=NULL;
00536 transition_matrix_a=NULL;
00537 observation_matrix_b=NULL;
00538 initial_state_distribution_p=NULL;
00539 end_state_distribution_q=NULL;
00540 }
00541
00542 bool CHMM::initialize(CModel* m, float64_t pseudo, FILE* modelfile)
00543 {
00544
00545 bool files_ok=true;
00546
00547 trans_list_forward = NULL ;
00548 trans_list_forward_cnt = NULL ;
00549 trans_list_forward_val = NULL ;
00550 trans_list_backward = NULL ;
00551 trans_list_backward_cnt = NULL ;
00552 trans_list_len = 0 ;
00553 mem_initialized = false ;
00554
00555 this->transition_matrix_a=NULL;
00556 this->observation_matrix_b=NULL;
00557 this->initial_state_distribution_p=NULL;
00558 this->end_state_distribution_q=NULL;
00559 this->PSEUDO= pseudo;
00560 this->model= m;
00561 this->p_observations=NULL;
00562 this->reused_caches=false;
00563
00564 #ifdef USE_HMMPARALLEL_STRUCTURES
00565 alpha_cache=new T_ALPHA_BETA[parallel->get_num_threads()] ;
00566 beta_cache=new T_ALPHA_BETA[parallel->get_num_threads()] ;
00567 states_per_observation_psi=new P_STATES[parallel->get_num_threads()] ;
00568
00569 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00570 {
00571 this->alpha_cache[i].table=NULL;
00572 this->beta_cache[i].table=NULL;
00573 this->alpha_cache[i].dimension=0;
00574 this->beta_cache[i].dimension=0;
00575 this->states_per_observation_psi[i]=NULL ;
00576 }
00577
00578 #else // USE_HMMPARALLEL_STRUCTURES
00579 this->alpha_cache.table=NULL;
00580 this->beta_cache.table=NULL;
00581 this->alpha_cache.dimension=0;
00582 this->beta_cache.dimension=0;
00583 this->states_per_observation_psi=NULL ;
00584 #endif //USE_HMMPARALLEL_STRUCTURES
00585
00586 if (modelfile)
00587 files_ok= files_ok && load_model(modelfile);
00588
00589 #ifdef USE_HMMPARALLEL_STRUCTURES
00590 path_prob_updated=new bool[parallel->get_num_threads()];
00591 path_prob_dimension=new int[parallel->get_num_threads()];
00592
00593 path=new P_STATES[parallel->get_num_threads()];
00594
00595 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00596 this->path[i]=NULL;
00597
00598 #else // USE_HMMPARALLEL_STRUCTURES
00599 this->path=NULL;
00600
00601 #endif //USE_HMMPARALLEL_STRUCTURES
00602
00603 #ifdef USE_HMMPARALLEL_STRUCTURES
00604 arrayN1=new float64_t*[parallel->get_num_threads()];
00605 arrayN2=new float64_t*[parallel->get_num_threads()];
00606 #endif //USE_HMMPARALLEL_STRUCTURES
00607
00608 #ifdef LOG_SUMARRAY
00609 #ifdef USE_HMMPARALLEL_STRUCTURES
00610 arrayS=new float64_t*[parallel->get_num_threads()] ;
00611 #endif // USE_HMMPARALLEL_STRUCTURES
00612 #endif //LOG_SUMARRAY
00613
00614 alloc_state_dependend_arrays();
00615
00616 this->loglikelihood=false;
00617 mem_initialized = true ;
00618 this->invalidate_model();
00619
00620 return ((files_ok) &&
00621 (transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00622 (transition_matrix_a != NULL) && (observation_matrix_b != NULL) && (initial_state_distribution_p != NULL) &&
00623 (end_state_distribution_q != NULL));
00624 }
00625
00626
00627
00628
00629
00630
00631 float64_t CHMM::forward_comp(int32_t time, int32_t state, int32_t dimension)
00632 {
00633 T_ALPHA_BETA_TABLE* alpha_new;
00634 T_ALPHA_BETA_TABLE* alpha;
00635 T_ALPHA_BETA_TABLE* dummy;
00636 if (time<1)
00637 time=0;
00638
00639
00640 int32_t wanted_time=time;
00641
00642 if (ALPHA_CACHE(dimension).table)
00643 {
00644 alpha=&ALPHA_CACHE(dimension).table[0];
00645 alpha_new=&ALPHA_CACHE(dimension).table[N];
00646 time=p_observations->get_vector_length(dimension)+1;
00647 }
00648 else
00649 {
00650 alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00651 alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00652 }
00653
00654 if (time<1)
00655 return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00656 else
00657 {
00658
00659 for (int32_t i=0; i<N; i++)
00660 alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00661
00662
00663 for (register int32_t t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00664 {
00665
00666 for (int32_t j=0; j<N; j++)
00667 {
00668 register int32_t i, num = trans_list_forward_cnt[j] ;
00669 float64_t sum=-CMath::INFTY;
00670 for (i=0; i < num; i++)
00671 {
00672 int32_t ii = trans_list_forward[j][i] ;
00673 sum = CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii,j));
00674 } ;
00675
00676 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00677 }
00678
00679 if (!ALPHA_CACHE(dimension).table)
00680 {
00681 dummy=alpha;
00682 alpha=alpha_new;
00683 alpha_new=dummy;
00684 }
00685 else
00686 {
00687 alpha=alpha_new;
00688 alpha_new+=N;
00689 }
00690 }
00691
00692
00693 if (time<p_observations->get_vector_length(dimension))
00694 {
00695 register int32_t i, num=trans_list_forward_cnt[state];
00696 register float64_t sum=-CMath::INFTY;
00697 for (i=0; i<num; i++)
00698 {
00699 int32_t ii = trans_list_forward[state][i] ;
00700 sum= CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii, state));
00701 } ;
00702
00703 return sum + get_b(state, p_observations->get_feature(dimension,time));
00704 }
00705 else
00706 {
00707
00708 register int32_t i ;
00709 float64_t sum ;
00710 sum=-CMath::INFTY;
00711 for (i=0; i<N; i++)
00712 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));
00713
00714 if (!ALPHA_CACHE(dimension).table)
00715 return sum;
00716 else
00717 {
00718 ALPHA_CACHE(dimension).dimension=dimension;
00719 ALPHA_CACHE(dimension).updated=true;
00720 ALPHA_CACHE(dimension).sum=sum;
00721
00722 if (wanted_time<p_observations->get_vector_length(dimension))
00723 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00724 else
00725 return ALPHA_CACHE(dimension).sum;
00726 }
00727 }
00728 }
00729 }
00730
00731
00732
00733
00734
00735 float64_t CHMM::forward_comp_old(int32_t time, int32_t state, int32_t dimension)
00736 {
00737 T_ALPHA_BETA_TABLE* alpha_new;
00738 T_ALPHA_BETA_TABLE* alpha;
00739 T_ALPHA_BETA_TABLE* dummy;
00740 if (time<1)
00741 time=0;
00742
00743 int32_t wanted_time=time;
00744
00745 if (ALPHA_CACHE(dimension).table)
00746 {
00747 alpha=&ALPHA_CACHE(dimension).table[0];
00748 alpha_new=&ALPHA_CACHE(dimension).table[N];
00749 time=p_observations->get_vector_length(dimension)+1;
00750 }
00751 else
00752 {
00753 alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00754 alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00755 }
00756
00757 if (time<1)
00758 return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00759 else
00760 {
00761
00762 for (int32_t i=0; i<N; i++)
00763 alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00764
00765
00766 for (register int32_t t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00767 {
00768
00769 for (int32_t j=0; j<N; j++)
00770 {
00771 register int32_t i ;
00772 #ifdef USE_LOGSUMARRAY
00773 for (i=0; i<(N>>1); i++)
00774 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,j),
00775 alpha[(i<<1)+1] + get_a((i<<1)+1,j));
00776 if (N%2==1)
00777 alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+
00778 CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,j),
00779 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00780 else
00781 alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00782 #else //USE_LOGSUMARRAY
00783 float64_t sum=-CMath::INFTY;
00784 for (i=0; i<N; i++)
00785 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i,j));
00786
00787 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00788 #endif //USE_LOGSUMARRAY
00789 }
00790
00791 if (!ALPHA_CACHE(dimension).table)
00792 {
00793 dummy=alpha;
00794 alpha=alpha_new;
00795 alpha_new=dummy;
00796 }
00797 else
00798 {
00799 alpha=alpha_new;
00800 alpha_new+=N;
00801 }
00802 }
00803
00804
00805 if (time<p_observations->get_vector_length(dimension))
00806 {
00807 register int32_t i;
00808 #ifdef USE_LOGSUMARRAY
00809 for (i=0; i<(N>>1); i++)
00810 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,state),
00811 alpha[(i<<1)+1] + get_a((i<<1)+1,state));
00812 if (N%2==1)
00813 return get_b(state, p_observations->get_feature(dimension,time))+
00814 CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,state),
00815 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00816 else
00817 return get_b(state, p_observations->get_feature(dimension,time))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00818 #else //USE_LOGSUMARRAY
00819 register float64_t sum=-CMath::INFTY;
00820 for (i=0; i<N; i++)
00821 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i, state));
00822
00823 return sum + get_b(state, p_observations->get_feature(dimension,time));
00824 #endif //USE_LOGSUMARRAY
00825 }
00826 else
00827 {
00828
00829 register int32_t i ;
00830 float64_t sum ;
00831 #ifdef USE_LOGSUMARRAY
00832 for (i=0; i<(N>>1); i++)
00833 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_q(i<<1),
00834 alpha[(i<<1)+1] + get_q((i<<1)+1));
00835 if (N%2==1)
00836 sum=CMath::logarithmic_sum(alpha[N-1]+get_q(N-1),
00837 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00838 else
00839 sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00840 #else //USE_LOGSUMARRAY
00841 sum=-CMath::INFTY;
00842 for (i=0; i<N; i++)
00843 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));
00844 #endif //USE_LOGSUMARRAY
00845
00846 if (!ALPHA_CACHE(dimension).table)
00847 return sum;
00848 else
00849 {
00850 ALPHA_CACHE(dimension).dimension=dimension;
00851 ALPHA_CACHE(dimension).updated=true;
00852 ALPHA_CACHE(dimension).sum=sum;
00853
00854 if (wanted_time<p_observations->get_vector_length(dimension))
00855 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00856 else
00857 return ALPHA_CACHE(dimension).sum;
00858 }
00859 }
00860 }
00861 }
00862
00863
00864
00865
00866
00867 float64_t CHMM::backward_comp(int32_t time, int32_t state, int32_t dimension)
00868 {
00869 T_ALPHA_BETA_TABLE* beta_new;
00870 T_ALPHA_BETA_TABLE* beta;
00871 T_ALPHA_BETA_TABLE* dummy;
00872 int32_t wanted_time=time;
00873
00874 if (time<0)
00875 forward(time, state, dimension);
00876
00877 if (BETA_CACHE(dimension).table)
00878 {
00879 beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00880 beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00881 time=-1;
00882 }
00883 else
00884 {
00885 beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00886 beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00887 }
00888
00889 if (time>=p_observations->get_vector_length(dimension)-1)
00890
00891
00892 return get_q(state);
00893 else
00894 {
00895
00896 for (register int32_t i=0; i<N; i++)
00897 beta[i]=get_q(i);
00898
00899
00900 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
00901 {
00902 for (register int32_t i=0; i<N; i++)
00903 {
00904 register int32_t j, num=trans_list_backward_cnt[i] ;
00905 float64_t sum=-CMath::INFTY;
00906 for (j=0; j<num; j++)
00907 {
00908 int32_t jj = trans_list_backward[i][j] ;
00909 sum= CMath::logarithmic_sum(sum, get_a(i, jj) + get_b(jj, p_observations->get_feature(dimension,t)) + beta[jj]);
00910 } ;
00911 beta_new[i]=sum;
00912 }
00913
00914 if (!BETA_CACHE(dimension).table)
00915 {
00916 dummy=beta;
00917 beta=beta_new;
00918 beta_new=dummy;
00919 }
00920 else
00921 {
00922 beta=beta_new;
00923 beta_new-=N;
00924 }
00925 }
00926
00927 if (time>=0)
00928 {
00929 register int32_t j, num=trans_list_backward_cnt[state] ;
00930 float64_t sum=-CMath::INFTY;
00931 for (j=0; j<num; j++)
00932 {
00933 int32_t jj = trans_list_backward[state][j] ;
00934 sum= CMath::logarithmic_sum(sum, get_a(state, jj) + get_b(jj, p_observations->get_feature(dimension,time+1))+beta[jj]);
00935 } ;
00936 return sum;
00937 }
00938 else
00939 {
00940 if (BETA_CACHE(dimension).table)
00941 {
00942 float64_t sum=-CMath::INFTY;
00943 for (register int32_t j=0; j<N; j++)
00944 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00945 BETA_CACHE(dimension).sum=sum;
00946 BETA_CACHE(dimension).dimension=dimension;
00947 BETA_CACHE(dimension).updated=true;
00948
00949 if (wanted_time<p_observations->get_vector_length(dimension))
00950 return BETA_CACHE(dimension).table[wanted_time*N+state];
00951 else
00952 return BETA_CACHE(dimension).sum;
00953 }
00954 else
00955 {
00956 float64_t sum=-CMath::INFTY;
00957 for (register int32_t j=0; j<N; j++)
00958 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00959 return sum;
00960 }
00961 }
00962 }
00963 }
00964
00965
00966 float64_t CHMM::backward_comp_old(
00967 int32_t time, int32_t state, int32_t dimension)
00968 {
00969 T_ALPHA_BETA_TABLE* beta_new;
00970 T_ALPHA_BETA_TABLE* beta;
00971 T_ALPHA_BETA_TABLE* dummy;
00972 int32_t wanted_time=time;
00973
00974 if (time<0)
00975 forward(time, state, dimension);
00976
00977 if (BETA_CACHE(dimension).table)
00978 {
00979 beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00980 beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00981 time=-1;
00982 }
00983 else
00984 {
00985 beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00986 beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00987 }
00988
00989 if (time>=p_observations->get_vector_length(dimension)-1)
00990
00991
00992 return get_q(state);
00993 else
00994 {
00995
00996 for (register int32_t i=0; i<N; i++)
00997 beta[i]=get_q(i);
00998
00999
01000 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
01001 {
01002 for (register int32_t i=0; i<N; i++)
01003 {
01004 register int32_t j ;
01005 #ifdef USE_LOGSUMARRAY
01006 for (j=0; j<(N>>1); j++)
01007 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
01008 get_a(i, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,t)) + beta[j<<1],
01009 get_a(i, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,t)) + beta[(j<<1)+1]);
01010 if (N%2==1)
01011 beta_new[i]=CMath::logarithmic_sum(get_a(i, N-1) + get_b(N-1, p_observations->get_feature(dimension,t)) + beta[N-1],
01012 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01013 else
01014 beta_new[i]=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01015 #else //USE_LOGSUMARRAY
01016 float64_t sum=-CMath::INFTY;
01017 for (j=0; j<N; j++)
01018 sum= CMath::logarithmic_sum(sum, get_a(i, j) + get_b(j, p_observations->get_feature(dimension,t)) + beta[j]);
01019
01020 beta_new[i]=sum;
01021 #endif //USE_LOGSUMARRAY
01022 }
01023
01024 if (!BETA_CACHE(dimension).table)
01025 {
01026 dummy=beta;
01027 beta=beta_new;
01028 beta_new=dummy;
01029 }
01030 else
01031 {
01032 beta=beta_new;
01033 beta_new-=N;
01034 }
01035 }
01036
01037 if (time>=0)
01038 {
01039 register int32_t j ;
01040 #ifdef USE_LOGSUMARRAY
01041 for (j=0; j<(N>>1); j++)
01042 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
01043 get_a(state, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,time+1)) + beta[j<<1],
01044 get_a(state, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,time+1)) + beta[(j<<1)+1]);
01045 if (N%2==1)
01046 return CMath::logarithmic_sum(get_a(state, N-1) + get_b(N-1, p_observations->get_feature(dimension,time+1)) + beta[N-1],
01047 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01048 else
01049 return CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01050 #else //USE_LOGSUMARRAY
01051 float64_t sum=-CMath::INFTY;
01052 for (j=0; j<N; j++)
01053 sum= CMath::logarithmic_sum(sum, get_a(state, j) + get_b(j, p_observations->get_feature(dimension,time+1))+beta[j]);
01054
01055 return sum;
01056 #endif //USE_LOGSUMARRAY
01057 }
01058 else
01059 {
01060 if (BETA_CACHE(dimension).table)
01061 {
01062 #ifdef USE_LOGSUMARRAY//AAA
01063 for (int32_t j=0; j<(N>>1); j++)
01064 ARRAYS(dimension)[j]=CMath::logarithmic_sum(get_p(j<<1) + get_b(j<<1, p_observations->get_feature(dimension,0))+beta[j<<1],
01065 get_p((j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,0))+beta[(j<<1)+1]) ;
01066 if (N%2==1)
01067 BETA_CACHE(dimension).sum=CMath::logarithmic_sum(get_p(N-1) + get_b(N-1, p_observations->get_feature(dimension,0))+beta[N-1],
01068 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01069 else
01070 BETA_CACHE(dimension).sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01071 #else //USE_LOGSUMARRAY
01072 float64_t sum=-CMath::INFTY;
01073 for (register int32_t j=0; j<N; j++)
01074 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01075 BETA_CACHE(dimension).sum=sum;
01076 #endif //USE_LOGSUMARRAY
01077 BETA_CACHE(dimension).dimension=dimension;
01078 BETA_CACHE(dimension).updated=true;
01079
01080 if (wanted_time<p_observations->get_vector_length(dimension))
01081 return BETA_CACHE(dimension).table[wanted_time*N+state];
01082 else
01083 return BETA_CACHE(dimension).sum;
01084 }
01085 else
01086 {
01087 float64_t sum=-CMath::INFTY;
01088 for (register int32_t j=0; j<N; j++)
01089 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01090 return sum;
01091 }
01092 }
01093 }
01094 }
01095
01096
01097
01098 float64_t CHMM::best_path(int32_t dimension)
01099 {
01100 if (!p_observations)
01101 return -1;
01102
01103 if (dimension==-1)
01104 {
01105 if (!all_path_prob_updated)
01106 {
01107 SG_INFO( "computing full viterbi likelihood\n") ;
01108 float64_t sum = 0 ;
01109 for (int32_t i=0; i<p_observations->get_num_vectors(); i++)
01110 sum+=best_path(i) ;
01111 sum /= p_observations->get_num_vectors() ;
01112 all_pat_prob=sum ;
01113 all_path_prob_updated=true ;
01114 return sum ;
01115 } else
01116 return all_pat_prob ;
01117 } ;
01118
01119 if (!STATES_PER_OBSERVATION_PSI(dimension))
01120 return -1 ;
01121
01122 if (dimension >= p_observations->get_num_vectors())
01123 return -1;
01124
01125 if (PATH_PROB_UPDATED(dimension) && dimension==PATH_PROB_DIMENSION(dimension))
01126 return pat_prob;
01127 else
01128 {
01129 register float64_t* delta= ARRAYN2(dimension);
01130 register float64_t* delta_new= ARRAYN1(dimension);
01131
01132 {
01133 for (register int32_t i=0; i<N; i++)
01134 {
01135 delta[i]=get_p(i)+get_b(i, p_observations->get_feature(dimension,0));
01136 set_psi(0, i, 0, dimension);
01137 }
01138 }
01139
01140 #ifdef USE_PATHDEBUG
01141 float64_t worst=-CMath::INFTY/4 ;
01142 #endif
01143
01144 for (register int32_t t=1; t<p_observations->get_vector_length(dimension); t++)
01145 {
01146 register float64_t* dummy;
01147 register int32_t NN=N ;
01148 for (register int32_t j=0; j<NN; j++)
01149 {
01150 register float64_t * matrix_a=&transition_matrix_a[j*N] ;
01151 register float64_t maxj=delta[0] + matrix_a[0];
01152 register int32_t argmax=0;
01153
01154 for (register int32_t i=1; i<NN; i++)
01155 {
01156 register float64_t temp = delta[i] + matrix_a[i];
01157
01158 if (temp>maxj)
01159 {
01160 maxj=temp;
01161 argmax=i;
01162 }
01163 }
01164 #ifdef FIX_POS
01165 if ((!model) || (model->get_fix_pos_state(t,j,NN)!=CModel::FIX_DISALLOWED))
01166 #endif
01167 delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t));
01168 #ifdef FIX_POS
01169 else
01170 delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t)) + CModel::DISALLOWED_PENALTY;
01171 #endif
01172 set_psi(t, j, argmax, dimension);
01173 }
01174
01175 #ifdef USE_PATHDEBUG
01176 float64_t best=log(0) ;
01177 for (int32_t jj=0; jj<N; jj++)
01178 if (delta_new[jj]>best)
01179 best=delta_new[jj] ;
01180
01181 if (best<-CMath::INFTY/2)
01182 {
01183 SG_DEBUG( "worst case at %i: %e:%e\n", t, best, worst) ;
01184 worst=best ;
01185 } ;
01186 #endif
01187
01188 dummy=delta;
01189 delta=delta_new;
01190 delta_new=dummy;
01191 }
01192
01193
01194 {
01195 register float64_t maxj=delta[0]+get_q(0);
01196 register int32_t argmax=0;
01197
01198 for (register int32_t i=1; i<N; i++)
01199 {
01200 register float64_t temp=delta[i]+get_q(i);
01201
01202 if (temp>maxj)
01203 {
01204 maxj=temp;
01205 argmax=i;
01206 }
01207 }
01208 pat_prob=maxj;
01209 PATH(dimension)[p_observations->get_vector_length(dimension)-1]=argmax;
01210 } ;
01211
01212
01213 {
01214 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>0; t--)
01215 {
01216 PATH(dimension)[t-1]=get_psi(t, PATH(dimension)[t], dimension);
01217 }
01218 }
01219 PATH_PROB_UPDATED(dimension)=true;
01220 PATH_PROB_DIMENSION(dimension)=dimension;
01221 return pat_prob ;
01222 }
01223 }
01224
01225 #ifndef USE_HMMPARALLEL
01226 float64_t CHMM::model_probability_comp()
01227 {
01228
01229 mod_prob=0 ;
01230 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
01231 mod_prob+=forward(p_observations->get_vector_length(dim), 0, dim);
01232
01233 mod_prob_updated=true;
01234 return mod_prob;
01235 }
01236
01237 #else
01238
01239 float64_t CHMM::model_probability_comp()
01240 {
01241 pthread_t *threads=new pthread_t[parallel->get_num_threads()];
01242 S_BW_THREAD_PARAM *params=new S_BW_THREAD_PARAM[parallel->get_num_threads()];
01243
01244 SG_INFO( "computing full model probablity\n");
01245 mod_prob=0;
01246
01247 for (int32_t cpu=0; cpu<parallel->get_num_threads(); cpu++)
01248 {
01249 params[cpu].hmm=this ;
01250 params[cpu].dim_start= p_observations->get_num_vectors()*cpu/parallel->get_num_threads();
01251 params[cpu].dim_stop= p_observations->get_num_vectors()*(cpu+1)/parallel->get_num_threads();
01252 params[cpu].p_buf=new float64_t[N];
01253 params[cpu].q_buf=new float64_t[N];
01254 params[cpu].a_buf=new float64_t[N*N];
01255 params[cpu].b_buf=new float64_t[N*M];
01256 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, (void*)¶ms[cpu]);
01257 }
01258
01259 for (int32_t cpu=0; cpu<parallel->get_num_threads(); cpu++)
01260 {
01261 pthread_join(threads[cpu], NULL);
01262 mod_prob+=params[cpu].ret;
01263 }
01264
01265 for (int32_t i=0; i<parallel->get_num_threads(); i++)
01266 {
01267 delete[] params[i].p_buf;
01268 delete[] params[i].q_buf;
01269 delete[] params[i].a_buf;
01270 delete[] params[i].b_buf;
01271 }
01272
01273 delete[] threads;
01274 delete[] params;
01275
01276 mod_prob_updated=true;
01277 return mod_prob;
01278 }
01279
01280 void* CHMM::bw_dim_prefetch(void* params)
01281 {
01282 CHMM* hmm=((S_BW_THREAD_PARAM*) params)->hmm;
01283 int32_t start=((S_BW_THREAD_PARAM*) params)->dim_start;
01284 int32_t stop=((S_BW_THREAD_PARAM*) params)->dim_stop;
01285 float64_t* p_buf=((S_BW_THREAD_PARAM*) params)->p_buf;
01286 float64_t* q_buf=((S_BW_THREAD_PARAM*) params)->q_buf;
01287 float64_t* a_buf=((S_BW_THREAD_PARAM*) params)->a_buf;
01288 float64_t* b_buf=((S_BW_THREAD_PARAM*) params)->b_buf;
01289 ((S_BW_THREAD_PARAM*)params)->ret=0;
01290
01291 for (int32_t dim=start; dim<stop; dim++)
01292 {
01293 hmm->forward_comp(hmm->p_observations->get_vector_length(dim), hmm->N-1, dim) ;
01294 hmm->backward_comp(hmm->p_observations->get_vector_length(dim), hmm->N-1, dim) ;
01295 float64_t modprob=hmm->model_probability(dim) ;
01296 hmm->ab_buf_comp(p_buf, q_buf, a_buf, b_buf, dim) ;
01297 ((S_BW_THREAD_PARAM*)params)->ret+= modprob;
01298 }
01299 return NULL ;
01300 }
01301
01302 void* CHMM::bw_single_dim_prefetch(void * params)
01303 {
01304 CHMM* hmm=((S_BW_THREAD_PARAM*)params)->hmm ;
01305 int32_t dim=((S_DIM_THREAD_PARAM*)params)->dim ;
01306 ((S_DIM_THREAD_PARAM*)params)->prob_sum = hmm->model_probability(dim);
01307 return NULL ;
01308 }
01309
01310 void* CHMM::vit_dim_prefetch(void * params)
01311 {
01312 CHMM* hmm=((S_DIM_THREAD_PARAM*)params)->hmm ;
01313 int32_t dim=((S_DIM_THREAD_PARAM*)params)->dim ;
01314 ((S_DIM_THREAD_PARAM*)params)->prob_sum = hmm->best_path(dim);
01315 return NULL ;
01316 }
01317
01318 #endif //USE_HMMPARALLEL
01319
01320
01321 #ifdef USE_HMMPARALLEL
01322
01323 void CHMM::ab_buf_comp(
01324 float64_t* p_buf, float64_t* q_buf, float64_t *a_buf, float64_t* b_buf,
01325 int32_t dim)
01326 {
01327 int32_t i,j,t ;
01328 float64_t a_sum;
01329 float64_t b_sum;
01330
01331 float64_t dimmodprob=model_probability(dim);
01332
01333 for (i=0; i<N; i++)
01334 {
01335
01336 p_buf[i]=get_p(i)+get_b(i,p_observations->get_feature(dim,0))+backward(0,i,dim) - dimmodprob;
01337 q_buf[i]=forward(p_observations->get_vector_length(dim)-1, i, dim)+get_q(i) - dimmodprob;
01338
01339
01340 for (j=0; j<N; j++)
01341 {
01342 a_sum=-CMath::INFTY;
01343
01344 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01345 {
01346 a_sum= CMath::logarithmic_sum(a_sum, forward(t,i,dim)+
01347 get_a(i,j)+get_b(j,p_observations->get_feature(dim,t+1))+backward(t+1,j,dim));
01348 }
01349 a_buf[N*i+j]=a_sum-dimmodprob ;
01350 }
01351
01352
01353 for (j=0; j<M; j++)
01354 {
01355 b_sum=-CMath::INFTY;
01356
01357 for (t=0; t<p_observations->get_vector_length(dim); t++)
01358 {
01359 if (p_observations->get_feature(dim,t)==j)
01360 b_sum=CMath::logarithmic_sum(b_sum, forward(t,i,dim)+backward(t, i, dim));
01361 }
01362
01363 b_buf[M*i+j]=b_sum-dimmodprob ;
01364 }
01365 }
01366 }
01367
01368
01369 void CHMM::estimate_model_baum_welch(CHMM* hmm)
01370 {
01371 int32_t i,j,cpu;
01372 float64_t fullmodprob=0;
01373
01374
01375 for (i=0; i<N; i++)
01376 {
01377 if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
01378 set_p(i,log(PSEUDO));
01379 else
01380 set_p(i,hmm->get_p(i));
01381 if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
01382 set_q(i,log(PSEUDO));
01383 else
01384 set_q(i,hmm->get_q(i));
01385
01386 for (j=0; j<N; j++)
01387 if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01388 set_a(i,j, log(PSEUDO));
01389 else
01390 set_a(i,j,hmm->get_a(i,j));
01391 for (j=0; j<M; j++)
01392 if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01393 set_b(i,j, log(PSEUDO));
01394 else
01395 set_b(i,j,hmm->get_b(i,j));
01396 }
01397 invalidate_model();
01398
01399 int32_t num_threads = parallel->get_num_threads();
01400
01401 pthread_t *threads=new pthread_t[num_threads] ;
01402 S_BW_THREAD_PARAM *params=new S_BW_THREAD_PARAM[num_threads] ;
01403
01404 if (p_observations->get_num_vectors()<num_threads)
01405 num_threads=p_observations->get_num_vectors();
01406
01407 for (cpu=0; cpu<num_threads; cpu++)
01408 {
01409 params[cpu].p_buf=new float64_t[N];
01410 params[cpu].q_buf=new float64_t[N];
01411 params[cpu].a_buf=new float64_t[N*N];
01412 params[cpu].b_buf=new float64_t[N*M];
01413
01414 params[cpu].hmm=hmm;
01415 int32_t start = p_observations->get_num_vectors()*cpu / num_threads;
01416 int32_t stop=p_observations->get_num_vectors()*(cpu+1) / num_threads;
01417
01418 if (cpu == parallel->get_num_threads()-1)
01419 stop=p_observations->get_num_vectors();
01420
01421 ASSERT(start<stop);
01422 params[cpu].dim_start=start;
01423 params[cpu].dim_stop=stop;
01424
01425 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, ¶ms[cpu]);
01426 }
01427
01428 for (cpu=0; cpu<num_threads; cpu++)
01429 {
01430 pthread_join(threads[cpu], NULL);
01431
01432 for (i=0; i<N; i++)
01433 {
01434
01435 set_p(i, CMath::logarithmic_sum(get_p(i), params[cpu].p_buf[i]));
01436 set_q(i, CMath::logarithmic_sum(get_q(i), params[cpu].q_buf[i]));
01437
01438
01439 for (j=0; j<N; j++)
01440 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), params[cpu].a_buf[N*i+j]));
01441
01442
01443 for (j=0; j<M; j++)
01444 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), params[cpu].b_buf[M*i+j]));
01445 }
01446
01447 fullmodprob+=params[cpu].ret;
01448
01449 }
01450
01451 for (cpu=0; cpu<num_threads; cpu++)
01452 {
01453 delete[] params[cpu].p_buf;
01454 delete[] params[cpu].q_buf;
01455 delete[] params[cpu].a_buf;
01456 delete[] params[cpu].b_buf;
01457 }
01458
01459 delete[] threads ;
01460 delete[] params ;
01461
01462
01463 hmm->mod_prob=fullmodprob;
01464 hmm->mod_prob_updated=true ;
01465
01466
01467 normalize();
01468 invalidate_model();
01469 }
01470
01471 #else // USE_HMMPARALLEL
01472
01473
01474 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01475 {
01476 int32_t i,j,t,dim;
01477 float64_t a_sum, b_sum;
01478 float64_t dimmodprob=0;
01479 float64_t fullmodprob=0;
01480
01481
01482 for (i=0; i<N; i++)
01483 {
01484 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01485 set_p(i,log(PSEUDO));
01486 else
01487 set_p(i,estimate->get_p(i));
01488 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01489 set_q(i,log(PSEUDO));
01490 else
01491 set_q(i,estimate->get_q(i));
01492
01493 for (j=0; j<N; j++)
01494 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01495 set_a(i,j, log(PSEUDO));
01496 else
01497 set_a(i,j,estimate->get_a(i,j));
01498 for (j=0; j<M; j++)
01499 if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01500 set_b(i,j, log(PSEUDO));
01501 else
01502 set_b(i,j,estimate->get_b(i,j));
01503 }
01504 invalidate_model();
01505
01506
01507 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01508 {
01509 dimmodprob=estimate->model_probability(dim);
01510 fullmodprob+=dimmodprob ;
01511
01512 for (i=0; i<N; i++)
01513 {
01514
01515 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01516 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01517
01518 int32_t num = trans_list_backward_cnt[i] ;
01519
01520
01521 for (j=0; j<num; j++)
01522 {
01523 int32_t jj = trans_list_backward[i][j] ;
01524 a_sum=-CMath::INFTY;
01525
01526 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01527 {
01528 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01529 estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01530 }
01531 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01532 }
01533
01534
01535 for (j=0; j<M; j++)
01536 {
01537 b_sum=-CMath::INFTY;
01538
01539 for (t=0; t<p_observations->get_vector_length(dim); t++)
01540 {
01541 if (p_observations->get_feature(dim,t)==j)
01542 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01543 }
01544
01545 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01546 }
01547 }
01548 }
01549
01550
01551 estimate->mod_prob=fullmodprob;
01552 estimate->mod_prob_updated=true ;
01553
01554
01555 normalize();
01556 invalidate_model();
01557 }
01558
01559
01560 void CHMM::estimate_model_baum_welch_old(CHMM* estimate)
01561 {
01562 int32_t i,j,t,dim;
01563 float64_t a_sum, b_sum;
01564 float64_t dimmodprob=0;
01565 float64_t fullmodprob=0;
01566
01567
01568 for (i=0; i<N; i++)
01569 {
01570 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01571 set_p(i,log(PSEUDO));
01572 else
01573 set_p(i,estimate->get_p(i));
01574 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01575 set_q(i,log(PSEUDO));
01576 else
01577 set_q(i,estimate->get_q(i));
01578
01579 for (j=0; j<N; j++)
01580 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01581 set_a(i,j, log(PSEUDO));
01582 else
01583 set_a(i,j,estimate->get_a(i,j));
01584 for (j=0; j<M; j++)
01585 if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01586 set_b(i,j, log(PSEUDO));
01587 else
01588 set_b(i,j,estimate->get_b(i,j));
01589 }
01590 invalidate_model();
01591
01592
01593 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01594 {
01595 dimmodprob=estimate->model_probability(dim);
01596 fullmodprob+=dimmodprob ;
01597
01598 for (i=0; i<N; i++)
01599 {
01600
01601 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01602 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01603
01604
01605 for (j=0; j<N; j++)
01606 {
01607 a_sum=-CMath::INFTY;
01608
01609 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01610 {
01611 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01612 estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
01613 }
01614 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum-dimmodprob));
01615 }
01616
01617
01618 for (j=0; j<M; j++)
01619 {
01620 b_sum=-CMath::INFTY;
01621
01622 for (t=0; t<p_observations->get_vector_length(dim); t++)
01623 {
01624 if (p_observations->get_feature(dim,t)==j)
01625 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01626 }
01627
01628 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01629 }
01630 }
01631 }
01632
01633
01634 estimate->mod_prob=fullmodprob;
01635 estimate->mod_prob_updated=true ;
01636
01637
01638 normalize();
01639 invalidate_model();
01640 }
01641 #endif // USE_HMMPARALLEL
01642
01643
01644
01645 void CHMM::estimate_model_baum_welch_trans(CHMM* estimate)
01646 {
01647 int32_t i,j,t,dim;
01648 float64_t a_sum;
01649 float64_t dimmodprob=0;
01650 float64_t fullmodprob=0;
01651
01652
01653 for (i=0; i<N; i++)
01654 {
01655 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01656 set_p(i,log(PSEUDO));
01657 else
01658 set_p(i,estimate->get_p(i));
01659 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01660 set_q(i,log(PSEUDO));
01661 else
01662 set_q(i,estimate->get_q(i));
01663
01664 for (j=0; j<N; j++)
01665 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01666 set_a(i,j, log(PSEUDO));
01667 else
01668 set_a(i,j,estimate->get_a(i,j));
01669 for (j=0; j<M; j++)
01670 set_b(i,j,estimate->get_b(i,j));
01671 }
01672 invalidate_model();
01673
01674
01675 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01676 {
01677 dimmodprob=estimate->model_probability(dim);
01678 fullmodprob+=dimmodprob ;
01679
01680 for (i=0; i<N; i++)
01681 {
01682
01683 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01684 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01685
01686 int32_t num = trans_list_backward_cnt[i] ;
01687
01688 for (j=0; j<num; j++)
01689 {
01690 int32_t jj = trans_list_backward[i][j] ;
01691 a_sum=-CMath::INFTY;
01692
01693 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01694 {
01695 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01696 estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01697 }
01698 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01699 }
01700 }
01701 }
01702
01703
01704 estimate->mod_prob=fullmodprob;
01705 estimate->mod_prob_updated=true ;
01706
01707
01708 normalize();
01709 invalidate_model();
01710 }
01711
01712
01713
01714
01715 void CHMM::estimate_model_baum_welch_defined(CHMM* estimate)
01716 {
01717 int32_t i,j,old_i,k,t,dim;
01718 float64_t a_sum_num, b_sum_num;
01719 float64_t a_sum_denom, b_sum_denom;
01720 float64_t dimmodprob=-CMath::INFTY;
01721 float64_t fullmodprob=0;
01722 float64_t* A=ARRAYN1(0);
01723 float64_t* B=ARRAYN2(0);
01724
01725
01726
01727 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01728 set_p(i,log(PSEUDO));
01729
01730 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01731 set_q(i,log(PSEUDO));
01732
01733 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01734 {
01735 j=model->get_learn_a(k,1);
01736 set_a(i,j, log(PSEUDO));
01737 }
01738
01739 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01740 {
01741 j=model->get_learn_b(k,1);
01742 set_b(i,j, log(PSEUDO));
01743 }
01744
01745 for (i=0; i<N; i++)
01746 {
01747 A[i]=log(PSEUDO);
01748 B[i]=log(PSEUDO);
01749 }
01750
01751 #ifdef USE_HMMPARALLEL
01752 int32_t num_threads = parallel->get_num_threads();
01753 pthread_t *threads=new pthread_t[num_threads] ;
01754 S_DIM_THREAD_PARAM *params=new S_DIM_THREAD_PARAM[num_threads] ;
01755
01756 if (p_observations->get_num_vectors()<num_threads)
01757 num_threads=p_observations->get_num_vectors();
01758 #endif
01759
01760
01761 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01762 {
01763 #ifdef USE_HMMPARALLEL
01764 if (dim%num_threads==0)
01765 {
01766 for (i=0; i<num_threads; i++)
01767 {
01768 if (dim+i<p_observations->get_num_vectors())
01769 {
01770 params[i].hmm=estimate ;
01771 params[i].dim=dim+i ;
01772 pthread_create(&threads[i], NULL, bw_single_dim_prefetch, (void*)¶ms[i]) ;
01773 }
01774 }
01775 for (i=0; i<num_threads; i++)
01776 {
01777 if (dim+i<p_observations->get_num_vectors())
01778 {
01779 pthread_join(threads[i], NULL);
01780 dimmodprob = params[i].prob_sum;
01781 }
01782 }
01783 }
01784 #else
01785 dimmodprob=estimate->model_probability(dim);
01786 #endif // USE_HMMPARALLEL
01787
01788
01789 fullmodprob+= dimmodprob;
01790
01791
01792 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01793 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->forward(0,i,dim)+estimate->backward(0,i,dim) - dimmodprob ) );
01794
01795 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01796 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+
01797 estimate->backward(p_observations->get_vector_length(dim)-1, i, dim) - dimmodprob ) );
01798
01799
01800 old_i=-1;
01801 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01802 {
01803
01804
01805 if (old_i!=i)
01806 {
01807 old_i=i;
01808 a_sum_denom=-CMath::INFTY;
01809
01810 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01811 a_sum_denom= CMath::logarithmic_sum(a_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
01812
01813 A[i]= CMath::logarithmic_sum(A[i], a_sum_denom-dimmodprob);
01814 }
01815
01816 j=model->get_learn_a(k,1);
01817 a_sum_num=-CMath::INFTY;
01818 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01819 {
01820 a_sum_num= CMath::logarithmic_sum(a_sum_num, estimate->forward(t,i,dim)+
01821 estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
01822 }
01823
01824 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum_num-dimmodprob));
01825 }
01826
01827
01828 old_i=-1;
01829 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01830 {
01831
01832
01833
01834 if (old_i!=i)
01835 {
01836 old_i=i;
01837 b_sum_denom=-CMath::INFTY;
01838
01839 for (t=0; t<p_observations->get_vector_length(dim); t++)
01840 b_sum_denom= CMath::logarithmic_sum(b_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
01841
01842 B[i]= CMath::logarithmic_sum(B[i], b_sum_denom-dimmodprob);
01843 }
01844
01845 j=model->get_learn_b(k,1);
01846 b_sum_num=-CMath::INFTY;
01847 for (t=0; t<p_observations->get_vector_length(dim); t++)
01848 {
01849 if (p_observations->get_feature(dim,t)==j)
01850 b_sum_num=CMath::logarithmic_sum(b_sum_num, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01851 }
01852
01853 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum_num-dimmodprob));
01854 }
01855 }
01856 #ifdef USE_HMMPARALLEL
01857 delete[] threads ;
01858 delete[] params ;
01859 #endif
01860
01861
01862
01863 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01864 set_p(i, get_p(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
01865
01866 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01867 set_q(i, get_q(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
01868
01869 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01870 {
01871 j=model->get_learn_a(k,1);
01872 set_a(i,j, get_a(i,j) - A[i]);
01873 }
01874
01875 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01876 {
01877 j=model->get_learn_b(k,1);
01878 set_b(i,j, get_b(i,j) - B[i]);
01879 }
01880
01881
01882 estimate->mod_prob=fullmodprob;
01883 estimate->mod_prob_updated=true ;
01884
01885
01886 normalize();
01887 invalidate_model();
01888 }
01889
01890
01891 void CHMM::estimate_model_viterbi(CHMM* estimate)
01892 {
01893 int32_t i,j,t;
01894 float64_t sum;
01895 float64_t* P=ARRAYN1(0);
01896 float64_t* Q=ARRAYN2(0);
01897
01898 path_deriv_updated=false ;
01899
01900
01901 for (i=0; i<N; i++)
01902 {
01903 for (j=0; j<N; j++)
01904 set_A(i,j, PSEUDO);
01905
01906 for (j=0; j<M; j++)
01907 set_B(i,j, PSEUDO);
01908
01909 P[i]=PSEUDO;
01910 Q[i]=PSEUDO;
01911 }
01912
01913 float64_t allpatprob=0 ;
01914
01915 #ifdef USE_HMMPARALLEL
01916 int32_t num_threads = parallel->get_num_threads();
01917 pthread_t *threads=new pthread_t[num_threads] ;
01918 S_DIM_THREAD_PARAM *params=new S_DIM_THREAD_PARAM[num_threads] ;
01919
01920 if (p_observations->get_num_vectors()<num_threads)
01921 num_threads=p_observations->get_num_vectors();
01922 #endif
01923
01924 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
01925 {
01926
01927 #ifdef USE_HMMPARALLEL
01928 if (dim%num_threads==0)
01929 {
01930 for (i=0; i<num_threads; i++)
01931 {
01932 if (dim+i<p_observations->get_num_vectors())
01933 {
01934 params[i].hmm=estimate ;
01935 params[i].dim=dim+i ;
01936 pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)¶ms[i]) ;
01937 }
01938 }
01939 for (i=0; i<num_threads; i++)
01940 {
01941 if (dim+i<p_observations->get_num_vectors())
01942 {
01943 pthread_join(threads[i], NULL);
01944 allpatprob += params[i].prob_sum;
01945 }
01946 }
01947 }
01948 #else
01949
01950 allpatprob += estimate->best_path(dim);
01951 #endif // USE_HMMPARALLEL
01952
01953
01954 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01955 {
01956 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
01957 set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t), get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
01958 }
01959
01960 set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
01961
01962 P[estimate->PATH(dim)[0]]++;
01963 Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
01964 }
01965
01966 #ifdef USE_HMMPARALLEL
01967 delete[] threads;
01968 delete[] params;
01969 #endif
01970
01971 allpatprob/=p_observations->get_num_vectors() ;
01972 estimate->all_pat_prob=allpatprob ;
01973 estimate->all_path_prob_updated=true ;
01974
01975
01976 for (i=0; i<N; i++)
01977 {
01978 sum=0;
01979 for (j=0; j<N; j++)
01980 sum+=get_A(i,j);
01981
01982 for (j=0; j<N; j++)
01983 set_a(i,j, log(get_A(i,j)/sum));
01984 }
01985
01986
01987 for (i=0; i<N; i++)
01988 {
01989 sum=0;
01990 for (j=0; j<M; j++)
01991 sum+=get_B(i,j);
01992
01993 for (j=0; j<M; j++)
01994 set_b(i,j, log(get_B(i, j)/sum));
01995 }
01996
01997
01998 sum=0;
01999 for (i=0; i<N; i++)
02000 sum+=P[i];
02001
02002 for (i=0; i<N; i++)
02003 set_p(i, log(P[i]/sum));
02004
02005
02006 sum=0;
02007 for (i=0; i<N; i++)
02008 sum+=Q[i];
02009
02010 for (i=0; i<N; i++)
02011 set_q(i, log(Q[i]/sum));
02012
02013
02014 invalidate_model();
02015 }
02016
02017
02018 void CHMM::estimate_model_viterbi_defined(CHMM* estimate)
02019 {
02020 int32_t i,j,k,t;
02021 float64_t sum;
02022 float64_t* P=ARRAYN1(0);
02023 float64_t* Q=ARRAYN2(0);
02024
02025 path_deriv_updated=false ;
02026
02027
02028 for (i=0; i<N; i++)
02029 {
02030 for (j=0; j<N; j++)
02031 set_A(i,j, PSEUDO);
02032
02033 for (j=0; j<M; j++)
02034 set_B(i,j, PSEUDO);
02035
02036 P[i]=PSEUDO;
02037 Q[i]=PSEUDO;
02038 }
02039
02040 #ifdef USE_HMMPARALLEL
02041 int32_t num_threads = parallel->get_num_threads();
02042 pthread_t *threads=new pthread_t[num_threads] ;
02043 S_DIM_THREAD_PARAM *params=new S_DIM_THREAD_PARAM[num_threads] ;
02044 #endif
02045
02046 float64_t allpatprob=0.0 ;
02047 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
02048 {
02049
02050 #ifdef USE_HMMPARALLEL
02051 if (dim%num_threads==0)
02052 {
02053 for (i=0; i<num_threads; i++)
02054 {
02055 if (dim+i<p_observations->get_num_vectors())
02056 {
02057 params[i].hmm=estimate ;
02058 params[i].dim=dim+i ;
02059 pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)¶ms[i]) ;
02060 }
02061 }
02062 for (i=0; i<num_threads; i++)
02063 {
02064 if (dim+i<p_observations->get_num_vectors())
02065 {
02066 pthread_join(threads[i], NULL);
02067 allpatprob += params[i].prob_sum;
02068 }
02069 }
02070 }
02071 #else // USE_HMMPARALLEL
02072
02073 allpatprob += estimate->best_path(dim);
02074 #endif // USE_HMMPARALLEL
02075
02076
02077
02078 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02079 {
02080 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
02081 set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t), get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
02082 }
02083
02084 set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
02085
02086 P[estimate->PATH(dim)[0]]++;
02087 Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
02088 }
02089
02090 #ifdef USE_HMMPARALLEL
02091 delete[] threads ;
02092 delete[] params ;
02093 #endif
02094
02095
02096
02097
02098 allpatprob/=p_observations->get_num_vectors() ;
02099 estimate->all_pat_prob=allpatprob ;
02100 estimate->all_path_prob_updated=true ;
02101
02102
02103
02104 for (i=0; i<N; i++)
02105 {
02106 for (j=0; j<N; j++)
02107 set_a(i,j, estimate->get_a(i,j));
02108
02109 for (j=0; j<M; j++)
02110 set_b(i,j, estimate->get_b(i,j));
02111 }
02112
02113
02114 i=0;
02115 sum=0;
02116 j=model->get_learn_a(i,0);
02117 k=i;
02118 while (model->get_learn_a(i,0)!=-1 || k<i)
02119 {
02120 if (j==model->get_learn_a(i,0))
02121 {
02122 sum+=get_A(model->get_learn_a(i,0), model->get_learn_a(i,1));
02123 i++;
02124 }
02125 else
02126 {
02127 while (k<i)
02128 {
02129 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1), log (get_A(model->get_learn_a(k,0), model->get_learn_a(k,1)) / sum));
02130 k++;
02131 }
02132
02133 sum=0;
02134 j=model->get_learn_a(i,0);
02135 k=i;
02136 }
02137 }
02138
02139
02140 i=0;
02141 sum=0;
02142 j=model->get_learn_b(i,0);
02143 k=i;
02144 while (model->get_learn_b(i,0)!=-1 || k<i)
02145 {
02146 if (j==model->get_learn_b(i,0))
02147 {
02148 sum+=get_B(model->get_learn_b(i,0),model->get_learn_b(i,1));
02149 i++;
02150 }
02151 else
02152 {
02153 while (k<i)
02154 {
02155 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1), log (get_B(model->get_learn_b(k,0), model->get_learn_b(k,1)) / sum));
02156 k++;
02157 }
02158
02159 sum=0;
02160 j=model->get_learn_b(i,0);
02161 k=i;
02162 }
02163 }
02164
02165 i=0;
02166 sum=0;
02167 while (model->get_learn_p(i)!=-1)
02168 {
02169 sum+=P[model->get_learn_p(i)] ;
02170 i++ ;
02171 } ;
02172 i=0 ;
02173 while (model->get_learn_p(i)!=-1)
02174 {
02175 set_p(model->get_learn_p(i), log(P[model->get_learn_p(i)]/sum));
02176 i++ ;
02177 } ;
02178
02179 i=0;
02180 sum=0;
02181 while (model->get_learn_q(i)!=-1)
02182 {
02183 sum+=Q[model->get_learn_q(i)] ;
02184 i++ ;
02185 } ;
02186 i=0 ;
02187 while (model->get_learn_q(i)!=-1)
02188 {
02189 set_q(model->get_learn_q(i), log(Q[model->get_learn_q(i)]/sum));
02190 i++ ;
02191 } ;
02192
02193
02194
02195 invalidate_model();
02196 }
02197
02198
02199
02200 void CHMM::output_model(bool verbose)
02201 {
02202 int32_t i,j;
02203 float64_t checksum;
02204
02205
02206 SG_INFO( "log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
02207 (float64_t)((p_observations) ? model_probability() : -CMath::INFTY),
02208 N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02209
02210 if (verbose)
02211 {
02212
02213 SG_INFO( "\ntransition matrix\n");
02214 for (i=0; i<N; i++)
02215 {
02216 checksum= get_q(i);
02217 for (j=0; j<N; j++)
02218 {
02219 checksum= CMath::logarithmic_sum(checksum, get_a(i,j));
02220
02221 SG_INFO( "a(%02i,%02i)=%1.4f ",i,j, (float32_t) exp(get_a(i,j)));
02222
02223 if (j % 4 == 3)
02224 SG_PRINT( "\n");
02225 }
02226 if (fabs(checksum)>1e-5)
02227 SG_DEBUG( " checksum % E ******* \n",checksum);
02228 else
02229 SG_DEBUG( " checksum % E\n",checksum);
02230 }
02231
02232
02233 SG_INFO( "\ndistribution of start states\n");
02234 checksum=-CMath::INFTY;
02235 for (i=0; i<N; i++)
02236 {
02237 checksum= CMath::logarithmic_sum(checksum, get_p(i));
02238 SG_INFO( "p(%02i)=%1.4f ",i, (float32_t) exp(get_p(i)));
02239 if (i % 4 == 3)
02240 SG_PRINT( "\n");
02241 }
02242 if (fabs(checksum)>1e-5)
02243 SG_DEBUG( " checksum % E ******* \n",checksum);
02244 else
02245 SG_DEBUG( " checksum=% E\n", checksum);
02246
02247
02248 SG_INFO( "\ndistribution of terminal states\n");
02249 checksum=-CMath::INFTY;
02250 for (i=0; i<N; i++)
02251 {
02252 checksum= CMath::logarithmic_sum(checksum, get_q(i));
02253 SG_INFO("q(%02i)=%1.4f ",i, (float32_t) exp(get_q(i)));
02254 if (i % 4 == 3)
02255 SG_INFO("\n");
02256 }
02257 if (fabs(checksum)>1e-5)
02258 SG_DEBUG( " checksum % E ******* \n",checksum);
02259 else
02260 SG_DEBUG( " checksum=% E\n", checksum);
02261
02262
02263 SG_INFO("\ndistribution of observations given the state\n");
02264 for (i=0; i<N; i++)
02265 {
02266 checksum=-CMath::INFTY;
02267 for (j=0; j<M; j++)
02268 {
02269 checksum=CMath::logarithmic_sum(checksum, get_b(i,j));
02270 SG_INFO("b(%02i,%02i)=%1.4f ",i,j, (float32_t) exp(get_b(i,j)));
02271 if (j % 4 == 3)
02272 SG_PRINT("\n");
02273 }
02274 if (fabs(checksum)>1e-5)
02275 SG_DEBUG( " checksum % E ******* \n",checksum);
02276 else
02277 SG_DEBUG( " checksum % E\n",checksum);
02278 }
02279 }
02280 SG_PRINT("\n");
02281 }
02282
02283
02284 void CHMM::output_model_defined(bool verbose)
02285 {
02286 int32_t i,j;
02287 if (!model)
02288 return ;
02289
02290
02291 SG_INFO("log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
02292 (float64_t)((p_observations) ? model_probability() : -CMath::INFTY),
02293 N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02294
02295 if (verbose)
02296 {
02297
02298 SG_INFO("\ntransition matrix\n");
02299
02300
02301 i=0;
02302 j=model->get_learn_a(i,0);
02303 while (model->get_learn_a(i,0)!=-1)
02304 {
02305 if (j!=model->get_learn_a(i,0))
02306 {
02307 j=model->get_learn_a(i,0);
02308 SG_PRINT("\n");
02309 }
02310
02311 SG_INFO("a(%02i,%02i)=%1.4f ",model->get_learn_a(i,0), model->get_learn_a(i,1), (float32_t) exp(get_a(model->get_learn_a(i,0), model->get_learn_a(i,1))));
02312 i++;
02313 }
02314
02315
02316 SG_INFO("\n\ndistribution of observations given the state\n");
02317 i=0;
02318 j=model->get_learn_b(i,0);
02319 while (model->get_learn_b(i,0)!=-1)
02320 {
02321 if (j!=model->get_learn_b(i,0))
02322 {
02323 j=model->get_learn_b(i,0);
02324 SG_PRINT("\n");
02325 }
02326
02327 SG_INFO("b(%02i,%02i)=%1.4f ",model->get_learn_b(i,0),model->get_learn_b(i,1), (float32_t) exp(get_b(model->get_learn_b(i,0),model->get_learn_b(i,1))));
02328 i++;
02329 }
02330
02331 SG_PRINT("\n");
02332 }
02333 SG_PRINT("\n");
02334 }
02335
02336
02337
02338
02339 void CHMM::convert_to_log()
02340 {
02341 int32_t i,j;
02342
02343 for (i=0; i<N; i++)
02344 {
02345 if (get_p(i)!=0)
02346 set_p(i, log(get_p(i)));
02347 else
02348 set_p(i, -CMath::INFTY);;
02349 }
02350
02351 for (i=0; i<N; i++)
02352 {
02353 if (get_q(i)!=0)
02354 set_q(i, log(get_q(i)));
02355 else
02356 set_q(i, -CMath::INFTY);;
02357 }
02358
02359 for (i=0; i<N; i++)
02360 {
02361 for (j=0; j<N; j++)
02362 {
02363 if (get_a(i,j)!=0)
02364 set_a(i,j, log(get_a(i,j)));
02365 else
02366 set_a(i,j, -CMath::INFTY);
02367 }
02368 }
02369
02370 for (i=0; i<N; i++)
02371 {
02372 for (j=0; j<M; j++)
02373 {
02374 if (get_b(i,j)!=0)
02375 set_b(i,j, log(get_b(i,j)));
02376 else
02377 set_b(i,j, -CMath::INFTY);
02378 }
02379 }
02380 loglikelihood=true;
02381
02382 invalidate_model();
02383 }
02384
02385
02386 void CHMM::init_model_random()
02387 {
02388 const float64_t MIN_RAND=23e-3;
02389
02390 float64_t sum;
02391 int32_t i,j;
02392
02393
02394 for (i=0; i<N; i++)
02395 {
02396 sum=0;
02397 for (j=0; j<N; j++)
02398 {
02399 set_a(i,j, CMath::random(MIN_RAND, 1.0));
02400
02401 sum+=get_a(i,j);
02402 }
02403
02404 for (j=0; j<N; j++)
02405 set_a(i,j, get_a(i,j)/sum);
02406 }
02407
02408
02409 sum=0;
02410 for (i=0; i<N; i++)
02411 {
02412 set_p(i, CMath::random(MIN_RAND, 1.0));
02413
02414 sum+=get_p(i);
02415 }
02416
02417 for (i=0; i<N; i++)
02418 set_p(i, get_p(i)/sum);
02419
02420
02421 sum=0;
02422 for (i=0; i<N; i++)
02423 {
02424 set_q(i, CMath::random(MIN_RAND, 1.0));
02425
02426 sum+=get_q(i);
02427 }
02428
02429 for (i=0; i<N; i++)
02430 set_q(i, get_q(i)/sum);
02431
02432
02433 for (i=0; i<N; i++)
02434 {
02435 sum=0;
02436 for (j=0; j<M; j++)
02437 {
02438 set_b(i,j, CMath::random(MIN_RAND, 1.0));
02439
02440 sum+=get_b(i,j);
02441 }
02442
02443 for (j=0; j<M; j++)
02444 set_b(i,j, get_b(i,j)/sum);
02445 }
02446
02447
02448 invalidate_model();
02449 }
02450
02451
02452 void CHMM::init_model_defined()
02453 {
02454 int32_t i,j,k,r;
02455 float64_t sum;
02456 const float64_t MIN_RAND=23e-3;
02457
02458
02459 for (i=0; i<N; i++)
02460 for (j=0; j<N; j++)
02461 set_a(i,j, 0);
02462
02463
02464 for (i=0; i<N; i++)
02465 set_p(i, 0);
02466
02467
02468 for (i=0; i<N; i++)
02469 set_q(i, 0);
02470
02471
02472 for (i=0; i<N; i++)
02473 for (j=0; j<M; j++)
02474 set_b(i,j, 0);
02475
02476
02477
02478 float64_t *R=new float64_t[N] ;
02479 for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02480 i=0; sum=0; k=i;
02481 j=model->get_learn_a(i,0);
02482 while (model->get_learn_a(i,0)!=-1 || k<i)
02483 {
02484 if (j==model->get_learn_a(i,0))
02485 {
02486 sum+=R[model->get_learn_a(i,1)] ;
02487 i++;
02488 }
02489 else
02490 {
02491 while (k<i)
02492 {
02493 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1),
02494 R[model->get_learn_a(k,1)]/sum);
02495 k++;
02496 }
02497 j=model->get_learn_a(i,0);
02498 k=i;
02499 sum=0;
02500 for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02501 }
02502 }
02503 delete[] R ; R=NULL ;
02504
02505
02506 R=new float64_t[M] ;
02507 for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02508 i=0; sum=0; k=0 ;
02509 j=model->get_learn_b(i,0);
02510 while (model->get_learn_b(i,0)!=-1 || k<i)
02511 {
02512 if (j==model->get_learn_b(i,0))
02513 {
02514 sum+=R[model->get_learn_b(i,1)] ;
02515 i++;
02516 }
02517 else
02518 {
02519 while (k<i)
02520 {
02521 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1),
02522 R[model->get_learn_b(k,1)]/sum);
02523 k++;
02524 }
02525
02526 j=model->get_learn_b(i,0);
02527 k=i;
02528 sum=0;
02529 for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02530 }
02531 }
02532 delete[] R ; R=NULL ;
02533
02534
02535 i=0;
02536 while (model->get_const_a(i,0) != -1)
02537 {
02538 set_a(model->get_const_a(i,0), model->get_const_a(i,1), model->get_const_a_val(i));
02539 i++;
02540 }
02541
02542
02543 i=0;
02544 while (model->get_const_b(i,0) != -1)
02545 {
02546 set_b(model->get_const_b(i,0), model->get_const_b(i,1), model->get_const_b_val(i));
02547 i++;
02548 }
02549
02550
02551 i=0;
02552 while (model->get_const_p(i) != -1)
02553 {
02554 set_p(model->get_const_p(i), model->get_const_p_val(i));
02555 i++;
02556 }
02557
02558
02559 for (i=0; i<N; i++)
02560 set_q(i, 0.0);
02561
02562
02563 i=0;
02564 while (model->get_const_q(i) != -1)
02565 {
02566 set_q(model->get_const_q(i), model->get_const_q_val(i));
02567 i++;
02568 }
02569
02570
02571 i=0;
02572 sum=0;
02573 while (model->get_learn_p(i)!=-1)
02574 {
02575 set_p(model->get_learn_p(i),CMath::random(MIN_RAND,1.0)) ;
02576 sum+=get_p(model->get_learn_p(i)) ;
02577 i++ ;
02578 } ;
02579 i=0 ;
02580 while (model->get_learn_p(i)!=-1)
02581 {
02582 set_p(model->get_learn_p(i), get_p(model->get_learn_p(i))/sum);
02583 i++ ;
02584 } ;
02585
02586
02587 i=0;
02588 sum=0;
02589 while (model->get_learn_q(i)!=-1)
02590 {
02591 set_q(model->get_learn_q(i),CMath::random(MIN_RAND,1.0)) ;
02592 sum+=get_q(model->get_learn_q(i)) ;
02593 i++ ;
02594 } ;
02595 i=0 ;
02596 while (model->get_learn_q(i)!=-1)
02597 {
02598 set_q(model->get_learn_q(i), get_q(model->get_learn_q(i))/sum);
02599 i++ ;
02600 } ;
02601
02602
02603 invalidate_model();
02604 }
02605
02606 void CHMM::clear_model()
02607 {
02608 int32_t i,j;
02609 for (i=0; i<N; i++)
02610 {
02611 set_p(i, log(PSEUDO));
02612 set_q(i, log(PSEUDO));
02613
02614 for (j=0; j<N; j++)
02615 set_a(i,j, log(PSEUDO));
02616
02617 for (j=0; j<M; j++)
02618 set_b(i,j, log(PSEUDO));
02619 }
02620 }
02621
02622 void CHMM::clear_model_defined()
02623 {
02624 int32_t i,j,k;
02625
02626 for (i=0; (j=model->get_learn_p(i))!=-1; i++)
02627 set_p(j, log(PSEUDO));
02628
02629 for (i=0; (j=model->get_learn_q(i))!=-1; i++)
02630 set_q(j, log(PSEUDO));
02631
02632 for (i=0; (j=model->get_learn_a(i,0))!=-1; i++)
02633 {
02634 k=model->get_learn_a(i,1);
02635 set_a(j,k, log(PSEUDO));
02636 }
02637
02638 for (i=0; (j=model->get_learn_b(i,0))!=-1; i++)
02639 {
02640 k=model->get_learn_b(i,1);
02641 set_b(j,k, log(PSEUDO));
02642 }
02643 }
02644
02645 void CHMM::copy_model(CHMM* l)
02646 {
02647 int32_t i,j;
02648 for (i=0; i<N; i++)
02649 {
02650 set_p(i, l->get_p(i));
02651 set_q(i, l->get_q(i));
02652
02653 for (j=0; j<N; j++)
02654 set_a(i,j, l->get_a(i,j));
02655
02656 for (j=0; j<M; j++)
02657 set_b(i,j, l->get_b(i,j));
02658 }
02659 }
02660
02661 void CHMM::invalidate_model()
02662 {
02663
02664 this->mod_prob=0.0;
02665 this->mod_prob_updated=false;
02666
02667 if (mem_initialized)
02668 {
02669 if (trans_list_forward_cnt)
02670 delete[] trans_list_forward_cnt ;
02671 trans_list_forward_cnt=NULL ;
02672 if (trans_list_backward_cnt)
02673 delete[] trans_list_backward_cnt ;
02674 trans_list_backward_cnt=NULL ;
02675 if (trans_list_forward)
02676 {
02677 for (int32_t i=0; i<trans_list_len; i++)
02678 if (trans_list_forward[i])
02679 delete[] trans_list_forward[i] ;
02680 delete[] trans_list_forward ;
02681 trans_list_forward=NULL ;
02682 }
02683 if (trans_list_backward)
02684 {
02685 for (int32_t i=0; i<trans_list_len; i++)
02686 if (trans_list_backward[i])
02687 delete[] trans_list_backward[i] ;
02688 delete[] trans_list_backward ;
02689 trans_list_backward = NULL ;
02690 } ;
02691
02692 trans_list_len = N ;
02693 trans_list_forward = new T_STATES*[N] ;
02694 trans_list_forward_cnt = new T_STATES[N] ;
02695
02696 for (int32_t j=0; j<N; j++)
02697 {
02698 trans_list_forward_cnt[j]= 0 ;
02699 trans_list_forward[j]= new T_STATES[N] ;
02700 for (int32_t i=0; i<N; i++)
02701 if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02702 {
02703 trans_list_forward[j][trans_list_forward_cnt[j]]=i ;
02704 trans_list_forward_cnt[j]++ ;
02705 }
02706 } ;
02707
02708 trans_list_backward = new T_STATES*[N] ;
02709 trans_list_backward_cnt = new T_STATES[N] ;
02710
02711 for (int32_t i=0; i<N; i++)
02712 {
02713 trans_list_backward_cnt[i]= 0 ;
02714 trans_list_backward[i]= new T_STATES[N] ;
02715 for (int32_t j=0; j<N; j++)
02716 if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02717 {
02718 trans_list_backward[i][trans_list_backward_cnt[i]]=j ;
02719 trans_list_backward_cnt[i]++ ;
02720 }
02721 } ;
02722 } ;
02723 this->all_pat_prob=0.0;
02724 this->pat_prob=0.0;
02725 this->path_deriv_updated=false ;
02726 this->path_deriv_dimension=-1 ;
02727 this->all_path_prob_updated=false;
02728
02729 #ifdef USE_HMMPARALLEL_STRUCTURES
02730 {
02731 for (int32_t i=0; i<parallel->get_num_threads(); i++)
02732 {
02733 this->alpha_cache[i].updated=false;
02734 this->beta_cache[i].updated=false;
02735 path_prob_updated[i]=false ;
02736 path_prob_dimension[i]=-1 ;
02737 } ;
02738 }
02739 #else // USE_HMMPARALLEL_STRUCTURES
02740 this->alpha_cache.updated=false;
02741 this->beta_cache.updated=false;
02742 this->path_prob_dimension=-1;
02743 this->path_prob_updated=false;
02744
02745 #endif // USE_HMMPARALLEL_STRUCTURES
02746 }
02747
02748 void CHMM::open_bracket(FILE* file)
02749 {
02750 int32_t value;
02751 while (((value=fgetc(file)) != EOF) && (value!='['))
02752 {
02753 if (value=='\n')
02754 line++;
02755 }
02756
02757 if (value==EOF)
02758 error(line, "expected \"[\" in input file");
02759
02760 while (((value=fgetc(file)) != EOF) && (isspace(value)))
02761 {
02762 if (value=='\n')
02763 line++;
02764 }
02765
02766 ungetc(value, file);
02767 }
02768
02769 void CHMM::close_bracket(FILE* file)
02770 {
02771 int32_t value;
02772 while (((value=fgetc(file)) != EOF) && (value!=']'))
02773 {
02774 if (value=='\n')
02775 line++;
02776 }
02777
02778 if (value==EOF)
02779 error(line, "expected \"]\" in input file");
02780 }
02781
02782 bool CHMM::comma_or_space(FILE* file)
02783 {
02784 int32_t value;
02785 while (((value=fgetc(file)) != EOF) && (value!=',') && (value!=';') && (value!=']'))
02786 {
02787 if (value=='\n')
02788 line++;
02789 }
02790 if (value==']')
02791 {
02792 ungetc(value, file);
02793 SG_ERROR( "found ']' instead of ';' or ','\n") ;
02794 return false ;
02795 } ;
02796
02797 if (value==EOF)
02798 error(line, "expected \";\" or \",\" in input file");
02799
02800 while (((value=fgetc(file)) != EOF) && (isspace(value)))
02801 {
02802 if (value=='\n')
02803 line++;
02804 }
02805 ungetc(value, file);
02806 return true ;
02807 }
02808
02809 bool CHMM::get_numbuffer(FILE* file, char* buffer, int32_t length)
02810 {
02811 signed char value;
02812
02813 while (((value=fgetc(file)) != EOF) &&
02814 !isdigit(value) && (value!='A')
02815 && (value!='C') && (value!='G') && (value!='T')
02816 && (value!='N') && (value!='n')
02817 && (value!='.') && (value!='-') && (value!='e') && (value!=']'))
02818 {
02819 if (value=='\n')
02820 line++;
02821 }
02822 if (value==']')
02823 {
02824 ungetc(value,file) ;
02825 return false ;
02826 } ;
02827 if (value!=EOF)
02828 {
02829 int32_t i=0;
02830 switch (value)
02831 {
02832 case 'A':
02833 value='0' +CAlphabet::B_A;
02834 break;
02835 case 'C':
02836 value='0' +CAlphabet::B_C;
02837 break;
02838 case 'G':
02839 value='0' +CAlphabet::B_G;
02840 break;
02841 case 'T':
02842 value='0' +CAlphabet::B_T;
02843 break;
02844 };
02845
02846 buffer[i++]=value;
02847
02848 while (((value=fgetc(file)) != EOF) &&
02849 (isdigit(value) || (value=='.') || (value=='-') || (value=='e')
02850 || (value=='A') || (value=='C') || (value=='G')|| (value=='T')
02851 || (value=='N') || (value=='n')) && (i<length))
02852 {
02853 switch (value)
02854 {
02855 case 'A':
02856 value='0' +CAlphabet::B_A;
02857 break;
02858 case 'C':
02859 value='0' +CAlphabet::B_C;
02860 break;
02861 case 'G':
02862 value='0' +CAlphabet::B_G;
02863 break;
02864 case 'T':
02865 value='0' +CAlphabet::B_T;
02866 break;
02867 case '1': case '2': case'3': case '4': case'5':
02868 case '6': case '7': case'8': case '9': case '0': break ;
02869 case '.': case 'e': case '-': break ;
02870 default:
02871 SG_ERROR( "found crap: %i %c (pos:%li)\n",i,value,ftell(file)) ;
02872 };
02873 buffer[i++]=value;
02874 }
02875 ungetc(value, file);
02876 buffer[i]='\0';
02877
02878 return (i<=length) && (i>0);
02879 }
02880 return false;
02881 }
02882
02883
02884
02885
02886
02887
02888
02889
02890
02891
02892
02893
02894
02895
02896
02897
02898
02899
02900
02901
02902
02903
02904
02905
02906
02907
02908
02909
02910
02911
02912
02913
02914
02915
02916
02917
02918 bool CHMM::load_model(FILE* file)
02919 {
02920 int32_t received_params=0;
02921
02922 bool result=false;
02923 E_STATE state=INITIAL;
02924 char buffer[1024];
02925
02926 line=1;
02927 int32_t i,j;
02928
02929 if (file)
02930 {
02931 while (state!=END)
02932 {
02933 int32_t value=fgetc(file);
02934
02935 if (value=='\n')
02936 line++;
02937 if (value==EOF)
02938 state=END;
02939
02940 switch (state)
02941 {
02942 case INITIAL:
02943 if (value=='N')
02944 {
02945 if (received_params & GOTN)
02946 error(line, "in model file: \"p double defined\"");
02947 else
02948 state=GET_N;
02949 }
02950 else if (value=='M')
02951 {
02952 if (received_params & GOTM)
02953 error(line, "in model file: \"p double defined\"");
02954 else
02955 state=GET_M;
02956 }
02957 else if (value=='%')
02958 {
02959 state=COMMENT;
02960 }
02961 case ARRAYs:
02962 if (value=='p')
02963 {
02964 if (received_params & GOTp)
02965 error(line, "in model file: \"p double defined\"");
02966 else
02967 state=GET_p;
02968 }
02969 if (value=='q')
02970 {
02971 if (received_params & GOTq)
02972 error(line, "in model file: \"q double defined\"");
02973 else
02974 state=GET_q;
02975 }
02976 else if (value=='a')
02977 {
02978 if (received_params & GOTa)
02979 error(line, "in model file: \"a double defined\"");
02980 else
02981 state=GET_a;
02982 }
02983 else if (value=='b')
02984 {
02985 if (received_params & GOTb)
02986 error(line, "in model file: \"b double defined\"");
02987 else
02988 state=GET_b;
02989 }
02990 else if (value=='%')
02991 {
02992 state=COMMENT;
02993 }
02994 break;
02995 case GET_N:
02996 if (value=='=')
02997 {
02998 if (get_numbuffer(file, buffer, 4))
02999 {
03000 this->N= atoi(buffer);
03001 received_params|=GOTN;
03002 state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03003 }
03004 else
03005 state=END;
03006 }
03007 break;
03008 case GET_M:
03009 if (value=='=')
03010 {
03011 if (get_numbuffer(file, buffer, 4))
03012 {
03013 this->M= atoi(buffer);
03014 received_params|=GOTM;
03015 state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03016 }
03017 else
03018 state=END;
03019 }
03020 break;
03021 case GET_a:
03022 if (value=='=')
03023 {
03024 float64_t f;
03025
03026 transition_matrix_a=new float64_t[N*N];
03027 open_bracket(file);
03028 for (i=0; i<this->N; i++)
03029 {
03030 open_bracket(file);
03031
03032 for (j=0; j<this->N ; j++)
03033 {
03034
03035 if (fscanf( file, "%le", &f ) != 1)
03036 error(line, "float64_t expected");
03037 else
03038 set_a(i,j, f);
03039
03040 if (j<this->N-1)
03041 comma_or_space(file);
03042 else
03043 close_bracket(file);
03044 }
03045
03046 if (i<this->N-1)
03047 comma_or_space(file);
03048 else
03049 close_bracket(file);
03050 }
03051 received_params|=GOTa;
03052 }
03053 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03054 break;
03055 case GET_b:
03056 if (value=='=')
03057 {
03058 float64_t f;
03059
03060 observation_matrix_b=new float64_t[N*M];
03061 open_bracket(file);
03062 for (i=0; i<this->N; i++)
03063 {
03064 open_bracket(file);
03065
03066 for (j=0; j<this->M ; j++)
03067 {
03068
03069 if (fscanf( file, "%le", &f ) != 1)
03070 error(line, "float64_t expected");
03071 else
03072 set_b(i,j, f);
03073
03074 if (j<this->M-1)
03075 comma_or_space(file);
03076 else
03077 close_bracket(file);
03078 }
03079
03080 if (i<this->N-1)
03081 comma_or_space(file);
03082 else
03083 close_bracket(file);
03084 }
03085 received_params|=GOTb;
03086 }
03087 state= ((received_params & (GOTa | GOTb | GOTp | GOTq)) == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03088 break;
03089 case GET_p:
03090 if (value=='=')
03091 {
03092 float64_t f;
03093
03094 initial_state_distribution_p=new float64_t[N];
03095 open_bracket(file);
03096 for (i=0; i<this->N ; i++)
03097 {
03098 if (fscanf( file, "%le", &f ) != 1)
03099 error(line, "float64_t expected");
03100 else
03101 set_p(i, f);
03102
03103 if (i<this->N-1)
03104 comma_or_space(file);
03105 else
03106 close_bracket(file);
03107 }
03108 received_params|=GOTp;
03109 }
03110 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03111 break;
03112 case GET_q:
03113 if (value=='=')
03114 {
03115 float64_t f;
03116
03117 end_state_distribution_q=new float64_t[N];
03118 open_bracket(file);
03119 for (i=0; i<this->N ; i++)
03120 {
03121 if (fscanf( file, "%le", &f ) != 1)
03122 error(line, "float64_t expected");
03123 else
03124 set_q(i, f);
03125
03126 if (i<this->N-1)
03127 comma_or_space(file);
03128 else
03129 close_bracket(file);
03130 }
03131 received_params|=GOTq;
03132 }
03133 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03134 break;
03135 case COMMENT:
03136 if (value==EOF)
03137 state=END;
03138 else if (value=='\n')
03139 {
03140 line++;
03141 state=INITIAL;
03142 }
03143 break;
03144
03145 default:
03146 break;
03147 }
03148 }
03149 result= (received_params== (GOTa | GOTb | GOTp | GOTq | GOTN | GOTM | GOTO));
03150 }
03151
03152 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
03154 return result;
03155 }
03156
03157
03158
03159
03160
03161
03162
03163
03164
03165
03166
03167
03168
03169
03170
03171
03172
03173
03174
03175
03176
03177
03178
03179
03180
03181
03182
03183
03184
03185
03186
03187
03188
03189
03190
03191
03192
03193
03194
03195
03196
03197
03198
03199
03200
03201
03202
03203
03204
03205
03206
03207
03208
03209
03210
03211
03212
03213
03214
03215
03216 bool CHMM::load_definitions(FILE* file, bool verbose, bool init)
03217 {
03218 if (model)
03219 delete model ;
03220 model=new CModel();
03221
03222 int32_t received_params=0x0000000;
03223 char buffer[1024];
03224
03225 bool result=false;
03226 E_STATE state=INITIAL;
03227
03228 {
03229 model->set_learn_a(0, -1);
03230 model->set_learn_a(1, -1);
03231 model->set_const_a(0, -1);
03232 model->set_const_a(1, -1);
03233 model->set_const_a_val(0, 1.0);
03234 model->set_learn_b(0, -1);
03235 model->set_const_b(0, -1);
03236 model->set_const_b_val(0, 1.0);
03237 model->set_learn_p(0, -1);
03238 model->set_learn_q(0, -1);
03239 model->set_const_p(0, -1);
03240 model->set_const_q(0, -1);
03241 } ;
03242
03243 line=1;
03244
03245 if (file)
03246 {
03247 while (state!=END)
03248 {
03249 int32_t value=fgetc(file);
03250
03251 if (value=='\n')
03252 line++;
03253
03254 if (value==EOF)
03255 state=END;
03256
03257 switch (state)
03258 {
03259 case INITIAL:
03260 if (value=='l')
03261 {
03262 if (fgetc(file)=='e' && fgetc(file)=='a' && fgetc(file)=='r' && fgetc(file)=='n' && fgetc(file)=='_')
03263 {
03264 switch(fgetc(file))
03265 {
03266 case 'a':
03267 state=GET_learn_a;
03268 break;
03269 case 'b':
03270 state=GET_learn_b;
03271 break;
03272 case 'p':
03273 state=GET_learn_p;
03274 break;
03275 case 'q':
03276 state=GET_learn_q;
03277 break;
03278 default:
03279 error(line, "a,b,p or q expected in train definition file");
03280 };
03281 }
03282 }
03283 else if (value=='c')
03284 {
03285 if (fgetc(file)=='o' && fgetc(file)=='n' && fgetc(file)=='s'
03286 && fgetc(file)=='t' && fgetc(file)=='_')
03287 {
03288 switch(fgetc(file))
03289 {
03290 case 'a':
03291 state=GET_const_a;
03292 break;
03293 case 'b':
03294 state=GET_const_b;
03295 break;
03296 case 'p':
03297 state=GET_const_p;
03298 break;
03299 case 'q':
03300 state=GET_const_q;
03301 break;
03302 default:
03303 error(line, "a,b,p or q expected in train definition file");
03304 };
03305 }
03306 }
03307 else if (value=='%')
03308 {
03309 state=COMMENT;
03310 }
03311 else if (value==EOF)
03312 {
03313 state=END;
03314 }
03315 break;
03316 case GET_learn_a:
03317 if (value=='=')
03318 {
03319 open_bracket(file);
03320 bool finished=false;
03321 int32_t i=0;
03322
03323 if (verbose)
03324 SG_DEBUG( "\nlearn for transition matrix: ") ;
03325 while (!finished)
03326 {
03327 open_bracket(file);
03328
03329 if (get_numbuffer(file, buffer, 4))
03330 {
03331 value=atoi(buffer);
03332 model->set_learn_a(i++, value);
03333
03334 if (value<0)
03335 {
03336 finished=true;
03337 break;
03338 }
03339 if (value>=N)
03340 SG_ERROR( "invalid value for learn_a(%i,0): %i\n",i/2,(int)value) ;
03341 }
03342 else
03343 break;
03344
03345 comma_or_space(file);
03346
03347 if (get_numbuffer(file, buffer, 4))
03348 {
03349 value=atoi(buffer);
03350 model->set_learn_a(i++, value);
03351
03352 if (value<0)
03353 {
03354 finished=true;
03355 break;
03356 }
03357 if (value>=N)
03358 SG_ERROR( "invalid value for learn_a(%i,1): %i\n",i/2-1,(int)value) ;
03359
03360 }
03361 else
03362 break;
03363 close_bracket(file);
03364 }
03365 close_bracket(file);
03366 if (verbose)
03367 SG_DEBUG( "%i Entries",(int)(i/2)) ;
03368
03369 if (finished)
03370 {
03371 received_params|=GOTlearn_a;
03372
03373 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03374 }
03375 else
03376 state=END;
03377 }
03378 break;
03379 case GET_learn_b:
03380 if (value=='=')
03381 {
03382 open_bracket(file);
03383 bool finished=false;
03384 int32_t i=0;
03385
03386 if (verbose)
03387 SG_DEBUG( "\nlearn for emission matrix: ") ;
03388
03389 while (!finished)
03390 {
03391 open_bracket(file);
03392
03393 int32_t combine=0;
03394
03395 for (int32_t j=0; j<2; j++)
03396 {
03397 if (get_numbuffer(file, buffer, 4))
03398 {
03399 value=atoi(buffer);
03400
03401 if (j==0)
03402 {
03403 model->set_learn_b(i++, value);
03404
03405 if (value<0)
03406 {
03407 finished=true;
03408 break;
03409 }
03410 if (value>=N)
03411 SG_ERROR( "invalid value for learn_b(%i,0): %i\n",i/2,(int)value) ;
03412 }
03413 else
03414 combine=value;
03415 }
03416 else
03417 break;
03418
03419 if (j<1)
03420 comma_or_space(file);
03421 else
03422 close_bracket(file);
03423 }
03424 model->set_learn_b(i++, combine);
03425 if (combine>=M)
03426
03427 SG_ERROR("invalid value for learn_b(%i,1): %i\n",i/2-1,(int)value) ;
03428 }
03429 close_bracket(file);
03430 if (verbose)
03431 SG_DEBUG( "%i Entries",(int)(i/2-1)) ;
03432
03433 if (finished)
03434 {
03435 received_params|=GOTlearn_b;
03436 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03437 }
03438 else
03439 state=END;
03440 }
03441 break;
03442 case GET_learn_p:
03443 if (value=='=')
03444 {
03445 open_bracket(file);
03446 bool finished=false;
03447 int32_t i=0;
03448
03449 if (verbose)
03450 SG_DEBUG( "\nlearn start states: ") ;
03451 while (!finished)
03452 {
03453 if (get_numbuffer(file, buffer, 4))
03454 {
03455 value=atoi(buffer);
03456
03457 model->set_learn_p(i++, value);
03458
03459 if (value<0)
03460 {
03461 finished=true;
03462 break;
03463 }
03464 if (value>=N)
03465 SG_ERROR( "invalid value for learn_p(%i): %i\n",i-1,(int)value) ;
03466 }
03467 else
03468 break;
03469
03470 comma_or_space(file);
03471 }
03472
03473 close_bracket(file);
03474 if (verbose)
03475 SG_DEBUG( "%i Entries",i-1) ;
03476
03477 if (finished)
03478 {
03479 received_params|=GOTlearn_p;
03480 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03481 }
03482 else
03483 state=END;
03484 }
03485 break;
03486 case GET_learn_q:
03487 if (value=='=')
03488 {
03489 open_bracket(file);
03490 bool finished=false;
03491 int32_t i=0;
03492
03493 if (verbose)
03494 SG_DEBUG( "\nlearn terminal states: ") ;
03495 while (!finished)
03496 {
03497 if (get_numbuffer(file, buffer, 4))
03498 {
03499 value=atoi(buffer);
03500 model->set_learn_q(i++, value);
03501
03502 if (value<0)
03503 {
03504 finished=true;
03505 break;
03506 }
03507 if (value>=N)
03508 SG_ERROR( "invalid value for learn_q(%i): %i\n",i-1,(int)value) ;
03509 }
03510 else
03511 break;
03512
03513 comma_or_space(file);
03514 }
03515
03516 close_bracket(file);
03517 if (verbose)
03518 SG_DEBUG( "%i Entries",i-1) ;
03519
03520 if (finished)
03521 {
03522 received_params|=GOTlearn_q;
03523 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03524 }
03525 else
03526 state=END;
03527 }
03528 break;
03529 case GET_const_a:
03530 if (value=='=')
03531 {
03532 open_bracket(file);
03533 bool finished=false;
03534 int32_t i=0;
03535
03536 if (verbose)
03537 #ifdef USE_HMMDEBUG
03538 SG_DEBUG( "\nconst for transition matrix: \n") ;
03539 #else
03540 SG_DEBUG( "\nconst for transition matrix: ") ;
03541 #endif
03542 while (!finished)
03543 {
03544 open_bracket(file);
03545
03546 if (get_numbuffer(file, buffer, 4))
03547 {
03548 value=atoi(buffer);
03549 model->set_const_a(i++, value);
03550
03551 if (value<0)
03552 {
03553 finished=true;
03554 model->set_const_a(i++, value);
03555 model->set_const_a_val((int32_t)i/2 - 1, value);
03556 break;
03557 }
03558 if (value>=N)
03559 SG_ERROR( "invalid value for const_a(%i,0): %i\n",i/2,(int)value) ;
03560 }
03561 else
03562 break;
03563
03564 comma_or_space(file);
03565
03566 if (get_numbuffer(file, buffer, 4))
03567 {
03568 value=atoi(buffer);
03569 model->set_const_a(i++, value);
03570
03571 if (value<0)
03572 {
03573 finished=true;
03574 model->set_const_a_val((int32_t)i/2 - 1, value);
03575 break;
03576 }
03577 if (value>=N)
03578 SG_ERROR( "invalid value for const_a(%i,1): %i\n",i/2-1,(int)value) ;
03579 }
03580 else
03581 break;
03582
03583 if (!comma_or_space(file))
03584 model->set_const_a_val((int32_t)i/2 - 1, 1.0);
03585 else
03586 if (get_numbuffer(file, buffer, 10))
03587 {
03588 float64_t dvalue=atof(buffer);
03589 model->set_const_a_val((int32_t)i/2 - 1, dvalue);
03590 if (dvalue<0)
03591 {
03592 finished=true;
03593 break;
03594 }
03595 if ((dvalue>1.0) || (dvalue<0.0))
03596 SG_ERROR( "invalid value for const_a_val(%i): %e\n",(int)i/2-1,dvalue) ;
03597 }
03598 else
03599 model->set_const_a_val((int32_t)i/2 - 1, 1.0);
03600
03601 #ifdef USE_HMMDEBUG
03602 if (verbose)
03603 SG_ERROR("const_a(%i,%i)=%e\n", model->get_const_a((int32_t)i/2-1,0),model->get_const_a((int32_t)i/2-1,1),model->get_const_a_val((int32_t)i/2-1)) ;
03604 #endif
03605 close_bracket(file);
03606 }
03607 close_bracket(file);
03608 if (verbose)
03609 SG_DEBUG( "%i Entries",(int)i/2-1) ;
03610
03611 if (finished)
03612 {
03613 received_params|=GOTconst_a;
03614 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03615 }
03616 else
03617 state=END;
03618 }
03619 break;
03620
03621 case GET_const_b:
03622 if (value=='=')
03623 {
03624 open_bracket(file);
03625 bool finished=false;
03626 int32_t i=0;
03627
03628 if (verbose)
03629 #ifdef USE_HMMDEBUG
03630 SG_DEBUG( "\nconst for emission matrix: \n") ;
03631 #else
03632 SG_DEBUG( "\nconst for emission matrix: ") ;
03633 #endif
03634 while (!finished)
03635 {
03636 open_bracket(file);
03637 int32_t combine=0;
03638 for (int32_t j=0; j<3; j++)
03639 {
03640 if (get_numbuffer(file, buffer, 10))
03641 {
03642 if (j==0)
03643 {
03644 value=atoi(buffer);
03645
03646 model->set_const_b(i++, value);
03647
03648 if (value<0)
03649 {
03650 finished=true;
03651
03652 break;
03653 }
03654 if (value>=N)
03655 SG_ERROR( "invalid value for const_b(%i,0): %i\n",i/2-1,(int)value) ;
03656 }
03657 else if (j==2)
03658 {
03659 float64_t dvalue=atof(buffer);
03660 model->set_const_b_val((int32_t)(i-1)/2, dvalue);
03661 if (dvalue<0)
03662 {
03663 finished=true;
03664 break;
03665 } ;
03666 if ((dvalue>1.0) || (dvalue<0.0))
03667 SG_ERROR( "invalid value for const_b_val(%i,1): %e\n",i/2-1,dvalue) ;
03668 }
03669 else
03670 {
03671 value=atoi(buffer);
03672 combine= value;
03673 } ;
03674 }
03675 else
03676 {
03677 if (j==2)
03678 model->set_const_b_val((int32_t)(i-1)/2, 1.0);
03679 break;
03680 } ;
03681 if (j<2)
03682 if ((!comma_or_space(file)) && (j==1))
03683 {
03684 model->set_const_b_val((int32_t)(i-1)/2, 1.0) ;
03685 break ;
03686 } ;
03687 }
03688 close_bracket(file);
03689 model->set_const_b(i++, combine);
03690 if (combine>=M)
03691 SG_ERROR("invalid value for const_b(%i,1): %i\n",i/2-1, combine) ;
03692 #ifdef USE_HMMDEBUG
03693 if (verbose && !finished)
03694 SG_ERROR("const_b(%i,%i)=%e\n", model->get_const_b((int32_t)i/2-1,0),model->get_const_b((int32_t)i/2-1,1),model->get_const_b_val((int32_t)i/2-1)) ;
03695 #endif
03696 }
03697 close_bracket(file);
03698 if (verbose)
03699 SG_ERROR( "%i Entries",(int)i/2-1) ;
03700
03701 if (finished)
03702 {
03703 received_params|=GOTconst_b;
03704 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03705 }
03706 else
03707 state=END;
03708 }
03709 break;
03710 case GET_const_p:
03711 if (value=='=')
03712 {
03713 open_bracket(file);
03714 bool finished=false;
03715 int32_t i=0;
03716
03717 if (verbose)
03718 #ifdef USE_HMMDEBUG
03719 SG_DEBUG( "\nconst for start states: \n") ;
03720 #else
03721 SG_DEBUG( "\nconst for start states: ") ;
03722 #endif
03723 while (!finished)
03724 {
03725 open_bracket(file);
03726
03727 if (get_numbuffer(file, buffer, 4))
03728 {
03729 value=atoi(buffer);
03730 model->set_const_p(i, value);
03731
03732 if (value<0)
03733 {
03734 finished=true;
03735 model->set_const_p_val(i++, value);
03736 break;
03737 }
03738 if (value>=N)
03739 SG_ERROR( "invalid value for const_p(%i): %i\n",i,(int)value) ;
03740
03741 }
03742 else
03743 break;
03744
03745 if (!comma_or_space(file))
03746 model->set_const_p_val(i++, 1.0);
03747 else
03748 if (get_numbuffer(file, buffer, 10))
03749 {
03750 float64_t dvalue=atof(buffer);
03751 model->set_const_p_val(i++, dvalue);
03752 if (dvalue<0)
03753 {
03754 finished=true;
03755 break;
03756 }
03757 if ((dvalue>1) || (dvalue<0))
03758 SG_ERROR( "invalid value for const_p_val(%i): %e\n",i,dvalue) ;
03759 }
03760 else
03761 model->set_const_p_val(i++, 1.0);
03762
03763 close_bracket(file);
03764
03765 #ifdef USE_HMMDEBUG
03766 if (verbose)
03767 SG_DEBUG("const_p(%i)=%e\n", model->get_const_p(i-1),model->get_const_p_val(i-1)) ;
03768 #endif
03769 }
03770 if (verbose)
03771 SG_DEBUG( "%i Entries",i-1) ;
03772
03773 close_bracket(file);
03774
03775 if (finished)
03776 {
03777 received_params|=GOTconst_p;
03778 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03779 }
03780 else
03781 state=END;
03782 }
03783 break;
03784 case GET_const_q:
03785 if (value=='=')
03786 {
03787 open_bracket(file);
03788 bool finished=false;
03789 if (verbose)
03790 #ifdef USE_HMMDEBUG
03791 SG_DEBUG( "\nconst for terminal states: \n") ;
03792 #else
03793 SG_DEBUG( "\nconst for terminal states: ") ;
03794 #endif
03795 int32_t i=0;
03796
03797 while (!finished)
03798 {
03799 open_bracket(file) ;
03800 if (get_numbuffer(file, buffer, 4))
03801 {
03802 value=atoi(buffer);
03803 model->set_const_q(i, value);
03804 if (value<0)
03805 {
03806 finished=true;
03807 model->set_const_q_val(i++, value);
03808 break;
03809 }
03810 if (value>=N)
03811 SG_ERROR( "invalid value for const_q(%i): %i\n",i,(int)value) ;
03812 }
03813 else
03814 break;
03815
03816 if (!comma_or_space(file))
03817 model->set_const_q_val(i++, 1.0);
03818 else
03819 if (get_numbuffer(file, buffer, 10))
03820 {
03821 float64_t dvalue=atof(buffer);
03822 model->set_const_q_val(i++, dvalue);
03823 if (dvalue<0)
03824 {
03825 finished=true;
03826 break;
03827 }
03828 if ((dvalue>1) || (dvalue<0))
03829 SG_ERROR("invalid value for const_q_val(%i): %e\n",i,(double) dvalue) ;
03830 }
03831 else
03832 model->set_const_q_val(i++, 1.0);
03833
03834 close_bracket(file);
03835 #ifdef USE_HMMDEBUG
03836 if (verbose)
03837 SG_DEBUG("const_q(%i)=%e\n", model->get_const_q(i-1),model->get_const_q_val(i-1)) ;
03838 #endif
03839 }
03840 if (verbose)
03841 SG_DEBUG( "%i Entries",i-1) ;
03842
03843 close_bracket(file);
03844
03845 if (finished)
03846 {
03847 received_params|=GOTconst_q;
03848 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03849 }
03850 else
03851 state=END;
03852 }
03853 break;
03854 case COMMENT:
03855 if (value==EOF)
03856 state=END;
03857 else if (value=='\n')
03858 state=INITIAL;
03859 break;
03860
03861 default:
03862 break;
03863 }
03864 }
03865 }
03866
03867
03868
03869
03870
03871 result=1 ;
03872 if (result)
03873 {
03874 model->sort_learn_a() ;
03875 model->sort_learn_b() ;
03876 if (init)
03877 {
03878 init_model_defined(); ;
03879 convert_to_log();
03880 } ;
03881 }
03882 if (verbose)
03883 SG_DEBUG( "\n") ;
03884 return result;
03885 }
03886
03887
03888
03889
03890
03891
03892
03893
03894
03895
03896
03897
03898
03899
03900
03901
03902
03903
03904
03905
03906
03907
03908
03909
03910
03911
03912
03913
03914
03915
03916
03917
03918
03919
03920
03921 bool CHMM::save_model(FILE* file)
03922 {
03923 bool result=false;
03924 int32_t i,j;
03925 const float32_t NAN_REPLACEMENT = (float32_t) CMath::ALMOST_NEG_INFTY ;
03926
03927 if (file)
03928 {
03929 fprintf(file,"%s","% HMM - specification\n% N - number of states\n% M - number of observation_tokens\n% a is state_transition_matrix\n% size(a)= [N,N]\n%\n% b is observation_per_state_matrix\n% size(b)= [N,M]\n%\n% p is initial distribution\n% size(p)= [1, N]\n\n% q is distribution of end states\n% size(q)= [1, N]\n");
03930 fprintf(file,"N=%d;\n",N);
03931 fprintf(file,"M=%d;\n",M);
03932
03933 fprintf(file,"p=[");
03934 for (i=0; i<N; i++)
03935 {
03936 if (i<N-1) {
03937 if (CMath::is_finite(get_p(i)))
03938 fprintf(file, "%e,", (double)get_p(i));
03939 else
03940 fprintf(file, "%f,", NAN_REPLACEMENT);
03941 }
03942 else {
03943 if (CMath::is_finite(get_p(i)))
03944 fprintf(file, "%e", (double)get_p(i));
03945 else
03946 fprintf(file, "%f", NAN_REPLACEMENT);
03947 }
03948 }
03949
03950 fprintf(file,"];\n\nq=[");
03951 for (i=0; i<N; i++)
03952 {
03953 if (i<N-1) {
03954 if (CMath::is_finite(get_q(i)))
03955 fprintf(file, "%e,", (double)get_q(i));
03956 else
03957 fprintf(file, "%f,", NAN_REPLACEMENT);
03958 }
03959 else {
03960 if (CMath::is_finite(get_q(i)))
03961 fprintf(file, "%e", (double)get_q(i));
03962 else
03963 fprintf(file, "%f", NAN_REPLACEMENT);
03964 }
03965 }
03966 fprintf(file,"];\n\na=[");
03967
03968 for (i=0; i<N; i++)
03969 {
03970 fprintf(file, "\t[");
03971
03972 for (j=0; j<N; j++)
03973 {
03974 if (j<N-1) {
03975 if (CMath::is_finite(get_a(i,j)))
03976 fprintf(file, "%e,", (double)get_a(i,j));
03977 else
03978 fprintf(file, "%f,", NAN_REPLACEMENT);
03979 }
03980 else {
03981 if (CMath::is_finite(get_a(i,j)))
03982 fprintf(file, "%e];\n", (double)get_a(i,j));
03983 else
03984 fprintf(file, "%f];\n", NAN_REPLACEMENT);
03985 }
03986 }
03987 }
03988
03989 fprintf(file," ];\n\nb=[");
03990
03991 for (i=0; i<N; i++)
03992 {
03993 fprintf(file, "\t[");
03994
03995 for (j=0; j<M; j++)
03996 {
03997 if (j<M-1) {
03998 if (CMath::is_finite(get_b(i,j)))
03999 fprintf(file, "%e,", (double)get_b(i,j));
04000 else
04001 fprintf(file, "%f,", NAN_REPLACEMENT);
04002 }
04003 else {
04004 if (CMath::is_finite(get_b(i,j)))
04005 fprintf(file, "%e];\n", (double)get_b(i,j));
04006 else
04007 fprintf(file, "%f];\n", NAN_REPLACEMENT);
04008 }
04009 }
04010 }
04011 result= (fprintf(file," ];\n") == 5);
04012 }
04013
04014 return result;
04015 }
04016
04017 T_STATES* CHMM::get_path(int32_t dim, float64_t& prob)
04018 {
04019 T_STATES* result = NULL;
04020
04021 prob = best_path(dim);
04022 result = new T_STATES[p_observations->get_vector_length(dim)];
04023
04024 for (int32_t i=0; i<p_observations->get_vector_length(dim); i++)
04025 result[i]=PATH(dim)[i];
04026
04027 return result;
04028 }
04029
04030 bool CHMM::save_path(FILE* file)
04031 {
04032 bool result=false;
04033
04034 if (file)
04035 {
04036 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04037 {
04038 if (dim%100==0)
04039 SG_PRINT( "%i..", dim) ;
04040 float64_t prob = best_path(dim);
04041 fprintf(file,"%i. path probability:%e\nstate sequence:\n", dim, prob);
04042 for (int32_t i=0; i<p_observations->get_vector_length(dim)-1; i++)
04043 fprintf(file,"%d ", PATH(dim)[i]);
04044 fprintf(file,"%d", PATH(dim)[p_observations->get_vector_length(dim)-1]);
04045 fprintf(file,"\n\n") ;
04046 }
04047 SG_DONE();
04048 result=true;
04049 }
04050
04051 return result;
04052 }
04053
04054 bool CHMM::save_likelihood_bin(FILE* file)
04055 {
04056 bool result=false;
04057
04058 if (file)
04059 {
04060 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04061 {
04062 float32_t prob= (float32_t) model_probability(dim);
04063 fwrite(&prob, sizeof(float32_t), 1, file);
04064 }
04065 result=true;
04066 }
04067
04068 return result;
04069 }
04070
04071 bool CHMM::save_likelihood(FILE* file)
04072 {
04073 bool result=false;
04074
04075 if (file)
04076 {
04077 fprintf(file, "%% likelihood of model per observation\n%% P[O|model]=[ P[O|model]_1 P[O|model]_2 ... P[O|model]_dim ]\n%%\n");
04078
04079 fprintf(file, "P=[");
04080 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04081 fprintf(file, "%e ", (double) model_probability(dim));
04082
04083 fprintf(file,"];");
04084 result=true;
04085 }
04086
04087 return result;
04088 }
04089
04090 #define FLOATWRITE(file, value) { float32_t rrr=float32_t(value); fwrite(&rrr, sizeof(float32_t), 1, file); num_floats++;}
04091
04092 bool CHMM::save_model_bin(FILE* file)
04093 {
04094 int32_t i,j,q, num_floats=0 ;
04095 if (!model)
04096 {
04097 if (file)
04098 {
04099
04100 FLOATWRITE(file, (float32_t)CMath::INFTY);
04101 FLOATWRITE(file, (float32_t) 1);
04102
04103
04104 for (i=0; i<N; i++)
04105 FLOATWRITE(file, get_p(i));
04106 SG_INFO( "wrote %i parameters for p\n",N) ;
04107
04108 for (i=0; i<N; i++)
04109 FLOATWRITE(file, get_q(i)) ;
04110 SG_INFO( "wrote %i parameters for q\n",N) ;
04111
04112
04113 for (i=0; i<N; i++)
04114 for (j=0; j<N; j++)
04115 FLOATWRITE(file, get_a(i,j));
04116 SG_INFO( "wrote %i parameters for a\n",N*N) ;
04117
04118 for (i=0; i<N; i++)
04119 for (j=0; j<M; j++)
04120 FLOATWRITE(file, get_b(i,j));
04121 SG_INFO( "wrote %i parameters for b\n",N*M) ;
04122
04123
04124 FLOATWRITE(file, (float32_t)CMath::INFTY);
04125 FLOATWRITE(file, (float32_t) 3);
04126
04127
04128 FLOATWRITE(file, (float32_t) N);
04129 FLOATWRITE(file, (float32_t) N);
04130 FLOATWRITE(file, (float32_t) N*N);
04131 FLOATWRITE(file, (float32_t) N*M);
04132 FLOATWRITE(file, (float32_t) N);
04133 FLOATWRITE(file, (float32_t) M);
04134 } ;
04135 }
04136 else
04137 {
04138 if (file)
04139 {
04140 int32_t num_p, num_q, num_a, num_b ;
04141
04142 FLOATWRITE(file, (float32_t)CMath::INFTY);
04143 FLOATWRITE(file, (float32_t) 2);
04144
04145 for (i=0; model->get_learn_p(i)>=0; i++)
04146 FLOATWRITE(file, get_p(model->get_learn_p(i)));
04147 num_p=i ;
04148 SG_INFO( "wrote %i parameters for p\n",num_p) ;
04149
04150 for (i=0; model->get_learn_q(i)>=0; i++)
04151 FLOATWRITE(file, get_q(model->get_learn_q(i)));
04152 num_q=i ;
04153 SG_INFO( "wrote %i parameters for q\n",num_q) ;
04154
04155
04156 for (q=0; model->get_learn_a(q,1)>=0; q++)
04157 {
04158 i=model->get_learn_a(q,0) ;
04159 j=model->get_learn_a(q,1) ;
04160 FLOATWRITE(file, (float32_t)i);
04161 FLOATWRITE(file, (float32_t)j);
04162 FLOATWRITE(file, get_a(i,j));
04163 } ;
04164 num_a=q ;
04165 SG_INFO( "wrote %i parameters for a\n",num_a) ;
04166
04167 for (q=0; model->get_learn_b(q,0)>=0; q++)
04168 {
04169 i=model->get_learn_b(q,0) ;
04170 j=model->get_learn_b(q,1) ;
04171 FLOATWRITE(file, (float32_t)i);
04172 FLOATWRITE(file, (float32_t)j);
04173 FLOATWRITE(file, get_b(i,j));
04174 } ;
04175 num_b=q ;
04176 SG_INFO( "wrote %i parameters for b\n",num_b) ;
04177
04178
04179 FLOATWRITE(file, (float32_t)CMath::INFTY);
04180 FLOATWRITE(file, (float32_t) 3);
04181
04182
04183 FLOATWRITE(file, (float32_t) num_p);
04184 FLOATWRITE(file, (float32_t) num_q);
04185 FLOATWRITE(file, (float32_t) num_a);
04186 FLOATWRITE(file, (float32_t) num_b);
04187 FLOATWRITE(file, (float32_t) N);
04188 FLOATWRITE(file, (float32_t) M);
04189 } ;
04190 } ;
04191 return true ;
04192 }
04193
04194 bool CHMM::save_path_derivatives(FILE* logfile)
04195 {
04196 int32_t dim,i,j;
04197 float64_t prob;
04198
04199 if (logfile)
04200 {
04201 fprintf(logfile,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%% \n%% calculating derivatives of P[O,Q|lambda]=p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04202 fprintf(logfile,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04203 fprintf(logfile,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04204 fprintf(logfile,"%% ............................. \n");
04205 fprintf(logfile,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04206 fprintf(logfile,"%% ];\n%%\n\ndPr(log()) = [\n");
04207 }
04208 else
04209 return false ;
04210
04211 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04212 {
04213 prob=best_path(dim);
04214
04215 fprintf(logfile, "[ ");
04216
04217
04218 for (i=0; i<N; i++)
04219 fprintf(logfile,"%e, ", (double) path_derivative_p(i,dim) );
04220
04221 for (i=0; i<N; i++)
04222 fprintf(logfile,"%e, ", (double) path_derivative_q(i,dim) );
04223
04224
04225 for (i=0; i<N; i++)
04226 for (j=0; j<N; j++)
04227 fprintf(logfile, "%e,", (double) path_derivative_a(i,j,dim) );
04228
04229 for (i=0; i<N; i++)
04230 for (j=0; j<M; j++)
04231 fprintf(logfile, "%e,", (double) path_derivative_b(i,j,dim) );
04232
04233 fseek(logfile,ftell(logfile)-1,SEEK_SET);
04234 fprintf(logfile, " ];\n");
04235 }
04236
04237 fprintf(logfile, "];");
04238
04239 return true ;
04240 }
04241
04242 bool CHMM::save_path_derivatives_bin(FILE* logfile)
04243 {
04244 bool result=false;
04245 int32_t dim,i,j,q;
04246 float64_t prob=0 ;
04247 int32_t num_floats=0 ;
04248
04249 float64_t sum_prob=0.0 ;
04250 if (!model)
04251 SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04252 else
04253 SG_INFO( "writing derivatives of changed weights only\n") ;
04254
04255 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04256 {
04257 if (dim%100==0)
04258 {
04259 SG_PRINT( ".") ;
04260
04261 } ;
04262
04263 prob=best_path(dim);
04264 sum_prob+=prob ;
04265
04266 if (!model)
04267 {
04268 if (logfile)
04269 {
04270
04271 FLOATWRITE(logfile, prob);
04272
04273 for (i=0; i<N; i++)
04274 FLOATWRITE(logfile, path_derivative_p(i,dim));
04275
04276 for (i=0; i<N; i++)
04277 FLOATWRITE(logfile, path_derivative_q(i,dim));
04278
04279 for (i=0; i<N; i++)
04280 for (j=0; j<N; j++)
04281 FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04282
04283 for (i=0; i<N; i++)
04284 for (j=0; j<M; j++)
04285 FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04286
04287 }
04288 }
04289 else
04290 {
04291 if (logfile)
04292 {
04293
04294 FLOATWRITE(logfile, prob);
04295
04296 for (i=0; model->get_learn_p(i)>=0; i++)
04297 FLOATWRITE(logfile, path_derivative_p(model->get_learn_p(i),dim));
04298
04299 for (i=0; model->get_learn_q(i)>=0; i++)
04300 FLOATWRITE(logfile, path_derivative_q(model->get_learn_q(i),dim));
04301
04302 for (q=0; model->get_learn_a(q,0)>=0; q++)
04303 {
04304 i=model->get_learn_a(q,0) ;
04305 j=model->get_learn_a(q,1) ;
04306 FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04307 } ;
04308
04309 for (q=0; model->get_learn_b(q,0)>=0; q++)
04310 {
04311 i=model->get_learn_b(q,0) ;
04312 j=model->get_learn_b(q,1) ;
04313 FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04314 } ;
04315 }
04316 } ;
04317 }
04318 save_model_bin(logfile) ;
04319
04320 result=true;
04321 SG_PRINT( "\n") ;
04322 return result;
04323 }
04324
04325 bool CHMM::save_model_derivatives_bin(FILE* file)
04326 {
04327 bool result=false;
04328 int32_t dim,i,j,q ;
04329 int32_t num_floats=0 ;
04330
04331 if (!model)
04332 SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04333 else
04334 SG_INFO( "writing derivatives of changed weights only\n") ;
04335
04336 #ifdef USE_HMMPARALLEL
04337 int32_t num_threads = parallel->get_num_threads();
04338 pthread_t *threads=new pthread_t[num_threads] ;
04339 S_DIM_THREAD_PARAM *params=new S_DIM_THREAD_PARAM[num_threads] ;
04340
04341 if (p_observations->get_num_vectors()<num_threads)
04342 num_threads=p_observations->get_num_vectors();
04343 #endif
04344
04345 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04346 {
04347 if (dim%20==0)
04348 {
04349 SG_PRINT( ".") ;
04350
04351 } ;
04352
04353 #ifdef USE_HMMPARALLEL
04354 if (dim%num_threads==0)
04355 {
04356 for (i=0; i<num_threads; i++)
04357 {
04358 if (dim+i<p_observations->get_num_vectors())
04359 {
04360 params[i].hmm=this ;
04361 params[i].dim=dim+i ;
04362 pthread_create(&threads[i], NULL, bw_dim_prefetch, (void*)¶ms[i]) ;
04363 }
04364 }
04365
04366 for (i=0; i<num_threads; i++)
04367 {
04368 if (dim+i<p_observations->get_num_vectors())
04369 pthread_join(threads[i], NULL);
04370 }
04371 }
04372 #endif
04373
04374 float64_t prob=model_probability(dim) ;
04375 if (!model)
04376 {
04377 if (file)
04378 {
04379
04380 FLOATWRITE(file, prob);
04381
04382
04383 for (i=0; i<N; i++)
04384 FLOATWRITE(file, model_derivative_p(i,dim));
04385
04386 for (i=0; i<N; i++)
04387 FLOATWRITE(file, model_derivative_q(i,dim));
04388
04389
04390 for (i=0; i<N; i++)
04391 for (j=0; j<N; j++)
04392 FLOATWRITE(file, model_derivative_a(i,j,dim));
04393
04394 for (i=0; i<N; i++)
04395 for (j=0; j<M; j++)
04396 FLOATWRITE(file, model_derivative_b(i,j,dim));
04397
04398 if (dim==0)
04399 SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04400 } ;
04401 }
04402 else
04403 {
04404 if (file)
04405 {
04406
04407 FLOATWRITE(file, prob);
04408
04409 for (i=0; model->get_learn_p(i)>=0; i++)
04410 FLOATWRITE(file, model_derivative_p(model->get_learn_p(i),dim));
04411
04412 for (i=0; model->get_learn_q(i)>=0; i++)
04413 FLOATWRITE(file, model_derivative_q(model->get_learn_q(i),dim));
04414
04415
04416 for (q=0; model->get_learn_a(q,1)>=0; q++)
04417 {
04418 i=model->get_learn_a(q,0) ;
04419 j=model->get_learn_a(q,1) ;
04420 FLOATWRITE(file, model_derivative_a(i,j,dim));
04421 } ;
04422
04423 for (q=0; model->get_learn_b(q,0)>=0; q++)
04424 {
04425 i=model->get_learn_b(q,0) ;
04426 j=model->get_learn_b(q,1) ;
04427 FLOATWRITE(file, model_derivative_b(i,j,dim));
04428 } ;
04429 if (dim==0)
04430 SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04431 } ;
04432 } ;
04433 }
04434 save_model_bin(file) ;
04435
04436 #ifdef USE_HMMPARALLEL
04437 delete[] threads ;
04438 delete[] params ;
04439 #endif
04440
04441 result=true;
04442 SG_PRINT( "\n") ;
04443 return result;
04444 }
04445
04446 bool CHMM::save_model_derivatives(FILE* file)
04447 {
04448 bool result=false;
04449 int32_t dim,i,j;
04450
04451 if (file)
04452 {
04453
04454 fprintf(file,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%%\n%% calculating derivatives of P[O|lambda]=sum_{all Q}p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04455 fprintf(file,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04456 fprintf(file,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04457 fprintf(file,"%% ............................. \n");
04458 fprintf(file,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04459 fprintf(file,"%% ];\n%%\n\nlog(dPr) = [\n");
04460
04461
04462 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04463 {
04464 fprintf(file, "[ ");
04465
04466
04467 for (i=0; i<N; i++)
04468 fprintf(file,"%e, ", (double) model_derivative_p(i, dim) );
04469
04470 for (i=0; i<N; i++)
04471 fprintf(file,"%e, ", (double) model_derivative_q(i, dim) );
04472
04473
04474 for (i=0; i<N; i++)
04475 for (j=0; j<N; j++)
04476 fprintf(file, "%e,", (double) model_derivative_a(i,j,dim) );
04477
04478 for (i=0; i<N; i++)
04479 for (j=0; j<M; j++)
04480 fprintf(file, "%e,", (double) model_derivative_b(i,j,dim) );
04481
04482 fseek(file,ftell(file)-1,SEEK_SET);
04483 fprintf(file, " ];\n");
04484 }
04485
04486
04487 fprintf(file, "];");
04488
04489 result=true;
04490 }
04491 return result;
04492 }
04493
04494 bool CHMM::check_model_derivatives_combined()
04495 {
04496
04497 const float64_t delta=5e-4 ;
04498
04499 int32_t i ;
04500
04501
04502
04503
04504
04505
04506
04507
04508
04509
04510
04511
04512
04513
04514
04515
04516
04517
04518
04519
04520
04521
04522
04523
04524
04525
04526
04527
04528
04529
04530
04531 i=0;
04532 {
04533 for (int32_t j=0; j<M; j++)
04534 {
04535 float64_t old_b=get_b(i,j) ;
04536
04537 set_b(i,j, log(exp(old_b)-delta)) ;
04538 invalidate_model() ;
04539 float64_t prob_old=(model_probability(-1)*p_observations->get_num_vectors()) ;
04540
04541 set_b(i,j, log(exp(old_b)+delta)) ;
04542 invalidate_model() ;
04543 float64_t prob_new=(model_probability(-1)*p_observations->get_num_vectors());
04544
04545 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04546
04547 set_b(i,j, old_b) ;
04548 invalidate_model() ;
04549
04550 float64_t deriv_calc=0 ;
04551 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04552 {
04553 deriv_calc+=exp(model_derivative_b(i, j, dim)-model_probability(dim)) ;
04554 if (j==1)
04555 SG_INFO("deriv_calc[%i]=%e\n",dim,deriv_calc) ;
04556 } ;
04557
04558 SG_ERROR( "b(%i,%i)=%e db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j,exp(old_b),i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04559 } ;
04560 } ;
04561 return true ;
04562 }
04563
04564 bool CHMM::check_model_derivatives()
04565 {
04566 bool result=false;
04567 const float64_t delta=3e-4 ;
04568
04569 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04570 {
04571 int32_t i ;
04572
04573 for (i=0; i<N; i++)
04574 {
04575 for (int32_t j=0; j<N; j++)
04576 {
04577 float64_t old_a=get_a(i,j) ;
04578
04579 set_a(i,j, log(exp(old_a)-delta)) ;
04580 invalidate_model() ;
04581 float64_t prob_old=exp(model_probability(dim)) ;
04582
04583 set_a(i,j, log(exp(old_a)+delta)) ;
04584 invalidate_model() ;
04585 float64_t prob_new=exp(model_probability(dim));
04586
04587 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04588
04589 set_a(i,j, old_a) ;
04590 invalidate_model() ;
04591 float64_t deriv_calc=exp(model_derivative_a(i, j, dim)) ;
04592
04593 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04594 invalidate_model() ;
04595 } ;
04596 } ;
04597 for (i=0; i<N; i++)
04598 {
04599 for (int32_t j=0; j<M; j++)
04600 {
04601 float64_t old_b=get_b(i,j) ;
04602
04603 set_b(i,j, log(exp(old_b)-delta)) ;
04604 invalidate_model() ;
04605 float64_t prob_old=exp(model_probability(dim)) ;
04606
04607 set_b(i,j, log(exp(old_b)+delta)) ;
04608 invalidate_model() ;
04609 float64_t prob_new=exp(model_probability(dim));
04610
04611 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04612
04613 set_b(i,j, old_b) ;
04614 invalidate_model() ;
04615 float64_t deriv_calc=exp(model_derivative_b(i, j, dim));
04616
04617 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
04618 } ;
04619 } ;
04620
04621 #ifdef TEST
04622 for (i=0; i<N; i++)
04623 {
04624 float64_t old_p=get_p(i) ;
04625
04626 set_p(i, log(exp(old_p)-delta)) ;
04627 invalidate_model() ;
04628 float64_t prob_old=exp(model_probability(dim)) ;
04629
04630 set_p(i, log(exp(old_p)+delta)) ;
04631 invalidate_model() ;
04632 float64_t prob_new=exp(model_probability(dim));
04633 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04634
04635 set_p(i, old_p) ;
04636 invalidate_model() ;
04637 float64_t deriv_calc=exp(model_derivative_p(i, dim));
04638
04639
04640 SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04641 } ;
04642 for (i=0; i<N; i++)
04643 {
04644 float64_t old_q=get_q(i) ;
04645
04646 set_q(i, log(exp(old_q)-delta)) ;
04647 invalidate_model() ;
04648 float64_t prob_old=exp(model_probability(dim)) ;
04649
04650 set_q(i, log(exp(old_q)+delta)) ;
04651 invalidate_model() ;
04652 float64_t prob_new=exp(model_probability(dim));
04653
04654 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04655
04656 set_q(i, old_q) ;
04657 invalidate_model() ;
04658 float64_t deriv_calc=exp(model_derivative_q(i, dim));
04659
04660
04661 SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04662 } ;
04663 #endif
04664 }
04665 return result;
04666 }
04667
04668 #ifdef USE_HMMDEBUG
04669 bool CHMM::check_path_derivatives()
04670 {
04671 bool result=false;
04672 const float64_t delta=1e-4 ;
04673
04674 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04675 {
04676 int32_t i ;
04677
04678 for (i=0; i<N; i++)
04679 {
04680 for (int32_t j=0; j<N; j++)
04681 {
04682 float64_t old_a=get_a(i,j) ;
04683
04684 set_a(i,j, log(exp(old_a)-delta)) ;
04685 invalidate_model() ;
04686 float64_t prob_old=best_path(dim) ;
04687
04688 set_a(i,j, log(exp(old_a)+delta)) ;
04689 invalidate_model() ;
04690 float64_t prob_new=best_path(dim);
04691
04692 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04693
04694 set_a(i,j, old_a) ;
04695 invalidate_model() ;
04696 float64_t deriv_calc=path_derivative_a(i, j, dim) ;
04697
04698 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04699 } ;
04700 } ;
04701 for (i=0; i<N; i++)
04702 {
04703 for (int32_t j=0; j<M; j++)
04704 {
04705 float64_t old_b=get_b(i,j) ;
04706
04707 set_b(i,j, log(exp(old_b)-delta)) ;
04708 invalidate_model() ;
04709 float64_t prob_old=best_path(dim) ;
04710
04711 set_b(i,j, log(exp(old_b)+delta)) ;
04712 invalidate_model() ;
04713 float64_t prob_new=best_path(dim);
04714
04715 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04716
04717 set_b(i,j, old_b) ;
04718 invalidate_model() ;
04719 float64_t deriv_calc=path_derivative_b(i, j, dim);
04720
04721 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
04722 } ;
04723 } ;
04724
04725 for (i=0; i<N; i++)
04726 {
04727 float64_t old_p=get_p(i) ;
04728
04729 set_p(i, log(exp(old_p)-delta)) ;
04730 invalidate_model() ;
04731 float64_t prob_old=best_path(dim) ;
04732
04733 set_p(i, log(exp(old_p)+delta)) ;
04734 invalidate_model() ;
04735 float64_t prob_new=best_path(dim);
04736 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04737
04738 set_p(i, old_p) ;
04739 invalidate_model() ;
04740 float64_t deriv_calc=path_derivative_p(i, dim);
04741
04742
04743 SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04744 } ;
04745 for (i=0; i<N; i++)
04746 {
04747 float64_t old_q=get_q(i) ;
04748
04749 set_q(i, log(exp(old_q)-delta)) ;
04750 invalidate_model() ;
04751 float64_t prob_old=best_path(dim) ;
04752
04753 set_q(i, log(exp(old_q)+delta)) ;
04754 invalidate_model() ;
04755 float64_t prob_new=best_path(dim);
04756
04757 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04758
04759 set_q(i, old_q) ;
04760 invalidate_model() ;
04761 float64_t deriv_calc=path_derivative_q(i, dim);
04762
04763
04764 SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04765 } ;
04766 }
04767 return result;
04768 }
04769 #endif // USE_HMMDEBUG
04770
04771
04772 void CHMM::normalize(bool keep_dead_states)
04773 {
04774 int32_t i,j;
04775 const float64_t INF=-1e10;
04776 float64_t sum_p =INF;
04777
04778 for (i=0; i<N; i++)
04779 {
04780 sum_p=CMath::logarithmic_sum(sum_p, get_p(i));
04781
04782 float64_t sum_b =INF;
04783 float64_t sum_a =get_q(i);
04784
04785 for (j=0; j<N; j++)
04786 sum_a=CMath::logarithmic_sum(sum_a, get_a(i,j));
04787
04788 if (sum_a>CMath::ALMOST_NEG_INFTY/N || (!keep_dead_states) )
04789 {
04790 for (j=0; j<N; j++)
04791 set_a(i,j, get_a(i,j)-sum_a);
04792 set_q(i, get_q(i)-sum_a);
04793 }
04794
04795 for (j=0; j<M; j++)
04796 sum_b=CMath::logarithmic_sum(sum_b, get_b(i,j));
04797 for (j=0; j<M; j++)
04798 set_b(i,j, get_b(i,j)-sum_b);
04799 }
04800
04801 for (i=0; i<N; i++)
04802 set_p(i, get_p(i)-sum_p);
04803
04804 invalidate_model();
04805 }
04806
04807 bool CHMM::append_model(CHMM* app_model)
04808 {
04809 bool result=false;
04810 const int32_t num_states=app_model->get_N();
04811 int32_t i,j;
04812
04813 SG_DEBUG( "cur N:%d M:%d\n", N, M);
04814 SG_DEBUG( "old N:%d M:%d\n", app_model->get_N(), app_model->get_M());
04815 if (app_model->get_M() == get_M())
04816 {
04817 float64_t* n_p=new float64_t[N+num_states];
04818 float64_t* n_q=new float64_t[N+num_states];
04819 float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
04820
04821 float64_t* n_b=new float64_t[(N+num_states)*M];
04822
04823
04824 for (i=0; i<N+num_states; i++)
04825 {
04826 n_p[i]=-CMath::INFTY;
04827 n_q[i]=-CMath::INFTY;
04828
04829 for (j=0; j<N+num_states; j++)
04830 n_a[(N+num_states)*i+j]=-CMath::INFTY;
04831
04832 for (j=0; j<M; j++)
04833 n_b[M*i+j]=-CMath::INFTY;
04834 }
04835
04836
04837
04838
04839
04840
04841 for (i=0; i<N; i++)
04842 {
04843 n_p[i]=get_p(i);
04844
04845 for (j=0; j<N; j++)
04846 n_a[(N+num_states)*j+i]=get_a(i,j);
04847
04848 for (j=0; j<M; j++)
04849 {
04850 n_b[M*i+j]=get_b(i,j);
04851 }
04852 }
04853
04854
04855 for (i=0; i<app_model->get_N(); i++)
04856 {
04857 n_q[i+N]=app_model->get_q(i);
04858
04859 for (j=0; j<app_model->get_N(); j++)
04860 n_a[(N+num_states)*(j+N)+(i+N)]=app_model->get_a(i,j);
04861 for (j=0; j<app_model->get_M(); j++)
04862 n_b[M*(i+N)+j]=app_model->get_b(i,j);
04863 }
04864
04865
04866
04867 for (i=0; i<N; i++)
04868 {
04869 for (j=N; j<N+num_states; j++)
04870 n_a[(N+num_states)*j + i]=CMath::logarithmic_sum(get_q(i)+app_model->get_p(j-N), n_a[(N+num_states)*j + i]);
04871 }
04872
04873 free_state_dependend_arrays();
04874 N+=num_states;
04875
04876 alloc_state_dependend_arrays();
04877
04878
04879 delete[] initial_state_distribution_p;
04880 delete[] end_state_distribution_q;
04881 delete[] transition_matrix_a;
04882 delete[] observation_matrix_b;
04883
04884 transition_matrix_a=n_a;
04885 observation_matrix_b=n_b;
04886 initial_state_distribution_p=n_p;
04887 end_state_distribution_q=n_q;
04888
04889 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
04891 invalidate_model();
04892 }
04893 else
04894 SG_ERROR( "number of observations is different for append model, doing nothing!\n");
04895
04896 return result;
04897 }
04898
04899 bool CHMM::append_model(CHMM* app_model, float64_t* cur_out, float64_t* app_out)
04900 {
04901 bool result=false;
04902 const int32_t num_states=app_model->get_N()+2;
04903 int32_t i,j;
04904
04905 if (app_model->get_M() == get_M())
04906 {
04907 float64_t* n_p=new float64_t[N+num_states];
04908 float64_t* n_q=new float64_t[N+num_states];
04909 float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
04910
04911 float64_t* n_b=new float64_t[(N+num_states)*M];
04912
04913
04914 for (i=0; i<N+num_states; i++)
04915 {
04916 n_p[i]=-CMath::INFTY;
04917 n_q[i]=-CMath::INFTY;
04918
04919 for (j=0; j<N+num_states; j++)
04920 n_a[(N+num_states)*j+i]=-CMath::INFTY;
04921
04922 for (j=0; j<M; j++)
04923 n_b[M*i+j]=-CMath::INFTY;
04924 }
04925
04926
04927
04928
04929
04930
04931 for (i=0; i<N; i++)
04932 {
04933 n_p[i]=get_p(i);
04934
04935 for (j=0; j<N; j++)
04936 n_a[(N+num_states)*j+i]=get_a(i,j);
04937
04938 for (j=0; j<M; j++)
04939 {
04940 n_b[M*i+j]=get_b(i,j);
04941 }
04942 }
04943
04944
04945 for (i=0; i<app_model->get_N(); i++)
04946 {
04947 n_q[i+N+2]=app_model->get_q(i);
04948
04949 for (j=0; j<app_model->get_N(); j++)
04950 n_a[(N+num_states)*(j+N+2)+(i+N+2)]=app_model->get_a(i,j);
04951 for (j=0; j<app_model->get_M(); j++)
04952 n_b[M*(i+N+2)+j]=app_model->get_b(i,j);
04953 }
04954
04955
04956
04957
04958 for (i=0; i<M; i++)
04959 {
04960 n_b[M*N+i]=cur_out[i];
04961 n_b[M*(N+1)+i]=app_out[i];
04962 }
04963
04964
04965 for (i=0; i<N+num_states; i++)
04966 {
04967
04968 if (i==N+1)
04969 n_a[(N+num_states)*i + N]=0;
04970
04971
04972
04973 if (i<N)
04974 n_a[(N+num_states)*N+i]=get_q(i);
04975
04976
04977
04978 if (i>=N+2)
04979 n_a[(N+num_states)*i+(N+1)]=app_model->get_p(i-(N+2));
04980 }
04981
04982 free_state_dependend_arrays();
04983 N+=num_states;
04984
04985 alloc_state_dependend_arrays();
04986
04987
04988 delete[] initial_state_distribution_p;
04989 delete[] end_state_distribution_q;
04990 delete[] transition_matrix_a;
04991 delete[] observation_matrix_b;
04992
04993 transition_matrix_a=n_a;
04994 observation_matrix_b=n_b;
04995 initial_state_distribution_p=n_p;
04996 end_state_distribution_q=n_q;
04997
04998 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
05000 invalidate_model();
05001 }
05002
05003 return result;
05004 }
05005
05006
05007 void CHMM::add_states(int32_t num_states, float64_t default_value)
05008 {
05009 int32_t i,j;
05010 const float64_t MIN_RAND=1e-2;
05011 const float64_t MAX_RAND=2e-1;
05012
05013 float64_t* n_p=new float64_t[N+num_states];
05014 float64_t* n_q=new float64_t[N+num_states];
05015 float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
05016
05017 float64_t* n_b=new float64_t[(N+num_states)*M];
05018
05019
05020
05021 for (i=0; i<N; i++)
05022 {
05023 n_p[i]=get_p(i);
05024 n_q[i]=get_q(i);
05025
05026 for (j=0; j<N; j++)
05027 n_a[(N+num_states)*j+i]=get_a(i,j);
05028
05029 for (j=0; j<M; j++)
05030 n_b[M*i+j]=get_b(i,j);
05031 }
05032
05033 for (i=N; i<N+num_states; i++)
05034 {
05035 n_p[i]=VAL_MACRO;
05036 n_q[i]=VAL_MACRO;
05037
05038 for (j=0; j<N; j++)
05039 n_a[(N+num_states)*i+j]=VAL_MACRO;
05040
05041 for (j=0; j<N+num_states; j++)
05042 n_a[(N+num_states)*j+i]=VAL_MACRO;
05043
05044 for (j=0; j<M; j++)
05045 n_b[M*i+j]=VAL_MACRO;
05046 }
05047 free_state_dependend_arrays();
05048 N+=num_states;
05049
05050 alloc_state_dependend_arrays();
05051
05052
05053 delete[] initial_state_distribution_p;
05054 delete[] end_state_distribution_q;
05055 delete[] transition_matrix_a;
05056 delete[] observation_matrix_b;
05057
05058 transition_matrix_a=n_a;
05059 observation_matrix_b=n_b;
05060 initial_state_distribution_p=n_p;
05061 end_state_distribution_q=n_q;
05062
05063 invalidate_model();
05064 normalize();
05065 }
05066
05067 void CHMM::chop(float64_t value)
05068 {
05069 for (int32_t i=0; i<N; i++)
05070 {
05071 int32_t j;
05072
05073 if (exp(get_p(i)) < value)
05074 set_p(i, CMath::ALMOST_NEG_INFTY);
05075
05076 if (exp(get_q(i)) < value)
05077 set_q(i, CMath::ALMOST_NEG_INFTY);
05078
05079 for (j=0; j<N; j++)
05080 {
05081 if (exp(get_a(i,j)) < value)
05082 set_a(i,j, CMath::ALMOST_NEG_INFTY);
05083 }
05084
05085 for (j=0; j<M; j++)
05086 {
05087 if (exp(get_b(i,j)) < value)
05088 set_b(i,j, CMath::ALMOST_NEG_INFTY);
05089 }
05090 }
05091 normalize();
05092 invalidate_model();
05093 }
05094
05095 bool CHMM::linear_train(bool right_align)
05096 {
05097 if (p_observations)
05098 {
05099 int32_t histsize=(get_M()*get_N());
05100 int32_t* hist=new int32_t[histsize];
05101 int32_t* startendhist=new int32_t[get_N()];
05102 int32_t i,dim;
05103
05104 ASSERT(p_observations->get_max_vector_length()<=get_N());
05105
05106 for (i=0; i<histsize; i++)
05107 hist[i]=0;
05108
05109 for (i=0; i<get_N(); i++)
05110 startendhist[i]=0;
05111
05112 if (right_align)
05113 {
05114 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05115 {
05116 int32_t len=0;
05117 bool free_vec;
05118 uint16_t* obs=p_observations->get_feature_vector(dim, len, free_vec);
05119
05120 ASSERT(len<=get_N());
05121 startendhist[(get_N()-len)]++;
05122
05123 for (i=0;i<len;i++)
05124 hist[(get_N()-len+i)*get_M() + *obs++]++;
05125
05126 p_observations->free_feature_vector(obs, dim, free_vec);
05127 }
05128
05129 set_q(get_N()-1, 1);
05130 for (i=0; i<get_N()-1; i++)
05131 set_q(i, 0);
05132
05133 for (i=0; i<get_N(); i++)
05134 set_p(i, startendhist[i]+PSEUDO);
05135
05136 for (i=0;i<get_N();i++)
05137 {
05138 for (int32_t j=0; j<get_N(); j++)
05139 {
05140 if (i==j-1)
05141 set_a(i,j, 1);
05142 else
05143 set_a(i,j, 0);
05144 }
05145 }
05146 }
05147 else
05148 {
05149 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05150 {
05151 int32_t len=0;
05152 bool free_vec;
05153 uint16_t* obs=p_observations->get_feature_vector(dim, len, free_vec);
05154
05155 ASSERT(len<=get_N());
05156 for (i=0;i<len;i++)
05157 hist[i*get_M() + *obs++]++;
05158
05159 startendhist[len-1]++;
05160
05161 p_observations->free_feature_vector(obs, dim, free_vec);
05162 }
05163
05164 set_p(0, 1);
05165 for (i=1; i<get_N(); i++)
05166 set_p(i, 0);
05167
05168 for (i=0; i<get_N(); i++)
05169 set_q(i, startendhist[i]+PSEUDO);
05170
05171 int32_t total=p_observations->get_num_vectors();
05172
05173 for (i=0;i<get_N();i++)
05174 {
05175 total-= startendhist[i] ;
05176
05177 for (int32_t j=0; j<get_N(); j++)
05178 {
05179 if (i==j-1)
05180 set_a(i,j, total+PSEUDO);
05181 else
05182 set_a(i,j, 0);
05183 }
05184 }
05185 ASSERT(total==0);
05186 }
05187
05188 for (i=0;i<get_N();i++)
05189 {
05190 for (int32_t j=0; j<get_M(); j++)
05191 {
05192 float64_t sum=0;
05193 int32_t offs=i*get_M()+ p_observations->get_masked_symbols((uint16_t) j, (uint8_t) 254);
05194
05195 for (int32_t k=0; k<p_observations->get_original_num_symbols(); k++)
05196 sum+=hist[offs+k];
05197
05198 set_b(i,j, (PSEUDO+hist[i*get_M()+j])/(sum+PSEUDO*p_observations->get_original_num_symbols()));
05199 }
05200 }
05201
05202 delete[] hist;
05203 delete[] startendhist;
05204 convert_to_log();
05205 invalidate_model();
05206 return true;
05207 }
05208 else
05209 return false;
05210 }
05211
05212 void CHMM::set_observation_nocache(CStringFeatures<uint16_t>* obs)
05213 {
05214 ASSERT(obs);
05215 p_observations=obs;
05216 SG_REF(obs);
05217
05218 if (obs)
05219 if (obs->get_num_symbols() > M)
05220 SG_ERROR( "number of symbols in observation (%d) larger than M (%d)\n", obs->get_num_symbols(), M);
05221
05222 if (!reused_caches)
05223 {
05224 #ifdef USE_HMMPARALLEL_STRUCTURES
05225 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05226 {
05227 delete[] alpha_cache[i].table;
05228 delete[] beta_cache[i].table;
05229 delete[] states_per_observation_psi[i];
05230 delete[] path[i];
05231
05232 alpha_cache[i].table=NULL;
05233 beta_cache[i].table=NULL;
05234 states_per_observation_psi[i]=NULL;
05235 path[i]=NULL;
05236 } ;
05237 #else
05238 delete[] alpha_cache.table;
05239 delete[] beta_cache.table;
05240 delete[] states_per_observation_psi;
05241 delete[] path;
05242
05243 alpha_cache.table=NULL;
05244 beta_cache.table=NULL;
05245 states_per_observation_psi=NULL;
05246 path=NULL;
05247
05248 #endif //USE_HMMPARALLEL_STRUCTURES
05249 }
05250
05251 invalidate_model();
05252 }
05253
05254 void CHMM::set_observations(CStringFeatures<uint16_t>* obs, CHMM* lambda)
05255 {
05256 ASSERT(obs);
05257 p_observations=obs;
05258 SG_REF(obs);
05259
05260
05261
05262 features=obs;
05263
05264 SG_DEBUG("num symbols alphabet: %ld\n", obs->get_alphabet()->get_num_symbols());
05265 SG_DEBUG("num symbols: %ld\n", obs->get_num_symbols());
05266 SG_DEBUG("M: %d\n", M);
05267
05268 if (obs)
05269 {
05270 if (obs->get_num_symbols() > M)
05271 {
05272 SG_ERROR( "number of symbols in observation (%d) larger than M (%d)\n", obs->get_num_symbols(), M);
05273 }
05274 }
05275
05276 if (!reused_caches)
05277 {
05278 #ifdef USE_HMMPARALLEL_STRUCTURES
05279 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05280 {
05281 delete[] alpha_cache[i].table;
05282 delete[] beta_cache[i].table;
05283 delete[] states_per_observation_psi[i];
05284 delete[] path[i];
05285
05286 alpha_cache[i].table=NULL;
05287 beta_cache[i].table=NULL;
05288 states_per_observation_psi[i]=NULL;
05289 path[i]=NULL;
05290 } ;
05291 #else
05292 delete[] alpha_cache.table;
05293 delete[] beta_cache.table;
05294 delete[] states_per_observation_psi;
05295 delete[] path;
05296
05297 alpha_cache.table=NULL;
05298 beta_cache.table=NULL;
05299 states_per_observation_psi=NULL;
05300 path=NULL;
05301
05302 #endif //USE_HMMPARALLEL_STRUCTURES
05303 }
05304
05305 if (obs!=NULL)
05306 {
05307 int32_t max_T=obs->get_max_vector_length();
05308
05309 if (lambda)
05310 {
05311 #ifdef USE_HMMPARALLEL_STRUCTURES
05312 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05313 {
05314 this->alpha_cache[i].table= lambda->alpha_cache[i].table;
05315 this->beta_cache[i].table= lambda->beta_cache[i].table;
05316 this->states_per_observation_psi[i]=lambda->states_per_observation_psi[i] ;
05317 this->path[i]=lambda->path[i];
05318 } ;
05319 #else
05320 this->alpha_cache.table= lambda->alpha_cache.table;
05321 this->beta_cache.table= lambda->beta_cache.table;
05322 this->states_per_observation_psi= lambda->states_per_observation_psi;
05323 this->path=lambda->path;
05324 #endif //USE_HMMPARALLEL_STRUCTURES
05325
05326 this->reused_caches=true;
05327 }
05328 else
05329 {
05330 this->reused_caches=false;
05331 #ifdef USE_HMMPARALLEL_STRUCTURES
05332 SG_INFO( "allocating mem for path-table of size %.2f Megabytes (%d*%d) each:\n", ((float32_t)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05333 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05334 {
05335 if ((states_per_observation_psi[i]=new T_STATES[max_T*N])!=NULL)
05336 SG_DEBUG( "path_table[%i] successfully allocated\n",i) ;
05337 else
05338 SG_ERROR( "failed allocating memory for path_table[%i].\n",i) ;
05339 path[i]=new T_STATES[max_T];
05340 }
05341 #else // no USE_HMMPARALLEL_STRUCTURES
05342 SG_INFO( "allocating mem of size %.2f Megabytes (%d*%d) for path-table ....", ((float32_t)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05343 if ((states_per_observation_psi=new T_STATES[max_T*N]) != NULL)
05344 SG_DONE();
05345 else
05346 SG_ERROR( "failed.\n") ;
05347
05348 path=new T_STATES[max_T];
05349 #endif // USE_HMMPARALLEL_STRUCTURES
05350 #ifdef USE_HMMCACHE
05351 SG_INFO( "allocating mem for caches each of size %.2f Megabytes (%d*%d) ....\n", ((float32_t)max_T)*N*sizeof(T_ALPHA_BETA_TABLE)/(1024*1024), max_T, N);
05352
05353 #ifdef USE_HMMPARALLEL_STRUCTURES
05354 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05355 {
05356 if ((alpha_cache[i].table=new T_ALPHA_BETA_TABLE[max_T*N])!=NULL)
05357 SG_DEBUG( "alpha_cache[%i].table successfully allocated\n",i) ;
05358 else
05359 SG_ERROR("allocation of alpha_cache[%i].table failed\n",i) ;
05360
05361 if ((beta_cache[i].table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05362 SG_DEBUG("beta_cache[%i].table successfully allocated\n",i) ;
05363 else
05364 SG_ERROR("allocation of beta_cache[%i].table failed\n",i) ;
05365 } ;
05366 #else // USE_HMMPARALLEL_STRUCTURES
05367 if ((alpha_cache.table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05368 SG_DEBUG( "alpha_cache.table successfully allocated\n") ;
05369 else
05370 SG_ERROR( "allocation of alpha_cache.table failed\n") ;
05371
05372 if ((beta_cache.table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05373 SG_DEBUG( "beta_cache.table successfully allocated\n") ;
05374 else
05375 SG_ERROR( "allocation of beta_cache.table failed\n") ;
05376
05377 #endif // USE_HMMPARALLEL_STRUCTURES
05378 #else // USE_HMMCACHE
05379 #ifdef USE_HMMPARALLEL_STRUCTURES
05380 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05381 {
05382 alpha_cache[i].table=NULL ;
05383 beta_cache[i].table=NULL ;
05384 } ;
05385 #else //USE_HMMPARALLEL_STRUCTURES
05386 alpha_cache.table=NULL ;
05387 beta_cache.table=NULL ;
05388 #endif //USE_HMMPARALLEL_STRUCTURES
05389 #endif //USE_HMMCACHE
05390 }
05391 }
05392
05393
05394 invalidate_model();
05395 }
05396
05397 bool CHMM::permutation_entropy(int32_t window_width, int32_t sequence_number)
05398 {
05399 if (p_observations && window_width>0 &&
05400 ( sequence_number<0 || sequence_number < p_observations->get_num_vectors()))
05401 {
05402 int32_t min_sequence=sequence_number;
05403 int32_t max_sequence=sequence_number;
05404
05405 if (sequence_number<0)
05406 {
05407 min_sequence=0;
05408 max_sequence=p_observations->get_num_vectors();
05409 SG_INFO( "numseq: %d\n", max_sequence);
05410 }
05411
05412 SG_INFO( "min_sequence: %d max_sequence: %d\n", min_sequence, max_sequence);
05413 for (sequence_number=min_sequence; sequence_number<max_sequence; sequence_number++)
05414 {
05415 int32_t sequence_length=0;
05416 bool free_vec;
05417 uint16_t* obs=p_observations->get_feature_vector(sequence_number, sequence_length, free_vec);
05418
05419 int32_t histsize=get_M();
05420 int64_t* hist=new int64_t[histsize];
05421 int32_t i,j;
05422
05423 for (i=0; i<sequence_length-window_width; i++)
05424 {
05425 for (j=0; j<histsize; j++)
05426 hist[j]=0;
05427
05428 uint16_t* ptr=&obs[i];
05429 for (j=0; j<window_width; j++)
05430 {
05431 hist[*ptr++]++;
05432 }
05433
05434 float64_t perm_entropy=0;
05435 for (j=0; j<get_M(); j++)
05436 {
05437 float64_t p=
05438 (((float64_t) hist[j])+PSEUDO)/
05439 (window_width+get_M()*PSEUDO);
05440 perm_entropy+=p*log(p);
05441 }
05442
05443 SG_PRINT( "%f\n", perm_entropy);
05444 }
05445 p_observations->free_feature_vector(obs, sequence_number, free_vec);
05446
05447 delete[] hist;
05448 }
05449 return true;
05450 }
05451 else
05452 return false;
05453 }
05454
05455 float64_t CHMM::get_log_derivative(int32_t num_param, int32_t num_example)
05456 {
05457 if (num_param<N)
05458 return model_derivative_p(num_param, num_example);
05459 else if (num_param<2*N)
05460 return model_derivative_q(num_param-N, num_example);
05461 else if (num_param<N*(N+2))
05462 {
05463 int32_t k=num_param-2*N;
05464 int32_t i=k/N;
05465 int32_t j=k%N;
05466 return model_derivative_a(i,j, num_example);
05467 }
05468 else if (num_param<N*(N+2+M))
05469 {
05470 int32_t k=num_param-N*(N+2);
05471 int32_t i=k/M;
05472 int32_t j=k%M;
05473 return model_derivative_b(i,j, num_example);
05474 }
05475
05476 ASSERT(false);
05477 return -1;
05478 }
05479
05480 float64_t CHMM::get_log_model_parameter(int32_t num_param)
05481 {
05482 if (num_param<N)
05483 return get_p(num_param);
05484 else if (num_param<2*N)
05485 return get_q(num_param-N);
05486 else if (num_param<N*(N+2))
05487 return transition_matrix_a[num_param-2*N];
05488 else if (num_param<N*(N+2+M))
05489 return observation_matrix_b[num_param-N*(N+2)];
05490
05491 ASSERT(false);
05492 return -1;
05493 }
05494
05495
05496
05497 bool CHMM::converged(float64_t x, float64_t y)
05498 {
05499 float64_t diff=y-x;
05500 float64_t absdiff=fabs(diff);
05501
05502 SG_INFO( "\n #%03d\tbest result so far: %G (eps: %f)", iteration_count, y, diff);
05503
05504 if (iteration_count--==0 || (absdiff<epsilon && conv_it<=0))
05505 {
05506 iteration_count=iterations;
05507 SG_INFO( "...finished\n");
05508 conv_it=5;
05509 return true;
05510 }
05511 else
05512 {
05513 if (absdiff<epsilon)
05514 conv_it--;
05515 else
05516 conv_it=5;
05517
05518 return false;
05519 }
05520 }
05521
05522 bool CHMM::baum_welch_viterbi_train(BaumWelchViterbiType type)
05523 {
05524 CHMM* estimate=new CHMM(this);
05525 CHMM* working=this;
05526 float64_t prob_max=-CMath::INFTY;
05527 float64_t prob=-CMath::INFTY;
05528 float64_t prob_train=CMath::ALMOST_NEG_INFTY;
05529 iteration_count=iterations;
05530
05531 while (!converged(prob, prob_train) && (!CSignal::cancel_computations()))
05532 {
05533 CMath::swap(working, estimate);
05534 prob=prob_train;
05535
05536 switch (type) {
05537 case BW_NORMAL:
05538 working->estimate_model_baum_welch(estimate); break;
05539 case BW_TRANS:
05540 working->estimate_model_baum_welch_trans(estimate); break;
05541 case BW_DEFINED:
05542 working->estimate_model_baum_welch_defined(estimate); break;
05543 case VIT_NORMAL:
05544 working->estimate_model_viterbi(estimate); break;
05545 case VIT_DEFINED:
05546 working->estimate_model_viterbi_defined(estimate); break;
05547 }
05548 prob_train=estimate->model_probability();
05549
05550 if (prob_max<prob_train)
05551 prob_max=prob_train;
05552 }
05553
05554 if (estimate == this)
05555 {
05556 estimate->copy_model(working);
05557 delete working;
05558 }
05559 else
05560 delete estimate;
05561
05562 return true;
05563 }