SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2009 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _TRIE_H___ 00013 #define _TRIE_H___ 00014 00015 #include <string.h> 00016 #include "lib/common.h" 00017 #include "lib/io.h" 00018 #include "base/DynArray.h" 00019 #include "lib/Mathematics.h" 00020 #include "base/SGObject.h" 00021 00022 namespace shogun 00023 { 00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00025 00026 // sentinel is 0xFFFFFFFC or float -2 00027 #define NO_CHILD ((int32_t)-1073741824) 00028 00029 #define WEIGHTS_IN_TRIE 00030 //#define TRIE_CHECK_EVERYTHING 00031 00032 #ifdef TRIE_CHECK_EVERYTHING 00033 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x) 00034 #else 00035 #define TRIE_ASSERT_EVERYTHING(x) 00036 #endif 00037 00038 //#define TRIE_ASSERT(x) ASSERT(x) 00039 #define TRIE_ASSERT(x) 00040 00041 #define TRIE_TERMINAL_CHARACTER 7 00042 00044 struct ConsensusEntry 00045 { 00047 uint64_t string; 00049 float32_t score; 00051 int32_t bt; 00052 }; 00053 00055 struct POIMTrie 00056 { 00058 float64_t weight; 00059 #ifdef TRIE_CHECK_EVERYTHING 00060 00061 bool has_seq; 00063 bool has_floats; 00064 #endif 00065 union 00066 { 00068 float32_t child_weights[4]; 00070 int32_t children[4]; 00072 uint8_t seq[16] ; 00073 }; 00074 00076 float64_t S; 00078 float64_t L; 00080 float64_t R; 00081 }; 00082 00084 struct DNATrie 00085 { 00087 float64_t weight; 00088 #ifdef TRIE_CHECK_EVERYTHING 00089 00090 bool has_seq; 00092 bool has_floats; 00093 #endif 00094 union 00095 { 00097 float32_t child_weights[4]; 00099 int32_t children[4]; 00101 uint8_t seq[16] ; 00102 }; 00103 }; 00104 00106 struct TreeParseInfo { 00108 int32_t num_sym; 00110 int32_t num_feat; 00112 int32_t p; 00114 int32_t k; 00116 int32_t* nofsKmers; 00118 float64_t* margFactors; 00120 int32_t* x; 00122 int32_t* substrs; 00124 int32_t y0; 00126 float64_t* C_k; 00128 float64_t* L_k; 00130 float64_t* R_k; 00131 }; 00132 00133 #endif // DOXYGEN_SHOULD_SKIP_THIS 00134 00135 template <class Trie> class CTrie; 00136 00155 #define IGNORE_IN_CLASSLIST 00156 IGNORE_IN_CLASSLIST template <class Trie> class CTrie : public CSGObject 00157 { 00158 public: 00160 CTrie(); 00161 00168 CTrie(int32_t d, bool p_use_compact_terminal_nodes=true); 00169 00171 CTrie(const CTrie & to_copy); 00172 virtual ~CTrie(); 00173 00175 const CTrie & operator=(const CTrie & to_copy); 00176 00184 bool compare_traverse( 00185 int32_t node, const CTrie & other, int32_t other_node); 00186 00192 bool compare(const CTrie & other); 00193 00200 bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const; 00201 00208 int32_t find_deepest_node( 00209 int32_t start_node, int32_t &deepest_node) const; 00210 00215 void display_node(int32_t node) const; 00216 00218 void destroy(); 00219 00224 void set_degree(int32_t d); 00225 00232 void create(int32_t len, bool p_use_compact_terminal_nodes=true); 00233 00239 void delete_trees(bool p_use_compact_terminal_nodes=true); 00240 00251 void add_to_trie( 00252 int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha, 00253 float64_t *weights, bool degree_times_position_weights); 00254 00261 float64_t compute_abs_weights_tree(int32_t tree, int32_t depth); 00262 00268 float64_t* compute_abs_weights(int32_t &len); 00269 00282 float64_t compute_by_tree_helper( 00283 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 00284 int32_t weight_pos, float64_t * weights, 00285 bool degree_times_position_weights) ; 00286 00301 void compute_by_tree_helper( 00302 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 00303 int32_t weight_pos, float64_t* LevelContrib, float64_t factor, 00304 int32_t mkl_stepsize, float64_t * weights, 00305 bool degree_times_position_weights); 00306 00321 void compute_scoring_helper( 00322 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d, 00323 int32_t max_degree, int32_t num_feat, int32_t num_sym, 00324 int32_t sym_offset, int32_t offs, float64_t* result); 00325 00338 void add_example_to_tree_mismatch_recursion( 00339 int32_t tree, int32_t i, float64_t alpha, int32_t *vec, 00340 int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec, 00341 int32_t max_mismatch, float64_t * weights); 00342 00352 void traverse( 00353 int32_t tree, const int32_t p, struct TreeParseInfo info, 00354 const int32_t depth, int32_t* const x, const int32_t k); 00355 00365 void count( 00366 const float64_t w, const int32_t depth, 00367 const struct TreeParseInfo info, const int32_t p, int32_t* x, 00368 const int32_t k); 00369 00376 int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights); 00377 00386 float64_t get_cumulative_score( 00387 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights); 00388 00398 void fill_backtracking_table_recursion( 00399 Trie* tree, int32_t depth, uint64_t seq, float64_t value, 00400 DynArray<ConsensusEntry>* table, float64_t* weights); 00401 00410 void fill_backtracking_table( 00411 int32_t pos, DynArray<ConsensusEntry>* prev, 00412 DynArray<ConsensusEntry>* cur, bool cumulative, 00413 float64_t* weights); 00414 00420 void POIMs_extract_W(float64_t* const* const W, const int32_t K); 00421 00426 void POIMs_precalc_SLR(const float64_t* const distrib); 00427 00437 void POIMs_get_SLR( 00438 const int32_t parentIdx, const int32_t sym, const int32_t depth, 00439 float64_t* S, float64_t* L, float64_t* R); 00440 00447 void POIMs_add_SLR( 00448 float64_t* const* const poims, const int32_t K, 00449 const int32_t debug); 00450 00455 inline bool get_use_compact_terminal_nodes() 00456 { 00457 return use_compact_terminal_nodes ; 00458 } 00459 00465 inline void set_use_compact_terminal_nodes( 00466 bool p_use_compact_terminal_nodes) 00467 { 00468 use_compact_terminal_nodes=p_use_compact_terminal_nodes ; 00469 } 00470 00475 inline int32_t get_num_used_nodes() 00476 { 00477 return TreeMemPtr; 00478 } 00479 00484 inline void set_position_weights(const float64_t * p_position_weights) 00485 { 00486 position_weights=p_position_weights; 00487 } 00488 00493 inline int32_t get_node(bool last_node=false) 00494 { 00495 int32_t ret = TreeMemPtr++; 00496 check_treemem() ; 00497 00498 if (last_node) 00499 { 00500 for (int32_t q=0; q<4; q++) 00501 TreeMem[ret].child_weights[q]=0.0; 00502 } 00503 else 00504 { 00505 for (int32_t q=0; q<4; q++) 00506 TreeMem[ret].children[q]=NO_CHILD; 00507 } 00508 #ifdef TRIE_CHECK_EVERYTHING 00509 TreeMem[ret].has_seq=false ; 00510 TreeMem[ret].has_floats=false ; 00511 #endif 00512 TreeMem[ret].weight=0.0; 00513 return ret ; 00514 } 00515 00517 inline void check_treemem() 00518 { 00519 if (TreeMemPtr+10 < TreeMemPtrMax) 00520 return; 00521 SG_DEBUG( "Extending TreeMem from %i to %i elements\n", 00522 TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2)); 00523 TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2); 00524 TreeMem = (Trie*) realloc(TreeMem, 00525 TreeMemPtrMax*sizeof(Trie)); 00526 if (!TreeMem) 00527 SG_ERROR( "out of memory\n"); 00528 } 00529 00534 inline void set_weights_in_tree(bool weights_in_tree_) 00535 { 00536 weights_in_tree = weights_in_tree_; 00537 } 00538 00543 inline bool get_weights_in_tree() 00544 { 00545 return weights_in_tree; 00546 } 00547 00557 void POIMs_extract_W_helper( 00558 const int32_t nodeIdx, const int32_t depth, const int32_t offset, 00559 const int32_t y0, float64_t* const* const W, const int32_t K); 00560 00573 void POIMs_calc_SLR_helper1( 00574 const float64_t* const distrib, const int32_t i, 00575 const int32_t nodeIdx, int32_t left_tries_idx[4], 00576 const int32_t depth, int32_t const lastSym, float64_t* S, 00577 float64_t* L, float64_t* R); 00578 00579 00590 void POIMs_calc_SLR_helper2( 00591 const float64_t* const distrib, const int32_t i, 00592 const int32_t nodeIdx, int32_t left_tries_idx[4], 00593 const int32_t depth, float64_t* S, float64_t* L, float64_t* R); 00594 00605 void POIMs_add_SLR_helper1( 00606 const int32_t nodeIdx, const int32_t depth,const int32_t i, 00607 const int32_t y0, float64_t* const* const poims, const int32_t K, 00608 const int32_t debug); 00609 00623 void POIMs_add_SLR_helper2( 00624 float64_t* const* const poims, const int32_t K, const int32_t k, 00625 const int32_t i, const int32_t y, const float64_t valW, 00626 const float64_t valS, const float64_t valL, const float64_t valR, 00627 const int32_t debug); 00628 00630 inline virtual const char* get_name() const { return "Trie"; } 00631 00632 public: 00634 int32_t NUM_SYMS; 00635 00636 protected: 00638 int32_t length; 00640 int32_t * trees; 00641 00643 int32_t degree; 00645 float64_t const * position_weights; 00646 00648 Trie* TreeMem; 00650 int32_t TreeMemPtr; 00652 int32_t TreeMemPtrMax; 00654 bool use_compact_terminal_nodes; 00655 00657 bool weights_in_tree; 00658 00660 int32_t* nofsKmers; 00661 }; 00662 template <class Trie> 00663 CTrie<Trie>::CTrie() 00664 : CSGObject(), degree(0), position_weights(NULL), 00665 use_compact_terminal_nodes(false), 00666 weights_in_tree(true) 00667 { 00668 00669 TreeMemPtrMax=0; 00670 TreeMemPtr=0; 00671 TreeMem=NULL; 00672 00673 length=0; 00674 trees=NULL; 00675 00676 NUM_SYMS=4; 00677 } 00678 00679 template <class Trie> 00680 CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes) 00681 : CSGObject(), degree(d), position_weights(NULL), 00682 use_compact_terminal_nodes(p_use_compact_terminal_nodes), 00683 weights_in_tree(true) 00684 { 00685 TreeMemPtrMax=1024*1024/sizeof(Trie); 00686 TreeMemPtr=0; 00687 TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie)); 00688 00689 length=0; 00690 trees=NULL; 00691 00692 NUM_SYMS=4; 00693 } 00694 00695 template <class Trie> 00696 CTrie<Trie>::CTrie(const CTrie & to_copy) 00697 : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL), 00698 use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes) 00699 { 00700 if (to_copy.position_weights!=NULL) 00701 { 00702 position_weights = to_copy.position_weights; 00703 /*new float64_t[to_copy.length]; 00704 for (int32_t i=0; i<to_copy.length; i++) 00705 position_weights[i]=to_copy.position_weights[i]; */ 00706 } 00707 else 00708 position_weights=NULL; 00709 00710 TreeMemPtrMax=to_copy.TreeMemPtrMax; 00711 TreeMemPtr=to_copy.TreeMemPtr; 00712 TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie)); 00713 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)); 00714 00715 length=to_copy.length; 00716 trees=new int32_t[length]; 00717 for (int32_t i=0; i<length; i++) 00718 trees[i]=to_copy.trees[i]; 00719 00720 NUM_SYMS=4; 00721 } 00722 00723 template <class Trie> 00724 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy) 00725 { 00726 degree=to_copy.degree ; 00727 use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ; 00728 00729 delete[] position_weights ; 00730 position_weights=NULL ; 00731 if (to_copy.position_weights!=NULL) 00732 { 00733 position_weights=to_copy.position_weights ; 00734 /*position_weights = new float64_t[to_copy.length] ; 00735 for (int32_t i=0; i<to_copy.length; i++) 00736 position_weights[i]=to_copy.position_weights[i] ;*/ 00737 } 00738 else 00739 position_weights=NULL ; 00740 00741 TreeMemPtrMax=to_copy.TreeMemPtrMax ; 00742 TreeMemPtr=to_copy.TreeMemPtr ; 00743 free(TreeMem) ; 00744 TreeMem = (Trie*)malloc(TreeMemPtrMax*sizeof(Trie)) ; 00745 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ; 00746 00747 length = to_copy.length ; 00748 if (trees) 00749 delete[] trees ; 00750 trees=new int32_t[length] ; 00751 for (int32_t i=0; i<length; i++) 00752 trees[i]=to_copy.trees[i] ; 00753 00754 return *this ; 00755 } 00756 00757 template <class Trie> 00758 int32_t CTrie<Trie>::find_deepest_node( 00759 int32_t start_node, int32_t& deepest_node) const 00760 { 00761 #ifdef TRIE_CHECK_EVERYTHING 00762 int32_t ret=0 ; 00763 SG_DEBUG("start_node=%i\n", start_node) ; 00764 00765 if (start_node==NO_CHILD) 00766 { 00767 for (int32_t i=0; i<length; i++) 00768 { 00769 int32_t my_deepest_node ; 00770 int32_t depth=find_deepest_node(i, my_deepest_node) ; 00771 SG_DEBUG("start_node %i depth=%i\n", i, depth) ; 00772 if (depth>ret) 00773 { 00774 deepest_node=my_deepest_node ; 00775 ret=depth ; 00776 } 00777 } 00778 return ret ; 00779 } 00780 00781 if (TreeMem[start_node].has_seq) 00782 { 00783 for (int32_t q=0; q<16; q++) 00784 if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER) 00785 ret++ ; 00786 deepest_node=start_node ; 00787 return ret ; 00788 } 00789 if (TreeMem[start_node].has_floats) 00790 { 00791 deepest_node=start_node ; 00792 return 1 ; 00793 } 00794 00795 for (int32_t q=0; q<4; q++) 00796 { 00797 int32_t my_deepest_node ; 00798 if (TreeMem[start_node].children[q]==NO_CHILD) 00799 continue ; 00800 int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ; 00801 if (depth>ret) 00802 { 00803 deepest_node=my_deepest_node ; 00804 ret=depth ; 00805 } 00806 } 00807 return ret ; 00808 #else 00809 SG_ERROR( "not implemented\n") ; 00810 return 0 ; 00811 #endif 00812 } 00813 00814 template <class Trie> 00815 int32_t CTrie<Trie>::compact_nodes( 00816 int32_t start_node, int32_t depth, float64_t * weights) 00817 { 00818 SG_ERROR( "code buggy\n") ; 00819 00820 int32_t ret=0 ; 00821 00822 if (start_node==NO_CHILD) 00823 { 00824 for (int32_t i=0; i<length; i++) 00825 compact_nodes(i,1, weights) ; 00826 return 0 ; 00827 } 00828 if (start_node<0) 00829 return -1 ; 00830 00831 if (depth==degree-1) 00832 { 00833 TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ; 00834 int32_t num_used=0 ; 00835 for (int32_t q=0; q<4; q++) 00836 if (TreeMem[start_node].child_weights[q]!=0.0) 00837 num_used++ ; 00838 if (num_used>1) 00839 return -1 ; 00840 return 1 ; 00841 } 00842 TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ; 00843 00844 int32_t num_used = 0 ; 00845 int32_t q_used=-1 ; 00846 00847 for (int32_t q=0; q<4; q++) 00848 { 00849 if (TreeMem[start_node].children[q]==NO_CHILD) 00850 continue ; 00851 num_used++ ; 00852 q_used=q ; 00853 } 00854 if (num_used>1) 00855 { 00856 if (depth>=degree-2) 00857 return -1 ; 00858 for (int32_t q=0; q<4; q++) 00859 { 00860 if (TreeMem[start_node].children[q]==NO_CHILD) 00861 continue ; 00862 int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ; 00863 if (num<=2) 00864 continue ; 00865 int32_t node=get_node() ; 00866 00867 int32_t last_node=TreeMem[start_node].children[q] ; 00868 if (weights_in_tree) 00869 { 00870 ASSERT(weights[depth]!=0.0) ; 00871 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ; 00872 } 00873 else 00874 TreeMem[node].weight=TreeMem[last_node].weight ; 00875 00876 #ifdef TRIE_CHECK_EVERYTHING 00877 TreeMem[node].has_seq=true ; 00878 #endif 00879 memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ; 00880 for (int32_t n=0; n<num; n++) 00881 { 00882 ASSERT(depth+n+1<=degree-1) ; 00883 ASSERT(last_node!=NO_CHILD) ; 00884 if (depth+n+1==degree-1) 00885 { 00886 TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ; 00887 int32_t k ; 00888 for (k=0; k<4; k++) 00889 if (TreeMem[last_node].child_weights[k]!=0.0) 00890 break ; 00891 if (k==4) 00892 break ; 00893 TreeMem[node].seq[n]=k ; 00894 break ; 00895 } 00896 else 00897 { 00898 TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ; 00899 int32_t k ; 00900 for (k=0; k<4; k++) 00901 if (TreeMem[last_node].children[k]!=NO_CHILD) 00902 break ; 00903 if (k==4) 00904 break ; 00905 TreeMem[node].seq[n]=k ; 00906 last_node=TreeMem[last_node].children[k] ; 00907 } 00908 } 00909 TreeMem[start_node].children[q]=-node ; 00910 } 00911 return -1 ; 00912 } 00913 if (num_used==0) 00914 return 0 ; 00915 00916 ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ; 00917 if (ret<0) 00918 return ret ; 00919 return ret+1 ; 00920 } 00921 00922 00923 template <class Trie> 00924 bool CTrie<Trie>::compare_traverse( 00925 int32_t node, const CTrie<Trie> & other, int32_t other_node) 00926 { 00927 SG_DEBUG("checking nodes %i and %i\n", node, other_node) ; 00928 if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5) 00929 { 00930 SG_DEBUG( "CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) ; 00931 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00932 display_node(node) ; 00933 SG_DEBUG( "============================================================\n") ; 00934 other.display_node(other_node) ; 00935 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00936 return false ; 00937 } 00938 00939 #ifdef TRIE_CHECK_EVERYTHING 00940 if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq) 00941 { 00942 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) ; 00943 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00944 display_node(node) ; 00945 SG_DEBUG( "============================================================\n") ; 00946 other.display_node(other_node) ; 00947 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00948 return false ; 00949 } 00950 if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats) 00951 { 00952 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) ; 00953 return false ; 00954 } 00955 if (other.TreeMem[other_node].has_floats) 00956 { 00957 for (int32_t q=0; q<4; q++) 00958 if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5) 00959 { 00960 SG_DEBUG( "CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) ; 00961 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00962 display_node(node) ; 00963 SG_DEBUG( "============================================================\n") ; 00964 other.display_node(other_node) ; 00965 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00966 return false ; 00967 } 00968 } 00969 if (other.TreeMem[other_node].has_seq) 00970 { 00971 for (int32_t q=0; q<16; q++) 00972 if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4))) 00973 { 00974 SG_DEBUG( "CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) ; 00975 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00976 display_node(node) ; 00977 SG_DEBUG( "============================================================\n") ; 00978 other.display_node(other_node) ; 00979 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00980 return false ; 00981 } 00982 } 00983 if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats) 00984 { 00985 for (int32_t q=0; q<4; q++) 00986 { 00987 if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD)) 00988 continue ; 00989 if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD)) 00990 { 00991 SG_DEBUG( "CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) ; 00992 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00993 display_node(node) ; 00994 SG_DEBUG( "============================================================\n") ; 00995 other.display_node(other_node) ; 00996 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00997 return false ; 00998 } 00999 if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q]))) 01000 return false ; 01001 } 01002 } 01003 #else 01004 SG_ERROR( "not implemented\n") ; 01005 #endif 01006 01007 return true ; 01008 } 01009 01010 template <class Trie> 01011 bool CTrie<Trie>::compare(const CTrie<Trie> & other) 01012 { 01013 bool ret=true ; 01014 for (int32_t i=0; i<length; i++) 01015 if (!compare_traverse(trees[i], other, other.trees[i])) 01016 return false ; 01017 else 01018 SG_DEBUG("two tries at %i identical\n", i) ; 01019 01020 return ret ; 01021 } 01022 01023 template <class Trie> 01024 bool CTrie<Trie>::find_node( 01025 int32_t node, int32_t * trace, int32_t& trace_len) const 01026 { 01027 #ifdef TRIE_CHECK_EVERYTHING 01028 ASSERT(trace_len-1>=0) ; 01029 ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax)) 01030 if (TreeMem[trace[trace_len-1]].has_seq) 01031 return false ; 01032 if (TreeMem[trace[trace_len-1]].has_floats) 01033 return false ; 01034 01035 for (int32_t q=0; q<4; q++) 01036 { 01037 if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD) 01038 continue ; 01039 int32_t tl=trace_len+1 ; 01040 if (TreeMem[trace[trace_len-1]].children[q]>=0) 01041 trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ; 01042 else 01043 trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ; 01044 01045 if (trace[trace_len]==node) 01046 { 01047 trace_len=tl ; 01048 return true ; 01049 } 01050 if (find_node(node, trace, tl)) 01051 { 01052 trace_len=tl ; 01053 return true ; 01054 } 01055 } 01056 trace_len=0 ; 01057 return false ; 01058 #else 01059 SG_ERROR( "not implemented\n") ; 01060 return false ; 01061 #endif 01062 } 01063 01064 template <class Trie> 01065 void CTrie<Trie>::display_node(int32_t node) const 01066 { 01067 #ifdef TRIE_CHECK_EVERYTHING 01068 int32_t * trace=new int32_t[2*degree] ; 01069 int32_t trace_len=-1 ; 01070 bool found = false ; 01071 int32_t tree=-1 ; 01072 for (tree=0; tree<length; tree++) 01073 { 01074 trace[0]=trees[tree] ; 01075 trace_len=1 ; 01076 found=find_node(node, trace, trace_len) ; 01077 if (found) 01078 break ; 01079 } 01080 ASSERT(found) ; 01081 SG_PRINT( "position %i trace: ", tree) ; 01082 01083 for (int32_t i=0; i<trace_len-1; i++) 01084 { 01085 int32_t branch=-1 ; 01086 for (int32_t q=0; q<4; q++) 01087 if (abs(TreeMem[trace[i]].children[q])==trace[i+1]) 01088 { 01089 branch=q; 01090 break ; 01091 } 01092 ASSERT(branch!=-1) ; 01093 char acgt[5]="ACGT" ; 01094 SG_PRINT( "%c", acgt[branch]) ; 01095 } 01096 SG_PRINT( "\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ; 01097 if (TreeMem[node].has_floats) 01098 { 01099 for (int32_t q=0; q<4; q++) 01100 SG_PRINT( "child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ; 01101 } 01102 if (TreeMem[node].has_seq) 01103 { 01104 for (int32_t q=0; q<16; q++) 01105 SG_PRINT( "seq[%i] = %i\n", q, TreeMem[node].seq[q]) ; 01106 } 01107 if (!TreeMem[node].has_seq && !TreeMem[node].has_floats) 01108 { 01109 for (int32_t q=0; q<4; q++) 01110 { 01111 if (TreeMem[node].children[q]!=NO_CHILD) 01112 { 01113 SG_PRINT( "children[%i] = %i -> \n", q, TreeMem[node].children[q]) ; 01114 display_node(abs(TreeMem[node].children[q])) ; 01115 } 01116 else 01117 SG_PRINT( "children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ; 01118 } 01119 01120 } 01121 01122 delete[] trace ; 01123 #else 01124 SG_ERROR( "not implemented\n") ; 01125 #endif 01126 } 01127 01128 01129 template <class Trie> CTrie<Trie>::~CTrie() 01130 { 01131 destroy() ; 01132 01133 free(TreeMem) ; 01134 } 01135 01136 template <class Trie> void CTrie<Trie>::destroy() 01137 { 01138 if (trees!=NULL) 01139 { 01140 delete_trees(); 01141 for (int32_t i=0; i<length; i++) 01142 trees[i] = NO_CHILD; 01143 delete[] trees; 01144 01145 TreeMemPtr=0; 01146 length=0; 01147 trees=NULL; 01148 } 01149 } 01150 01151 template <class Trie> void CTrie<Trie>::set_degree(int32_t d) 01152 { 01153 delete_trees(get_use_compact_terminal_nodes()); 01154 degree=d; 01155 } 01156 01157 template <class Trie> void CTrie<Trie>::create( 01158 int32_t len, bool p_use_compact_terminal_nodes) 01159 { 01160 destroy(); 01161 01162 trees=new int32_t[len] ; 01163 TreeMemPtr=0 ; 01164 for (int32_t i=0; i<len; i++) 01165 trees[i]=get_node(degree==1); 01166 length = len ; 01167 01168 use_compact_terminal_nodes=p_use_compact_terminal_nodes ; 01169 } 01170 01171 01172 template <class Trie> void CTrie<Trie>::delete_trees( 01173 bool p_use_compact_terminal_nodes) 01174 { 01175 if (trees==NULL) 01176 return; 01177 01178 TreeMemPtr=0 ; 01179 for (int32_t i=0; i<length; i++) 01180 trees[i]=get_node(degree==1); 01181 01182 use_compact_terminal_nodes=p_use_compact_terminal_nodes ; 01183 } 01184 01185 template <class Trie> 01186 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth) 01187 { 01188 float64_t ret=0 ; 01189 01190 if (tree==NO_CHILD) 01191 return 0 ; 01192 TRIE_ASSERT(tree>=0) ; 01193 01194 if (depth==degree-2) 01195 { 01196 ret+=(TreeMem[tree].weight) ; 01197 01198 for (int32_t k=0; k<4; k++) 01199 ret+=(TreeMem[tree].child_weights[k]) ; 01200 01201 return ret ; 01202 } 01203 01204 ret+=(TreeMem[tree].weight) ; 01205 01206 for (int32_t i=0; i<4; i++) 01207 if (TreeMem[tree].children[i]!=NO_CHILD) 01208 ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1) ; 01209 01210 return ret ; 01211 } 01212 01213 01214 template <class Trie> 01215 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len) 01216 { 01217 float64_t * sum=new float64_t[length*4] ; 01218 for (int32_t i=0; i<length*4; i++) 01219 sum[i]=0 ; 01220 len=length ; 01221 01222 for (int32_t i=0; i<length; i++) 01223 { 01224 TRIE_ASSERT(trees[i]!=NO_CHILD) ; 01225 for (int32_t k=0; k<4; k++) 01226 { 01227 sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ; 01228 } 01229 } 01230 01231 return sum ; 01232 } 01233 01234 template <class Trie> 01235 void CTrie<Trie>::add_example_to_tree_mismatch_recursion( 01236 int32_t tree, int32_t i, float64_t alpha, 01237 int32_t *vec, int32_t len_rem, 01238 int32_t degree_rec, int32_t mismatch_rec, 01239 int32_t max_mismatch, float64_t * weights) 01240 { 01241 if (tree==NO_CHILD) 01242 tree=trees[i] ; 01243 TRIE_ASSERT(tree!=NO_CHILD) ; 01244 01245 if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree)) 01246 return ; 01247 const int32_t other[4][3] = { {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ; 01248 01249 int32_t subtree = NO_CHILD ; 01250 01251 if (degree_rec==degree-1) 01252 { 01253 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ; 01254 if (weights_in_tree) 01255 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]; 01256 else 01257 if (weights[degree_rec]!=0.0) 01258 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec]; 01259 if (mismatch_rec+1<=max_mismatch) 01260 for (int32_t o=0; o<3; o++) 01261 { 01262 if (weights_in_tree) 01263 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]; 01264 else 01265 if (weights[degree_rec]!=0.0) 01266 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec]; 01267 } 01268 return ; 01269 } 01270 else 01271 { 01272 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01273 if (TreeMem[tree].children[vec[0]]!=NO_CHILD) 01274 { 01275 subtree=TreeMem[tree].children[vec[0]] ; 01276 if (weights_in_tree) 01277 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]; 01278 else 01279 if (weights[degree_rec]!=0.0) 01280 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec]; 01281 } 01282 else 01283 { 01284 int32_t tmp = get_node(degree_rec==degree-2); 01285 ASSERT(tmp>=0) ; 01286 TreeMem[tree].children[vec[0]]=tmp ; 01287 subtree=tmp ; 01288 #ifdef TRIE_CHECK_EVERYTHING 01289 if (degree_rec==degree-2) 01290 TreeMem[subtree].has_floats=true ; 01291 #endif 01292 if (weights_in_tree) 01293 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ; 01294 else 01295 if (weights[degree_rec]!=0.0) 01296 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ; 01297 else 01298 TreeMem[subtree].weight = 0.0 ; 01299 } 01300 add_example_to_tree_mismatch_recursion(subtree, i, alpha, 01301 &vec[1], len_rem-1, 01302 degree_rec+1, mismatch_rec, max_mismatch, weights) ; 01303 01304 if (mismatch_rec+1<=max_mismatch) 01305 { 01306 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01307 for (int32_t o=0; o<3; o++) 01308 { 01309 int32_t ot = other[vec[0]][o] ; 01310 if (TreeMem[tree].children[ot]!=NO_CHILD) 01311 { 01312 subtree=TreeMem[tree].children[ot] ; 01313 if (weights_in_tree) 01314 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]; 01315 else 01316 if (weights[degree_rec]!=0.0) 01317 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec]; 01318 } 01319 else 01320 { 01321 int32_t tmp = get_node(degree_rec==degree-2); 01322 ASSERT(tmp>=0) ; 01323 TreeMem[tree].children[ot]=tmp ; 01324 subtree=tmp ; 01325 #ifdef TRIE_CHECK_EVERYTHING 01326 if (degree_rec==degree-2) 01327 TreeMem[subtree].has_floats=true ; 01328 #endif 01329 01330 if (weights_in_tree) 01331 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ; 01332 else 01333 if (weights[degree_rec]!=0.0) 01334 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ; 01335 else 01336 TreeMem[subtree].weight = 0.0 ; 01337 } 01338 01339 add_example_to_tree_mismatch_recursion(subtree, i, alpha, 01340 &vec[1], len_rem-1, 01341 degree_rec+1, mismatch_rec+1, max_mismatch, weights) ; 01342 } 01343 } 01344 } 01345 } 01346 01347 template <class Trie> 01348 void CTrie<Trie>::compute_scoring_helper( 01349 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d, 01350 int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset, 01351 int32_t offs, float64_t* result) 01352 { 01353 if (i+j<num_feat) 01354 { 01355 float64_t decay=1.0; //no decay by default 01356 //if (j>d) 01357 // decay=pow(0.5,j); //marginalize out lower order matches 01358 01359 if (j<degree-1) 01360 { 01361 for (int32_t k=0; k<num_sym; k++) 01362 { 01363 if (TreeMem[tree].children[k]!=NO_CHILD) 01364 { 01365 int32_t child=TreeMem[tree].children[k]; 01366 //continue recursion if not yet at max_degree, else add to result 01367 if (d<max_degree-1) 01368 compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result); 01369 else 01370 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight; 01371 01373 if (d==0) 01374 compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result); 01375 } 01376 } 01377 } 01378 else if (j==degree-1) 01379 { 01380 for (int32_t k=0; k<num_sym; k++) 01381 { 01382 //continue recursion if not yet at max_degree, else add to result 01383 if (d<max_degree-1 && i<num_feat-1) 01384 compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result); 01385 else 01386 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k]; 01387 } 01388 } 01389 } 01390 } 01391 01392 template <class Trie> 01393 void CTrie<Trie>::traverse( 01394 int32_t tree, const int32_t p, struct TreeParseInfo info, 01395 const int32_t depth, int32_t* const x, const int32_t k) 01396 { 01397 const int32_t num_sym = info.num_sym; 01398 const int32_t y0 = info.y0; 01399 const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] ); 01400 //const int32_t temp = info.substrs[depth]*num_sym - ( (depth<=k) ? 0 : info.nofsKmers[k] * x[depth-k-1] ); 01401 //if( !( info.y0 == temp ) ) { 01402 // printf( "\n temp=%d y0=%d k=%d depth=%d \n", temp, info.y0, k, depth ); 01403 //} 01404 //ASSERT( info.y0 == temp ); 01405 int32_t sym; 01406 ASSERT( depth < degree ); 01407 //ASSERT( 0 <= info.substrs[depth] && info.substrs[depth] < info.nofsKmers[k] ); 01408 if (depth<degree-1) 01409 { 01410 for( sym=0; sym<num_sym; ++sym ) { 01411 const int32_t childNum = TreeMem[tree].children[ sym ]; 01412 if( childNum != NO_CHILD ) { 01413 int32_t child = childNum ; 01414 x[depth] = sym; 01415 info.substrs[depth+1] = y0 + sym; 01416 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym; 01417 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) ); 01418 count( TreeMem[child].weight, depth, info, p, x, k ); 01419 traverse( child, p, info, depth+1, x, k ); 01420 x[depth] = -1; 01421 } 01422 } 01423 } 01424 else if( depth == degree-1 ) 01425 { 01426 for( sym=0; sym<num_sym; ++sym ) { 01427 const float64_t w = TreeMem[tree].child_weights[ sym ]; 01428 if( w != 0.0 ) { 01429 x[depth] = sym; 01430 info.substrs[depth+1] = y0 + sym; 01431 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym; 01432 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) ); 01433 count( w, depth, info, p, x, k ); 01434 x[depth] = -1; 01435 } 01436 } 01437 } 01438 //info.substrs[depth+1] = -1; 01439 //info.y0 = temp; 01440 } 01441 01442 template <class Trie> 01443 void CTrie<Trie>::count( 01444 const float64_t w, const int32_t depth, const struct TreeParseInfo info, 01445 const int32_t p, int32_t* x, const int32_t k) 01446 { 01447 ASSERT( fabs(w) < 1e10 ); 01448 ASSERT( x[depth] >= 0 ); 01449 ASSERT( x[depth+1] < 0 ); 01450 if ( depth < k ) { 01451 return; 01452 } 01453 //ASSERT( info.margFactors[ depth-k ] == pow( 0.25, depth-k ) ); 01454 const int32_t nofKmers = info.nofsKmers[k]; 01455 const float64_t margWeight = w * info.margFactors[ depth-k ]; 01456 const int32_t m_a = depth - k + 1; 01457 const int32_t m_b = info.num_feat - p; 01458 const int32_t m = ( m_a < m_b ) ? m_a : m_b; 01459 // all proper k-substrings 01460 const int32_t offset0 = nofKmers * p; 01461 register int32_t i; 01462 register int32_t offset; 01463 offset = offset0; 01464 for( i = 0; i < m; ++i ) { 01465 const int32_t y = info.substrs[i+k+1]; 01466 info.C_k[ y + offset ] += margWeight; 01467 offset += nofKmers; 01468 } 01469 if( depth > k ) { 01470 // k-prefix 01471 const int32_t offsR = info.substrs[k+1] + offset0; 01472 info.R_k[offsR] += margWeight; 01473 // k-suffix 01474 if( p+depth-k < info.num_feat ) { 01475 const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k); 01476 info.L_k[offsL] += margWeight; 01477 } 01478 } 01479 // # N.x = substring represented by N 01480 // # N.d = length of N.x 01481 // # N.s = starting position of N.x 01482 // # N.w = weight for feature represented by N 01483 // if( N.d >= k ) 01484 // margContrib = w / 4^(N.d-k) 01485 // for i = 1 to (N.d-k+1) 01486 // y = N.x[i:(i+k-1)] # overlapped k-mer 01487 // C_k[ N.s+i-1, y ] += margContrib 01488 // end; 01489 // if( N.d > k ) 01490 // L_k[ N.s+N.d-k, N.x[N.d-k+(1:k)] ] += margContrib # j-suffix of N.x 01491 // R_k[ N.s, N.x[1:k] ] += margContrib # j-prefix of N.x 01492 // end; 01493 // end; 01494 } 01495 01496 template <class Trie> 01497 void CTrie<Trie>::add_to_trie( 01498 int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha, 01499 float64_t *weights, bool degree_times_position_weights) 01500 { 01501 int32_t tree = trees[i] ; 01502 //ASSERT(seq_offset==0) ; 01503 01504 int32_t max_depth = 0 ; 01505 float64_t* weights_column ; 01506 if (degree_times_position_weights) 01507 weights_column = &weights[(i+seq_offset)*degree] ; 01508 else 01509 weights_column = weights ; 01510 01511 if (weights_in_tree) 01512 { 01513 for (int32_t j=0; (j<degree) && (i+j<length); j++) 01514 if (CMath::abs(weights_column[j]*alpha)>0) 01515 max_depth = j+1 ; 01516 } 01517 else 01518 // don't use the weights 01519 max_depth=degree ; 01520 01521 for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++) 01522 { 01523 TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ; 01524 if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD)) 01525 { 01526 if (TreeMem[tree].children[vec[i+j+seq_offset]]<0) 01527 { 01528 // special treatment of the next nodes 01529 TRIE_ASSERT(j >= degree-16) ; 01530 // get the right element 01531 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01532 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01533 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ; 01534 01535 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ; 01536 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ; 01537 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ; 01538 01539 // check whether the same string is stored 01540 int32_t mismatch_pos = -1 ; 01541 { 01542 int32_t k ; 01543 for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++) 01544 { 01545 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ; 01546 // ### 01547 if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER)) 01548 fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ; 01549 TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ; 01550 TRIE_ASSERT(k<16) ; 01551 if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k]) 01552 { 01553 mismatch_pos=k ; 01554 break ; 01555 } 01556 } 01557 } 01558 01559 // what happens when the .seq sequence is longer than vec? should we branch??? 01560 01561 if (mismatch_pos==-1) 01562 // if so, then just increase the weight by alpha and stop 01563 TreeMem[node].weight+=alpha ; 01564 else 01565 // otherwise 01566 // 1. replace current node with new node 01567 // 2. create new nodes until mismatching positon 01568 // 2. add a branch with old string (old node) and the new string (new node) 01569 { 01570 // replace old node 01571 int32_t last_node=tree ; 01572 01573 // create new nodes until mismatch 01574 int32_t k ; 01575 for (k=0; k<mismatch_pos; k++) 01576 { 01577 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ; 01578 TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ; 01579 01580 int32_t tmp=get_node(); 01581 TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ; 01582 last_node=tmp ; 01583 if (weights_in_tree) 01584 TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ; 01585 else 01586 TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ; 01587 TRIE_ASSERT(j+k!=degree-1) ; 01588 } 01589 if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER)) 01590 fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ; 01591 ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ; 01592 TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ; 01593 01594 if (j+k==degree-1) 01595 { 01596 // init child weights with zero if after dropping out 01597 // of the k<mismatch_pos loop we are one level below degree 01598 // (keep this even after get_node() change!) 01599 for (int32_t q=0; q<4; q++) 01600 TreeMem[last_node].child_weights[q]=0.0 ; 01601 if (weights_in_tree) 01602 { 01603 if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER 01604 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ; 01605 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ; 01606 } 01607 else 01608 { 01609 if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER 01610 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ; 01611 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ; 01612 } 01613 01614 #ifdef TRIE_CHECK_EVERYTHING 01615 TreeMem[last_node].has_floats=true ; 01616 #endif 01617 } 01618 else 01619 { 01620 // the branch for the existing string 01621 if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER 01622 { 01623 TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ; 01624 01625 // move string by mismatch_pos positions 01626 for (int32_t q=0; q<16; q++) 01627 { 01628 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length)) 01629 TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ; 01630 else 01631 TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ; 01632 } 01633 #ifdef TRIE_CHECK_EVERYTHING 01634 TreeMem[node].has_seq=true ; 01635 #endif 01636 } 01637 01638 // the new branch 01639 TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ; 01640 int32_t tmp = get_node() ; 01641 TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ; 01642 last_node=tmp ; 01643 01644 TreeMem[last_node].weight = alpha ; 01645 #ifdef TRIE_CHECK_EVERYTHING 01646 TreeMem[last_node].has_seq = true ; 01647 #endif 01648 memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ; 01649 for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++) 01650 TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ; 01651 } 01652 } 01653 break ; 01654 } 01655 else 01656 { 01657 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ; 01658 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ; 01659 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01660 if (weights_in_tree) 01661 TreeMem[tree].weight += alpha*weights_column[j]; 01662 else 01663 TreeMem[tree].weight += alpha ; 01664 } 01665 } 01666 else if (j==degree-1) 01667 { 01668 // special treatment of the last node 01669 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01670 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ; 01671 if (weights_in_tree) 01672 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ; 01673 else 01674 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha; 01675 01676 break; 01677 } 01678 else 01679 { 01680 bool use_seq = use_compact_terminal_nodes && (j>degree-16) ; 01681 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01682 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01683 01684 int32_t tmp = get_node((j==degree-2) && (!use_seq)); 01685 if (use_seq) 01686 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ; 01687 else 01688 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ; 01689 tree=tmp ; 01690 01691 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ; 01692 #ifdef TRIE_CHECK_EVERYTHING 01693 TreeMem[tree].has_seq = use_seq ; 01694 #endif 01695 if (use_seq) 01696 { 01697 TreeMem[tree].weight = alpha ; 01698 // important to have the terminal characters (see ###) 01699 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ; 01700 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++) 01701 { 01702 TRIE_ASSERT(q<16) ; 01703 TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ; 01704 } 01705 break ; 01706 } 01707 else 01708 { 01709 if (weights_in_tree) 01710 TreeMem[tree].weight = alpha*weights_column[j] ; 01711 else 01712 TreeMem[tree].weight = alpha ; 01713 #ifdef TRIE_CHECK_EVERYTHING 01714 if (j==degree-2) 01715 TreeMem[tree].has_floats = true ; 01716 #endif 01717 } 01718 } 01719 } 01720 } 01721 01722 template <class Trie> 01723 float64_t CTrie<Trie>::compute_by_tree_helper( 01724 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 01725 int32_t weight_pos, float64_t* weights, 01726 bool degree_times_position_weights) 01727 { 01728 int32_t tree = trees[tree_pos] ; 01729 01730 if ((position_weights!=NULL) && (position_weights[weight_pos]==0)) 01731 return 0.0; 01732 01733 float64_t *weights_column=NULL ; 01734 if (degree_times_position_weights) 01735 weights_column=&weights[weight_pos*degree] ; 01736 else // weights is a vector (1 x degree) 01737 weights_column=weights ; 01738 01739 float64_t sum=0 ; 01740 for (int32_t j=0; seq_pos+j < len; j++) 01741 { 01742 TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ; 01743 01744 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01745 { 01746 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01747 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01748 { 01749 tree = - TreeMem[tree].children[vec[seq_pos+j]]; 01750 TRIE_ASSERT(tree>=0) ; 01751 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01752 float64_t this_weight=0.0 ; 01753 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01754 { 01755 TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ; 01756 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01757 break ; 01758 this_weight += weights_column[j+k] ; 01759 } 01760 sum += TreeMem[tree].weight * this_weight ; 01761 break ; 01762 } 01763 else 01764 { 01765 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01766 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01767 if (weights_in_tree) 01768 sum += TreeMem[tree].weight ; 01769 else 01770 sum += TreeMem[tree].weight * weights_column[j] ; 01771 } ; 01772 } 01773 else 01774 { 01775 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01776 if (j==degree-1) 01777 { 01778 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ; 01779 if (weights_in_tree) 01780 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01781 else 01782 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ; 01783 } 01784 else 01785 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01786 01787 break; 01788 } 01789 } 01790 01791 if (position_weights!=NULL) 01792 return sum*position_weights[weight_pos] ; 01793 else 01794 return sum ; 01795 } 01796 01797 template <class Trie> 01798 void CTrie<Trie>::compute_by_tree_helper( 01799 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 01800 int32_t weight_pos, float64_t* LevelContrib, float64_t factor, 01801 int32_t mkl_stepsize, float64_t * weights, 01802 bool degree_times_position_weights) 01803 { 01804 int32_t tree = trees[tree_pos] ; 01805 if (factor==0) 01806 return ; 01807 01808 if (position_weights!=NULL) 01809 { 01810 factor *= position_weights[weight_pos] ; 01811 if (factor==0) 01812 return ; 01813 if (!degree_times_position_weights) // with position_weigths, weights is a vector (1 x degree) 01814 { 01815 for (int32_t j=0; seq_pos+j<len; j++) 01816 { 01817 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01818 { 01819 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01820 { 01821 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01822 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01823 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01824 { 01825 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01826 break ; 01827 if (weights_in_tree) 01828 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01829 else 01830 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ; 01831 } 01832 break ; 01833 } 01834 else 01835 { 01836 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01837 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01838 if (weights_in_tree) 01839 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01840 else 01841 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ; 01842 } 01843 } 01844 else 01845 { 01846 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01847 if (j==degree-1) 01848 { 01849 if (weights_in_tree) 01850 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01851 else 01852 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ; 01853 } 01854 } 01855 } 01856 } 01857 else // with position_weigths, weights is a matrix (len x degree) 01858 { 01859 for (int32_t j=0; seq_pos+j<len; j++) 01860 { 01861 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01862 { 01863 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01864 { 01865 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01866 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01867 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01868 { 01869 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01870 break ; 01871 if (weights_in_tree) 01872 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01873 else 01874 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ; 01875 } 01876 break ; 01877 } 01878 else 01879 { 01880 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01881 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01882 if (weights_in_tree) 01883 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01884 else 01885 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ; 01886 } 01887 } 01888 else 01889 { 01890 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01891 if (j==degree-1) 01892 { 01893 if (weights_in_tree) 01894 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01895 else 01896 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ; 01897 } 01898 break ; 01899 } 01900 } 01901 } 01902 } 01903 else if (!degree_times_position_weights) // no position_weigths, weights is a vector (1 x degree) 01904 { 01905 for (int32_t j=0; seq_pos+j<len; j++) 01906 { 01907 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01908 { 01909 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01910 { 01911 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01912 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01913 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01914 { 01915 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01916 break ; 01917 if (weights_in_tree) 01918 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ; 01919 else 01920 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ; 01921 } 01922 break ; 01923 } 01924 else 01925 { 01926 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01927 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01928 if (weights_in_tree) 01929 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ; 01930 else 01931 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ; 01932 } 01933 } 01934 else 01935 { 01936 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01937 if (j==degree-1) 01938 { 01939 if (weights_in_tree) 01940 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01941 else 01942 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ; 01943 } 01944 break ; 01945 } 01946 } 01947 } 01948 else // no position_weigths, weights is a matrix (len x degree) 01949 { 01950 /*if (!position_mask) 01951 { 01952 position_mask = new bool[len] ; 01953 for (int32_t i=0; i<len; i++) 01954 { 01955 position_mask[i]=false ; 01956 01957 for (int32_t j=0; j<degree; j++) 01958 if (weights[i*degree+j]!=0.0) 01959 { 01960 position_mask[i]=true ; 01961 break ; 01962 } 01963 } 01964 } 01965 if (position_mask[weight_pos]==0) 01966 return ;*/ 01967 01968 for (int32_t j=0; seq_pos+j<len; j++) 01969 { 01970 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01971 { 01972 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01973 { 01974 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01975 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01976 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01977 { 01978 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01979 break ; 01980 if (weights_in_tree) 01981 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ; 01982 else 01983 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ; 01984 } 01985 break ; 01986 } 01987 else 01988 { 01989 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01990 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01991 if (weights_in_tree) 01992 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ; 01993 else 01994 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ; 01995 } 01996 } 01997 else 01998 { 01999 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 02000 if (j==degree-1) 02001 { 02002 if (weights_in_tree) 02003 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ; 02004 else 02005 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ; 02006 } 02007 break ; 02008 } 02009 } 02010 } 02011 } 02012 02013 template <class Trie> 02014 void CTrie<Trie>::fill_backtracking_table_recursion( 02015 Trie* tree, int32_t depth, uint64_t seq, float64_t value, 02016 DynArray<ConsensusEntry>* table, float64_t* weights) 02017 { 02018 float64_t w=1.0; 02019 02020 if (weights_in_tree || depth==0) 02021 value+=tree->weight; 02022 else 02023 { 02024 w=weights[degree-1]; 02025 value+=weights[depth-1]*tree->weight; 02026 } 02027 02028 if (degree-1==depth) 02029 { 02030 for (int32_t sym=0; sym<4; sym++) 02031 { 02032 float64_t v=w*tree->child_weights[sym]; 02033 if (v!=0.0) 02034 { 02035 ConsensusEntry entry; 02036 entry.bt=-1; 02037 entry.score=value+v; 02038 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1)); 02039 02040 table->append_element(entry); 02041 } 02042 } 02043 } 02044 else 02045 { 02046 for (int32_t sym=0; sym<4; sym++) 02047 { 02048 uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1)); 02049 if (tree->children[sym] != NO_CHILD) 02050 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights); 02051 } 02052 } 02053 } 02054 02055 template <class Trie> 02056 float64_t CTrie<Trie>::get_cumulative_score( 02057 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights) 02058 { 02059 float64_t result=0.0; 02060 02061 //SG_PRINT("pos:%i length:%i deg:%i seq:0x%0llx...\n", pos, length, deg, seq); 02062 02063 for (int32_t i=pos; i<pos+deg && i<length; i++) 02064 { 02065 //SG_PRINT("loop %d\n", i); 02066 Trie* tree = &TreeMem[trees[i]]; 02067 02068 for (int32_t d=0; d<deg-i+pos; d++) 02069 { 02070 //SG_PRINT("loop degree %d shit: %d\n", d, (2*(deg-1-d-i+pos))); 02071 ASSERT(d-1<degree); 02072 int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3); 02073 02074 float64_t w=1.0; 02075 if (!weights_in_tree) 02076 w=weights[d]; 02077 02078 ASSERT(tree->children[sym] != NO_CHILD); 02079 tree=&TreeMem[tree->children[sym]]; 02080 result+=w*tree->weight; 02081 } 02082 } 02083 //SG_PRINT("cum: %f\n", result); 02084 return result; 02085 } 02086 02087 template <class Trie> 02088 void CTrie<Trie>::fill_backtracking_table( 02089 int32_t pos, DynArray<ConsensusEntry>* prev, 02090 DynArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights) 02091 { 02092 ASSERT(pos>=0 && pos<length); 02093 ASSERT(!use_compact_terminal_nodes); 02094 02095 Trie* t = &TreeMem[trees[pos]]; 02096 02097 fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights); 02098 02099 02100 if (cumulative) 02101 { 02102 int32_t num_cur=cur->get_num_elements(); 02103 for (int32_t i=0; i<num_cur; i++) 02104 { 02105 ConsensusEntry entry=cur->get_element(i); 02106 entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights); 02107 cur->set_element(entry,i); 02108 //SG_PRINT("cum: str:0%0llx sc:%f bt:%d\n",entry.string,entry.score,entry.bt); 02109 } 02110 } 02111 02112 //if previous tree exists find maximum scoring path 02113 //for each element in cur and update bt table 02114 if (prev) 02115 { 02116 int32_t num_cur=cur->get_num_elements(); 02117 int32_t num_prev=prev->get_num_elements(); 02118 02119 for (int32_t i=0; i<num_cur; i++) 02120 { 02121 //uint64_t str_cur_old= cur->get_element(i).string; 02122 uint64_t str_cur= cur->get_element(i).string >> 2; 02123 //SG_PRINT("...cur:0x%0llx cur_noprfx:0x%0llx...\n", str_cur_old, str_cur); 02124 02125 int32_t bt=-1; 02126 float64_t max_score=0.0; 02127 02128 for (int32_t j=0; j<num_prev; j++) 02129 { 02130 //uint64_t str_prev_old= prev->get_element(j).string; 02131 uint64_t mask= 02132 ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1)))); 02133 uint64_t str_prev= mask & prev->get_element(j).string; 02134 //SG_PRINT("...prev:0x%0llx prev_nosfx:0x%0llx mask:%0llx...\n", str_prev_old, str_prev,mask); 02135 02136 if (str_cur == str_prev) 02137 { 02138 float64_t sc=prev->get_element(j).score+cur->get_element(i).score; 02139 if (bt==-1 || sc>max_score) 02140 { 02141 bt=j; 02142 max_score=sc; 02143 02144 //SG_PRINT("new max[%i,%i] = %f\n", j,i, max_score); 02145 } 02146 } 02147 } 02148 02149 ASSERT(bt!=-1); 02150 ConsensusEntry entry; 02151 entry.bt=bt; 02152 entry.score=max_score; 02153 entry.string=cur->get_element(i).string; 02154 cur->set_element(entry, i); 02155 //SG_PRINT("entry[%d]: str:0%0llx sc:%f bt:%d\n",i, entry.string,entry.score,entry.bt); 02156 } 02157 } 02158 } 02159 } 02160 #endif // _TRIE_H___