00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _TRIE_H___
00013 #define _TRIE_H___
00014
00015 #include <string.h>
00016 #include "lib/common.h"
00017 #include "lib/io.h"
00018 #include "lib/DynamicArray.h"
00019 #include "lib/Mathematics.h"
00020 #include "base/SGObject.h"
00021
00022 namespace shogun
00023 {
00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00025
00026
00027 #define NO_CHILD ((int32_t)-1073741824)
00028
00029 #define WEIGHTS_IN_TRIE
00030
00031
00032 #ifdef TRIE_CHECK_EVERYTHING
00033 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x)
00034 #else
00035 #define TRIE_ASSERT_EVERYTHING(x)
00036 #endif
00037
00038
00039 #define TRIE_ASSERT(x)
00040
00041 #define TRIE_TERMINAL_CHARACTER 7
00042
00044 struct ConsensusEntry
00045 {
00047 uint64_t string;
00049 float32_t score;
00051 int32_t bt;
00052 };
00053
00055 struct POIMTrie
00056 {
00058 float64_t weight;
00059 #ifdef TRIE_CHECK_EVERYTHING
00060
00061 bool has_seq;
00063 bool has_floats;
00064 #endif
00065 union
00066 {
00068 float32_t child_weights[4];
00070 int32_t children[4];
00072 uint8_t seq[16] ;
00073 };
00074
00076 float64_t S;
00078 float64_t L;
00080 float64_t R;
00081 };
00082
00084 struct DNATrie
00085 {
00087 float64_t weight;
00088 #ifdef TRIE_CHECK_EVERYTHING
00089
00090 bool has_seq;
00092 bool has_floats;
00093 #endif
00094 union
00095 {
00097 float32_t child_weights[4];
00099 int32_t children[4];
00101 uint8_t seq[16] ;
00102 };
00103 };
00104
00106 struct TreeParseInfo {
00108 int32_t num_sym;
00110 int32_t num_feat;
00112 int32_t p;
00114 int32_t k;
00116 int32_t* nofsKmers;
00118 float64_t* margFactors;
00120 int32_t* x;
00122 int32_t* substrs;
00124 int32_t y0;
00126 float64_t* C_k;
00128 float64_t* L_k;
00130 float64_t* R_k;
00131 };
00132
00133 #endif // DOXYGEN_SHOULD_SKIP_THIS
00134
00135 template <class Trie> class CTrie;
00136
00155 template <class Trie> class CTrie : public CSGObject
00156 {
00157 public:
00164 CTrie(int32_t d, bool p_use_compact_terminal_nodes=true);
00165
00167 CTrie(const CTrie & to_copy);
00168 virtual ~CTrie();
00169
00171 const CTrie & operator=(const CTrie & to_copy);
00172
00180 bool compare_traverse(
00181 int32_t node, const CTrie & other, int32_t other_node);
00182
00188 bool compare(const CTrie & other);
00189
00196 bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const;
00197
00204 int32_t find_deepest_node(
00205 int32_t start_node, int32_t &deepest_node) const;
00206
00211 void display_node(int32_t node) const;
00212
00214 void destroy();
00215
00220 void set_degree(int32_t d);
00221
00228 void create(int32_t len, bool p_use_compact_terminal_nodes=true);
00229
00235 void delete_trees(bool p_use_compact_terminal_nodes=true);
00236
00247 void add_to_trie(
00248 int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha,
00249 float64_t *weights, bool degree_times_position_weights);
00250
00257 float64_t compute_abs_weights_tree(int32_t tree, int32_t depth);
00258
00264 float64_t* compute_abs_weights(int32_t &len);
00265
00278 float64_t compute_by_tree_helper(
00279 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00280 int32_t weight_pos, float64_t * weights,
00281 bool degree_times_position_weights) ;
00282
00297 void compute_by_tree_helper(
00298 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00299 int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
00300 int32_t mkl_stepsize, float64_t * weights,
00301 bool degree_times_position_weights);
00302
00317 void compute_scoring_helper(
00318 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
00319 int32_t max_degree, int32_t num_feat, int32_t num_sym,
00320 int32_t sym_offset, int32_t offs, float64_t* result);
00321
00334 void add_example_to_tree_mismatch_recursion(
00335 int32_t tree, int32_t i, float64_t alpha, int32_t *vec,
00336 int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec,
00337 int32_t max_mismatch, float64_t * weights);
00338
00348 void traverse(
00349 int32_t tree, const int32_t p, struct TreeParseInfo info,
00350 const int32_t depth, int32_t* const x, const int32_t k);
00351
00361 void count(
00362 const float64_t w, const int32_t depth,
00363 const struct TreeParseInfo info, const int32_t p, int32_t* x,
00364 const int32_t k);
00365
00372 int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights);
00373
00382 float64_t get_cumulative_score(
00383 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights);
00384
00394 void fill_backtracking_table_recursion(
00395 Trie* tree, int32_t depth, uint64_t seq, float64_t value,
00396 CDynamicArray<ConsensusEntry>* table, float64_t* weights);
00397
00406 void fill_backtracking_table(
00407 int32_t pos, CDynamicArray<ConsensusEntry>* prev,
00408 CDynamicArray<ConsensusEntry>* cur, bool cumulative,
00409 float64_t* weights);
00410
00416 void POIMs_extract_W(float64_t* const* const W, const int32_t K);
00417
00422 void POIMs_precalc_SLR(const float64_t* const distrib);
00423
00433 void POIMs_get_SLR(
00434 const int32_t parentIdx, const int32_t sym, const int32_t depth,
00435 float64_t* S, float64_t* L, float64_t* R);
00436
00443 void POIMs_add_SLR(
00444 float64_t* const* const poims, const int32_t K,
00445 const int32_t debug);
00446
00451 inline bool get_use_compact_terminal_nodes()
00452 {
00453 return use_compact_terminal_nodes ;
00454 }
00455
00461 inline void set_use_compact_terminal_nodes(
00462 bool p_use_compact_terminal_nodes)
00463 {
00464 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
00465 }
00466
00471 inline int32_t get_num_used_nodes()
00472 {
00473 return TreeMemPtr;
00474 }
00475
00480 inline void set_position_weights(const float64_t * p_position_weights)
00481 {
00482 position_weights=p_position_weights;
00483 }
00484
00489 inline int32_t get_node(bool last_node=false)
00490 {
00491 int32_t ret = TreeMemPtr++;
00492 check_treemem() ;
00493
00494 if (last_node)
00495 {
00496 for (int32_t q=0; q<4; q++)
00497 TreeMem[ret].child_weights[q]=0.0;
00498 }
00499 else
00500 {
00501 for (int32_t q=0; q<4; q++)
00502 TreeMem[ret].children[q]=NO_CHILD;
00503 }
00504 #ifdef TRIE_CHECK_EVERYTHING
00505 TreeMem[ret].has_seq=false ;
00506 TreeMem[ret].has_floats=false ;
00507 #endif
00508 TreeMem[ret].weight=0.0;
00509 return ret ;
00510 }
00511
00513 inline void check_treemem()
00514 {
00515 if (TreeMemPtr+10 < TreeMemPtrMax)
00516 return;
00517 SG_DEBUG( "Extending TreeMem from %i to %i elements\n",
00518 TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2));
00519 TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2);
00520 TreeMem = (Trie*) realloc(TreeMem,
00521 TreeMemPtrMax*sizeof(Trie));
00522 if (!TreeMem)
00523 SG_ERROR( "out of memory\n");
00524 }
00525
00530 inline void set_weights_in_tree(bool weights_in_tree_)
00531 {
00532 weights_in_tree = weights_in_tree_;
00533 }
00534
00539 inline bool get_weights_in_tree()
00540 {
00541 return weights_in_tree;
00542 }
00543
00553 void POIMs_extract_W_helper(
00554 const int32_t nodeIdx, const int32_t depth, const int32_t offset,
00555 const int32_t y0, float64_t* const* const W, const int32_t K);
00556
00569 void POIMs_calc_SLR_helper1(
00570 const float64_t* const distrib, const int32_t i,
00571 const int32_t nodeIdx, int32_t left_tries_idx[4],
00572 const int32_t depth, int32_t const lastSym, float64_t* S,
00573 float64_t* L, float64_t* R);
00574
00575
00586 void POIMs_calc_SLR_helper2(
00587 const float64_t* const distrib, const int32_t i,
00588 const int32_t nodeIdx, int32_t left_tries_idx[4],
00589 const int32_t depth, float64_t* S, float64_t* L, float64_t* R);
00590
00601 void POIMs_add_SLR_helper1(
00602 const int32_t nodeIdx, const int32_t depth,const int32_t i,
00603 const int32_t y0, float64_t* const* const poims, const int32_t K,
00604 const int32_t debug);
00605
00619 void POIMs_add_SLR_helper2(
00620 float64_t* const* const poims, const int32_t K, const int32_t k,
00621 const int32_t i, const int32_t y, const float64_t valW,
00622 const float64_t valS, const float64_t valL, const float64_t valR,
00623 const int32_t debug);
00624
00626 inline virtual const char* get_name() const { return "Trie"; }
00627
00628 public:
00630 int32_t NUM_SYMS;
00631
00632 protected:
00634 int32_t length;
00636 int32_t * trees;
00637
00639 int32_t degree;
00641 float64_t const * position_weights;
00642
00644 Trie* TreeMem;
00646 int32_t TreeMemPtr;
00648 int32_t TreeMemPtrMax;
00650 bool use_compact_terminal_nodes;
00651
00653 bool weights_in_tree;
00654
00656 int32_t* nofsKmers;
00657 };
00658
00659 template <class Trie>
00660 CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes)
00661 : CSGObject(), degree(d), position_weights(NULL),
00662 use_compact_terminal_nodes(p_use_compact_terminal_nodes),
00663 weights_in_tree(true)
00664 {
00665 TreeMemPtrMax=1024*1024/sizeof(Trie);
00666 TreeMemPtr=0;
00667 TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00668
00669 length=0;
00670 trees=NULL;
00671
00672 NUM_SYMS=4;
00673 }
00674
00675 template <class Trie>
00676 CTrie<Trie>::CTrie(const CTrie & to_copy)
00677 : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL),
00678 use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes)
00679 {
00680 if (to_copy.position_weights!=NULL)
00681 {
00682 position_weights = to_copy.position_weights;
00683
00684
00685
00686 }
00687 else
00688 position_weights=NULL;
00689
00690 TreeMemPtrMax=to_copy.TreeMemPtrMax;
00691 TreeMemPtr=to_copy.TreeMemPtr;
00692 TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00693 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie));
00694
00695 length=to_copy.length;
00696 trees=new int32_t[length];
00697 for (int32_t i=0; i<length; i++)
00698 trees[i]=to_copy.trees[i];
00699
00700 NUM_SYMS=4;
00701 }
00702
00703 template <class Trie>
00704 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy)
00705 {
00706 degree=to_copy.degree ;
00707 use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ;
00708
00709 delete[] position_weights ;
00710 position_weights=NULL ;
00711 if (to_copy.position_weights!=NULL)
00712 {
00713 position_weights=to_copy.position_weights ;
00714
00715
00716
00717 }
00718 else
00719 position_weights=NULL ;
00720
00721 TreeMemPtrMax=to_copy.TreeMemPtrMax ;
00722 TreeMemPtr=to_copy.TreeMemPtr ;
00723 free(TreeMem) ;
00724 TreeMem = (Trie*)malloc(TreeMemPtrMax*sizeof(Trie)) ;
00725 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ;
00726
00727 length = to_copy.length ;
00728 if (trees)
00729 delete[] trees ;
00730 trees=new int32_t[length] ;
00731 for (int32_t i=0; i<length; i++)
00732 trees[i]=to_copy.trees[i] ;
00733
00734 return *this ;
00735 }
00736
00737 template <class Trie>
00738 int32_t CTrie<Trie>::find_deepest_node(
00739 int32_t start_node, int32_t& deepest_node) const
00740 {
00741 #ifdef TRIE_CHECK_EVERYTHING
00742 int32_t ret=0 ;
00743 SG_DEBUG("start_node=%i\n", start_node) ;
00744
00745 if (start_node==NO_CHILD)
00746 {
00747 for (int32_t i=0; i<length; i++)
00748 {
00749 int32_t my_deepest_node ;
00750 int32_t depth=find_deepest_node(i, my_deepest_node) ;
00751 SG_DEBUG("start_node %i depth=%i\n", i, depth) ;
00752 if (depth>ret)
00753 {
00754 deepest_node=my_deepest_node ;
00755 ret=depth ;
00756 }
00757 }
00758 return ret ;
00759 }
00760
00761 if (TreeMem[start_node].has_seq)
00762 {
00763 for (int32_t q=0; q<16; q++)
00764 if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
00765 ret++ ;
00766 deepest_node=start_node ;
00767 return ret ;
00768 }
00769 if (TreeMem[start_node].has_floats)
00770 {
00771 deepest_node=start_node ;
00772 return 1 ;
00773 }
00774
00775 for (int32_t q=0; q<4; q++)
00776 {
00777 int32_t my_deepest_node ;
00778 if (TreeMem[start_node].children[q]==NO_CHILD)
00779 continue ;
00780 int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ;
00781 if (depth>ret)
00782 {
00783 deepest_node=my_deepest_node ;
00784 ret=depth ;
00785 }
00786 }
00787 return ret ;
00788 #else
00789 SG_ERROR( "not implemented\n") ;
00790 return 0 ;
00791 #endif
00792 }
00793
00794 template <class Trie>
00795 int32_t CTrie<Trie>::compact_nodes(
00796 int32_t start_node, int32_t depth, float64_t * weights)
00797 {
00798 SG_ERROR( "code buggy\n") ;
00799
00800 int32_t ret=0 ;
00801
00802 if (start_node==NO_CHILD)
00803 {
00804 for (int32_t i=0; i<length; i++)
00805 compact_nodes(i,1, weights) ;
00806 return 0 ;
00807 }
00808 if (start_node<0)
00809 return -1 ;
00810
00811 if (depth==degree-1)
00812 {
00813 TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ;
00814 int32_t num_used=0 ;
00815 for (int32_t q=0; q<4; q++)
00816 if (TreeMem[start_node].child_weights[q]!=0.0)
00817 num_used++ ;
00818 if (num_used>1)
00819 return -1 ;
00820 return 1 ;
00821 }
00822 TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ;
00823
00824 int32_t num_used = 0 ;
00825 int32_t q_used=-1 ;
00826
00827 for (int32_t q=0; q<4; q++)
00828 {
00829 if (TreeMem[start_node].children[q]==NO_CHILD)
00830 continue ;
00831 num_used++ ;
00832 q_used=q ;
00833 }
00834 if (num_used>1)
00835 {
00836 if (depth>=degree-2)
00837 return -1 ;
00838 for (int32_t q=0; q<4; q++)
00839 {
00840 if (TreeMem[start_node].children[q]==NO_CHILD)
00841 continue ;
00842 int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ;
00843 if (num<=2)
00844 continue ;
00845 int32_t node=get_node() ;
00846
00847 int32_t last_node=TreeMem[start_node].children[q] ;
00848 if (weights_in_tree)
00849 {
00850 ASSERT(weights[depth]!=0.0) ;
00851 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ;
00852 }
00853 else
00854 TreeMem[node].weight=TreeMem[last_node].weight ;
00855
00856 #ifdef TRIE_CHECK_EVERYTHING
00857 TreeMem[node].has_seq=true ;
00858 #endif
00859 memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
00860 for (int32_t n=0; n<num; n++)
00861 {
00862 ASSERT(depth+n+1<=degree-1) ;
00863 ASSERT(last_node!=NO_CHILD) ;
00864 if (depth+n+1==degree-1)
00865 {
00866 TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ;
00867 int32_t k ;
00868 for (k=0; k<4; k++)
00869 if (TreeMem[last_node].child_weights[k]!=0.0)
00870 break ;
00871 if (k==4)
00872 break ;
00873 TreeMem[node].seq[n]=k ;
00874 break ;
00875 }
00876 else
00877 {
00878 TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ;
00879 int32_t k ;
00880 for (k=0; k<4; k++)
00881 if (TreeMem[last_node].children[k]!=NO_CHILD)
00882 break ;
00883 if (k==4)
00884 break ;
00885 TreeMem[node].seq[n]=k ;
00886 last_node=TreeMem[last_node].children[k] ;
00887 }
00888 }
00889 TreeMem[start_node].children[q]=-node ;
00890 }
00891 return -1 ;
00892 }
00893 if (num_used==0)
00894 return 0 ;
00895
00896 ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ;
00897 if (ret<0)
00898 return ret ;
00899 return ret+1 ;
00900 }
00901
00902
00903 template <class Trie>
00904 bool CTrie<Trie>::compare_traverse(
00905 int32_t node, const CTrie<Trie> & other, int32_t other_node)
00906 {
00907 SG_DEBUG("checking nodes %i and %i\n", node, other_node) ;
00908 if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5)
00909 {
00910 SG_DEBUG( "CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) ;
00911 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00912 display_node(node) ;
00913 SG_DEBUG( "============================================================\n") ;
00914 other.display_node(other_node) ;
00915 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00916 return false ;
00917 }
00918
00919 #ifdef TRIE_CHECK_EVERYTHING
00920 if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq)
00921 {
00922 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) ;
00923 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00924 display_node(node) ;
00925 SG_DEBUG( "============================================================\n") ;
00926 other.display_node(other_node) ;
00927 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00928 return false ;
00929 }
00930 if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats)
00931 {
00932 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) ;
00933 return false ;
00934 }
00935 if (other.TreeMem[other_node].has_floats)
00936 {
00937 for (int32_t q=0; q<4; q++)
00938 if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5)
00939 {
00940 SG_DEBUG( "CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) ;
00941 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00942 display_node(node) ;
00943 SG_DEBUG( "============================================================\n") ;
00944 other.display_node(other_node) ;
00945 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00946 return false ;
00947 }
00948 }
00949 if (other.TreeMem[other_node].has_seq)
00950 {
00951 for (int32_t q=0; q<16; q++)
00952 if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4)))
00953 {
00954 SG_DEBUG( "CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) ;
00955 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00956 display_node(node) ;
00957 SG_DEBUG( "============================================================\n") ;
00958 other.display_node(other_node) ;
00959 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00960 return false ;
00961 }
00962 }
00963 if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats)
00964 {
00965 for (int32_t q=0; q<4; q++)
00966 {
00967 if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD))
00968 continue ;
00969 if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD))
00970 {
00971 SG_DEBUG( "CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) ;
00972 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00973 display_node(node) ;
00974 SG_DEBUG( "============================================================\n") ;
00975 other.display_node(other_node) ;
00976 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00977 return false ;
00978 }
00979 if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q])))
00980 return false ;
00981 }
00982 }
00983 #else
00984 SG_ERROR( "not implemented\n") ;
00985 #endif
00986
00987 return true ;
00988 }
00989
00990 template <class Trie>
00991 bool CTrie<Trie>::compare(const CTrie<Trie> & other)
00992 {
00993 bool ret=true ;
00994 for (int32_t i=0; i<length; i++)
00995 if (!compare_traverse(trees[i], other, other.trees[i]))
00996 return false ;
00997 else
00998 SG_DEBUG("two tries at %i identical\n", i) ;
00999
01000 return ret ;
01001 }
01002
01003 template <class Trie>
01004 bool CTrie<Trie>::find_node(
01005 int32_t node, int32_t * trace, int32_t& trace_len) const
01006 {
01007 #ifdef TRIE_CHECK_EVERYTHING
01008 ASSERT(trace_len-1>=0) ;
01009 ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax))
01010 if (TreeMem[trace[trace_len-1]].has_seq)
01011 return false ;
01012 if (TreeMem[trace[trace_len-1]].has_floats)
01013 return false ;
01014
01015 for (int32_t q=0; q<4; q++)
01016 {
01017 if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
01018 continue ;
01019 int32_t tl=trace_len+1 ;
01020 if (TreeMem[trace[trace_len-1]].children[q]>=0)
01021 trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ;
01022 else
01023 trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ;
01024
01025 if (trace[trace_len]==node)
01026 {
01027 trace_len=tl ;
01028 return true ;
01029 }
01030 if (find_node(node, trace, tl))
01031 {
01032 trace_len=tl ;
01033 return true ;
01034 }
01035 }
01036 trace_len=0 ;
01037 return false ;
01038 #else
01039 SG_ERROR( "not implemented\n") ;
01040 return false ;
01041 #endif
01042 }
01043
01044 template <class Trie>
01045 void CTrie<Trie>::display_node(int32_t node) const
01046 {
01047 #ifdef TRIE_CHECK_EVERYTHING
01048 int32_t * trace=new int32_t[2*degree] ;
01049 int32_t trace_len=-1 ;
01050 bool found = false ;
01051 int32_t tree=-1 ;
01052 for (tree=0; tree<length; tree++)
01053 {
01054 trace[0]=trees[tree] ;
01055 trace_len=1 ;
01056 found=find_node(node, trace, trace_len) ;
01057 if (found)
01058 break ;
01059 }
01060 ASSERT(found) ;
01061 SG_PRINT( "position %i trace: ", tree) ;
01062
01063 for (int32_t i=0; i<trace_len-1; i++)
01064 {
01065 int32_t branch=-1 ;
01066 for (int32_t q=0; q<4; q++)
01067 if (abs(TreeMem[trace[i]].children[q])==trace[i+1])
01068 {
01069 branch=q;
01070 break ;
01071 }
01072 ASSERT(branch!=-1) ;
01073 char acgt[5]="ACGT" ;
01074 SG_PRINT( "%c", acgt[branch]) ;
01075 }
01076 SG_PRINT( "\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ;
01077 if (TreeMem[node].has_floats)
01078 {
01079 for (int32_t q=0; q<4; q++)
01080 SG_PRINT( "child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ;
01081 }
01082 if (TreeMem[node].has_seq)
01083 {
01084 for (int32_t q=0; q<16; q++)
01085 SG_PRINT( "seq[%i] = %i\n", q, TreeMem[node].seq[q]) ;
01086 }
01087 if (!TreeMem[node].has_seq && !TreeMem[node].has_floats)
01088 {
01089 for (int32_t q=0; q<4; q++)
01090 {
01091 if (TreeMem[node].children[q]!=NO_CHILD)
01092 {
01093 SG_PRINT( "children[%i] = %i -> \n", q, TreeMem[node].children[q]) ;
01094 display_node(abs(TreeMem[node].children[q])) ;
01095 }
01096 else
01097 SG_PRINT( "children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ;
01098 }
01099
01100 }
01101
01102 delete[] trace ;
01103 #else
01104 SG_ERROR( "not implemented\n") ;
01105 #endif
01106 }
01107
01108
01109 template <class Trie> CTrie<Trie>::~CTrie()
01110 {
01111 destroy() ;
01112
01113 free(TreeMem) ;
01114 }
01115
01116 template <class Trie> void CTrie<Trie>::destroy()
01117 {
01118 if (trees!=NULL)
01119 {
01120 delete_trees();
01121 for (int32_t i=0; i<length; i++)
01122 trees[i] = NO_CHILD;
01123 delete[] trees;
01124
01125 TreeMemPtr=0;
01126 length=0;
01127 trees=NULL;
01128 }
01129 }
01130
01131 template <class Trie> void CTrie<Trie>::set_degree(int32_t d)
01132 {
01133 delete_trees(get_use_compact_terminal_nodes());
01134 degree=d;
01135 }
01136
01137 template <class Trie> void CTrie<Trie>::create(
01138 int32_t len, bool p_use_compact_terminal_nodes)
01139 {
01140 destroy();
01141
01142 trees=new int32_t[len] ;
01143 TreeMemPtr=0 ;
01144 for (int32_t i=0; i<len; i++)
01145 trees[i]=get_node(degree==1);
01146 length = len ;
01147
01148 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01149 }
01150
01151
01152 template <class Trie> void CTrie<Trie>::delete_trees(
01153 bool p_use_compact_terminal_nodes)
01154 {
01155 if (trees==NULL)
01156 return;
01157
01158 TreeMemPtr=0 ;
01159 for (int32_t i=0; i<length; i++)
01160 trees[i]=get_node(degree==1);
01161
01162 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01163 }
01164
01165 template <class Trie>
01166 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth)
01167 {
01168 float64_t ret=0 ;
01169
01170 if (tree==NO_CHILD)
01171 return 0 ;
01172 TRIE_ASSERT(tree>=0) ;
01173
01174 if (depth==degree-2)
01175 {
01176 ret+=(TreeMem[tree].weight) ;
01177
01178 for (int32_t k=0; k<4; k++)
01179 ret+=(TreeMem[tree].child_weights[k]) ;
01180
01181 return ret ;
01182 }
01183
01184 ret+=(TreeMem[tree].weight) ;
01185
01186 for (int32_t i=0; i<4; i++)
01187 if (TreeMem[tree].children[i]!=NO_CHILD)
01188 ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1) ;
01189
01190 return ret ;
01191 }
01192
01193
01194 template <class Trie>
01195 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len)
01196 {
01197 float64_t * sum=new float64_t[length*4] ;
01198 for (int32_t i=0; i<length*4; i++)
01199 sum[i]=0 ;
01200 len=length ;
01201
01202 for (int32_t i=0; i<length; i++)
01203 {
01204 TRIE_ASSERT(trees[i]!=NO_CHILD) ;
01205 for (int32_t k=0; k<4; k++)
01206 {
01207 sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ;
01208 }
01209 }
01210
01211 return sum ;
01212 }
01213
01214 template <class Trie>
01215 void CTrie<Trie>::add_example_to_tree_mismatch_recursion(
01216 int32_t tree, int32_t i, float64_t alpha,
01217 int32_t *vec, int32_t len_rem,
01218 int32_t degree_rec, int32_t mismatch_rec,
01219 int32_t max_mismatch, float64_t * weights)
01220 {
01221 if (tree==NO_CHILD)
01222 tree=trees[i] ;
01223 TRIE_ASSERT(tree!=NO_CHILD) ;
01224
01225 if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree))
01226 return ;
01227 const int32_t other[4][3] = { {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
01228
01229 int32_t subtree = NO_CHILD ;
01230
01231 if (degree_rec==degree-1)
01232 {
01233 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01234 if (weights_in_tree)
01235 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec];
01236 else
01237 if (weights[degree_rec]!=0.0)
01238 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01239 if (mismatch_rec+1<=max_mismatch)
01240 for (int32_t o=0; o<3; o++)
01241 {
01242 if (weights_in_tree)
01243 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01244 else
01245 if (weights[degree_rec]!=0.0)
01246 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01247 }
01248 return ;
01249 }
01250 else
01251 {
01252 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01253 if (TreeMem[tree].children[vec[0]]!=NO_CHILD)
01254 {
01255 subtree=TreeMem[tree].children[vec[0]] ;
01256 if (weights_in_tree)
01257 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec];
01258 else
01259 if (weights[degree_rec]!=0.0)
01260 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01261 }
01262 else
01263 {
01264 int32_t tmp = get_node(degree_rec==degree-2);
01265 ASSERT(tmp>=0) ;
01266 TreeMem[tree].children[vec[0]]=tmp ;
01267 subtree=tmp ;
01268 #ifdef TRIE_CHECK_EVERYTHING
01269 if (degree_rec==degree-2)
01270 TreeMem[subtree].has_floats=true ;
01271 #endif
01272 if (weights_in_tree)
01273 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ;
01274 else
01275 if (weights[degree_rec]!=0.0)
01276 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ;
01277 else
01278 TreeMem[subtree].weight = 0.0 ;
01279 }
01280 add_example_to_tree_mismatch_recursion(subtree, i, alpha,
01281 &vec[1], len_rem-1,
01282 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
01283
01284 if (mismatch_rec+1<=max_mismatch)
01285 {
01286 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01287 for (int32_t o=0; o<3; o++)
01288 {
01289 int32_t ot = other[vec[0]][o] ;
01290 if (TreeMem[tree].children[ot]!=NO_CHILD)
01291 {
01292 subtree=TreeMem[tree].children[ot] ;
01293 if (weights_in_tree)
01294 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01295 else
01296 if (weights[degree_rec]!=0.0)
01297 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01298 }
01299 else
01300 {
01301 int32_t tmp = get_node(degree_rec==degree-2);
01302 ASSERT(tmp>=0) ;
01303 TreeMem[tree].children[ot]=tmp ;
01304 subtree=tmp ;
01305 #ifdef TRIE_CHECK_EVERYTHING
01306 if (degree_rec==degree-2)
01307 TreeMem[subtree].has_floats=true ;
01308 #endif
01309
01310 if (weights_in_tree)
01311 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ;
01312 else
01313 if (weights[degree_rec]!=0.0)
01314 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ;
01315 else
01316 TreeMem[subtree].weight = 0.0 ;
01317 }
01318
01319 add_example_to_tree_mismatch_recursion(subtree, i, alpha,
01320 &vec[1], len_rem-1,
01321 degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
01322 }
01323 }
01324 }
01325 }
01326
01327 template <class Trie>
01328 void CTrie<Trie>::compute_scoring_helper(
01329 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
01330 int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset,
01331 int32_t offs, float64_t* result)
01332 {
01333 if (i+j<num_feat)
01334 {
01335 float64_t decay=1.0;
01336
01337
01338
01339 if (j<degree-1)
01340 {
01341 for (int32_t k=0; k<num_sym; k++)
01342 {
01343 if (TreeMem[tree].children[k]!=NO_CHILD)
01344 {
01345 int32_t child=TreeMem[tree].children[k];
01346
01347 if (d<max_degree-1)
01348 compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01349 else
01350 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight;
01351
01353 if (d==0)
01354 compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
01355 }
01356 }
01357 }
01358 else if (j==degree-1)
01359 {
01360 for (int32_t k=0; k<num_sym; k++)
01361 {
01362
01363 if (d<max_degree-1 && i<num_feat-1)
01364 compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01365 else
01366 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k];
01367 }
01368 }
01369 }
01370 }
01371
01372 template <class Trie>
01373 void CTrie<Trie>::traverse(
01374 int32_t tree, const int32_t p, struct TreeParseInfo info,
01375 const int32_t depth, int32_t* const x, const int32_t k)
01376 {
01377 const int32_t num_sym = info.num_sym;
01378 const int32_t y0 = info.y0;
01379 const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
01380
01381
01382
01383
01384
01385 int32_t sym;
01386 ASSERT( depth < degree );
01387
01388 if (depth<degree-1)
01389 {
01390 for( sym=0; sym<num_sym; ++sym ) {
01391 const int32_t childNum = TreeMem[tree].children[ sym ];
01392 if( childNum != NO_CHILD ) {
01393 int32_t child = childNum ;
01394 x[depth] = sym;
01395 info.substrs[depth+1] = y0 + sym;
01396 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01397
01398 count( TreeMem[child].weight, depth, info, p, x, k );
01399 traverse( child, p, info, depth+1, x, k );
01400 x[depth] = -1;
01401 }
01402 }
01403 }
01404 else if( depth == degree-1 )
01405 {
01406 for( sym=0; sym<num_sym; ++sym ) {
01407 const float64_t w = TreeMem[tree].child_weights[ sym ];
01408 if( w != 0.0 ) {
01409 x[depth] = sym;
01410 info.substrs[depth+1] = y0 + sym;
01411 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01412
01413 count( w, depth, info, p, x, k );
01414 x[depth] = -1;
01415 }
01416 }
01417 }
01418
01419
01420 }
01421
01422 template <class Trie>
01423 void CTrie<Trie>::count(
01424 const float64_t w, const int32_t depth, const struct TreeParseInfo info,
01425 const int32_t p, int32_t* x, const int32_t k)
01426 {
01427 ASSERT( fabs(w) < 1e10 );
01428 ASSERT( x[depth] >= 0 );
01429 ASSERT( x[depth+1] < 0 );
01430 if ( depth < k ) {
01431 return;
01432 }
01433
01434 const int32_t nofKmers = info.nofsKmers[k];
01435 const float64_t margWeight = w * info.margFactors[ depth-k ];
01436 const int32_t m_a = depth - k + 1;
01437 const int32_t m_b = info.num_feat - p;
01438 const int32_t m = ( m_a < m_b ) ? m_a : m_b;
01439
01440 const int32_t offset0 = nofKmers * p;
01441 register int32_t i;
01442 register int32_t offset;
01443 offset = offset0;
01444 for( i = 0; i < m; ++i ) {
01445 const int32_t y = info.substrs[i+k+1];
01446 info.C_k[ y + offset ] += margWeight;
01447 offset += nofKmers;
01448 }
01449 if( depth > k ) {
01450
01451 const int32_t offsR = info.substrs[k+1] + offset0;
01452 info.R_k[offsR] += margWeight;
01453
01454 if( p+depth-k < info.num_feat ) {
01455 const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
01456 info.L_k[offsL] += margWeight;
01457 }
01458 }
01459
01460
01461
01462
01463
01464
01465
01466
01467
01468
01469
01470
01471
01472
01473
01474 }
01475
01476 template <class Trie>
01477 void CTrie<Trie>::add_to_trie(
01478 int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha,
01479 float64_t *weights, bool degree_times_position_weights)
01480 {
01481 int32_t tree = trees[i] ;
01482
01483
01484 int32_t max_depth = 0 ;
01485 float64_t* weights_column ;
01486 if (degree_times_position_weights)
01487 weights_column = &weights[(i+seq_offset)*degree] ;
01488 else
01489 weights_column = weights ;
01490
01491 if (weights_in_tree)
01492 {
01493 for (int32_t j=0; (j<degree) && (i+j<length); j++)
01494 if (CMath::abs(weights_column[j]*alpha)>0)
01495 max_depth = j+1 ;
01496 }
01497 else
01498
01499 max_depth=degree ;
01500
01501 for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++)
01502 {
01503 TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ;
01504 if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
01505 {
01506 if (TreeMem[tree].children[vec[i+j+seq_offset]]<0)
01507 {
01508
01509 TRIE_ASSERT(j >= degree-16) ;
01510
01511 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01512 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01513 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ;
01514
01515 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ;
01516 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ;
01517 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ;
01518
01519
01520 int32_t mismatch_pos = -1 ;
01521 {
01522 int32_t k ;
01523 for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++)
01524 {
01525 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01526
01527 if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
01528 fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ;
01529 TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ;
01530 TRIE_ASSERT(k<16) ;
01531 if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
01532 {
01533 mismatch_pos=k ;
01534 break ;
01535 }
01536 }
01537 }
01538
01539
01540
01541 if (mismatch_pos==-1)
01542
01543 TreeMem[node].weight+=alpha ;
01544 else
01545
01546
01547
01548
01549 {
01550
01551 int32_t last_node=tree ;
01552
01553
01554 int32_t k ;
01555 for (k=0; k<mismatch_pos; k++)
01556 {
01557 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01558 TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ;
01559
01560 int32_t tmp=get_node();
01561 TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
01562 last_node=tmp ;
01563 if (weights_in_tree)
01564 TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ;
01565 else
01566 TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ;
01567 TRIE_ASSERT(j+k!=degree-1) ;
01568 }
01569 if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
01570 fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ;
01571 ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ;
01572 TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ;
01573
01574 if (j+k==degree-1)
01575 {
01576
01577
01578
01579 for (int32_t q=0; q<4; q++)
01580 TreeMem[last_node].child_weights[q]=0.0 ;
01581 if (weights_in_tree)
01582 {
01583 if (TreeMem[node].seq[mismatch_pos]<4)
01584 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ;
01585 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ;
01586 }
01587 else
01588 {
01589 if (TreeMem[node].seq[mismatch_pos]<4)
01590 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ;
01591 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
01592 }
01593
01594 #ifdef TRIE_CHECK_EVERYTHING
01595 TreeMem[last_node].has_floats=true ;
01596 #endif
01597 }
01598 else
01599 {
01600
01601 if (TreeMem[node].seq[mismatch_pos]<4)
01602 {
01603 TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ;
01604
01605
01606 for (int32_t q=0; q<16; q++)
01607 {
01608 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length))
01609 TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ;
01610 else
01611 TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
01612 }
01613 #ifdef TRIE_CHECK_EVERYTHING
01614 TreeMem[node].has_seq=true ;
01615 #endif
01616 }
01617
01618
01619 TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ;
01620 int32_t tmp = get_node() ;
01621 TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
01622 last_node=tmp ;
01623
01624 TreeMem[last_node].weight = alpha ;
01625 #ifdef TRIE_CHECK_EVERYTHING
01626 TreeMem[last_node].has_seq = true ;
01627 #endif
01628 memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01629 for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++)
01630 TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
01631 }
01632 }
01633 break ;
01634 }
01635 else
01636 {
01637 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ;
01638 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01639 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01640 if (weights_in_tree)
01641 TreeMem[tree].weight += alpha*weights_column[j];
01642 else
01643 TreeMem[tree].weight += alpha ;
01644 }
01645 }
01646 else if (j==degree-1)
01647 {
01648
01649 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01650 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01651 if (weights_in_tree)
01652 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
01653 else
01654 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
01655
01656 break;
01657 }
01658 else
01659 {
01660 bool use_seq = use_compact_terminal_nodes && (j>degree-16) ;
01661 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01662 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01663
01664 int32_t tmp = get_node((j==degree-2) && (!use_seq));
01665 if (use_seq)
01666 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
01667 else
01668 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
01669 tree=tmp ;
01670
01671 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01672 #ifdef TRIE_CHECK_EVERYTHING
01673 TreeMem[tree].has_seq = use_seq ;
01674 #endif
01675 if (use_seq)
01676 {
01677 TreeMem[tree].weight = alpha ;
01678
01679 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01680 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++)
01681 {
01682 TRIE_ASSERT(q<16) ;
01683 TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
01684 }
01685 break ;
01686 }
01687 else
01688 {
01689 if (weights_in_tree)
01690 TreeMem[tree].weight = alpha*weights_column[j] ;
01691 else
01692 TreeMem[tree].weight = alpha ;
01693 #ifdef TRIE_CHECK_EVERYTHING
01694 if (j==degree-2)
01695 TreeMem[tree].has_floats = true ;
01696 #endif
01697 }
01698 }
01699 }
01700 }
01701
01702 template <class Trie>
01703 float64_t CTrie<Trie>::compute_by_tree_helper(
01704 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01705 int32_t weight_pos, float64_t* weights,
01706 bool degree_times_position_weights)
01707 {
01708 int32_t tree = trees[tree_pos] ;
01709
01710 if ((position_weights!=NULL) && (position_weights[weight_pos]==0))
01711 return 0.0;
01712
01713 float64_t *weights_column=NULL ;
01714 if (degree_times_position_weights)
01715 weights_column=&weights[weight_pos*degree] ;
01716 else
01717 weights_column=weights ;
01718
01719 float64_t sum=0 ;
01720 for (int32_t j=0; seq_pos+j < len; j++)
01721 {
01722 TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ;
01723
01724 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01725 {
01726 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01727 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01728 {
01729 tree = - TreeMem[tree].children[vec[seq_pos+j]];
01730 TRIE_ASSERT(tree>=0) ;
01731 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01732 float64_t this_weight=0.0 ;
01733 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01734 {
01735 TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ;
01736 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01737 break ;
01738 this_weight += weights_column[j+k] ;
01739 }
01740 sum += TreeMem[tree].weight * this_weight ;
01741 break ;
01742 }
01743 else
01744 {
01745 tree=TreeMem[tree].children[vec[seq_pos+j]];
01746 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01747 if (weights_in_tree)
01748 sum += TreeMem[tree].weight ;
01749 else
01750 sum += TreeMem[tree].weight * weights_column[j] ;
01751 } ;
01752 }
01753 else
01754 {
01755 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01756 if (j==degree-1)
01757 {
01758 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01759 if (weights_in_tree)
01760 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01761 else
01762 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
01763 }
01764 else
01765 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01766
01767 break;
01768 }
01769 }
01770
01771 if (position_weights!=NULL)
01772 return sum*position_weights[weight_pos] ;
01773 else
01774 return sum ;
01775 }
01776
01777 template <class Trie>
01778 void CTrie<Trie>::compute_by_tree_helper(
01779 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01780 int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
01781 int32_t mkl_stepsize, float64_t * weights,
01782 bool degree_times_position_weights)
01783 {
01784 int32_t tree = trees[tree_pos] ;
01785 if (factor==0)
01786 return ;
01787
01788 if (position_weights!=NULL)
01789 {
01790 factor *= position_weights[weight_pos] ;
01791 if (factor==0)
01792 return ;
01793 if (!degree_times_position_weights)
01794 {
01795 for (int32_t j=0; seq_pos+j<len; j++)
01796 {
01797 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01798 {
01799 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01800 {
01801 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01802 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01803 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01804 {
01805 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01806 break ;
01807 if (weights_in_tree)
01808 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01809 else
01810 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01811 }
01812 break ;
01813 }
01814 else
01815 {
01816 tree=TreeMem[tree].children[vec[seq_pos+j]];
01817 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01818 if (weights_in_tree)
01819 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01820 else
01821 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01822 }
01823 }
01824 else
01825 {
01826 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01827 if (j==degree-1)
01828 {
01829 if (weights_in_tree)
01830 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01831 else
01832 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01833 }
01834 }
01835 }
01836 }
01837 else
01838 {
01839 for (int32_t j=0; seq_pos+j<len; j++)
01840 {
01841 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01842 {
01843 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01844 {
01845 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01846 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01847 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01848 {
01849 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01850 break ;
01851 if (weights_in_tree)
01852 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01853 else
01854 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01855 }
01856 break ;
01857 }
01858 else
01859 {
01860 tree=TreeMem[tree].children[vec[seq_pos+j]];
01861 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01862 if (weights_in_tree)
01863 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01864 else
01865 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ;
01866 }
01867 }
01868 else
01869 {
01870 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01871 if (j==degree-1)
01872 {
01873 if (weights_in_tree)
01874 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01875 else
01876 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ;
01877 }
01878 break ;
01879 }
01880 }
01881 }
01882 }
01883 else if (!degree_times_position_weights)
01884 {
01885 for (int32_t j=0; seq_pos+j<len; j++)
01886 {
01887 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01888 {
01889 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01890 {
01891 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01892 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01893 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01894 {
01895 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01896 break ;
01897 if (weights_in_tree)
01898 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01899 else
01900 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01901 }
01902 break ;
01903 }
01904 else
01905 {
01906 tree=TreeMem[tree].children[vec[seq_pos+j]];
01907 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01908 if (weights_in_tree)
01909 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ;
01910 else
01911 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01912 }
01913 }
01914 else
01915 {
01916 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01917 if (j==degree-1)
01918 {
01919 if (weights_in_tree)
01920 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01921 else
01922 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01923 }
01924 break ;
01925 }
01926 }
01927 }
01928 else
01929 {
01930
01931
01932
01933
01934
01935
01936
01937
01938
01939
01940
01941
01942
01943
01944
01945
01946
01947
01948 for (int32_t j=0; seq_pos+j<len; j++)
01949 {
01950 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01951 {
01952 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01953 {
01954 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01955 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01956 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01957 {
01958 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01959 break ;
01960 if (weights_in_tree)
01961 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01962 else
01963 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01964 }
01965 break ;
01966 }
01967 else
01968 {
01969 tree=TreeMem[tree].children[vec[seq_pos+j]];
01970 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01971 if (weights_in_tree)
01972 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ;
01973 else
01974 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ;
01975 }
01976 }
01977 else
01978 {
01979 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01980 if (j==degree-1)
01981 {
01982 if (weights_in_tree)
01983 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01984 else
01985 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ;
01986 }
01987 break ;
01988 }
01989 }
01990 }
01991 }
01992
01993 template <class Trie>
01994 void CTrie<Trie>::fill_backtracking_table_recursion(
01995 Trie* tree, int32_t depth, uint64_t seq, float64_t value,
01996 CDynamicArray<ConsensusEntry>* table, float64_t* weights)
01997 {
01998 float64_t w=1.0;
01999
02000 if (weights_in_tree || depth==0)
02001 value+=tree->weight;
02002 else
02003 {
02004 w=weights[degree-1];
02005 value+=weights[depth-1]*tree->weight;
02006 }
02007
02008 if (degree-1==depth)
02009 {
02010 for (int32_t sym=0; sym<4; sym++)
02011 {
02012 float64_t v=w*tree->child_weights[sym];
02013 if (v!=0.0)
02014 {
02015 ConsensusEntry entry;
02016 entry.bt=-1;
02017 entry.score=value+v;
02018 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02019
02020 table->append_element(entry);
02021 }
02022 }
02023 }
02024 else
02025 {
02026 for (int32_t sym=0; sym<4; sym++)
02027 {
02028 uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02029 if (tree->children[sym] != NO_CHILD)
02030 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights);
02031 }
02032 }
02033 }
02034
02035 template <class Trie>
02036 float64_t CTrie<Trie>::get_cumulative_score(
02037 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights)
02038 {
02039 float64_t result=0.0;
02040
02041
02042
02043 for (int32_t i=pos; i<pos+deg && i<length; i++)
02044 {
02045
02046 Trie* tree = &TreeMem[trees[i]];
02047
02048 for (int32_t d=0; d<deg-i+pos; d++)
02049 {
02050
02051 ASSERT(d-1<degree);
02052 int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3);
02053
02054 float64_t w=1.0;
02055 if (!weights_in_tree)
02056 w=weights[d];
02057
02058 ASSERT(tree->children[sym] != NO_CHILD);
02059 tree=&TreeMem[tree->children[sym]];
02060 result+=w*tree->weight;
02061 }
02062 }
02063
02064 return result;
02065 }
02066
02067 template <class Trie>
02068 void CTrie<Trie>::fill_backtracking_table(
02069 int32_t pos, CDynamicArray<ConsensusEntry>* prev,
02070 CDynamicArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights)
02071 {
02072 ASSERT(pos>=0 && pos<length);
02073 ASSERT(!use_compact_terminal_nodes);
02074
02075 Trie* t = &TreeMem[trees[pos]];
02076
02077 fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights);
02078
02079
02080 if (cumulative)
02081 {
02082 int32_t num_cur=cur->get_num_elements();
02083 for (int32_t i=0; i<num_cur; i++)
02084 {
02085 ConsensusEntry entry=cur->get_element(i);
02086 entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights);
02087 cur->set_element(entry,i);
02088
02089 }
02090 }
02091
02092
02093
02094 if (prev)
02095 {
02096 int32_t num_cur=cur->get_num_elements();
02097 int32_t num_prev=prev->get_num_elements();
02098
02099 for (int32_t i=0; i<num_cur; i++)
02100 {
02101
02102 uint64_t str_cur= cur->get_element(i).string >> 2;
02103
02104
02105 int32_t bt=-1;
02106 float64_t max_score=0.0;
02107
02108 for (int32_t j=0; j<num_prev; j++)
02109 {
02110
02111 uint64_t mask=
02112 ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1))));
02113 uint64_t str_prev= mask & prev->get_element(j).string;
02114
02115
02116 if (str_cur == str_prev)
02117 {
02118 float64_t sc=prev->get_element(j).score+cur->get_element(i).score;
02119 if (bt==-1 || sc>max_score)
02120 {
02121 bt=j;
02122 max_score=sc;
02123
02124
02125 }
02126 }
02127 }
02128
02129 ASSERT(bt!=-1);
02130 ConsensusEntry entry;
02131 entry.bt=bt;
02132 entry.score=max_score;
02133 entry.string=cur->get_element(i).string;
02134 cur->set_element(entry, i);
02135
02136 }
02137 }
02138 }
02139 }
02140 #endif // _TRIE_H___