Trie.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2009 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _TRIE_H___
00013 #define _TRIE_H___
00014 
00015 #include <string.h>
00016 #include "lib/common.h"
00017 #include "lib/io.h"
00018 #include "lib/DynamicArray.h"
00019 #include "lib/Mathematics.h"
00020 #include "base/SGObject.h"
00021 
00022 namespace shogun
00023 {
00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00025 
00026 // sentinel is 0xFFFFFFFC or float -2
00027 #define NO_CHILD ((int32_t)-1073741824)
00028 
00029 #define WEIGHTS_IN_TRIE 
00030 //#define TRIE_CHECK_EVERYTHING
00031 
00032 #ifdef TRIE_CHECK_EVERYTHING
00033 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x)
00034 #else
00035 #define TRIE_ASSERT_EVERYTHING(x) 
00036 #endif
00037 
00038 //#define TRIE_ASSERT(x) ASSERT(x)
00039 #define TRIE_ASSERT(x) 
00040 
00041 #define TRIE_TERMINAL_CHARACTER  7
00042 
00044 struct ConsensusEntry
00045 {
00047     uint64_t string;
00049     float32_t score;
00051     int32_t bt;
00052 };
00053 
00055 struct POIMTrie
00056 {
00058     float64_t weight;
00059 #ifdef TRIE_CHECK_EVERYTHING
00060 
00061     bool has_seq;
00063     bool has_floats;
00064 #endif      
00065     union
00066     {
00068         float32_t child_weights[4];
00070         int32_t children[4];
00072         uint8_t seq[16] ;
00073     }; 
00074 
00076     float64_t S;
00078     float64_t L;
00080     float64_t R;
00081 };
00082 
00084 struct DNATrie
00085 {
00087     float64_t weight;
00088 #ifdef TRIE_CHECK_EVERYTHING
00089 
00090     bool has_seq;
00092     bool has_floats;
00093 #endif
00094     union
00095     {
00097         float32_t child_weights[4];
00099         int32_t children[4];
00101         uint8_t seq[16] ;
00102     }; 
00103 };
00104 
00106 struct TreeParseInfo {
00108     int32_t num_sym;
00110     int32_t num_feat;
00112     int32_t p;
00114     int32_t k;
00116     int32_t* nofsKmers;
00118     float64_t* margFactors;
00120     int32_t* x;
00122     int32_t* substrs;
00124     int32_t y0;
00126     float64_t* C_k;
00128     float64_t* L_k;
00130     float64_t* R_k;
00131 };
00132 
00133 #endif // DOXYGEN_SHOULD_SKIP_THIS
00134 
00135 template <class Trie> class CTrie;
00136 
00155 template <class Trie> class CTrie : public CSGObject
00156 {
00157     public:
00164         CTrie(int32_t d, bool p_use_compact_terminal_nodes=true);
00165 
00167         CTrie(const CTrie & to_copy);
00168         virtual ~CTrie();
00169 
00171         const CTrie & operator=(const CTrie & to_copy);
00172 
00180         bool compare_traverse(
00181             int32_t node, const CTrie & other, int32_t other_node);
00182 
00188         bool compare(const CTrie & other);
00189 
00196         bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const;
00197 
00204         int32_t find_deepest_node(
00205             int32_t start_node, int32_t &deepest_node) const;
00206 
00211         void display_node(int32_t node) const;
00212 
00214         void destroy();
00215 
00220         void set_degree(int32_t d);
00221 
00228         void create(int32_t len, bool p_use_compact_terminal_nodes=true);
00229 
00235         void delete_trees(bool p_use_compact_terminal_nodes=true);
00236 
00247         void add_to_trie(
00248             int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha,
00249             float64_t *weights, bool degree_times_position_weights);
00250 
00257         float64_t compute_abs_weights_tree(int32_t tree, int32_t depth);
00258 
00264         float64_t* compute_abs_weights(int32_t &len);
00265 
00278         float64_t compute_by_tree_helper(
00279             int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00280             int32_t weight_pos, float64_t * weights,
00281             bool degree_times_position_weights) ;
00282 
00297         void compute_by_tree_helper(
00298             int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00299             int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
00300             int32_t mkl_stepsize, float64_t * weights,
00301             bool degree_times_position_weights);
00302 
00317         void compute_scoring_helper(
00318             int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
00319             int32_t max_degree, int32_t num_feat, int32_t num_sym,
00320             int32_t sym_offset, int32_t offs, float64_t* result);
00321 
00334         void add_example_to_tree_mismatch_recursion(
00335             int32_t tree,  int32_t i, float64_t alpha, int32_t *vec,
00336             int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec,
00337             int32_t max_mismatch, float64_t * weights);
00338 
00348         void traverse(
00349             int32_t tree, const int32_t p, struct TreeParseInfo info,
00350             const int32_t depth, int32_t* const x, const int32_t k);
00351 
00361         void count(
00362             const float64_t w, const int32_t depth,
00363             const struct TreeParseInfo info, const int32_t p, int32_t* x,
00364             const int32_t k);
00365 
00372         int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights);
00373 
00382         float64_t get_cumulative_score(
00383             int32_t pos, uint64_t seq, int32_t deg, float64_t* weights);
00384 
00394         void fill_backtracking_table_recursion(
00395             Trie* tree, int32_t depth, uint64_t seq, float64_t value,
00396             CDynamicArray<ConsensusEntry>* table, float64_t* weights);
00397 
00406         void fill_backtracking_table(
00407             int32_t pos, CDynamicArray<ConsensusEntry>* prev,
00408             CDynamicArray<ConsensusEntry>* cur, bool cumulative,
00409             float64_t* weights);
00410 
00416         void POIMs_extract_W(float64_t* const* const W, const int32_t K);
00417 
00422         void POIMs_precalc_SLR(const float64_t* const distrib);
00423 
00433         void POIMs_get_SLR(
00434             const int32_t parentIdx, const int32_t sym, const int32_t depth,
00435             float64_t* S, float64_t* L, float64_t* R);
00436 
00443         void POIMs_add_SLR(
00444             float64_t* const* const poims, const int32_t K,
00445             const int32_t debug);
00446 
00451         inline bool get_use_compact_terminal_nodes()
00452         {
00453             return use_compact_terminal_nodes ;
00454         }
00455 
00461         inline void set_use_compact_terminal_nodes(
00462             bool p_use_compact_terminal_nodes)
00463         {
00464             use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
00465         }
00466 
00471         inline int32_t get_num_used_nodes()
00472         {
00473             return TreeMemPtr;
00474         }
00475 
00480         inline void set_position_weights(const float64_t * p_position_weights)
00481         {
00482             position_weights=p_position_weights;
00483         }
00484 
00489         inline int32_t get_node(bool last_node=false)
00490         {
00491             int32_t ret = TreeMemPtr++;
00492             check_treemem() ;
00493 
00494             if (last_node)
00495             {
00496                 for (int32_t q=0; q<4; q++)
00497                     TreeMem[ret].child_weights[q]=0.0;
00498             }
00499             else
00500             {
00501                 for (int32_t q=0; q<4; q++)
00502                     TreeMem[ret].children[q]=NO_CHILD;
00503             }
00504 #ifdef TRIE_CHECK_EVERYTHING
00505             TreeMem[ret].has_seq=false ;
00506             TreeMem[ret].has_floats=false ;
00507 #endif
00508             TreeMem[ret].weight=0.0;
00509             return ret ;
00510         }
00511 
00513         inline void check_treemem()
00514         {
00515             if (TreeMemPtr+10 < TreeMemPtrMax)
00516                 return;
00517             SG_DEBUG( "Extending TreeMem from %i to %i elements\n",
00518                     TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2));
00519             TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2);
00520             TreeMem = (Trie*) realloc(TreeMem,
00521                     TreeMemPtrMax*sizeof(Trie));
00522             if (!TreeMem)
00523                 SG_ERROR( "out of memory\n");
00524         }
00525 
00530         inline void set_weights_in_tree(bool weights_in_tree_)
00531         {
00532             weights_in_tree = weights_in_tree_;
00533         }
00534 
00539         inline bool get_weights_in_tree()
00540         {
00541             return weights_in_tree;
00542         }
00543 
00553         void POIMs_extract_W_helper(
00554             const int32_t nodeIdx, const int32_t depth, const int32_t offset,
00555             const int32_t y0, float64_t* const* const W, const int32_t K);
00556 
00569         void POIMs_calc_SLR_helper1(
00570             const float64_t* const distrib, const int32_t i,
00571             const int32_t nodeIdx, int32_t left_tries_idx[4],
00572             const int32_t depth, int32_t const lastSym, float64_t* S,
00573             float64_t* L, float64_t* R);
00574 
00575 
00586         void POIMs_calc_SLR_helper2(
00587             const float64_t* const distrib, const int32_t i,
00588             const int32_t nodeIdx, int32_t left_tries_idx[4],
00589             const int32_t depth, float64_t* S, float64_t* L, float64_t* R);
00590 
00601         void POIMs_add_SLR_helper1(
00602             const int32_t nodeIdx, const int32_t depth,const int32_t i,
00603             const int32_t y0, float64_t* const* const poims, const int32_t K,
00604             const int32_t debug);
00605 
00619         void POIMs_add_SLR_helper2(
00620             float64_t* const* const poims, const int32_t K, const int32_t k,
00621             const int32_t i, const int32_t y, const float64_t valW,
00622             const float64_t valS, const float64_t valL, const float64_t valR,
00623             const int32_t debug);
00624 
00626         inline virtual const char* get_name() const { return "Trie"; }
00627 
00628     public:
00630         int32_t NUM_SYMS;
00631 
00632     protected:
00634         int32_t length;
00636         int32_t * trees;
00637 
00639         int32_t degree;
00641         float64_t const *  position_weights;
00642 
00644         Trie* TreeMem;
00646         int32_t TreeMemPtr;
00648         int32_t TreeMemPtrMax;
00650         bool use_compact_terminal_nodes;
00651 
00653         bool weights_in_tree;
00654 
00656         int32_t* nofsKmers;
00657 };
00658 
00659     template <class Trie>
00660     CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes)
00661     : CSGObject(), degree(d), position_weights(NULL),
00662         use_compact_terminal_nodes(p_use_compact_terminal_nodes),
00663         weights_in_tree(true)
00664     {
00665         TreeMemPtrMax=1024*1024/sizeof(Trie);
00666         TreeMemPtr=0;
00667         TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00668 
00669         length=0;
00670         trees=NULL;
00671 
00672         NUM_SYMS=4;
00673     }
00674 
00675     template <class Trie>
00676     CTrie<Trie>::CTrie(const CTrie & to_copy)
00677     : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL),
00678         use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes)
00679     {
00680         if (to_copy.position_weights!=NULL)
00681         {
00682             position_weights = to_copy.position_weights;
00683             /*new float64_t[to_copy.length];
00684               for (int32_t i=0; i<to_copy.length; i++)
00685               position_weights[i]=to_copy.position_weights[i]; */
00686         }
00687         else
00688             position_weights=NULL;
00689 
00690         TreeMemPtrMax=to_copy.TreeMemPtrMax;
00691         TreeMemPtr=to_copy.TreeMemPtr;
00692         TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00693         memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie));
00694 
00695         length=to_copy.length;
00696         trees=new int32_t[length];
00697         for (int32_t i=0; i<length; i++)
00698             trees[i]=to_copy.trees[i];
00699 
00700         NUM_SYMS=4;
00701     }
00702 
00703     template <class Trie>
00704 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy)
00705 {
00706     degree=to_copy.degree ;
00707     use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ;
00708 
00709     delete[] position_weights ;
00710     position_weights=NULL ;
00711     if (to_copy.position_weights!=NULL)
00712     {
00713         position_weights=to_copy.position_weights ;
00714         /*position_weights = new float64_t[to_copy.length] ;
00715           for (int32_t i=0; i<to_copy.length; i++)
00716           position_weights[i]=to_copy.position_weights[i] ;*/
00717     }
00718     else
00719         position_weights=NULL ;
00720 
00721     TreeMemPtrMax=to_copy.TreeMemPtrMax ;
00722     TreeMemPtr=to_copy.TreeMemPtr ;
00723     free(TreeMem) ;
00724     TreeMem = (Trie*)malloc(TreeMemPtrMax*sizeof(Trie)) ;
00725     memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ;
00726 
00727     length = to_copy.length ;
00728     if (trees)
00729         delete[] trees ;
00730     trees=new int32_t[length] ;     
00731     for (int32_t i=0; i<length; i++)
00732         trees[i]=to_copy.trees[i] ;
00733 
00734     return *this ;
00735 }
00736 
00737 template <class Trie>
00738 int32_t CTrie<Trie>::find_deepest_node(
00739     int32_t start_node, int32_t& deepest_node) const
00740 {
00741 #ifdef TRIE_CHECK_EVERYTHING
00742     int32_t ret=0 ;
00743     SG_DEBUG("start_node=%i\n", start_node) ;
00744 
00745     if (start_node==NO_CHILD) 
00746     {
00747         for (int32_t i=0; i<length; i++)
00748         {
00749             int32_t my_deepest_node ;
00750             int32_t depth=find_deepest_node(i, my_deepest_node) ;
00751             SG_DEBUG("start_node %i depth=%i\n", i, depth) ;
00752             if (depth>ret)
00753             {
00754                 deepest_node=my_deepest_node ;
00755                 ret=depth ;
00756             }
00757         }
00758         return ret ;
00759     }
00760 
00761     if (TreeMem[start_node].has_seq)
00762     {
00763         for (int32_t q=0; q<16; q++)
00764             if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
00765                 ret++ ;
00766         deepest_node=start_node ;
00767         return ret ;
00768     }
00769     if (TreeMem[start_node].has_floats)
00770     {
00771         deepest_node=start_node ;
00772         return 1 ;
00773     }
00774 
00775     for (int32_t q=0; q<4; q++)
00776     {
00777         int32_t my_deepest_node ;
00778         if (TreeMem[start_node].children[q]==NO_CHILD)
00779             continue ;
00780         int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ;
00781         if (depth>ret)
00782         {
00783             deepest_node=my_deepest_node ;
00784             ret=depth ;
00785         }
00786     }
00787     return ret ;
00788 #else
00789     SG_ERROR( "not implemented\n") ;
00790     return 0 ;
00791 #endif
00792 }
00793 
00794     template <class Trie>
00795 int32_t CTrie<Trie>::compact_nodes(
00796     int32_t start_node, int32_t depth, float64_t * weights) 
00797 {
00798     SG_ERROR( "code buggy\n") ;
00799 
00800     int32_t ret=0 ;
00801 
00802     if (start_node==NO_CHILD) 
00803     {
00804         for (int32_t i=0; i<length; i++)
00805             compact_nodes(i,1, weights) ;
00806         return 0 ;
00807     }
00808     if (start_node<0)
00809         return -1 ;
00810 
00811     if (depth==degree-1)
00812     {
00813         TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ;
00814         int32_t num_used=0 ;
00815         for (int32_t q=0; q<4; q++)
00816             if (TreeMem[start_node].child_weights[q]!=0.0)
00817                 num_used++ ;
00818         if (num_used>1)
00819             return -1 ;
00820         return 1 ;
00821     }
00822     TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ;
00823 
00824     int32_t num_used = 0 ;
00825     int32_t q_used=-1 ;
00826 
00827     for (int32_t q=0; q<4; q++)
00828     {
00829         if (TreeMem[start_node].children[q]==NO_CHILD)
00830             continue ;
00831         num_used++ ;
00832         q_used=q ;
00833     }
00834     if (num_used>1)
00835     {
00836         if (depth>=degree-2)
00837             return -1 ;
00838         for (int32_t q=0; q<4; q++)
00839         {
00840             if (TreeMem[start_node].children[q]==NO_CHILD)
00841                 continue ;
00842             int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ;
00843             if (num<=2)
00844                 continue ;
00845             int32_t node=get_node() ;
00846 
00847             int32_t last_node=TreeMem[start_node].children[q] ;
00848             if (weights_in_tree)
00849             {
00850                 ASSERT(weights[depth]!=0.0) ;
00851                 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ;
00852             }
00853             else
00854                 TreeMem[node].weight=TreeMem[last_node].weight ;
00855 
00856 #ifdef TRIE_CHECK_EVERYTHING
00857             TreeMem[node].has_seq=true ;
00858 #endif
00859             memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
00860             for (int32_t n=0; n<num; n++)
00861             {
00862                 ASSERT(depth+n+1<=degree-1) ;
00863                 ASSERT(last_node!=NO_CHILD) ;
00864                 if (depth+n+1==degree-1)
00865                 {
00866                     TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ;
00867                     int32_t  k ;
00868                     for (k=0; k<4; k++)
00869                         if (TreeMem[last_node].child_weights[k]!=0.0)
00870                             break ;
00871                     if (k==4)
00872                         break ;
00873                     TreeMem[node].seq[n]=k ;
00874                     break ;
00875                 }
00876                 else
00877                 {
00878                     TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ;
00879                     int32_t k ;
00880                     for (k=0; k<4; k++)
00881                         if (TreeMem[last_node].children[k]!=NO_CHILD)
00882                             break ;
00883                     if (k==4)
00884                         break ;
00885                     TreeMem[node].seq[n]=k ;
00886                     last_node=TreeMem[last_node].children[k] ;
00887                 }
00888             }
00889             TreeMem[start_node].children[q]=-node ;
00890         }
00891         return -1 ;
00892     }
00893     if (num_used==0)
00894         return 0 ;
00895 
00896     ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ;
00897     if (ret<0)
00898         return ret ;
00899     return ret+1 ;
00900 }
00901 
00902 
00903     template <class Trie>
00904 bool CTrie<Trie>::compare_traverse(
00905     int32_t node, const CTrie<Trie> & other, int32_t other_node)
00906 {
00907     SG_DEBUG("checking nodes %i and %i\n", node, other_node) ;
00908     if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5)
00909     {
00910         SG_DEBUG( "CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) ;
00911         SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00912         display_node(node) ;
00913         SG_DEBUG( "============================================================\n") ;           
00914         other.display_node(other_node) ;
00915         SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00916         return false ;
00917     }
00918 
00919 #ifdef TRIE_CHECK_EVERYTHING
00920     if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq)
00921     {
00922         SG_DEBUG( "CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) ;
00923         SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00924         display_node(node) ;
00925         SG_DEBUG( "============================================================\n") ;           
00926         other.display_node(other_node) ;
00927         SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00928         return false ;
00929     }
00930     if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats)
00931     {
00932         SG_DEBUG( "CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) ;
00933         return false ;
00934     }
00935     if (other.TreeMem[other_node].has_floats)
00936     {
00937         for (int32_t q=0; q<4; q++)
00938             if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5)
00939             {
00940                 SG_DEBUG( "CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) ;
00941                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00942                 display_node(node) ;
00943                 SG_DEBUG( "============================================================\n") ;           
00944                 other.display_node(other_node) ;
00945                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00946                 return false ;
00947             }
00948     }
00949     if (other.TreeMem[other_node].has_seq)
00950     {
00951         for (int32_t q=0; q<16; q++)
00952             if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4)))
00953             {
00954                 SG_DEBUG( "CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) ;
00955                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00956                 display_node(node) ;
00957                 SG_DEBUG( "============================================================\n") ;           
00958                 other.display_node(other_node) ;
00959                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00960                 return false ;
00961             }
00962     }
00963     if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats)
00964     {
00965         for (int32_t q=0; q<4; q++)
00966         {
00967             if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD))
00968                 continue ;
00969             if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD))
00970             {
00971                 SG_DEBUG( "CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) ;
00972                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00973                 display_node(node) ;
00974                 SG_DEBUG( "============================================================\n") ;           
00975                 other.display_node(other_node) ;
00976                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00977                 return false ;
00978             }
00979             if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q])))
00980                 return false ;
00981         }
00982     }
00983 #else
00984     SG_ERROR( "not implemented\n") ;
00985 #endif
00986 
00987     return true ;
00988 }
00989 
00990     template <class Trie>
00991 bool CTrie<Trie>::compare(const CTrie<Trie> & other)
00992 {
00993     bool ret=true ;
00994     for (int32_t i=0; i<length; i++)
00995         if (!compare_traverse(trees[i], other, other.trees[i]))
00996             return false ;
00997         else
00998             SG_DEBUG("two tries at %i identical\n", i) ;
00999 
01000     return ret ;
01001 }
01002 
01003 template <class Trie>
01004 bool CTrie<Trie>::find_node(
01005     int32_t node, int32_t * trace, int32_t& trace_len) const 
01006 {
01007 #ifdef TRIE_CHECK_EVERYTHING
01008     ASSERT(trace_len-1>=0) ;
01009     ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax))
01010         if (TreeMem[trace[trace_len-1]].has_seq)
01011             return false ;
01012     if (TreeMem[trace[trace_len-1]].has_floats)
01013         return false ;
01014 
01015     for (int32_t q=0; q<4; q++)
01016     {
01017         if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
01018             continue ;
01019         int32_t tl=trace_len+1 ;
01020         if (TreeMem[trace[trace_len-1]].children[q]>=0)
01021             trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ;
01022         else
01023             trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ;
01024 
01025         if (trace[trace_len]==node)
01026         {
01027             trace_len=tl ;
01028             return true ;
01029         }
01030         if (find_node(node, trace, tl))
01031         {
01032             trace_len=tl ;
01033             return true ;
01034         }
01035     }
01036     trace_len=0 ;
01037     return false ;
01038 #else
01039     SG_ERROR( "not implemented\n") ;
01040     return false ;
01041 #endif
01042 }
01043 
01044 template <class Trie>
01045 void CTrie<Trie>::display_node(int32_t node) const
01046 {
01047 #ifdef TRIE_CHECK_EVERYTHING
01048     int32_t * trace=new int32_t[2*degree] ;
01049     int32_t trace_len=-1 ;
01050     bool found = false ;
01051     int32_t tree=-1 ;
01052     for (tree=0; tree<length; tree++)
01053     {
01054         trace[0]=trees[tree] ;
01055         trace_len=1 ;
01056         found=find_node(node, trace, trace_len) ;
01057         if (found)
01058             break ;
01059     }
01060     ASSERT(found) ;
01061     SG_PRINT( "position %i  trace: ", tree) ;
01062 
01063     for (int32_t i=0; i<trace_len-1; i++)
01064     {
01065         int32_t branch=-1 ;
01066         for (int32_t q=0; q<4; q++)
01067             if (abs(TreeMem[trace[i]].children[q])==trace[i+1])
01068             {
01069                 branch=q;
01070                 break ;
01071             }
01072         ASSERT(branch!=-1) ;
01073         char acgt[5]="ACGT" ;
01074         SG_PRINT( "%c", acgt[branch]) ;
01075     }
01076     SG_PRINT( "\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ;
01077     if (TreeMem[node].has_floats)
01078     {
01079         for (int32_t q=0; q<4; q++)
01080             SG_PRINT( "child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ;
01081     }
01082     if (TreeMem[node].has_seq)
01083     {
01084         for (int32_t q=0; q<16; q++)
01085             SG_PRINT( "seq[%i] = %i\n", q, TreeMem[node].seq[q]) ;
01086     }
01087     if (!TreeMem[node].has_seq && !TreeMem[node].has_floats)
01088     {
01089         for (int32_t q=0; q<4; q++)
01090         {
01091             if (TreeMem[node].children[q]!=NO_CHILD)
01092             {
01093                 SG_PRINT( "children[%i] = %i -> \n", q, TreeMem[node].children[q]) ;
01094                 display_node(abs(TreeMem[node].children[q])) ;
01095             }
01096             else
01097                 SG_PRINT( "children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ;
01098         }
01099 
01100     }
01101 
01102     delete[] trace ;
01103 #else
01104     SG_ERROR( "not implemented\n") ;
01105 #endif
01106 }
01107 
01108 
01109 template <class Trie> CTrie<Trie>::~CTrie()
01110 {
01111     destroy() ;
01112 
01113     free(TreeMem) ;
01114 }
01115 
01116 template <class Trie> void CTrie<Trie>::destroy()
01117 {
01118     if (trees!=NULL)
01119     {
01120         delete_trees();
01121         for (int32_t i=0; i<length; i++)
01122             trees[i] = NO_CHILD;
01123         delete[] trees;
01124 
01125         TreeMemPtr=0;
01126         length=0;
01127         trees=NULL;
01128     }
01129 }
01130 
01131 template <class Trie> void CTrie<Trie>::set_degree(int32_t d)
01132 {
01133     delete_trees(get_use_compact_terminal_nodes());
01134     degree=d;
01135 }
01136 
01137 template <class Trie> void CTrie<Trie>::create(
01138     int32_t len, bool p_use_compact_terminal_nodes)
01139 {
01140     destroy();
01141 
01142     trees=new int32_t[len] ;        
01143     TreeMemPtr=0 ;
01144     for (int32_t i=0; i<len; i++)
01145         trees[i]=get_node(degree==1);
01146     length = len ;
01147 
01148     use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01149 }
01150 
01151 
01152 template <class Trie> void CTrie<Trie>::delete_trees(
01153     bool p_use_compact_terminal_nodes)
01154 {
01155     if (trees==NULL)
01156         return;
01157 
01158     TreeMemPtr=0 ;
01159     for (int32_t i=0; i<length; i++)
01160         trees[i]=get_node(degree==1);
01161 
01162     use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01163 } 
01164 
01165     template <class Trie>
01166 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth)
01167 {
01168     float64_t ret=0 ;
01169 
01170     if (tree==NO_CHILD)
01171         return 0 ;
01172     TRIE_ASSERT(tree>=0) ;
01173 
01174     if (depth==degree-2)
01175     {
01176         ret+=(TreeMem[tree].weight) ;
01177 
01178         for (int32_t k=0; k<4; k++)
01179             ret+=(TreeMem[tree].child_weights[k]) ;
01180 
01181         return ret ;
01182     }
01183 
01184     ret+=(TreeMem[tree].weight) ;
01185 
01186     for (int32_t i=0; i<4; i++)
01187         if (TreeMem[tree].children[i]!=NO_CHILD)
01188             ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1)  ;
01189 
01190     return ret ;
01191 }
01192 
01193 
01194     template <class Trie>
01195 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len)
01196 {
01197     float64_t * sum=new float64_t[length*4] ;
01198     for (int32_t i=0; i<length*4; i++)
01199         sum[i]=0 ;
01200     len=length ;
01201 
01202     for (int32_t i=0; i<length; i++)
01203     {
01204         TRIE_ASSERT(trees[i]!=NO_CHILD) ;
01205         for (int32_t k=0; k<4; k++)
01206         {
01207             sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ;
01208         }
01209     }
01210 
01211     return sum ;
01212 }
01213 
01214     template <class Trie>
01215 void CTrie<Trie>::add_example_to_tree_mismatch_recursion(
01216     int32_t tree,  int32_t i, float64_t alpha,
01217         int32_t *vec, int32_t len_rem, 
01218         int32_t degree_rec, int32_t mismatch_rec, 
01219         int32_t max_mismatch, float64_t * weights) 
01220 {
01221     if (tree==NO_CHILD)
01222         tree=trees[i] ;
01223     TRIE_ASSERT(tree!=NO_CHILD) ;
01224 
01225     if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree))
01226         return ;
01227     const int32_t other[4][3] = {   {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
01228 
01229     int32_t subtree = NO_CHILD ;
01230 
01231     if (degree_rec==degree-1)
01232     {
01233         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01234         if (weights_in_tree)
01235             TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec];
01236         else
01237             if (weights[degree_rec]!=0.0)
01238                 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01239         if (mismatch_rec+1<=max_mismatch)
01240             for (int32_t o=0; o<3; o++)
01241             {
01242                 if (weights_in_tree)
01243                     TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01244                 else
01245                     if (weights[degree_rec]!=0.0)
01246                         TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01247             }
01248         return ;
01249     }
01250     else
01251     {
01252         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01253         if (TreeMem[tree].children[vec[0]]!=NO_CHILD)
01254         {
01255             subtree=TreeMem[tree].children[vec[0]] ;
01256             if (weights_in_tree)
01257                 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec];
01258             else
01259                 if (weights[degree_rec]!=0.0)
01260                     TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01261         }
01262         else 
01263         {
01264             int32_t tmp = get_node(degree_rec==degree-2);
01265             ASSERT(tmp>=0) ;
01266             TreeMem[tree].children[vec[0]]=tmp ;
01267             subtree=tmp ;
01268 #ifdef TRIE_CHECK_EVERYTHING
01269             if (degree_rec==degree-2)
01270                 TreeMem[subtree].has_floats=true ;
01271 #endif
01272             if (weights_in_tree)
01273                 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ;
01274             else
01275                 if (weights[degree_rec]!=0.0)
01276                     TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ;
01277                 else
01278                     TreeMem[subtree].weight = 0.0 ;
01279         }
01280         add_example_to_tree_mismatch_recursion(subtree,  i, alpha,
01281                 &vec[1], len_rem-1, 
01282                 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
01283 
01284         if (mismatch_rec+1<=max_mismatch)
01285         {
01286             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01287             for (int32_t o=0; o<3; o++)
01288             {
01289                 int32_t ot = other[vec[0]][o] ;
01290                 if (TreeMem[tree].children[ot]!=NO_CHILD)
01291                 {
01292                     subtree=TreeMem[tree].children[ot] ;
01293                     if (weights_in_tree)
01294                         TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01295                     else
01296                         if (weights[degree_rec]!=0.0)
01297                             TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01298                 }
01299                 else 
01300                 {
01301                     int32_t tmp = get_node(degree_rec==degree-2);
01302                     ASSERT(tmp>=0) ;
01303                     TreeMem[tree].children[ot]=tmp ;
01304                     subtree=tmp ;
01305 #ifdef TRIE_CHECK_EVERYTHING
01306                     if (degree_rec==degree-2)
01307                         TreeMem[subtree].has_floats=true ;
01308 #endif
01309 
01310                     if (weights_in_tree)
01311                         TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ;
01312                     else
01313                         if (weights[degree_rec]!=0.0)
01314                             TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ;
01315                         else
01316                             TreeMem[subtree].weight = 0.0 ;
01317                 }
01318 
01319                 add_example_to_tree_mismatch_recursion(subtree,  i, alpha,
01320                         &vec[1], len_rem-1, 
01321                         degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
01322             }
01323         }
01324     }
01325 }
01326 
01327     template <class Trie>
01328 void CTrie<Trie>::compute_scoring_helper(
01329     int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
01330     int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset,
01331     int32_t offs, float64_t* result)
01332 {
01333     if (i+j<num_feat)
01334     {
01335         float64_t decay=1.0; //no decay by default
01336         //if (j>d)
01337         //  decay=pow(0.5,j); //marginalize out lower order matches
01338 
01339         if (j<degree-1)
01340         {
01341             for (int32_t k=0; k<num_sym; k++)
01342             {
01343                 if (TreeMem[tree].children[k]!=NO_CHILD)
01344                 {
01345                     int32_t child=TreeMem[tree].children[k];
01346                     //continue recursion if not yet at max_degree, else add to result
01347                     if (d<max_degree-1)
01348                         compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01349                     else
01350                         result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight;
01351 
01353                     if (d==0)
01354                         compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
01355                 }
01356             }
01357         }
01358         else if (j==degree-1)
01359         {
01360             for (int32_t k=0; k<num_sym; k++)
01361             {
01362                 //continue recursion if not yet at max_degree, else add to result
01363                 if (d<max_degree-1 && i<num_feat-1)
01364                     compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01365                 else
01366                     result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k];
01367             }
01368         }
01369     }
01370 }
01371 
01372     template <class Trie>
01373 void CTrie<Trie>::traverse(
01374     int32_t tree, const int32_t p, struct TreeParseInfo info,
01375     const int32_t depth, int32_t* const x, const int32_t k)
01376 {
01377     const int32_t num_sym = info.num_sym;
01378     const int32_t y0 = info.y0;
01379     const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
01380     //const int32_t temp = info.substrs[depth]*num_sym - ( (depth<=k) ? 0 : info.nofsKmers[k] * x[depth-k-1] );
01381     //if( !( info.y0 == temp ) ) {
01382     //  printf( "\n temp=%d y0=%d k=%d depth=%d \n", temp, info.y0, k, depth );
01383     //}
01384     //ASSERT( info.y0 == temp );
01385     int32_t sym;
01386     ASSERT( depth < degree );
01387     //ASSERT( 0 <= info.substrs[depth] && info.substrs[depth] < info.nofsKmers[k] );
01388     if (depth<degree-1)
01389     {
01390         for( sym=0; sym<num_sym; ++sym ) {
01391             const int32_t childNum = TreeMem[tree].children[ sym ];
01392             if( childNum != NO_CHILD ) {
01393                 int32_t child = childNum ;
01394                 x[depth] = sym;
01395                 info.substrs[depth+1] = y0 + sym;
01396                 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01397                 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) );
01398                 count( TreeMem[child].weight, depth, info, p, x, k );
01399                 traverse( child, p, info, depth+1, x, k );
01400                 x[depth] = -1;
01401             }
01402         }
01403     }
01404     else if( depth == degree-1 )
01405     {
01406         for( sym=0; sym<num_sym; ++sym ) {
01407             const float64_t w = TreeMem[tree].child_weights[ sym ];
01408             if( w != 0.0 ) {
01409                 x[depth] = sym;
01410                 info.substrs[depth+1] = y0 + sym;
01411                 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01412                 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) );
01413                 count( w, depth, info, p, x, k );
01414                 x[depth] = -1;
01415             }
01416         }
01417     }
01418     //info.substrs[depth+1] = -1;
01419     //info.y0 = temp;
01420 }
01421 
01422     template <class Trie>
01423 void CTrie<Trie>::count(
01424     const float64_t w, const int32_t depth, const struct TreeParseInfo info,
01425     const int32_t p, int32_t* x, const int32_t k)
01426 {
01427     ASSERT( fabs(w) < 1e10 );
01428     ASSERT( x[depth] >= 0 );
01429     ASSERT( x[depth+1] < 0 );
01430     if ( depth < k ) {
01431         return;
01432     }
01433     //ASSERT( info.margFactors[ depth-k ] == pow( 0.25, depth-k ) );
01434     const int32_t nofKmers = info.nofsKmers[k];
01435     const float64_t margWeight =  w * info.margFactors[ depth-k ];
01436     const int32_t m_a = depth - k + 1;
01437     const int32_t m_b = info.num_feat - p;
01438     const int32_t m = ( m_a < m_b ) ? m_a : m_b;
01439     // all proper k-substrings
01440     const int32_t offset0 = nofKmers * p;
01441     register int32_t i;
01442     register int32_t offset;
01443     offset = offset0;
01444     for( i = 0; i < m; ++i ) {
01445         const int32_t y = info.substrs[i+k+1];
01446         info.C_k[ y + offset ] += margWeight;
01447         offset += nofKmers;
01448     }
01449     if( depth > k ) {
01450         // k-prefix
01451         const int32_t offsR = info.substrs[k+1] + offset0;
01452         info.R_k[offsR] += margWeight;
01453         // k-suffix
01454         if( p+depth-k < info.num_feat ) {
01455             const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
01456             info.L_k[offsL] += margWeight; 
01457         }
01458     }
01459     //    # N.x = substring represented by N
01460     //    # N.d = length of N.x
01461     //    # N.s = starting position of N.x
01462     //    # N.w = weight for feature represented by N
01463     //    if( N.d >= k )
01464     //      margContrib = w / 4^(N.d-k)
01465     //      for i = 1 to (N.d-k+1)
01466     //        y = N.x[i:(i+k-1)]  # overlapped k-mer
01467     //        C_k[ N.s+i-1, y ] += margContrib
01468     //      end;
01469     //      if( N.d > k )
01470     //        L_k[ N.s+N.d-k, N.x[N.d-k+(1:k)] ] += margContrib  # j-suffix of N.x
01471     //        R_k[ N.s,       N.x[1:k]         ] += margContrib  # j-prefix of N.x
01472     //      end;
01473     //    end;
01474 }
01475 
01476     template <class Trie>
01477 void CTrie<Trie>::add_to_trie(
01478     int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha,
01479     float64_t *weights, bool degree_times_position_weights)
01480 {
01481     int32_t tree = trees[i] ;
01482     //ASSERT(seq_offset==0) ;
01483 
01484     int32_t max_depth = 0 ;
01485     float64_t* weights_column ;
01486     if (degree_times_position_weights)
01487         weights_column = &weights[(i+seq_offset)*degree] ;
01488     else
01489         weights_column = weights ;
01490 
01491     if (weights_in_tree)
01492     {
01493         for (int32_t j=0; (j<degree) && (i+j<length); j++)
01494             if (CMath::abs(weights_column[j]*alpha)>0)
01495                 max_depth = j+1 ;
01496     }
01497     else
01498         // don't use the weights
01499         max_depth=degree ;
01500 
01501     for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++)
01502     {
01503         TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ;
01504         if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
01505         {
01506             if (TreeMem[tree].children[vec[i+j+seq_offset]]<0)
01507             {
01508                 // special treatment of the next nodes
01509                 TRIE_ASSERT(j >= degree-16) ;
01510                 // get the right element
01511                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01512                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01513                 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ;
01514 
01515                 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ;
01516                 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ;
01517                 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ;
01518 
01519                 // check whether the same string is stored
01520                 int32_t mismatch_pos = -1 ;
01521                 {
01522                     int32_t k ;
01523                     for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++)
01524                     {
01525                         TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01526                         // ###
01527                         if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
01528                             fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ;
01529                         TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ;
01530                         TRIE_ASSERT(k<16) ;
01531                         if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
01532                         {
01533                             mismatch_pos=k ;
01534                             break ;
01535                         }
01536                     }
01537                 }
01538 
01539                 // what happens when the .seq sequence is longer than vec? should we branch???
01540 
01541                 if (mismatch_pos==-1)
01542                     // if so, then just increase the weight by alpha and stop
01543                     TreeMem[node].weight+=alpha ;
01544                 else
01545                     // otherwise
01546                     // 1. replace current node with new node
01547                     // 2. create new nodes until mismatching positon
01548                     // 2. add a branch with old string (old node) and the new string (new node)
01549                 {
01550                     // replace old node
01551                     int32_t last_node=tree ;
01552 
01553                     // create new nodes until mismatch
01554                     int32_t k ;
01555                     for (k=0; k<mismatch_pos; k++)
01556                     {
01557                         TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01558                         TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ;
01559 
01560                         int32_t tmp=get_node();
01561                         TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
01562                         last_node=tmp ;
01563                         if (weights_in_tree)
01564                             TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ;
01565                         else
01566                             TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ;
01567                         TRIE_ASSERT(j+k!=degree-1) ;
01568                     }
01569                     if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
01570                         fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ;
01571                     ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ;
01572                     TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ;
01573 
01574                     if (j+k==degree-1)
01575                     {
01576                         // init child weights with zero if after dropping out
01577                         // of the k<mismatch_pos loop we are one level below degree
01578                         // (keep this even after get_node() change!)
01579                         for (int32_t q=0; q<4; q++)
01580                             TreeMem[last_node].child_weights[q]=0.0 ;
01581                         if (weights_in_tree)
01582                         {
01583                             if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01584                                 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ;
01585                             TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ;
01586                         }
01587                         else
01588                         {
01589                             if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01590                                 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ;
01591                             TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
01592                         }
01593 
01594 #ifdef TRIE_CHECK_EVERYTHING
01595                         TreeMem[last_node].has_floats=true ;
01596 #endif
01597                     }
01598                     else
01599                     {
01600                         // the branch for the existing string
01601                         if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01602                         {
01603                             TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ;
01604 
01605                             // move string by mismatch_pos positions
01606                             for (int32_t q=0; q<16; q++)
01607                             {
01608                                 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length))
01609                                     TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ;
01610                                 else
01611                                     TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
01612                             }
01613 #ifdef TRIE_CHECK_EVERYTHING
01614                             TreeMem[node].has_seq=true ;
01615 #endif
01616                         }
01617 
01618                         // the new branch
01619                         TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ;
01620                         int32_t tmp = get_node() ;
01621                         TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
01622                         last_node=tmp ;
01623 
01624                         TreeMem[last_node].weight = alpha ;
01625 #ifdef TRIE_CHECK_EVERYTHING
01626                         TreeMem[last_node].has_seq = true ;
01627 #endif
01628                         memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01629                         for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++)
01630                             TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
01631                     }
01632                 }
01633                 break ;
01634             } 
01635             else
01636             {
01637                 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ;
01638                 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01639                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01640                 if (weights_in_tree)
01641                     TreeMem[tree].weight += alpha*weights_column[j];
01642                 else
01643                     TreeMem[tree].weight += alpha ;
01644             }
01645         }
01646         else if (j==degree-1)
01647         {
01648             // special treatment of the last node
01649             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01650             TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01651             if (weights_in_tree)
01652                 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
01653             else
01654                 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
01655 
01656             break;
01657         }
01658         else
01659         {
01660             bool use_seq = use_compact_terminal_nodes && (j>degree-16) ;
01661             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01662             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01663 
01664             int32_t tmp = get_node((j==degree-2) && (!use_seq));
01665             if (use_seq)
01666                 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
01667             else
01668                 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
01669             tree=tmp ;
01670 
01671             TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01672 #ifdef TRIE_CHECK_EVERYTHING
01673             TreeMem[tree].has_seq = use_seq ;
01674 #endif
01675             if (use_seq)
01676             {
01677                 TreeMem[tree].weight = alpha ;
01678                 // important to have the terminal characters (see ###)
01679                 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01680                 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++)
01681                 {
01682                     TRIE_ASSERT(q<16) ;
01683                     TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
01684                 }
01685                 break ;
01686             }
01687             else
01688             {
01689                 if (weights_in_tree)
01690                     TreeMem[tree].weight = alpha*weights_column[j] ;
01691                 else
01692                     TreeMem[tree].weight = alpha ;
01693 #ifdef TRIE_CHECK_EVERYTHING
01694                 if (j==degree-2)
01695                     TreeMem[tree].has_floats = true ;
01696 #endif
01697             }
01698         }
01699     }
01700 }
01701 
01702     template <class Trie>
01703 float64_t CTrie<Trie>::compute_by_tree_helper(
01704     int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01705         int32_t weight_pos, float64_t* weights, 
01706         bool degree_times_position_weights)
01707 {
01708     int32_t tree = trees[tree_pos] ;
01709 
01710     if ((position_weights!=NULL) && (position_weights[weight_pos]==0))
01711         return 0.0;
01712 
01713     float64_t *weights_column=NULL ;
01714     if (degree_times_position_weights)
01715         weights_column=&weights[weight_pos*degree] ;
01716     else // weights is a vector (1 x degree)
01717         weights_column=weights ;
01718 
01719     float64_t sum=0 ;
01720     for (int32_t j=0; seq_pos+j < len; j++)
01721     {
01722         TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ;
01723 
01724         if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01725         {
01726             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01727             if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01728             {
01729                 tree = - TreeMem[tree].children[vec[seq_pos+j]];
01730                 TRIE_ASSERT(tree>=0) ;
01731                 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01732                 float64_t this_weight=0.0 ;
01733                 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01734                 {
01735                     TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ;
01736                     if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01737                         break ;
01738                     this_weight += weights_column[j+k] ;
01739                 }
01740                 sum += TreeMem[tree].weight * this_weight ;
01741                 break ;
01742             }
01743             else
01744             {
01745                 tree=TreeMem[tree].children[vec[seq_pos+j]];
01746                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01747                 if (weights_in_tree)
01748                     sum += TreeMem[tree].weight ;
01749                 else
01750                     sum += TreeMem[tree].weight * weights_column[j] ;
01751             } ;
01752         }
01753         else
01754         {
01755             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01756             if (j==degree-1)
01757             {
01758                 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01759                 if (weights_in_tree)
01760                     sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01761                 else
01762                     sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
01763             }
01764             else
01765                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01766 
01767             break;
01768         }
01769     } 
01770 
01771     if (position_weights!=NULL)
01772         return sum*position_weights[weight_pos] ;
01773     else
01774         return sum ;
01775 }
01776 
01777     template <class Trie>
01778 void CTrie<Trie>::compute_by_tree_helper(
01779     int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01780     int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
01781     int32_t mkl_stepsize, float64_t * weights,
01782     bool degree_times_position_weights)
01783 {
01784     int32_t tree = trees[tree_pos] ;
01785     if (factor==0)
01786         return ;
01787 
01788     if (position_weights!=NULL)
01789     {
01790         factor *= position_weights[weight_pos] ;
01791         if (factor==0)
01792             return ;
01793         if (!degree_times_position_weights) // with position_weigths, weights is a vector (1 x degree)
01794         {
01795             for (int32_t j=0; seq_pos+j<len; j++)
01796             {
01797                 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01798                 {
01799                     if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01800                     {
01801                         tree = -TreeMem[tree].children[vec[seq_pos+j]];
01802                         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01803                         for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01804                         {
01805                             if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01806                                 break ;
01807                             if (weights_in_tree)
01808                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01809                             else
01810                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01811                         }
01812                         break ;
01813                     }
01814                     else
01815                     {
01816                         tree=TreeMem[tree].children[vec[seq_pos+j]];
01817                         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01818                         if (weights_in_tree)
01819                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01820                         else
01821                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01822                     }
01823                 } 
01824                 else
01825                 {
01826                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01827                     if (j==degree-1)
01828                     {
01829                         if (weights_in_tree)
01830                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01831                         else
01832                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01833                     }
01834                 }
01835             }
01836         } 
01837         else // with position_weigths, weights is a matrix (len x degree)
01838         {
01839             for (int32_t j=0; seq_pos+j<len; j++)
01840             {
01841                 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01842                 {
01843                     if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01844                     {
01845                         tree = -TreeMem[tree].children[vec[seq_pos+j]];
01846                         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01847                         for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01848                         {
01849                             if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01850                                 break ;
01851                             if (weights_in_tree)
01852                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01853                             else
01854                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01855                         }
01856                         break ;
01857                     }
01858                     else
01859                     {
01860                         tree=TreeMem[tree].children[vec[seq_pos+j]];
01861                         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01862                         if (weights_in_tree)
01863                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01864                         else
01865                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ;
01866                     }
01867                 } 
01868                 else
01869                 {
01870                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01871                     if (j==degree-1)
01872                     {
01873                         if (weights_in_tree)
01874                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01875                         else
01876                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ;
01877                     }         
01878                     break ;
01879                 }
01880             }
01881         } 
01882     }
01883     else if (!degree_times_position_weights) // no position_weigths, weights is a vector (1 x degree)
01884     {
01885         for (int32_t j=0; seq_pos+j<len; j++)
01886         {
01887             if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01888             {
01889                 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01890                 {
01891                     tree = -TreeMem[tree].children[vec[seq_pos+j]];
01892                     TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01893                     for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01894                     {
01895                         if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01896                             break ;
01897                         if (weights_in_tree)
01898                             LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01899                         else
01900                             LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01901                     }
01902                     break ;
01903                 }
01904                 else
01905                 {
01906                     tree=TreeMem[tree].children[vec[seq_pos+j]];
01907                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01908                     if (weights_in_tree)
01909                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ;
01910                     else
01911                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01912                 }
01913             }
01914             else
01915             {
01916                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01917                 if (j==degree-1)
01918                 {
01919                     if (weights_in_tree)
01920                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01921                     else
01922                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01923                 }
01924                 break ;
01925             }
01926         } 
01927     } 
01928     else // no position_weigths, weights is a matrix (len x degree)
01929     {
01930         /*if (!position_mask)
01931           {     
01932           position_mask = new bool[len] ;
01933           for (int32_t i=0; i<len; i++)
01934           {
01935           position_mask[i]=false ;
01936 
01937           for (int32_t j=0; j<degree; j++)
01938           if (weights[i*degree+j]!=0.0)
01939           {
01940           position_mask[i]=true ;
01941           break ;
01942           }
01943           }
01944           }
01945           if (position_mask[weight_pos]==0)
01946           return ;*/
01947 
01948         for (int32_t j=0; seq_pos+j<len; j++)
01949         {
01950             if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01951             {
01952                 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01953                 {
01954                     tree = -TreeMem[tree].children[vec[seq_pos+j]];
01955                     TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01956                     for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01957                     {
01958                         if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01959                             break ;
01960                         if (weights_in_tree)
01961                             LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01962                         else
01963                             LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01964                     }
01965                     break ;
01966                 }
01967                 else
01968                 {
01969                     tree=TreeMem[tree].children[vec[seq_pos+j]];
01970                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01971                     if (weights_in_tree)
01972                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ;
01973                     else
01974                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ;
01975                 } 
01976             }
01977             else
01978             {
01979                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01980                 if (j==degree-1)
01981                 {
01982                     if (weights_in_tree)
01983                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01984                     else
01985                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ;
01986                 }
01987                 break ;
01988             }
01989         } 
01990     }
01991 }
01992 
01993     template <class Trie>
01994 void CTrie<Trie>::fill_backtracking_table_recursion(
01995     Trie* tree, int32_t depth, uint64_t seq, float64_t value,
01996     CDynamicArray<ConsensusEntry>* table, float64_t* weights)
01997 {
01998     float64_t w=1.0;
01999 
02000     if (weights_in_tree || depth==0)
02001         value+=tree->weight;
02002     else
02003     {
02004         w=weights[degree-1];
02005         value+=weights[depth-1]*tree->weight;
02006     }
02007 
02008     if (degree-1==depth)
02009     {
02010         for (int32_t sym=0; sym<4; sym++)
02011         {
02012             float64_t v=w*tree->child_weights[sym];
02013             if (v!=0.0)
02014             {
02015                 ConsensusEntry entry;
02016                 entry.bt=-1;
02017                 entry.score=value+v;
02018                 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02019 
02020                 table->append_element(entry);
02021             }
02022         }
02023     }
02024     else
02025     {
02026         for (int32_t sym=0; sym<4; sym++)
02027         {
02028             uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02029             if (tree->children[sym] != NO_CHILD)
02030                 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights);
02031         }
02032     }
02033 }
02034 
02035     template <class Trie>
02036 float64_t CTrie<Trie>::get_cumulative_score(
02037     int32_t pos, uint64_t seq, int32_t deg, float64_t* weights)
02038 {
02039     float64_t result=0.0;
02040 
02041     //SG_PRINT("pos:%i length:%i deg:%i seq:0x%0llx...\n", pos, length, deg, seq);
02042 
02043     for (int32_t i=pos; i<pos+deg && i<length; i++)
02044     {
02045         //SG_PRINT("loop %d\n", i);
02046         Trie* tree = &TreeMem[trees[i]];
02047 
02048         for (int32_t d=0; d<deg-i+pos; d++)
02049         {
02050             //SG_PRINT("loop degree %d shit: %d\n", d, (2*(deg-1-d-i+pos)));
02051             ASSERT(d-1<degree);
02052             int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3);
02053 
02054             float64_t w=1.0;
02055             if (!weights_in_tree)
02056                 w=weights[d];
02057 
02058             ASSERT(tree->children[sym] != NO_CHILD);
02059             tree=&TreeMem[tree->children[sym]];
02060             result+=w*tree->weight;
02061         }
02062     }
02063     //SG_PRINT("cum: %f\n", result);
02064     return result;
02065 }
02066 
02067     template <class Trie>
02068 void CTrie<Trie>::fill_backtracking_table(
02069     int32_t pos, CDynamicArray<ConsensusEntry>* prev,
02070     CDynamicArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights)
02071 {
02072     ASSERT(pos>=0 && pos<length);
02073     ASSERT(!use_compact_terminal_nodes);
02074 
02075     Trie* t = &TreeMem[trees[pos]];
02076 
02077     fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights);
02078 
02079 
02080     if (cumulative)
02081     {
02082         int32_t num_cur=cur->get_num_elements();
02083         for (int32_t i=0; i<num_cur; i++)
02084         {
02085             ConsensusEntry entry=cur->get_element(i);
02086             entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights);
02087             cur->set_element(entry,i);
02088             //SG_PRINT("cum: str:0%0llx sc:%f bt:%d\n",entry.string,entry.score,entry.bt);
02089         }
02090     }
02091 
02092     //if previous tree exists find maximum scoring path
02093     //for each element in cur and update bt table
02094     if (prev)
02095     {
02096         int32_t num_cur=cur->get_num_elements();
02097         int32_t num_prev=prev->get_num_elements();
02098 
02099         for (int32_t i=0; i<num_cur; i++)
02100         {
02101             //uint64_t str_cur_old= cur->get_element(i).string;
02102             uint64_t str_cur= cur->get_element(i).string >> 2;
02103             //SG_PRINT("...cur:0x%0llx cur_noprfx:0x%0llx...\n", str_cur_old, str_cur);
02104 
02105             int32_t bt=-1;
02106             float64_t max_score=0.0;
02107 
02108             for (int32_t j=0; j<num_prev; j++)
02109             {
02110                 //uint64_t str_prev_old= prev->get_element(j).string;
02111                 uint64_t mask=
02112                     ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1))));
02113                 uint64_t str_prev=  mask & prev->get_element(j).string;
02114                 //SG_PRINT("...prev:0x%0llx prev_nosfx:0x%0llx mask:%0llx...\n", str_prev_old, str_prev,mask);
02115 
02116                 if (str_cur == str_prev)
02117                 {
02118                     float64_t sc=prev->get_element(j).score+cur->get_element(i).score;
02119                     if (bt==-1 || sc>max_score)
02120                     {
02121                         bt=j;
02122                         max_score=sc;
02123 
02124                         //SG_PRINT("new max[%i,%i] = %f\n", j,i, max_score);
02125                     }
02126                 }
02127             }
02128 
02129             ASSERT(bt!=-1);
02130             ConsensusEntry entry;
02131             entry.bt=bt;
02132             entry.score=max_score;
02133             entry.string=cur->get_element(i).string;
02134             cur->set_element(entry, i);
02135             //SG_PRINT("entry[%d]: str:0%0llx sc:%f bt:%d\n",i, entry.string,entry.score,entry.bt);
02136         }
02137     }
02138 }
02139 }
02140 #endif // _TRIE_H___

SHOGUN Machine Learning Toolbox - Documentation