WeightedDegreePositionStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "lib/io.h"
00014 #include "lib/Signal.h"
00015 #include "lib/Trie.h"
00016 #include "base/Parallel.h"
00017 
00018 #include "kernel/WeightedDegreePositionStringKernel.h"
00019 #include "kernel/SqrtDiagKernelNormalizer.h"
00020 #include "features/Features.h"
00021 #include "features/StringFeatures.h"
00022 
00023 #include "classifier/svm/SVM.h"
00024 
00025 #ifndef WIN32
00026 #include <pthread.h>
00027 #endif
00028 
00029 using namespace shogun;
00030 
00031 #define TRIES(X) ((use_poim_tries) ? (poim_tries.X) : (tries.X))
00032 
00033 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00034 template <class Trie> struct S_THREAD_PARAM 
00035 {
00036     int32_t* vec;
00037     float64_t* result;
00038     float64_t* weights;
00039     CWeightedDegreePositionStringKernel* kernel;
00040     CTrie<Trie>* tries;
00041     float64_t factor;
00042     int32_t j;
00043     int32_t start;
00044     int32_t end;
00045     int32_t length;
00046     int32_t max_shift;
00047     int32_t* shift;
00048     int32_t* vec_idx;
00049 };
00050 #endif // DOXYGEN_SHOULD_SKIP_THIS
00051 
00052 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00053     int32_t size, int32_t d, int32_t mm, int32_t mkls)
00054 : CStringKernel<char>(size), weights(NULL), position_weights(NULL),
00055     position_weights_lhs(NULL), position_weights_rhs(NULL),
00056     weights_buffer(NULL), mkl_stepsize(mkls), degree(d), length(0),
00057     max_mismatch(mm), seq_length(0), shift(NULL), shift_len(0),
00058     num_block_weights_external(0), block_weights_external(NULL),
00059     block_weights(NULL), type(E_EXTERNAL), tries(d), poim_tries(d),
00060     tree_initialized(false), use_poim_tries(false), m_poim_distrib(NULL),
00061     m_poim(NULL), m_poim_num_sym(0), m_poim_num_feat(0), m_poim_result_len(0),
00062     alphabet(NULL)
00063 {
00064     properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00065     set_wd_weights();
00066     ASSERT(weights);
00067 
00068     set_normalizer(new CSqrtDiagKernelNormalizer());
00069 }
00070 
00071 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00072     int32_t size, float64_t* w, int32_t d, int32_t mm, int32_t* s, int32_t sl,
00073     int32_t mkls)
00074 : CStringKernel<char>(size), weights(NULL), position_weights(NULL),
00075     position_weights_lhs(NULL), position_weights_rhs(NULL),
00076     weights_buffer(NULL), mkl_stepsize(mkls), degree(d), length(0),
00077     max_mismatch(mm), seq_length(0), shift(NULL), shift_len(0),
00078     num_block_weights_external(0), block_weights_external(NULL),
00079     block_weights(NULL), type(E_EXTERNAL), tries(d), poim_tries(d),
00080     tree_initialized(false), use_poim_tries(false), m_poim_distrib(NULL),
00081     m_poim(NULL), m_poim_num_sym(0), m_poim_num_feat(0), m_poim_result_len(0),
00082     alphabet(NULL)
00083 {
00084     properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00085 
00086     weights=new float64_t[d*(1+max_mismatch)];
00087     for (int32_t i=0; i<d*(1+max_mismatch); i++)
00088         weights[i]=w[i];
00089 
00090     set_shifts(s, sl);
00091 
00092     set_normalizer(new CSqrtDiagKernelNormalizer());
00093 }
00094 
00095 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00096     CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t d)
00097 : CStringKernel<char>(10), weights(NULL), position_weights(NULL),
00098     position_weights_lhs(NULL), position_weights_rhs(NULL),
00099     weights_buffer(NULL), mkl_stepsize(1), degree(d), length(0),
00100     max_mismatch(0), seq_length(0), shift(NULL), shift_len(0),
00101     num_block_weights_external(0), block_weights_external(NULL),
00102     block_weights(NULL), type(E_EXTERNAL), tries(d), poim_tries(d),
00103     tree_initialized(false), use_poim_tries(false), m_poim_distrib(NULL),
00104     m_poim(NULL), m_poim_num_sym(0), m_poim_num_feat(0), m_poim_result_len(0),
00105     alphabet(NULL)
00106 {
00107     properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00108     set_wd_weights();
00109     ASSERT(weights);
00110     set_normalizer(new CSqrtDiagKernelNormalizer());
00111 
00112     init(l, r);
00113 }
00114 
00115 
00116 CWeightedDegreePositionStringKernel::~CWeightedDegreePositionStringKernel()
00117 {
00118     cleanup();
00119     cleanup_POIM2();
00120 
00121     delete[] shift;
00122     shift=NULL;
00123 
00124     delete[] weights;
00125     weights=NULL ;
00126 
00127     delete[] block_weights;
00128     block_weights=NULL;
00129 
00130     delete[] position_weights;
00131     position_weights=NULL;
00132 
00133     delete[] position_weights_lhs;
00134     position_weights_lhs=NULL;
00135 
00136     delete[] position_weights_rhs;
00137     position_weights_rhs=NULL;
00138 
00139     delete[] weights_buffer;
00140     weights_buffer=NULL;
00141 }
00142 
00143 void CWeightedDegreePositionStringKernel::remove_lhs()
00144 {
00145     SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00146     delete_optimization();
00147 
00148     tries.destroy();
00149     poim_tries.destroy();
00150 
00151     CKernel::remove_lhs();
00152 }
00153 
00154 void CWeightedDegreePositionStringKernel::create_empty_tries()
00155 {
00156     ASSERT(lhs);
00157     seq_length = ((CStringFeatures<char>*) lhs)->get_max_vector_length();
00158 
00159     if (opt_type==SLOWBUTMEMEFFICIENT)
00160     {
00161         tries.create(seq_length, true); 
00162         poim_tries.create(seq_length, true); 
00163     }
00164     else if (opt_type==FASTBUTMEMHUNGRY)
00165     {
00166         tries.create(seq_length, false);  // still buggy
00167         poim_tries.create(seq_length, false);  // still buggy
00168     }
00169     else
00170         SG_ERROR( "unknown optimization type\n");
00171 }
00172 
00173 bool CWeightedDegreePositionStringKernel::init(CFeatures* l, CFeatures* r)
00174 {
00175     int32_t lhs_changed = (lhs!=l) ;
00176     int32_t rhs_changed = (rhs!=r) ;
00177 
00178     CStringKernel<char>::init(l,r);
00179 
00180     SG_DEBUG( "lhs_changed: %i\n", lhs_changed) ;
00181     SG_DEBUG( "rhs_changed: %i\n", rhs_changed) ;
00182 
00183     CStringFeatures<char>* sf_l=(CStringFeatures<char>*) l;
00184     CStringFeatures<char>* sf_r=(CStringFeatures<char>*) r;
00185 
00186     /* set shift */
00187     if (shift_len==0) {
00188         shift_len=sf_l->get_vector_length(0);
00189         int32_t *shifts=new int32_t[shift_len];
00190         for (int32_t i=0; i<shift_len; i++) {
00191             shifts[i]=1;
00192         }
00193         set_shifts(shifts, shift_len);
00194         delete[] shifts;
00195     }
00196 
00197 
00198     int32_t len=sf_l->get_max_vector_length();
00199     if (lhs_changed && !sf_l->have_same_length(len))
00200         SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n");
00201 
00202     if (rhs_changed && !sf_r->have_same_length(len))
00203         SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n");
00204 
00205     SG_UNREF(alphabet);
00206     alphabet= sf_l->get_alphabet();
00207     CAlphabet* ralphabet=sf_r->get_alphabet();
00208 
00209     if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA)))
00210         properties &= ((uint64_t) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION);
00211 
00212     ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet());
00213     SG_UNREF(ralphabet);
00214 
00215     //whenever init is called also init tries and block weights
00216     create_empty_tries();
00217     init_block_weights();
00218 
00219     return init_normalizer();
00220 }
00221 
00222 void CWeightedDegreePositionStringKernel::cleanup()
00223 {
00224     SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00225     delete_optimization();
00226 
00227     delete[] block_weights;
00228     block_weights=NULL;
00229 
00230     tries.destroy();
00231     poim_tries.destroy();
00232 
00233     seq_length = 0;
00234     tree_initialized = false;
00235 
00236     SG_UNREF(alphabet);
00237     alphabet=NULL;
00238 
00239     CKernel::cleanup();
00240 }
00241 
00242 bool CWeightedDegreePositionStringKernel::init_optimization(
00243     int32_t p_count, int32_t * IDX, float64_t * alphas, int32_t tree_num,
00244     int32_t upto_tree)
00245 {
00246     ASSERT(position_weights_lhs==NULL);
00247     ASSERT(position_weights_rhs==NULL);
00248 
00249     if (upto_tree<0)
00250         upto_tree=tree_num;
00251 
00252     if (max_mismatch!=0)
00253     {
00254         SG_ERROR( "CWeightedDegreePositionStringKernel optimization not implemented for mismatch!=0\n");
00255         return false ;
00256     }
00257 
00258     if (tree_num<0)
00259         SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00260 
00261     delete_optimization();
00262 
00263     if (tree_num<0)
00264         SG_DEBUG( "initializing CWeightedDegreePositionStringKernel optimization\n") ;
00265 
00266     for (int32_t i=0; i<p_count; i++)
00267     {
00268         if (tree_num<0)
00269         {
00270             if ( (i % (p_count/10+1)) == 0)
00271                 SG_PROGRESS(i,0,p_count);
00272             add_example_to_tree(IDX[i], alphas[i]);
00273         }
00274         else
00275         {
00276             for (int32_t t=tree_num; t<=upto_tree; t++)
00277                 add_example_to_single_tree(IDX[i], alphas[i], t);
00278         }
00279     }
00280 
00281     if (tree_num<0)
00282         SG_DONE();
00283 
00284     set_is_initialized(true) ;
00285     return true ;
00286 }
00287 
00288 bool CWeightedDegreePositionStringKernel::delete_optimization() 
00289 { 
00290     if ((opt_type==FASTBUTMEMHUNGRY) && (tries.get_use_compact_terminal_nodes())) 
00291     {
00292         tries.set_use_compact_terminal_nodes(false) ;
00293         SG_DEBUG( "disabling compact trie nodes with FASTBUTMEMHUNGRY\n") ;
00294     }
00295 
00296     if (get_is_initialized())
00297     {
00298         if (opt_type==SLOWBUTMEMEFFICIENT)
00299             tries.delete_trees(true); 
00300         else if (opt_type==FASTBUTMEMHUNGRY)
00301             tries.delete_trees(false);  // still buggy
00302         else {
00303             SG_ERROR( "unknown optimization type\n");
00304         }
00305         set_is_initialized(false);
00306 
00307         return true;
00308     }
00309 
00310     return false;
00311 }
00312 
00313 float64_t CWeightedDegreePositionStringKernel::compute_with_mismatch(
00314     char* avec, int32_t alen, char* bvec, int32_t blen)
00315 {
00316     float64_t* max_shift_vec= new float64_t[max_shift];
00317     float64_t sum0=0 ;
00318     for (int32_t i=0; i<max_shift; i++)
00319         max_shift_vec[i]=0 ;
00320     
00321     // no shift
00322     for (int32_t i=0; i<alen; i++)
00323     {
00324         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00325             continue ;
00326         
00327         int32_t mismatches=0;
00328         float64_t sumi = 0.0 ;
00329         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00330         {
00331             if (avec[i+j]!=bvec[i+j])
00332             {
00333                 mismatches++ ;
00334                 if (mismatches>max_mismatch)
00335                     break ;
00336             } ;
00337             sumi += weights[j+degree*mismatches];
00338         }
00339         if (position_weights!=NULL)
00340             sum0 += position_weights[i]*sumi ;
00341         else
00342             sum0 += sumi ;
00343     } ;
00344     
00345     for (int32_t i=0; i<alen; i++)
00346     {
00347         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00348         {
00349             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00350                 continue ;
00351             
00352             float64_t sumi1 = 0.0 ;
00353             // shift in sequence a
00354             int32_t mismatches=0;
00355             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00356             {
00357                 if (avec[i+j+k]!=bvec[i+j])
00358                 {
00359                     mismatches++ ;
00360                     if (mismatches>max_mismatch)
00361                         break ;
00362                 } ;
00363                 sumi1 += weights[j+degree*mismatches];
00364             }
00365             float64_t sumi2 = 0.0 ;
00366             // shift in sequence b
00367             mismatches=0;
00368             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00369             {
00370                 if (avec[i+j]!=bvec[i+j+k])
00371                 {
00372                     mismatches++ ;
00373                     if (mismatches>max_mismatch)
00374                         break ;
00375                 } ;
00376                 sumi2 += weights[j+degree*mismatches];
00377             }
00378             if (position_weights!=NULL)
00379                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00380             else
00381                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00382         } ;
00383     }
00384     
00385     float64_t result = sum0 ;
00386     for (int32_t i=0; i<max_shift; i++)
00387         result += max_shift_vec[i]/(2*(i+1)) ;
00388     
00389     delete[] max_shift_vec;
00390     return result ;
00391 }
00392 
00393 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch(
00394     char* avec, int32_t alen, char* bvec, int32_t blen) 
00395 {
00396     float64_t* max_shift_vec = new float64_t[max_shift];
00397     float64_t sum0=0 ;
00398     for (int32_t i=0; i<max_shift; i++)
00399         max_shift_vec[i]=0 ;
00400 
00401     // no shift
00402     for (int32_t i=0; i<alen; i++)
00403     {
00404         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00405             continue ;
00406 
00407         float64_t sumi = 0.0 ;
00408         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00409         {
00410             if (avec[i+j]!=bvec[i+j])
00411                 break ;
00412             sumi += weights[j];
00413         }
00414         if (position_weights!=NULL)
00415             sum0 += position_weights[i]*sumi ;
00416         else
00417             sum0 += sumi ;
00418     } ;
00419 
00420     for (int32_t i=0; i<alen; i++)
00421     {
00422         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00423         {
00424             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00425                 continue ;
00426 
00427             float64_t sumi1 = 0.0 ;
00428             // shift in sequence a
00429             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00430             {
00431                 if (avec[i+j+k]!=bvec[i+j])
00432                     break ;
00433                 sumi1 += weights[j];
00434             }
00435             float64_t sumi2 = 0.0 ;
00436             // shift in sequence b
00437             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00438             {
00439                 if (avec[i+j]!=bvec[i+j+k])
00440                     break ;
00441                 sumi2 += weights[j];
00442             }
00443             if (position_weights!=NULL)
00444                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00445             else
00446                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00447         } ;
00448     }
00449 
00450     float64_t result = sum0 ;
00451     for (int32_t i=0; i<max_shift; i++)
00452         result += max_shift_vec[i]/(2*(i+1)) ;
00453 
00454     delete[] max_shift_vec;
00455     return result ;
00456 }
00457 
00458 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_matrix(
00459     char* avec, int32_t alen, char* bvec, int32_t blen) 
00460 {
00461     float64_t* max_shift_vec = new float64_t[max_shift];
00462     float64_t sum0=0 ;
00463     for (int32_t i=0; i<max_shift; i++)
00464         max_shift_vec[i]=0 ;
00465 
00466     // no shift
00467     for (int32_t i=0; i<alen; i++)
00468     {
00469         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00470             continue ;
00471         float64_t sumi = 0.0 ;
00472         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00473         {
00474             if (avec[i+j]!=bvec[i+j])
00475                 break ;
00476             sumi += weights[i*degree+j];
00477         }
00478         if (position_weights!=NULL)
00479             sum0 += position_weights[i]*sumi ;
00480         else
00481             sum0 += sumi ;
00482     } ;
00483 
00484     for (int32_t i=0; i<alen; i++)
00485     {
00486         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00487         {
00488             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00489                 continue ;
00490 
00491             float64_t sumi1 = 0.0 ;
00492             // shift in sequence a
00493             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00494             {
00495                 if (avec[i+j+k]!=bvec[i+j])
00496                     break ;
00497                 sumi1 += weights[i*degree+j];
00498             }
00499             float64_t sumi2 = 0.0 ;
00500             // shift in sequence b
00501             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00502             {
00503                 if (avec[i+j]!=bvec[i+j+k])
00504                     break ;
00505                 sumi2 += weights[i*degree+j];
00506             }
00507             if (position_weights!=NULL)
00508                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00509             else
00510                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00511         } ;
00512     }
00513 
00514     float64_t result = sum0 ;
00515     for (int32_t i=0; i<max_shift; i++)
00516         result += max_shift_vec[i]/(2*(i+1)) ;
00517 
00518     delete[] max_shift_vec;
00519     return result ;
00520 }
00521 
00522 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_position_weights(
00523     char* avec, float64_t* pos_weights_lhs, int32_t alen, char* bvec,
00524     float64_t* pos_weights_rhs, int32_t blen)
00525 {
00526 
00527     float64_t* max_shift_vec = new float64_t[max_shift];
00528     float64_t sum0=0 ;
00529     for (int32_t i=0; i<max_shift; i++)
00530         max_shift_vec[i]=0 ;
00531 
00532     // no shift
00533     for (int32_t i=0; i<alen; i++)
00534     {
00535         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00536             continue ;
00537 
00538         float64_t sumi = 0.0 ;
00539         float64_t posweight_lhs = 0.0 ;
00540         float64_t posweight_rhs = 0.0 ;
00541         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00542         {
00543             posweight_lhs += pos_weights_lhs[i+j] ;
00544             posweight_rhs += pos_weights_rhs[i+j] ;
00545             
00546             if (avec[i+j]!=bvec[i+j])
00547                 break ;
00548             sumi += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00549         }
00550         if (position_weights!=NULL)
00551             sum0 += position_weights[i]*sumi ;
00552         else
00553             sum0 += sumi ;
00554     } ;
00555 
00556     for (int32_t i=0; i<alen; i++)
00557     {
00558         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00559         {
00560             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00561                 continue ;
00562 
00563             // shift in sequence a  
00564             float64_t sumi1 = 0.0 ;
00565             float64_t posweight_lhs = 0.0 ;
00566             float64_t posweight_rhs = 0.0 ;
00567             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00568             {
00569                 posweight_lhs += pos_weights_lhs[i+j+k] ;
00570                 posweight_rhs += pos_weights_rhs[i+j] ;
00571                 if (avec[i+j+k]!=bvec[i+j])
00572                     break ;
00573                 sumi1 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00574             }
00575             // shift in sequence b
00576             float64_t sumi2 = 0.0 ;
00577             posweight_lhs = 0.0 ;
00578             posweight_rhs = 0.0 ;
00579             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00580             {
00581                 posweight_lhs += pos_weights_lhs[i+j] ;
00582                 posweight_rhs += pos_weights_rhs[i+j+k] ;
00583                 if (avec[i+j]!=bvec[i+j+k])
00584                     break ;
00585                 sumi2 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00586             }
00587             if (position_weights!=NULL)
00588                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00589             else
00590                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00591         } ;
00592     }
00593 
00594     float64_t result = sum0 ;
00595     for (int32_t i=0; i<max_shift; i++)
00596         result += max_shift_vec[i]/(2*(i+1)) ;
00597 
00598     delete[] max_shift_vec;
00599     return result ;
00600 }
00601 
00602 
00603 float64_t CWeightedDegreePositionStringKernel::compute(
00604     int32_t idx_a, int32_t idx_b)
00605 {
00606     int32_t alen, blen;
00607     bool free_avec, free_bvec;
00608 
00609     char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00610     char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00611     // can only deal with strings of same length
00612     ASSERT(alen==blen);
00613     ASSERT(shift_len==alen);
00614 
00615     float64_t result = 0 ;
00616     if (position_weights_lhs!=NULL || position_weights_rhs!=NULL)
00617     {
00618         ASSERT(max_mismatch==0);
00619         float64_t* position_weights_rhs_ = position_weights_rhs ;
00620         if (lhs==rhs)
00621         {
00622             //fprintf(stdout, ".") ;
00623             position_weights_rhs_ = position_weights_lhs ;
00624         }
00625         else
00626         {
00627             //fprintf(stdout, "*") ;
00628         }
00629         
00630         result = compute_without_mismatch_position_weights(avec, &position_weights_lhs[idx_a*alen], alen, bvec, &position_weights_rhs_[idx_b*blen], blen) ;
00631     }
00632     else if (max_mismatch > 0)
00633         result = compute_with_mismatch(avec, alen, bvec, blen) ;
00634     else if (length==0)
00635         result = compute_without_mismatch(avec, alen, bvec, blen) ;
00636     else
00637         result = compute_without_mismatch_matrix(avec, alen, bvec, blen) ;
00638 
00639     ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00640     ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00641 
00642     return result ;
00643 }
00644 
00645 
00646 void CWeightedDegreePositionStringKernel::add_example_to_tree(
00647     int32_t idx, float64_t alpha)
00648 {
00649     ASSERT(position_weights_lhs==NULL);
00650     ASSERT(position_weights_rhs==NULL);
00651     ASSERT(alphabet);
00652     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00653 
00654     int32_t len=0;
00655     bool free_vec;
00656     char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
00657     ASSERT(max_mismatch==0);
00658     int32_t *vec = new int32_t[len] ;
00659 
00660     for (int32_t i=0; i<len; i++)
00661         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00662     ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
00663 
00664     if (opt_type==FASTBUTMEMHUNGRY)
00665     {
00666         //TRIES(set_use_compact_terminal_nodes(false)) ;
00667         ASSERT(!TRIES(get_use_compact_terminal_nodes()));
00668     }
00669 
00670     for (int32_t i=0; i<len; i++)
00671     {
00672         int32_t max_s=-1;
00673 
00674         if (opt_type==SLOWBUTMEMEFFICIENT)
00675             max_s=0;
00676         else if (opt_type==FASTBUTMEMHUNGRY)
00677             max_s=shift[i];
00678         else {
00679             SG_ERROR( "unknown optimization type\n");
00680         }
00681 
00682         for (int32_t s=max_s; s>=0; s--)
00683         {
00684             float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00685             TRIES(add_to_trie(i, s, vec, alpha_pw, weights, (length!=0))) ;
00686             //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", i, s, idx, alpha_pw) ;
00687 
00688             if ((s==0) || (i+s>=len))
00689                 continue;
00690 
00691             TRIES(add_to_trie(i+s, -s, vec, alpha_pw, weights, (length!=0))) ;
00692             //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", i+s, -s, idx, alpha_pw) ;
00693         }
00694     }
00695 
00696     delete[] vec ;
00697     tree_initialized=true ;
00698 }
00699 
00700 void CWeightedDegreePositionStringKernel::add_example_to_single_tree(
00701     int32_t idx, float64_t alpha, int32_t tree_num) 
00702 {
00703     ASSERT(position_weights_lhs==NULL);
00704     ASSERT(position_weights_rhs==NULL);
00705     ASSERT(alphabet);
00706     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00707 
00708     int32_t len=0;
00709     bool free_vec;
00710     char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
00711     ASSERT(max_mismatch==0);
00712     int32_t *vec=new int32_t[len];
00713     int32_t max_s=-1;
00714 
00715     if (opt_type==SLOWBUTMEMEFFICIENT)
00716         max_s=0;
00717     else if (opt_type==FASTBUTMEMHUNGRY)
00718     {
00719         ASSERT(!tries.get_use_compact_terminal_nodes());
00720         max_s=shift[tree_num];
00721     }
00722     else {
00723         SG_ERROR( "unknown optimization type\n");
00724     }
00725     for (int32_t i=CMath::max(0,tree_num-max_shift);
00726             i<CMath::min(len,tree_num+degree+max_shift); i++)
00727     {
00728         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00729     } 
00730     ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
00731 
00732     for (int32_t s=max_s; s>=0; s--)
00733     {
00734         float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00735         tries.add_to_trie(tree_num, s, vec, alpha_pw, weights, (length!=0)) ;
00736         //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", tree_num, s, idx, alpha_pw) ;
00737     } 
00738 
00739     if (opt_type==FASTBUTMEMHUNGRY)
00740     {
00741         for (int32_t i=CMath::max(0,tree_num-max_shift); i<CMath::min(len,tree_num+max_shift+1); i++)
00742         {
00743             int32_t s=tree_num-i;
00744             if ((i+s<len) && (s>=1) && (s<=shift[i]))
00745             {
00746                 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00747                 tries.add_to_trie(tree_num, -s, vec, alpha_pw, weights, (length!=0)) ; 
00748                 //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", tree_num, -s, idx, alpha_pw) ;
00749             }
00750         }
00751     }
00752     delete[] vec ;
00753     tree_initialized=true ;
00754 }
00755 
00756 float64_t CWeightedDegreePositionStringKernel::compute_by_tree(int32_t idx)
00757 {
00758     ASSERT(position_weights_lhs==NULL);
00759     ASSERT(position_weights_rhs==NULL);
00760     ASSERT(alphabet);
00761     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00762 
00763     float64_t sum=0;
00764     int32_t len=0;
00765     bool free_vec;
00766     char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec);
00767     ASSERT(max_mismatch==0);
00768     int32_t *vec=new int32_t[len];
00769 
00770     for (int32_t i=0; i<len; i++)
00771         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00772 
00773     ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
00774 
00775     for (int32_t i=0; i<len; i++)
00776         sum += tries.compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0)) ;
00777 
00778     if (opt_type==SLOWBUTMEMEFFICIENT)
00779     {
00780         for (int32_t i=0; i<len; i++)
00781         {
00782             for (int32_t s=1; (s<=shift[i]) && (i+s<len); s++)
00783             {
00784                 sum+=tries.compute_by_tree_helper(vec, len, i, i+s, i, weights, (length!=0))/(2*s) ;
00785                 sum+=tries.compute_by_tree_helper(vec, len, i+s, i, i+s, weights, (length!=0))/(2*s) ;
00786             }
00787         }
00788     }
00789 
00790     delete[] vec ;
00791 
00792     return normalizer->normalize_rhs(sum, idx);
00793 }
00794 
00795 void CWeightedDegreePositionStringKernel::compute_by_tree(
00796     int32_t idx, float64_t* LevelContrib)
00797 {
00798     ASSERT(position_weights_lhs==NULL);
00799     ASSERT(position_weights_rhs==NULL);
00800     ASSERT(alphabet);
00801     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00802 
00803     int32_t len=0;
00804     bool free_vec;
00805     char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec);
00806     ASSERT(max_mismatch==0);
00807     int32_t *vec=new int32_t[len];
00808 
00809     for (int32_t i=0; i<len; i++)
00810         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00811 
00812     ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
00813 
00814     for (int32_t i=0; i<len; i++)
00815     {
00816         tries.compute_by_tree_helper(vec, len, i, i, i, LevelContrib,
00817                 normalizer->normalize_rhs(1.0, idx), mkl_stepsize, weights,
00818                 (length!=0));
00819     }
00820 
00821     if (opt_type==SLOWBUTMEMEFFICIENT)
00822     {
00823         for (int32_t i=0; i<len; i++)
00824             for (int32_t k=1; (k<=shift[i]) && (i+k<len); k++)
00825             {
00826                 tries.compute_by_tree_helper(vec, len, i, i+k, i, LevelContrib,
00827                         normalizer->normalize_rhs(1.0/(2*k), idx), mkl_stepsize,
00828                         weights, (length!=0)) ;
00829                 tries.compute_by_tree_helper(vec, len, i+k, i, i+k,
00830                         LevelContrib, normalizer->normalize_rhs(1.0/(2*k), idx),
00831                         mkl_stepsize, weights, (length!=0)) ;
00832             }
00833     }
00834 
00835     delete[] vec ;
00836 }
00837 
00838 float64_t* CWeightedDegreePositionStringKernel::compute_abs_weights(
00839     int32_t &len)
00840 {
00841     return tries.compute_abs_weights(len);
00842 }
00843 
00844 bool CWeightedDegreePositionStringKernel::set_shifts(
00845     int32_t* shift_, int32_t shift_len_)
00846 {
00847     delete[] shift;
00848 
00849     shift_len = shift_len_ ;
00850     shift = new int32_t[shift_len] ;
00851 
00852     if (shift)
00853     {
00854         max_shift = 0 ;
00855 
00856         for (int32_t i=0; i<shift_len; i++)
00857         {
00858             shift[i] = shift_[i] ;
00859             if (shift[i]>max_shift)
00860                 max_shift = shift[i] ;
00861         }
00862 
00863         ASSERT(max_shift>=0 && max_shift<=shift_len);
00864     }
00865     
00866     return false;
00867 }
00868 
00869 bool CWeightedDegreePositionStringKernel::set_wd_weights()
00870 {
00871     ASSERT(degree>0);
00872 
00873     delete[] weights;
00874     weights=new float64_t[degree];
00875     if (weights)
00876     {
00877         int32_t i;
00878         float64_t sum=0;
00879         for (i=0; i<degree; i++)
00880         {
00881             weights[i]=degree-i;
00882             sum+=weights[i];
00883         }
00884         for (i=0; i<degree; i++)
00885             weights[i]/=sum;
00886 
00887         for (i=0; i<degree; i++)
00888         {
00889             for (int32_t j=1; j<=max_mismatch; j++)
00890             {
00891                 if (j<i+1)
00892                 {
00893                     int32_t nk=CMath::nchoosek(i+1, j);
00894                     weights[i+j*degree]=weights[i]/(nk*CMath::pow(3.0,j));
00895                 }
00896                 else
00897                     weights[i+j*degree]= 0;
00898             }
00899         }
00900 
00901         return true;
00902     }
00903     else
00904         return false;
00905 }
00906 
00907 bool CWeightedDegreePositionStringKernel::set_weights(
00908     float64_t* ws, int32_t d, int32_t len)
00909 {
00910     SG_DEBUG("degree = %i  d=%i\n", degree, d);
00911     degree=d;
00912     length=len;
00913 
00914     if (len <= 0)
00915         len=1;
00916 
00917     delete[] weights;
00918     weights=new float64_t[d*len];
00919 
00920     if (weights)
00921     {
00922         for (int32_t i=0; i<degree*len; i++)
00923             weights[i]=ws[i];
00924         return true;
00925     }
00926     else
00927         return false;
00928 }
00929 
00930 bool CWeightedDegreePositionStringKernel::set_position_weights(
00931     float64_t* pws, int32_t len)
00932 {
00933     if (seq_length==0)
00934         seq_length=len;
00935 
00936     if (seq_length!=len)
00937     {
00938         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00939         return false;
00940     }
00941     delete[] position_weights;
00942     position_weights=new float64_t[len];
00943     tries.set_position_weights(position_weights);
00944 
00945     if (position_weights)
00946     {
00947         for (int32_t i=0; i<len; i++)
00948             position_weights[i]=pws[i];
00949         return true;
00950     }
00951     else
00952         return false;
00953 }
00954 
00955 bool CWeightedDegreePositionStringKernel::set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num)
00956 {
00957     fprintf(stderr, "set_position_weights_lhs %i %i %i\n", len, num, seq_length);
00958 
00959     if (position_weights_rhs==position_weights_lhs)
00960         position_weights_rhs=NULL;
00961     else
00962         delete_position_weights_rhs();
00963 
00964     if (len==0)
00965     {
00966         return delete_position_weights_lhs();
00967     }
00968     
00969     if (seq_length!=len)
00970     {
00971         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00972         return false;
00973     }
00974     if (0)
00975     {
00976         
00977     if (!lhs)
00978     {
00979         SG_ERROR("lhs=NULL\n");
00980         return false ;
00981     }
00982     if (lhs->get_num_vectors()!=num)
00983     {
00984         SG_ERROR("lhs->get_num_vectors()=%i, num=%i\n", lhs->get_num_vectors(), num);
00985         return false;
00986     }
00987     }
00988     
00989     delete[] position_weights_lhs;
00990     position_weights_lhs=new float64_t[len*num];
00991     if (position_weights_lhs)
00992     {
00993         for (int32_t i=0; i<len*num; i++)
00994             position_weights_lhs[i]=pws[i];
00995         return true;
00996     }
00997     else
00998         return false;
00999 }
01000 
01001 bool CWeightedDegreePositionStringKernel::set_position_weights_rhs(
01002     float64_t* pws, int32_t len, int32_t num)
01003 {
01004     fprintf(stderr, "set_position_weights_rhs %i %i %i\n", len, num, seq_length);
01005     if (len==0)
01006     {
01007         if (position_weights_rhs==position_weights_lhs)
01008         {
01009             position_weights_rhs=NULL;
01010             return true;
01011         }
01012         return delete_position_weights_rhs();
01013     }
01014 
01015     if (seq_length!=len)
01016     {
01017         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
01018         return false;
01019     }
01020     if (0)
01021     {
01022         
01023     if (!rhs)
01024     {
01025         if (!lhs)
01026         {
01027             SG_ERROR("rhs=NULL and lhs=NULL\n");
01028             return false;
01029         }
01030         if (lhs->get_num_vectors()!=num)
01031         {
01032             SG_ERROR("lhs->get_num_vectors()=%i, num=%i\n", lhs->get_num_vectors(), num);
01033             return false;
01034         }
01035     } else
01036         if (rhs->get_num_vectors()!=num)
01037         {
01038             SG_ERROR("rhs->get_num_vectors()=%i, num=%i\n", rhs->get_num_vectors(), num);
01039             return false;
01040         }
01041     }
01042     
01043     delete[] position_weights_rhs;
01044     position_weights_rhs=new float64_t[len*num];
01045     if (position_weights_rhs)
01046     {
01047         for (int32_t i=0; i<len*num; i++)
01048             position_weights_rhs[i]=pws[i];
01049         return true;
01050     }
01051     else
01052         return false;
01053 }
01054 
01055 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd()
01056 {
01057     delete[] block_weights;
01058     block_weights=new float64_t[CMath::max(seq_length,degree)];
01059 
01060     if (block_weights)
01061     {
01062         int32_t k;
01063         float64_t d=degree; // use float to evade rounding errors below
01064 
01065         for (k=0; k<degree; k++)
01066             block_weights[k]=
01067                 (-CMath::pow(k, 3)+(3*d-3)*CMath::pow(k, 2)+(9*d-2)*k+6*d)/(3*d*(d+1));
01068         for (k=degree; k<seq_length; k++)
01069             block_weights[k]=(-d+3*k+4)/3;
01070     }
01071 
01072     return (block_weights!=NULL);
01073 }
01074 
01075 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd_external()
01076 {
01077     ASSERT(weights);
01078     delete[] block_weights;
01079     block_weights=new float64_t[CMath::max(seq_length,degree)];
01080 
01081     if (block_weights)
01082     {
01083         int32_t i=0;
01084         block_weights[0]=weights[0];
01085         for (i=1; i<CMath::max(seq_length,degree); i++)
01086             block_weights[i]=0;
01087 
01088         for (i=1; i<CMath::max(seq_length,degree); i++)
01089         {
01090             block_weights[i]=block_weights[i-1];
01091 
01092             float64_t contrib=0;
01093             for (int32_t j=0; j<CMath::min(degree,i+1); j++)
01094                 contrib+=weights[j];
01095 
01096             block_weights[i]+=contrib;
01097         }
01098     }
01099 
01100     return (block_weights!=NULL);
01101 }
01102 
01103 bool CWeightedDegreePositionStringKernel::init_block_weights_const()
01104 {
01105     delete[] block_weights;
01106     block_weights=new float64_t[seq_length];
01107 
01108     if (block_weights)
01109     {
01110         for (int32_t i=1; i<seq_length+1 ; i++)
01111             block_weights[i-1]=1.0/seq_length;
01112     }
01113 
01114     return (block_weights!=NULL);
01115 }
01116 
01117 bool CWeightedDegreePositionStringKernel::init_block_weights_linear()
01118 {
01119     delete[] block_weights;
01120     block_weights=new float64_t[seq_length];
01121 
01122     if (block_weights)
01123     {
01124         for (int32_t i=1; i<seq_length+1 ; i++)
01125             block_weights[i-1]=degree*i;
01126     }
01127 
01128     return (block_weights!=NULL);
01129 }
01130 
01131 bool CWeightedDegreePositionStringKernel::init_block_weights_sqpoly()
01132 {
01133     delete[] block_weights;
01134     block_weights=new float64_t[seq_length];
01135 
01136     if (block_weights)
01137     {
01138         for (int32_t i=1; i<degree+1 ; i++)
01139             block_weights[i-1]=((float64_t) i)*i;
01140 
01141         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01142             block_weights[i-1]=i;
01143     }
01144 
01145     return (block_weights!=NULL);
01146 }
01147 
01148 bool CWeightedDegreePositionStringKernel::init_block_weights_cubicpoly()
01149 {
01150     delete[] block_weights;
01151     block_weights=new float64_t[seq_length];
01152 
01153     if (block_weights)
01154     {
01155         for (int32_t i=1; i<degree+1 ; i++)
01156             block_weights[i-1]=((float64_t) i)*i*i;
01157 
01158         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01159             block_weights[i-1]=i;
01160     }
01161 
01162     return (block_weights!=NULL);
01163 }
01164 
01165 bool CWeightedDegreePositionStringKernel::init_block_weights_exp()
01166 {
01167     delete[] block_weights;
01168     block_weights=new float64_t[seq_length];
01169 
01170     if (block_weights)
01171     {
01172         for (int32_t i=1; i<degree+1 ; i++)
01173             block_weights[i-1]=exp(((float64_t) i/10.0));
01174 
01175         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01176             block_weights[i-1]=i;
01177     }
01178 
01179     return (block_weights!=NULL);
01180 }
01181 
01182 bool CWeightedDegreePositionStringKernel::init_block_weights_log()
01183 {
01184     delete[] block_weights;
01185     block_weights=new float64_t[seq_length];
01186 
01187     if (block_weights)
01188     {
01189         for (int32_t i=1; i<degree+1 ; i++)
01190             block_weights[i-1]=CMath::pow(CMath::log((float64_t) i),2);
01191 
01192         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01193             block_weights[i-1]=i-degree+1+CMath::pow(CMath::log(degree+1.0),2);
01194     }
01195 
01196     return (block_weights!=NULL);
01197 }
01198 
01199 bool CWeightedDegreePositionStringKernel::init_block_weights_external()
01200 {
01201     if (block_weights_external && (seq_length == num_block_weights_external) )
01202     {
01203         delete[] block_weights;
01204         block_weights=new float64_t[seq_length];
01205 
01206         if (block_weights)
01207         {
01208             for (int32_t i=0; i<seq_length; i++)
01209                 block_weights[i]=block_weights_external[i];
01210         }
01211     }
01212     else {
01213       SG_ERROR( "sequence longer then weights (seqlen:%d, wlen:%d)\n", seq_length, block_weights_external);
01214    }
01215     return (block_weights!=NULL);
01216 }
01217 
01218 bool CWeightedDegreePositionStringKernel::init_block_weights()
01219 {
01220     switch (type)
01221     {
01222         case E_WD:
01223             return init_block_weights_from_wd();
01224         case E_EXTERNAL:
01225             return init_block_weights_from_wd_external();
01226         case E_BLOCK_CONST:
01227             return init_block_weights_const();
01228         case E_BLOCK_LINEAR:
01229             return init_block_weights_linear();
01230         case E_BLOCK_SQPOLY:
01231             return init_block_weights_sqpoly();
01232         case E_BLOCK_CUBICPOLY:
01233             return init_block_weights_cubicpoly();
01234         case E_BLOCK_EXP:
01235             return init_block_weights_exp();
01236         case E_BLOCK_LOG:
01237             return init_block_weights_log();
01238         case E_BLOCK_EXTERNAL:
01239             return init_block_weights_external();
01240         default:
01241             return false;
01242     };
01243 }
01244 
01245 
01246 
01247 void* CWeightedDegreePositionStringKernel::compute_batch_helper(void* p)
01248 {
01249     S_THREAD_PARAM<DNATrie>* params = (S_THREAD_PARAM<DNATrie>*) p;
01250     int32_t j=params->j;
01251     CWeightedDegreePositionStringKernel* wd=params->kernel;
01252     CTrie<DNATrie>* tries=params->tries;
01253     float64_t* weights=params->weights;
01254     int32_t length=params->length;
01255     int32_t max_shift=params->max_shift;
01256     int32_t* vec=params->vec;
01257     float64_t* result=params->result;
01258     float64_t factor=params->factor;
01259     int32_t* shift=params->shift;
01260     int32_t* vec_idx=params->vec_idx;
01261 
01262     for (int32_t i=params->start; i<params->end; i++)
01263     {
01264         int32_t len=0;
01265         CStringFeatures<char>* rhs_feat=((CStringFeatures<char>*) wd->get_rhs());
01266         CAlphabet* alpha=wd->alphabet;
01267 
01268         bool free_vec;
01269         char* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len, free_vec);
01270         for (int32_t k=CMath::max(0,j-max_shift); k<CMath::min(len,j+wd->get_degree()+max_shift); k++)
01271             vec[k]=alpha->remap_to_bin(char_vec[k]);
01272         rhs_feat->free_feature_vector(char_vec, vec_idx[i], free_vec);
01273 
01274         SG_UNREF(rhs_feat);
01275 
01276         result[i] += factor*wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0)), vec_idx[i]);
01277 
01278         if (wd->get_optimization_type()==SLOWBUTMEMEFFICIENT)
01279         {
01280             for (int32_t q=CMath::max(0,j-max_shift); q<CMath::min(len,j+max_shift+1); q++)
01281             {
01282                 int32_t s=j-q ;
01283                 if ((s>=1) && (s<=shift[q]) && (q+s<len))
01284                 {
01285                     result[i] +=
01286                         wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec,
01287                                 len, q, q+s, q, weights, (length!=0)),
01288                                 vec_idx[i])/(2.0*s);
01289                 }
01290             }
01291 
01292             for (int32_t s=1; (s<=shift[j]) && (j+s<len); s++)
01293             {
01294                 result[i] +=
01295                     wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec,
01296                                 len, j+s, j, j+s, weights, (length!=0)),
01297                                 vec_idx[i])/(2.0*s);
01298             }
01299         }
01300     }
01301 
01302     return NULL;
01303 }
01304 
01305 void CWeightedDegreePositionStringKernel::compute_batch(
01306     int32_t num_vec, int32_t* vec_idx, float64_t* result, int32_t num_suppvec,
01307     int32_t* IDX, float64_t* alphas, float64_t factor)
01308 {
01309     ASSERT(alphabet);
01310     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01311     ASSERT(position_weights_lhs==NULL);
01312     ASSERT(position_weights_rhs==NULL);
01313     ASSERT(rhs);
01314     ASSERT(num_vec<=rhs->get_num_vectors());
01315     ASSERT(num_vec>0);
01316     ASSERT(vec_idx);
01317     ASSERT(result);
01318     create_empty_tries();
01319 
01320     int32_t num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01321     ASSERT(num_feat>0);
01322     int32_t num_threads=parallel->get_num_threads();
01323     ASSERT(num_threads>0);
01324     int32_t* vec=new int32_t[num_threads*num_feat];
01325 
01326     if (num_threads < 2)
01327     {
01328 #ifdef WIN32
01329        for (int32_t j=0; j<num_feat; j++)
01330 #else
01331        CSignal::clear_cancel();
01332        for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
01333 #endif
01334             {
01335                 init_optimization(num_suppvec, IDX, alphas, j);
01336                 S_THREAD_PARAM<DNATrie> params;
01337                 params.vec=vec;
01338                 params.result=result;
01339                 params.weights=weights;
01340                 params.kernel=this;
01341                 params.tries=&tries;
01342                 params.factor=factor;
01343                 params.j=j;
01344                 params.start=0;
01345                 params.end=num_vec;
01346                 params.length=length;
01347                 params.max_shift=max_shift;
01348                 params.shift=shift;
01349                 params.vec_idx=vec_idx;
01350                 compute_batch_helper((void*) &params);
01351 
01352                 SG_PROGRESS(j,0,num_feat);
01353             }
01354     }
01355 #ifndef WIN32
01356     else
01357     {
01358 
01359         CSignal::clear_cancel();
01360         for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
01361         {
01362             init_optimization(num_suppvec, IDX, alphas, j);
01363             pthread_t* threads = new pthread_t[num_threads-1];
01364             S_THREAD_PARAM<DNATrie>* params = new S_THREAD_PARAM<DNATrie>[num_threads];
01365             int32_t step= num_vec/num_threads;
01366             int32_t t;
01367 
01368             for (t=0; t<num_threads-1; t++)
01369             {
01370                 params[t].vec=&vec[num_feat*t];
01371                 params[t].result=result;
01372                 params[t].weights=weights;
01373                 params[t].kernel=this;
01374                 params[t].tries=&tries;
01375                 params[t].factor=factor;
01376                 params[t].j=j;
01377                 params[t].start = t*step;
01378                 params[t].end = (t+1)*step;
01379                 params[t].length=length;
01380                 params[t].max_shift=max_shift;
01381                 params[t].shift=shift;
01382                 params[t].vec_idx=vec_idx;
01383                 pthread_create(&threads[t], NULL, CWeightedDegreePositionStringKernel::compute_batch_helper, (void*)&params[t]);
01384             }
01385 
01386             params[t].vec=&vec[num_feat*t];
01387             params[t].result=result;
01388             params[t].weights=weights;
01389             params[t].kernel=this;
01390             params[t].tries=&tries;
01391             params[t].factor=factor;
01392             params[t].j=j;
01393             params[t].start=t*step;
01394             params[t].end=num_vec;
01395             params[t].length=length;
01396             params[t].max_shift=max_shift;
01397             params[t].shift=shift;
01398             params[t].vec_idx=vec_idx;
01399             compute_batch_helper((void*) &params[t]);
01400 
01401             for (t=0; t<num_threads-1; t++)
01402                 pthread_join(threads[t], NULL);
01403             SG_PROGRESS(j,0,num_feat);
01404 
01405             delete[] params;
01406             delete[] threads;
01407         }
01408     }
01409 #endif
01410 
01411     delete[] vec;
01412 
01413     //really also free memory as this can be huge on testing especially when
01414     //using the combined kernel
01415     create_empty_tries();
01416 }
01417 
01418 float64_t* CWeightedDegreePositionStringKernel::compute_scoring(
01419     int32_t max_degree, int32_t& num_feat, int32_t& num_sym, float64_t* result,
01420     int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01421 {
01422     ASSERT(position_weights_lhs==NULL);
01423     ASSERT(position_weights_rhs==NULL);
01424 
01425     num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01426     ASSERT(num_feat>0);
01427     ASSERT(alphabet);
01428     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01429 
01430     num_sym=4; //for now works only w/ DNA
01431 
01432     ASSERT(max_degree>0);
01433 
01434     // === variables
01435     int32_t* nofsKmers=new int32_t[max_degree];
01436     float64_t** C=new float64_t*[max_degree];
01437     float64_t** L=new float64_t*[max_degree];
01438     float64_t** R=new float64_t*[max_degree];
01439 
01440     int32_t i;
01441     int32_t k;
01442 
01443     // --- return table
01444     int32_t bigtabSize=0;
01445     for (k=0; k<max_degree; ++k )
01446     {
01447         nofsKmers[k]=(int32_t) CMath::pow(num_sym, k+1);
01448         const int32_t tabSize=nofsKmers[k]*num_feat;
01449         bigtabSize+=tabSize;
01450     }
01451     result=new float64_t[bigtabSize];
01452 
01453     // --- auxilliary tables
01454     int32_t tabOffs=0;
01455     for( k = 0; k < max_degree; ++k )
01456     {
01457         const int32_t tabSize = nofsKmers[k] * num_feat;
01458         C[k] = &result[tabOffs];
01459         L[k] = new float64_t[ tabSize ];
01460         R[k] = new float64_t[ tabSize ];
01461         tabOffs+=tabSize;
01462         for(i = 0; i < tabSize; i++ )
01463         {
01464             C[k][i] = 0.0;
01465             L[k][i] = 0.0;
01466             R[k][i] = 0.0;
01467         }
01468     }
01469 
01470     // --- tree parsing info
01471     float64_t* margFactors=new float64_t[degree];
01472 
01473     int32_t* x = new int32_t[ degree+1 ];
01474     int32_t* substrs = new int32_t[ degree+1 ];
01475     // - fill arrays
01476     margFactors[0] = 1.0;
01477     substrs[0] = 0;
01478     for( k=1; k < degree; ++k ) {
01479         margFactors[k] = 0.25 * margFactors[k-1];
01480         substrs[k] = -1;
01481     }
01482     substrs[degree] = -1;
01483     // - fill struct
01484     struct TreeParseInfo info;
01485     info.num_sym = num_sym;
01486     info.num_feat = num_feat;
01487     info.p = -1;
01488     info.k = -1;
01489     info.nofsKmers = nofsKmers;
01490     info.margFactors = margFactors;
01491     info.x = x;
01492     info.substrs = substrs;
01493     info.y0 = 0;
01494     info.C_k = NULL;
01495     info.L_k = NULL;
01496     info.R_k = NULL;
01497 
01498     // === main loop
01499     i = 0; // total progress
01500     for( k = 0; k < max_degree; ++k )
01501     {
01502         const int32_t nofKmers = nofsKmers[ k ];
01503         info.C_k = C[k];
01504         info.L_k = L[k];
01505         info.R_k = R[k];
01506 
01507         // --- run over all trees
01508         for(int32_t p = 0; p < num_feat; ++p )
01509         {
01510             init_optimization( num_suppvec, IDX, alphas, p );
01511             int32_t tree = p ;
01512             for(int32_t j = 0; j < degree+1; j++ ) {
01513                 x[j] = -1;
01514             }
01515             tries.traverse( tree, p, info, 0, x, k );
01516             SG_PROGRESS(i++,0,num_feat*max_degree);
01517         }
01518 
01519         // --- add partial overlap scores
01520         if( k > 0 ) {
01521             const int32_t j = k - 1;
01522             const int32_t nofJmers = (int32_t) CMath::pow( num_sym, j+1 );
01523             for(int32_t p = 0; p < num_feat; ++p ) {
01524                 const int32_t offsetJ = nofJmers * p;
01525                 const int32_t offsetJ1 = nofJmers * (p+1);
01526                 const int32_t offsetK = nofKmers * p;
01527                 int32_t y;
01528                 int32_t sym;
01529                 for( y = 0; y < nofJmers; ++y ) {
01530                     for( sym = 0; sym < num_sym; ++sym ) {
01531                         const int32_t y_sym = num_sym*y + sym;
01532                         const int32_t sym_y = nofJmers*sym + y;
01533                         ASSERT(0<=y_sym && y_sym<nofKmers);
01534                         ASSERT(0<=sym_y && sym_y<nofKmers);
01535                         C[k][ y_sym + offsetK ] += L[j][ y + offsetJ ];
01536                         if( p < num_feat-1 ) {
01537                             C[k][ sym_y + offsetK ] += R[j][ y + offsetJ1 ];
01538                         }
01539                     }
01540                 }
01541             }
01542         }
01543         //   if( k > 1 )
01544         //     j = k-1
01545         //     for all positions p
01546         //       for all j-mers y
01547         //          for n in {A,C,G,T}
01548         //            C_k[ p, [y,n] ] += L_j[ p, y ]
01549         //            C_k[ p, [n,y] ] += R_j[ p+1, y ]
01550         //          end;
01551         //       end;
01552         //     end;
01553         //   end;
01554     }
01555 
01556     // === return a vector
01557     num_feat=1;
01558     num_sym = bigtabSize;
01559     // --- clean up
01560     delete[] nofsKmers;
01561     delete[] margFactors;
01562     delete[] substrs;
01563     delete[] x;
01564     delete[] C;
01565     for( k = 0; k < max_degree; ++k ) {
01566         delete[] L[k];
01567         delete[] R[k];
01568     }
01569     delete[] L;
01570     delete[] R;
01571     return result;
01572 }
01573 
01574 char* CWeightedDegreePositionStringKernel::compute_consensus(
01575     int32_t &num_feat, int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01576 {
01577     ASSERT(position_weights_lhs==NULL);
01578     ASSERT(position_weights_rhs==NULL);
01579     //only works for order <= 32
01580     ASSERT(degree<=32);
01581     ASSERT(!tries.get_use_compact_terminal_nodes());
01582     num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01583     ASSERT(num_feat>0);
01584     ASSERT(alphabet);
01585     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01586 
01587     //consensus
01588     char* result=new char[num_feat];
01589 
01590     //backtracking and scoring table
01591     int32_t num_tables=CMath::max(1,num_feat-degree+1);
01592     CDynamicArray<ConsensusEntry>** table=new CDynamicArray<ConsensusEntry>*[num_tables];
01593 
01594     for (int32_t i=0; i<num_tables; i++)
01595         table[i]=new CDynamicArray<ConsensusEntry>(num_suppvec/10);
01596 
01597     //compute consensus via dynamic programming
01598     for (int32_t i=0; i<num_tables; i++)
01599     {
01600         bool cumulative=false;
01601 
01602         if (i<num_tables-1)
01603             init_optimization(num_suppvec, IDX, alphas, i);
01604         else
01605         {
01606             init_optimization(num_suppvec, IDX, alphas, i, num_feat-1);
01607             cumulative=true;
01608         }
01609 
01610         if (i==0)
01611             tries.fill_backtracking_table(i, NULL, table[i], cumulative, weights);
01612         else
01613             tries.fill_backtracking_table(i, table[i-1], table[i], cumulative, weights);
01614 
01615         SG_PROGRESS(i,0,num_feat);
01616     }
01617 
01618 
01619     //int32_t n=table[0]->get_num_elements();
01620 
01621     //for (int32_t i=0; i<n; i++)
01622     //{
01623     //  ConsensusEntry e= table[0]->get_element(i);
01624     //  SG_PRint32_t("first: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01625     //}
01626 
01627     //n=table[num_tables-1]->get_num_elements();
01628     //for (int32_t i=0; i<n; i++)
01629     //{
01630     //  ConsensusEntry e= table[num_tables-1]->get_element(i);
01631     //  SG_PRint32_t("last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01632     //}
01633     //n=table[num_tables-2]->get_num_elements();
01634     //for (int32_t i=0; i<n; i++)
01635     //{
01636     //  ConsensusEntry e= table[num_tables-2]->get_element(i);
01637     //  SG_PRINT("second last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01638     //}
01639 
01640     const char* acgt="ACGT";
01641 
01642     //backtracking start
01643     int32_t max_idx=-1;
01644     float32_t max_score=0;
01645     int32_t num_elements=table[num_tables-1]->get_num_elements();
01646 
01647     for (int32_t i=0; i<num_elements; i++)
01648     {
01649         float64_t sc=table[num_tables-1]->get_element(i).score;
01650         if (sc>max_score || max_idx==-1)
01651         {
01652             max_idx=i;
01653             max_score=sc;
01654         }
01655     }
01656     uint64_t endstr=table[num_tables-1]->get_element(max_idx).string;
01657 
01658     SG_INFO("max_idx:%d num_el:%d num_feat:%d num_tables:%d max_score:%f\n", max_idx, num_elements, num_feat, num_tables, max_score);
01659 
01660     for (int32_t i=0; i<degree; i++)
01661         result[num_feat-1-i]=acgt[(endstr >> (2*i)) & 3];
01662 
01663     if (num_tables>1)
01664     {
01665         for (int32_t i=num_tables-1; i>=0; i--)
01666         {
01667             //SG_PRINT("max_idx: %d, i:%d\n", max_idx, i);
01668             result[i]=acgt[table[i]->get_element(max_idx).string >> (2*(degree-1)) & 3];
01669             max_idx=table[i]->get_element(max_idx).bt;
01670         }
01671     }
01672 
01673     //for (int32_t t=0; t<num_tables; t++)
01674     //{
01675     //  n=table[t]->get_num_elements();
01676     //  for (int32_t i=0; i<n; i++)
01677     //  {
01678     //      ConsensusEntry e= table[t]->get_element(i);
01679     //      SG_PRINT("table[%d,%d]: str:0%0llx sc:%+f bt:%d\n",t,i, e.string,e.score,e.bt);
01680     //  }
01681     //}
01682 
01683     for (int32_t i=0; i<num_tables; i++)
01684         delete table[i];
01685 
01686     delete[] table;
01687     return result;
01688 }
01689 
01690 
01691 float64_t* CWeightedDegreePositionStringKernel::extract_w(
01692     int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
01693     float64_t* w_result, int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01694 {
01695   delete_optimization();
01696   use_poim_tries=true;
01697   poim_tries.delete_trees(false);
01698 
01699   // === check
01700   ASSERT(position_weights_lhs==NULL);
01701   ASSERT(position_weights_rhs==NULL);
01702   num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01703   ASSERT(num_feat>0);
01704   ASSERT(alphabet->get_alphabet()==DNA);
01705   ASSERT(max_degree>0);
01706 
01707   // === general variables
01708   static const int32_t NUM_SYMS = poim_tries.NUM_SYMS;
01709   const int32_t seqLen = num_feat;
01710   float64_t** subs;
01711   int32_t i;
01712   int32_t k;
01713   //int32_t y;
01714 
01715   // === init tables "subs" for substring scores / POIMs
01716   // --- compute table sizes
01717   int32_t* offsets;
01718   int32_t offset;
01719   offsets = new int32_t[ max_degree ];
01720   offset = 0;
01721   for( k = 0; k < max_degree; ++k ) {
01722     offsets[k] = offset;
01723     const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 );
01724     const int32_t tabSize = nofsKmers * seqLen;
01725     offset += tabSize;
01726   }
01727   // --- allocate memory
01728   const int32_t bigTabSize = offset;
01729   w_result=new float64_t[bigTabSize];
01730   for (i=0; i<bigTabSize; ++i)
01731     w_result[i]=0;
01732 
01733   // --- set pointers for tables
01734   subs = new float64_t*[ max_degree ];
01735   ASSERT( subs != NULL );
01736   for( k = 0; k < max_degree; ++k ) {
01737     subs[k] = &w_result[ offsets[k] ];
01738   }
01739   delete[] offsets;
01740 
01741   // === init trees; extract "w"
01742   init_optimization( num_suppvec, IDX, alphas, -1);
01743   poim_tries.POIMs_extract_W( subs, max_degree );
01744 
01745   // === clean; return "subs" as vector
01746   delete[] subs;
01747   num_feat = 1;
01748   num_sym = bigTabSize;
01749   use_poim_tries=false;
01750   poim_tries.delete_trees(false);
01751   return w_result;
01752 }
01753 
01754 float64_t* CWeightedDegreePositionStringKernel::compute_POIM(
01755     int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
01756     float64_t* poim_result, int32_t num_suppvec, int32_t* IDX,
01757     float64_t* alphas, float64_t* distrib )
01758 {
01759   delete_optimization();
01760   use_poim_tries=true;
01761   poim_tries.delete_trees(false);
01762 
01763   // === check
01764   ASSERT(position_weights_lhs==NULL);
01765   ASSERT(position_weights_rhs==NULL);
01766   num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01767   ASSERT(num_feat>0);
01768   ASSERT(alphabet->get_alphabet()==DNA);
01769   ASSERT(max_degree!=0);
01770   ASSERT(distrib);
01771 
01772   // === general variables
01773   static const int32_t NUM_SYMS = poim_tries.NUM_SYMS;
01774   const int32_t seqLen = num_feat;
01775   float64_t** subs;
01776   int32_t i;
01777   int32_t k;
01778 
01779   // === DEBUGGING mode
01780   //
01781   // Activated if "max_degree" < 0.
01782   // Allows to output selected partial score.
01783   //
01784   // |max_degree| mod 4
01785   //   0: substring
01786   //   1: superstring
01787   //   2: left overlap
01788   //   3: right overlap
01789   //
01790   const int32_t debug = ( max_degree < 0 ) ? ( abs(max_degree) % 4 + 1 ) : 0;
01791   if( debug ) {
01792     max_degree = abs(max_degree) / 4;
01793     switch( debug ) {
01794     case 1: {
01795       printf( "POIM DEBUGGING: substring only (max order=%d)\n", max_degree );
01796       break;
01797     }
01798     case 2: {
01799       printf( "POIM DEBUGGING: superstring only (max order=%d)\n", max_degree );
01800       break;
01801     }
01802     case 3: {
01803       printf( "POIM DEBUGGING: left overlap only (max order=%d)\n", max_degree );
01804       break;
01805     }
01806     case 4: {
01807       printf( "POIM DEBUGGING: right overlap only (max order=%d)\n", max_degree );
01808       break;
01809     }
01810     default: {
01811       printf( "POIM DEBUGGING: something is wrong (max order=%d)\n", max_degree );
01812       ASSERT(0);
01813       break;
01814     }
01815     }
01816   }
01817 
01818   // --- compute table sizes
01819   int32_t* offsets;
01820   int32_t offset;
01821   offsets = new int32_t[ max_degree ];
01822   offset = 0;
01823   for( k = 0; k < max_degree; ++k ) {
01824     offsets[k] = offset;
01825     const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 );
01826     const int32_t tabSize = nofsKmers * seqLen;
01827     offset += tabSize;
01828   }
01829   // --- allocate memory
01830   const int32_t bigTabSize=offset;
01831   poim_result=new float64_t[bigTabSize];
01832   for (i=0; i<bigTabSize; ++i )
01833     poim_result[i]=0;
01834 
01835   // --- set pointers for tables
01836   subs=new float64_t*[max_degree];
01837   for (k=0; k<max_degree; ++k)
01838     subs[k]=&poim_result[offsets[k]];
01839 
01840   delete[] offsets;
01841 
01842   // === init trees; precalc S, L and R
01843   init_optimization( num_suppvec, IDX, alphas, -1);
01844   poim_tries.POIMs_precalc_SLR( distrib );
01845 
01846   // === compute substring scores
01847   if( debug==0 || debug==1 ) {
01848     poim_tries.POIMs_extract_W( subs, max_degree );
01849     for( k = 1; k < max_degree; ++k ) {
01850       const int32_t nofKmers2 = ( k > 1 ) ? (int32_t) CMath::pow(NUM_SYMS,k-1) : 0;
01851       const int32_t nofKmers1 = (int32_t) CMath::pow( NUM_SYMS, k );
01852       const int32_t nofKmers0 = nofKmers1 * NUM_SYMS;
01853       for( i = 0; i < seqLen; ++i ) {
01854     float64_t* const subs_k2i1 = ( k>1 && i<seqLen-1 ) ? &subs[k-2][(i+1)*nofKmers2] : NULL;
01855     float64_t* const subs_k1i1 = ( i < seqLen-1 ) ? &subs[k-1][(i+1)*nofKmers1] : NULL;
01856     float64_t* const subs_k1i0 = & subs[ k-1 ][ i*nofKmers1 ];
01857     float64_t* const subs_k0i  = & subs[ k-0 ][ i*nofKmers0 ];
01858     int32_t y0;
01859     for( y0 = 0; y0 < nofKmers0; ++y0 ) {
01860       const int32_t y1l = y0 / NUM_SYMS;
01861       const int32_t y1r = y0 % nofKmers1;
01862       const int32_t y2 = y1r / NUM_SYMS;
01863       subs_k0i[ y0 ] += subs_k1i0[ y1l ];
01864       if( i < seqLen-1 ) {
01865         subs_k0i[ y0 ] += subs_k1i1[ y1r ];
01866         if( k > 1 ) {
01867           subs_k0i[ y0 ] -= subs_k2i1[ y2 ];
01868         }
01869       }
01870     }
01871       }
01872     }
01873   }
01874 
01875   // === compute POIMs
01876   poim_tries.POIMs_add_SLR( subs, max_degree, debug );
01877 
01878   // === clean; return "subs" as vector
01879   delete[] subs;
01880   num_feat = 1;
01881   num_sym = bigTabSize;
01882 
01883   use_poim_tries=false;
01884   poim_tries.delete_trees(false);
01885   
01886   return poim_result;
01887 }
01888 
01889 
01890 void CWeightedDegreePositionStringKernel::prepare_POIM2(
01891     float64_t* distrib, int32_t num_sym, int32_t num_feat)
01892 {
01893     free(m_poim_distrib);
01894     m_poim_distrib=(float64_t*)malloc(num_sym*num_feat*sizeof(float64_t));
01895     ASSERT(m_poim_distrib);
01896 
01897     memcpy(m_poim_distrib, distrib, num_sym*num_feat*sizeof(float64_t));
01898     m_poim_num_sym=num_sym;
01899     m_poim_num_feat=num_feat;
01900 }
01901 
01902 void CWeightedDegreePositionStringKernel::compute_POIM2(
01903     int32_t max_degree, CSVM* svm)
01904 {
01905     ASSERT(svm);
01906     int32_t num_suppvec=svm->get_num_support_vectors();
01907     int32_t* sv_idx=new int32_t[num_suppvec];
01908     float64_t* sv_weight=new float64_t[num_suppvec];
01909 
01910     for (int32_t i=0; i<num_suppvec; i++)
01911     {
01912         sv_idx[i]=svm->get_support_vector(i);
01913         sv_weight[i]=svm->get_alpha(i);
01914     }
01915     
01916     if ((max_degree < 1) || (max_degree > 12))
01917     {
01918         //SG_WARNING( "max_degree out of range 1..12 (%d).\n", max_degree);
01919         SG_WARNING( "max_degree out of range 1..12 (%d). setting to 1.\n", max_degree);
01920         max_degree=1;
01921     }
01922     
01923     int32_t num_feat = m_poim_num_feat;
01924     int32_t num_sym = m_poim_num_sym;
01925     free(m_poim);
01926 
01927     m_poim = compute_POIM(max_degree, num_feat, num_sym, NULL,  num_suppvec, sv_idx, 
01928                           sv_weight, m_poim_distrib);
01929 
01930     ASSERT(num_feat==1);
01931     m_poim_result_len=num_sym;
01932     
01933     delete[] sv_weight;
01934     delete[] sv_idx;
01935 }
01936 
01937 void CWeightedDegreePositionStringKernel::get_POIM2(
01938     float64_t** poim, int32_t* result_len)
01939 {
01940     *poim=(float64_t*) malloc(m_poim_result_len*sizeof(float64_t));
01941     ASSERT(*poim);
01942     memcpy(*poim, m_poim, m_poim_result_len*sizeof(float64_t)) ;
01943     *result_len=m_poim_result_len ;
01944 }
01945 
01946 void CWeightedDegreePositionStringKernel::cleanup_POIM2()
01947 {
01948     free(m_poim) ;
01949     m_poim=NULL ;
01950     free(m_poim_distrib) ;
01951     m_poim_distrib=NULL ;
01952     m_poim_num_sym=0 ;
01953     m_poim_num_sym=0 ;
01954     m_poim_result_len=0 ;
01955 }
01956 

SHOGUN Machine Learning Toolbox - Documentation