SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/common.h" 00013 #include "lib/io.h" 00014 #include "lib/Signal.h" 00015 #include "lib/Trie.h" 00016 #include "base/Parallel.h" 00017 00018 #include "kernel/WeightedDegreePositionStringKernel.h" 00019 #include "kernel/SqrtDiagKernelNormalizer.h" 00020 #include "features/Features.h" 00021 #include "features/StringFeatures.h" 00022 00023 #include "classifier/svm/SVM.h" 00024 00025 #ifndef WIN32 00026 #include <pthread.h> 00027 #endif 00028 00029 using namespace shogun; 00030 00031 #define TRIES(X) ((use_poim_tries) ? (poim_tries.X) : (tries.X)) 00032 00033 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00034 template <class Trie> struct S_THREAD_PARAM 00035 { 00036 int32_t* vec; 00037 float64_t* result; 00038 float64_t* weights; 00039 CWeightedDegreePositionStringKernel* kernel; 00040 CTrie<Trie>* tries; 00041 float64_t factor; 00042 int32_t j; 00043 int32_t start; 00044 int32_t end; 00045 int32_t length; 00046 int32_t max_shift; 00047 int32_t* shift; 00048 int32_t* vec_idx; 00049 }; 00050 #endif // DOXYGEN_SHOULD_SKIP_THIS 00051 00052 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel( 00053 void) 00054 : CStringKernel<char>() 00055 { 00056 init(); 00057 } 00058 00059 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel( 00060 int32_t size, int32_t d, int32_t mm, int32_t mkls) 00061 : CStringKernel<char>(size) 00062 { 00063 init(); 00064 00065 mkl_stepsize=mkls; 00066 degree=d; 00067 max_mismatch=mm; 00068 00069 tries=CTrie<DNATrie>(d); 00070 poim_tries=CTrie<POIMTrie>(d); 00071 00072 set_wd_weights(); 00073 ASSERT(weights); 00074 } 00075 00076 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel( 00077 int32_t size, float64_t* w, int32_t d, int32_t mm, int32_t* s, int32_t sl, 00078 int32_t mkls) 00079 : CStringKernel<char>(size) 00080 { 00081 init(); 00082 00083 mkl_stepsize=mkls; 00084 degree=d; 00085 max_mismatch=mm; 00086 00087 tries=CTrie<DNATrie>(d); 00088 poim_tries=CTrie<POIMTrie>(d); 00089 00090 weights=new float64_t[d*(1+max_mismatch)]; 00091 weights_degree=degree; 00092 weights_length=(1+max_mismatch); 00093 00094 for (int32_t i=0; i<d*(1+max_mismatch); i++) 00095 weights[i]=w[i]; 00096 00097 set_shifts(s, sl); 00098 } 00099 00100 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel( 00101 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t d) 00102 : CStringKernel<char>() 00103 { 00104 init(); 00105 00106 mkl_stepsize=1; 00107 degree=d; 00108 00109 tries=CTrie<DNATrie>(d); 00110 poim_tries=CTrie<POIMTrie>(d); 00111 00112 set_wd_weights(); 00113 ASSERT(weights); 00114 00115 init(l, r); 00116 } 00117 00118 00119 CWeightedDegreePositionStringKernel::~CWeightedDegreePositionStringKernel() 00120 { 00121 cleanup(); 00122 cleanup_POIM2(); 00123 00124 delete[] shift; 00125 shift=NULL; 00126 00127 delete[] weights; 00128 weights=NULL; 00129 weights_degree=0; 00130 weights_length=0; 00131 00132 delete[] block_weights; 00133 block_weights=NULL; 00134 00135 delete[] position_weights; 00136 position_weights=NULL; 00137 00138 delete[] position_weights_lhs; 00139 position_weights_lhs=NULL; 00140 00141 delete[] position_weights_rhs; 00142 position_weights_rhs=NULL; 00143 00144 delete[] weights_buffer; 00145 weights_buffer=NULL; 00146 } 00147 00148 void CWeightedDegreePositionStringKernel::remove_lhs() 00149 { 00150 SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n"); 00151 delete_optimization(); 00152 00153 tries.destroy(); 00154 poim_tries.destroy(); 00155 00156 CKernel::remove_lhs(); 00157 } 00158 00159 void CWeightedDegreePositionStringKernel::create_empty_tries() 00160 { 00161 ASSERT(lhs); 00162 seq_length = ((CStringFeatures<char>*) lhs)->get_max_vector_length(); 00163 00164 if (opt_type==SLOWBUTMEMEFFICIENT) 00165 { 00166 tries.create(seq_length, true); 00167 poim_tries.create(seq_length, true); 00168 } 00169 else if (opt_type==FASTBUTMEMHUNGRY) 00170 { 00171 tries.create(seq_length, false); // still buggy 00172 poim_tries.create(seq_length, false); // still buggy 00173 } 00174 else 00175 SG_ERROR( "unknown optimization type\n"); 00176 } 00177 00178 bool CWeightedDegreePositionStringKernel::init(CFeatures* l, CFeatures* r) 00179 { 00180 int32_t lhs_changed = (lhs!=l) ; 00181 int32_t rhs_changed = (rhs!=r) ; 00182 00183 CStringKernel<char>::init(l,r); 00184 00185 SG_DEBUG( "lhs_changed: %i\n", lhs_changed) ; 00186 SG_DEBUG( "rhs_changed: %i\n", rhs_changed) ; 00187 00188 CStringFeatures<char>* sf_l=(CStringFeatures<char>*) l; 00189 CStringFeatures<char>* sf_r=(CStringFeatures<char>*) r; 00190 00191 /* set shift */ 00192 if (shift_len==0) { 00193 shift_len=sf_l->get_vector_length(0); 00194 int32_t *shifts=new int32_t[shift_len]; 00195 for (int32_t i=0; i<shift_len; i++) { 00196 shifts[i]=1; 00197 } 00198 set_shifts(shifts, shift_len); 00199 delete[] shifts; 00200 } 00201 00202 00203 int32_t len=sf_l->get_max_vector_length(); 00204 if (lhs_changed && !sf_l->have_same_length(len)) 00205 SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n"); 00206 00207 if (rhs_changed && !sf_r->have_same_length(len)) 00208 SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n"); 00209 00210 SG_UNREF(alphabet); 00211 alphabet= sf_l->get_alphabet(); 00212 CAlphabet* ralphabet=sf_r->get_alphabet(); 00213 00214 if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA))) 00215 properties &= ((uint64_t) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION); 00216 00217 ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet()); 00218 SG_UNREF(ralphabet); 00219 00220 //whenever init is called also init tries and block weights 00221 create_empty_tries(); 00222 init_block_weights(); 00223 00224 return init_normalizer(); 00225 } 00226 00227 void CWeightedDegreePositionStringKernel::cleanup() 00228 { 00229 SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n"); 00230 delete_optimization(); 00231 00232 delete[] block_weights; 00233 block_weights=NULL; 00234 00235 tries.destroy(); 00236 poim_tries.destroy(); 00237 00238 seq_length = 0; 00239 tree_initialized = false; 00240 00241 SG_UNREF(alphabet); 00242 alphabet=NULL; 00243 00244 CKernel::cleanup(); 00245 } 00246 00247 bool CWeightedDegreePositionStringKernel::init_optimization( 00248 int32_t p_count, int32_t * IDX, float64_t * alphas, int32_t tree_num, 00249 int32_t upto_tree) 00250 { 00251 ASSERT(position_weights_lhs==NULL); 00252 ASSERT(position_weights_rhs==NULL); 00253 00254 if (upto_tree<0) 00255 upto_tree=tree_num; 00256 00257 if (max_mismatch!=0) 00258 { 00259 SG_ERROR( "CWeightedDegreePositionStringKernel optimization not implemented for mismatch!=0\n"); 00260 return false ; 00261 } 00262 00263 if (tree_num<0) 00264 SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n"); 00265 00266 delete_optimization(); 00267 00268 if (tree_num<0) 00269 SG_DEBUG( "initializing CWeightedDegreePositionStringKernel optimization\n") ; 00270 00271 for (int32_t i=0; i<p_count; i++) 00272 { 00273 if (tree_num<0) 00274 { 00275 if ( (i % (p_count/10+1)) == 0) 00276 SG_PROGRESS(i,0,p_count); 00277 add_example_to_tree(IDX[i], alphas[i]); 00278 } 00279 else 00280 { 00281 for (int32_t t=tree_num; t<=upto_tree; t++) 00282 add_example_to_single_tree(IDX[i], alphas[i], t); 00283 } 00284 } 00285 00286 if (tree_num<0) 00287 SG_DONE(); 00288 00289 set_is_initialized(true) ; 00290 return true ; 00291 } 00292 00293 bool CWeightedDegreePositionStringKernel::delete_optimization() 00294 { 00295 if ((opt_type==FASTBUTMEMHUNGRY) && (tries.get_use_compact_terminal_nodes())) 00296 { 00297 tries.set_use_compact_terminal_nodes(false) ; 00298 SG_DEBUG( "disabling compact trie nodes with FASTBUTMEMHUNGRY\n") ; 00299 } 00300 00301 if (get_is_initialized()) 00302 { 00303 if (opt_type==SLOWBUTMEMEFFICIENT) 00304 tries.delete_trees(true); 00305 else if (opt_type==FASTBUTMEMHUNGRY) 00306 tries.delete_trees(false); // still buggy 00307 else { 00308 SG_ERROR( "unknown optimization type\n"); 00309 } 00310 set_is_initialized(false); 00311 00312 return true; 00313 } 00314 00315 return false; 00316 } 00317 00318 float64_t CWeightedDegreePositionStringKernel::compute_with_mismatch( 00319 char* avec, int32_t alen, char* bvec, int32_t blen) 00320 { 00321 float64_t* max_shift_vec= new float64_t[max_shift]; 00322 float64_t sum0=0 ; 00323 for (int32_t i=0; i<max_shift; i++) 00324 max_shift_vec[i]=0 ; 00325 00326 // no shift 00327 for (int32_t i=0; i<alen; i++) 00328 { 00329 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00330 continue ; 00331 00332 int32_t mismatches=0; 00333 float64_t sumi = 0.0 ; 00334 for (int32_t j=0; (j<degree) && (i+j<alen); j++) 00335 { 00336 if (avec[i+j]!=bvec[i+j]) 00337 { 00338 mismatches++ ; 00339 if (mismatches>max_mismatch) 00340 break ; 00341 } ; 00342 sumi += weights[j+degree*mismatches]; 00343 } 00344 if (position_weights!=NULL) 00345 sum0 += position_weights[i]*sumi ; 00346 else 00347 sum0 += sumi ; 00348 } ; 00349 00350 for (int32_t i=0; i<alen; i++) 00351 { 00352 for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++) 00353 { 00354 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00355 continue ; 00356 00357 float64_t sumi1 = 0.0 ; 00358 // shift in sequence a 00359 int32_t mismatches=0; 00360 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00361 { 00362 if (avec[i+j+k]!=bvec[i+j]) 00363 { 00364 mismatches++ ; 00365 if (mismatches>max_mismatch) 00366 break ; 00367 } ; 00368 sumi1 += weights[j+degree*mismatches]; 00369 } 00370 float64_t sumi2 = 0.0 ; 00371 // shift in sequence b 00372 mismatches=0; 00373 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00374 { 00375 if (avec[i+j]!=bvec[i+j+k]) 00376 { 00377 mismatches++ ; 00378 if (mismatches>max_mismatch) 00379 break ; 00380 } ; 00381 sumi2 += weights[j+degree*mismatches]; 00382 } 00383 if (position_weights!=NULL) 00384 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00385 else 00386 max_shift_vec[k-1] += sumi1 + sumi2 ; 00387 } ; 00388 } 00389 00390 float64_t result = sum0 ; 00391 for (int32_t i=0; i<max_shift; i++) 00392 result += max_shift_vec[i]/(2*(i+1)) ; 00393 00394 delete[] max_shift_vec; 00395 return result ; 00396 } 00397 00398 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch( 00399 char* avec, int32_t alen, char* bvec, int32_t blen) 00400 { 00401 float64_t* max_shift_vec = new float64_t[max_shift]; 00402 float64_t sum0=0 ; 00403 for (int32_t i=0; i<max_shift; i++) 00404 max_shift_vec[i]=0 ; 00405 00406 // no shift 00407 for (int32_t i=0; i<alen; i++) 00408 { 00409 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00410 continue ; 00411 00412 float64_t sumi = 0.0 ; 00413 for (int32_t j=0; (j<degree) && (i+j<alen); j++) 00414 { 00415 if (avec[i+j]!=bvec[i+j]) 00416 break ; 00417 sumi += weights[j]; 00418 } 00419 if (position_weights!=NULL) 00420 sum0 += position_weights[i]*sumi ; 00421 else 00422 sum0 += sumi ; 00423 } ; 00424 00425 for (int32_t i=0; i<alen; i++) 00426 { 00427 for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++) 00428 { 00429 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00430 continue ; 00431 00432 float64_t sumi1 = 0.0 ; 00433 // shift in sequence a 00434 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00435 { 00436 if (avec[i+j+k]!=bvec[i+j]) 00437 break ; 00438 sumi1 += weights[j]; 00439 } 00440 float64_t sumi2 = 0.0 ; 00441 // shift in sequence b 00442 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00443 { 00444 if (avec[i+j]!=bvec[i+j+k]) 00445 break ; 00446 sumi2 += weights[j]; 00447 } 00448 if (position_weights!=NULL) 00449 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00450 else 00451 max_shift_vec[k-1] += sumi1 + sumi2 ; 00452 } ; 00453 } 00454 00455 float64_t result = sum0 ; 00456 for (int32_t i=0; i<max_shift; i++) 00457 result += max_shift_vec[i]/(2*(i+1)) ; 00458 00459 delete[] max_shift_vec; 00460 00461 return result ; 00462 } 00463 00464 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_matrix( 00465 char* avec, int32_t alen, char* bvec, int32_t blen) 00466 { 00467 float64_t* max_shift_vec = new float64_t[max_shift]; 00468 float64_t sum0=0 ; 00469 for (int32_t i=0; i<max_shift; i++) 00470 max_shift_vec[i]=0 ; 00471 00472 // no shift 00473 for (int32_t i=0; i<alen; i++) 00474 { 00475 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00476 continue ; 00477 float64_t sumi = 0.0 ; 00478 for (int32_t j=0; (j<degree) && (i+j<alen); j++) 00479 { 00480 if (avec[i+j]!=bvec[i+j]) 00481 break ; 00482 sumi += weights[i*degree+j]; 00483 } 00484 if (position_weights!=NULL) 00485 sum0 += position_weights[i]*sumi ; 00486 else 00487 sum0 += sumi ; 00488 } ; 00489 00490 for (int32_t i=0; i<alen; i++) 00491 { 00492 for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++) 00493 { 00494 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00495 continue ; 00496 00497 float64_t sumi1 = 0.0 ; 00498 // shift in sequence a 00499 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00500 { 00501 if (avec[i+j+k]!=bvec[i+j]) 00502 break ; 00503 sumi1 += weights[i*degree+j]; 00504 } 00505 float64_t sumi2 = 0.0 ; 00506 // shift in sequence b 00507 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00508 { 00509 if (avec[i+j]!=bvec[i+j+k]) 00510 break ; 00511 sumi2 += weights[i*degree+j]; 00512 } 00513 if (position_weights!=NULL) 00514 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00515 else 00516 max_shift_vec[k-1] += sumi1 + sumi2 ; 00517 } ; 00518 } 00519 00520 float64_t result = sum0 ; 00521 for (int32_t i=0; i<max_shift; i++) 00522 result += max_shift_vec[i]/(2*(i+1)) ; 00523 00524 delete[] max_shift_vec; 00525 return result ; 00526 } 00527 00528 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_position_weights( 00529 char* avec, float64_t* pos_weights_lhs, int32_t alen, char* bvec, 00530 float64_t* pos_weights_rhs, int32_t blen) 00531 { 00532 float64_t* max_shift_vec = new float64_t[max_shift]; 00533 float64_t sum0=0 ; 00534 for (int32_t i=0; i<max_shift; i++) 00535 max_shift_vec[i]=0 ; 00536 00537 // no shift 00538 for (int32_t i=0; i<alen; i++) 00539 { 00540 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00541 continue ; 00542 00543 float64_t sumi = 0.0 ; 00544 float64_t posweight_lhs = 0.0 ; 00545 float64_t posweight_rhs = 0.0 ; 00546 for (int32_t j=0; (j<degree) && (i+j<alen); j++) 00547 { 00548 posweight_lhs += pos_weights_lhs[i+j] ; 00549 posweight_rhs += pos_weights_rhs[i+j] ; 00550 00551 if (avec[i+j]!=bvec[i+j]) 00552 break ; 00553 sumi += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ; 00554 } 00555 if (position_weights!=NULL) 00556 sum0 += position_weights[i]*sumi ; 00557 else 00558 sum0 += sumi ; 00559 } ; 00560 00561 for (int32_t i=0; i<alen; i++) 00562 { 00563 for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++) 00564 { 00565 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00566 continue ; 00567 00568 // shift in sequence a 00569 float64_t sumi1 = 0.0 ; 00570 float64_t posweight_lhs = 0.0 ; 00571 float64_t posweight_rhs = 0.0 ; 00572 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00573 { 00574 posweight_lhs += pos_weights_lhs[i+j+k] ; 00575 posweight_rhs += pos_weights_rhs[i+j] ; 00576 if (avec[i+j+k]!=bvec[i+j]) 00577 break ; 00578 sumi1 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ; 00579 } 00580 // shift in sequence b 00581 float64_t sumi2 = 0.0 ; 00582 posweight_lhs = 0.0 ; 00583 posweight_rhs = 0.0 ; 00584 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00585 { 00586 posweight_lhs += pos_weights_lhs[i+j] ; 00587 posweight_rhs += pos_weights_rhs[i+j+k] ; 00588 if (avec[i+j]!=bvec[i+j+k]) 00589 break ; 00590 sumi2 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ; 00591 } 00592 if (position_weights!=NULL) 00593 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00594 else 00595 max_shift_vec[k-1] += sumi1 + sumi2 ; 00596 } ; 00597 } 00598 00599 float64_t result = sum0 ; 00600 for (int32_t i=0; i<max_shift; i++) 00601 result += max_shift_vec[i]/(2*(i+1)) ; 00602 00603 delete[] max_shift_vec; 00604 return result ; 00605 } 00606 00607 00608 float64_t CWeightedDegreePositionStringKernel::compute( 00609 int32_t idx_a, int32_t idx_b) 00610 { 00611 int32_t alen, blen; 00612 bool free_avec, free_bvec; 00613 00614 char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec); 00615 char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec); 00616 // can only deal with strings of same length 00617 ASSERT(alen==blen); 00618 ASSERT(shift_len==alen); 00619 00620 float64_t result = 0 ; 00621 if (position_weights_lhs!=NULL || position_weights_rhs!=NULL) 00622 { 00623 ASSERT(max_mismatch==0); 00624 float64_t* position_weights_rhs_ = position_weights_rhs ; 00625 if (lhs==rhs) 00626 position_weights_rhs_ = position_weights_lhs ; 00627 00628 result = compute_without_mismatch_position_weights(avec, &position_weights_lhs[idx_a*alen], alen, bvec, &position_weights_rhs_[idx_b*blen], blen) ; 00629 } 00630 else if (max_mismatch > 0) 00631 result = compute_with_mismatch(avec, alen, bvec, blen) ; 00632 else if (length==0) 00633 result = compute_without_mismatch(avec, alen, bvec, blen) ; 00634 else 00635 result = compute_without_mismatch_matrix(avec, alen, bvec, blen) ; 00636 00637 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec); 00638 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec); 00639 00640 return result ; 00641 } 00642 00643 00644 void CWeightedDegreePositionStringKernel::add_example_to_tree( 00645 int32_t idx, float64_t alpha) 00646 { 00647 ASSERT(position_weights_lhs==NULL); 00648 ASSERT(position_weights_rhs==NULL); 00649 ASSERT(alphabet); 00650 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00651 00652 int32_t len=0; 00653 bool free_vec; 00654 char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec); 00655 ASSERT(max_mismatch==0); 00656 int32_t *vec = new int32_t[len] ; 00657 00658 for (int32_t i=0; i<len; i++) 00659 vec[i]=alphabet->remap_to_bin(char_vec[i]); 00660 ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec); 00661 00662 if (opt_type==FASTBUTMEMHUNGRY) 00663 { 00664 //TRIES(set_use_compact_terminal_nodes(false)) ; 00665 ASSERT(!TRIES(get_use_compact_terminal_nodes())); 00666 } 00667 00668 for (int32_t i=0; i<len; i++) 00669 { 00670 int32_t max_s=-1; 00671 00672 if (opt_type==SLOWBUTMEMEFFICIENT) 00673 max_s=0; 00674 else if (opt_type==FASTBUTMEMHUNGRY) 00675 max_s=shift[i]; 00676 else { 00677 SG_ERROR( "unknown optimization type\n"); 00678 } 00679 00680 for (int32_t s=max_s; s>=0; s--) 00681 { 00682 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx); 00683 TRIES(add_to_trie(i, s, vec, alpha_pw, weights, (length!=0))) ; 00684 if ((s==0) || (i+s>=len)) 00685 continue; 00686 00687 TRIES(add_to_trie(i+s, -s, vec, alpha_pw, weights, (length!=0))) ; 00688 } 00689 } 00690 00691 delete[] vec ; 00692 tree_initialized=true ; 00693 } 00694 00695 void CWeightedDegreePositionStringKernel::add_example_to_single_tree( 00696 int32_t idx, float64_t alpha, int32_t tree_num) 00697 { 00698 ASSERT(position_weights_lhs==NULL); 00699 ASSERT(position_weights_rhs==NULL); 00700 ASSERT(alphabet); 00701 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00702 00703 int32_t len=0; 00704 bool free_vec; 00705 char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec); 00706 ASSERT(max_mismatch==0); 00707 int32_t *vec=new int32_t[len]; 00708 int32_t max_s=-1; 00709 00710 if (opt_type==SLOWBUTMEMEFFICIENT) 00711 max_s=0; 00712 else if (opt_type==FASTBUTMEMHUNGRY) 00713 { 00714 ASSERT(!tries.get_use_compact_terminal_nodes()); 00715 max_s=shift[tree_num]; 00716 } 00717 else { 00718 SG_ERROR( "unknown optimization type\n"); 00719 } 00720 for (int32_t i=CMath::max(0,tree_num-max_shift); 00721 i<CMath::min(len,tree_num+degree+max_shift); i++) 00722 { 00723 vec[i]=alphabet->remap_to_bin(char_vec[i]); 00724 } 00725 ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec); 00726 00727 for (int32_t s=max_s; s>=0; s--) 00728 { 00729 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx); 00730 tries.add_to_trie(tree_num, s, vec, alpha_pw, weights, (length!=0)) ; 00731 } 00732 00733 if (opt_type==FASTBUTMEMHUNGRY) 00734 { 00735 for (int32_t i=CMath::max(0,tree_num-max_shift); i<CMath::min(len,tree_num+max_shift+1); i++) 00736 { 00737 int32_t s=tree_num-i; 00738 if ((i+s<len) && (s>=1) && (s<=shift[i])) 00739 { 00740 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx); 00741 tries.add_to_trie(tree_num, -s, vec, alpha_pw, weights, (length!=0)) ; 00742 } 00743 } 00744 } 00745 delete[] vec ; 00746 tree_initialized=true ; 00747 } 00748 00749 float64_t CWeightedDegreePositionStringKernel::compute_by_tree(int32_t idx) 00750 { 00751 ASSERT(position_weights_lhs==NULL); 00752 ASSERT(position_weights_rhs==NULL); 00753 ASSERT(alphabet); 00754 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00755 00756 float64_t sum=0; 00757 int32_t len=0; 00758 bool free_vec; 00759 char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec); 00760 ASSERT(max_mismatch==0); 00761 int32_t *vec=new int32_t[len]; 00762 00763 for (int32_t i=0; i<len; i++) 00764 vec[i]=alphabet->remap_to_bin(char_vec[i]); 00765 00766 ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec); 00767 00768 for (int32_t i=0; i<len; i++) 00769 sum += tries.compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0)) ; 00770 00771 if (opt_type==SLOWBUTMEMEFFICIENT) 00772 { 00773 for (int32_t i=0; i<len; i++) 00774 { 00775 for (int32_t s=1; (s<=shift[i]) && (i+s<len); s++) 00776 { 00777 sum+=tries.compute_by_tree_helper(vec, len, i, i+s, i, weights, (length!=0))/(2*s) ; 00778 sum+=tries.compute_by_tree_helper(vec, len, i+s, i, i+s, weights, (length!=0))/(2*s) ; 00779 } 00780 } 00781 } 00782 00783 delete[] vec ; 00784 00785 return normalizer->normalize_rhs(sum, idx); 00786 } 00787 00788 void CWeightedDegreePositionStringKernel::compute_by_tree( 00789 int32_t idx, float64_t* LevelContrib) 00790 { 00791 ASSERT(position_weights_lhs==NULL); 00792 ASSERT(position_weights_rhs==NULL); 00793 ASSERT(alphabet); 00794 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00795 00796 int32_t len=0; 00797 bool free_vec; 00798 char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec); 00799 ASSERT(max_mismatch==0); 00800 int32_t *vec=new int32_t[len]; 00801 00802 for (int32_t i=0; i<len; i++) 00803 vec[i]=alphabet->remap_to_bin(char_vec[i]); 00804 00805 ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec); 00806 00807 for (int32_t i=0; i<len; i++) 00808 { 00809 tries.compute_by_tree_helper(vec, len, i, i, i, LevelContrib, 00810 normalizer->normalize_rhs(1.0, idx), mkl_stepsize, weights, 00811 (length!=0)); 00812 } 00813 00814 if (opt_type==SLOWBUTMEMEFFICIENT) 00815 { 00816 for (int32_t i=0; i<len; i++) 00817 for (int32_t k=1; (k<=shift[i]) && (i+k<len); k++) 00818 { 00819 tries.compute_by_tree_helper(vec, len, i, i+k, i, LevelContrib, 00820 normalizer->normalize_rhs(1.0/(2*k), idx), mkl_stepsize, 00821 weights, (length!=0)) ; 00822 tries.compute_by_tree_helper(vec, len, i+k, i, i+k, 00823 LevelContrib, normalizer->normalize_rhs(1.0/(2*k), idx), 00824 mkl_stepsize, weights, (length!=0)) ; 00825 } 00826 } 00827 00828 delete[] vec ; 00829 } 00830 00831 float64_t* CWeightedDegreePositionStringKernel::compute_abs_weights( 00832 int32_t &len) 00833 { 00834 return tries.compute_abs_weights(len); 00835 } 00836 00837 bool CWeightedDegreePositionStringKernel::set_shifts( 00838 int32_t* shift_, int32_t shift_len_) 00839 { 00840 delete[] shift; 00841 00842 shift_len = shift_len_ ; 00843 shift = new int32_t[shift_len] ; 00844 00845 if (shift) 00846 { 00847 max_shift = 0 ; 00848 00849 for (int32_t i=0; i<shift_len; i++) 00850 { 00851 shift[i] = shift_[i] ; 00852 max_shift = CMath::max(shift[i], max_shift); 00853 } 00854 00855 ASSERT(max_shift>=0 && max_shift<=shift_len); 00856 } 00857 00858 return false; 00859 } 00860 00861 bool CWeightedDegreePositionStringKernel::set_wd_weights() 00862 { 00863 ASSERT(degree>0); 00864 00865 delete[] weights; 00866 weights=new float64_t[degree]; 00867 weights_degree=degree; 00868 weights_length=1; 00869 00870 if (weights) 00871 { 00872 int32_t i; 00873 float64_t sum=0; 00874 for (i=0; i<degree; i++) 00875 { 00876 weights[i]=degree-i; 00877 sum+=weights[i]; 00878 } 00879 for (i=0; i<degree; i++) 00880 weights[i]/=sum; 00881 00882 for (i=0; i<degree; i++) 00883 { 00884 for (int32_t j=1; j<=max_mismatch; j++) 00885 { 00886 if (j<i+1) 00887 { 00888 int32_t nk=CMath::nchoosek(i+1, j); 00889 weights[i+j*degree]=weights[i]/(nk*CMath::pow(3.0,j)); 00890 } 00891 else 00892 weights[i+j*degree]= 0; 00893 } 00894 } 00895 00896 return true; 00897 } 00898 else 00899 return false; 00900 } 00901 00902 bool CWeightedDegreePositionStringKernel::set_weights( 00903 float64_t* ws, int32_t d, int32_t len) 00904 { 00905 if (d!=degree || len<0) 00906 SG_ERROR("WD: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree); 00907 00908 degree=d; 00909 length=len; 00910 00911 if (len <= 0) 00912 len=1; 00913 00914 weights_degree=degree; 00915 weights_length=len+max_mismatch; 00916 00917 SG_DEBUG("Creating weights of size %dx%d\n", weights_degree, weights_length); 00918 int32_t num_weights=weights_degree*weights_length; 00919 delete[] weights; 00920 weights=new float64_t[num_weights]; 00921 00922 for (int32_t i=0; i<degree*len; i++) 00923 weights[i]=ws[i]; 00924 00925 return true; 00926 } 00927 00928 bool CWeightedDegreePositionStringKernel::set_position_weights( 00929 float64_t* pws, int32_t len) 00930 { 00931 if (seq_length==0) 00932 seq_length=len; 00933 00934 if (seq_length!=len) 00935 { 00936 SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len); 00937 return false; 00938 } 00939 delete[] position_weights; 00940 position_weights=new float64_t[len]; 00941 position_weights_len=len; 00942 tries.set_position_weights(position_weights); 00943 00944 if (position_weights) 00945 { 00946 for (int32_t i=0; i<len; i++) 00947 position_weights[i]=pws[i]; 00948 return true; 00949 } 00950 else 00951 return false; 00952 } 00953 00954 bool CWeightedDegreePositionStringKernel::set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num) 00955 { 00956 if (position_weights_rhs==position_weights_lhs) 00957 position_weights_rhs=NULL; 00958 else 00959 delete_position_weights_rhs(); 00960 00961 if (len==0) 00962 { 00963 return delete_position_weights_lhs(); 00964 } 00965 00966 if (seq_length!=len) 00967 { 00968 SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len); 00969 return false; 00970 } 00971 00972 delete[] position_weights_lhs; 00973 position_weights_lhs=new float64_t[len*num]; 00974 position_weights_lhs_len=len*num; 00975 00976 for (int32_t i=0; i<len*num; i++) 00977 position_weights_lhs[i]=pws[i]; 00978 00979 return true; 00980 } 00981 00982 bool CWeightedDegreePositionStringKernel::set_position_weights_rhs( 00983 float64_t* pws, int32_t len, int32_t num) 00984 { 00985 if (len==0) 00986 { 00987 if (position_weights_rhs==position_weights_lhs) 00988 { 00989 position_weights_rhs=NULL; 00990 return true; 00991 } 00992 return delete_position_weights_rhs(); 00993 } 00994 00995 if (seq_length!=len) 00996 { 00997 SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len); 00998 return false; 00999 } 01000 01001 delete[] position_weights_rhs; 01002 position_weights_rhs=new float64_t[len*num]; 01003 position_weights_rhs_len=len*num; 01004 01005 for (int32_t i=0; i<len*num; i++) 01006 position_weights_rhs[i]=pws[i]; 01007 01008 return true; 01009 } 01010 01011 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd() 01012 { 01013 delete[] block_weights; 01014 block_weights=new float64_t[CMath::max(seq_length,degree)]; 01015 01016 if (block_weights) 01017 { 01018 int32_t k; 01019 float64_t d=degree; // use float to evade rounding errors below 01020 01021 for (k=0; k<degree; k++) 01022 block_weights[k]= 01023 (-CMath::pow(k, 3)+(3*d-3)*CMath::pow(k, 2)+(9*d-2)*k+6*d)/(3*d*(d+1)); 01024 for (k=degree; k<seq_length; k++) 01025 block_weights[k]=(-d+3*k+4)/3; 01026 } 01027 01028 return (block_weights!=NULL); 01029 } 01030 01031 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd_external() 01032 { 01033 ASSERT(weights); 01034 delete[] block_weights; 01035 block_weights=new float64_t[CMath::max(seq_length,degree)]; 01036 01037 if (block_weights) 01038 { 01039 int32_t i=0; 01040 block_weights[0]=weights[0]; 01041 for (i=1; i<CMath::max(seq_length,degree); i++) 01042 block_weights[i]=0; 01043 01044 for (i=1; i<CMath::max(seq_length,degree); i++) 01045 { 01046 block_weights[i]=block_weights[i-1]; 01047 01048 float64_t contrib=0; 01049 for (int32_t j=0; j<CMath::min(degree,i+1); j++) 01050 contrib+=weights[j]; 01051 01052 block_weights[i]+=contrib; 01053 } 01054 } 01055 01056 return (block_weights!=NULL); 01057 } 01058 01059 bool CWeightedDegreePositionStringKernel::init_block_weights_const() 01060 { 01061 delete[] block_weights; 01062 block_weights=new float64_t[seq_length]; 01063 01064 if (block_weights) 01065 { 01066 for (int32_t i=1; i<seq_length+1 ; i++) 01067 block_weights[i-1]=1.0/seq_length; 01068 } 01069 01070 return (block_weights!=NULL); 01071 } 01072 01073 bool CWeightedDegreePositionStringKernel::init_block_weights_linear() 01074 { 01075 delete[] block_weights; 01076 block_weights=new float64_t[seq_length]; 01077 01078 if (block_weights) 01079 { 01080 for (int32_t i=1; i<seq_length+1 ; i++) 01081 block_weights[i-1]=degree*i; 01082 } 01083 01084 return (block_weights!=NULL); 01085 } 01086 01087 bool CWeightedDegreePositionStringKernel::init_block_weights_sqpoly() 01088 { 01089 delete[] block_weights; 01090 block_weights=new float64_t[seq_length]; 01091 01092 if (block_weights) 01093 { 01094 for (int32_t i=1; i<degree+1 ; i++) 01095 block_weights[i-1]=((float64_t) i)*i; 01096 01097 for (int32_t i=degree+1; i<seq_length+1 ; i++) 01098 block_weights[i-1]=i; 01099 } 01100 01101 return (block_weights!=NULL); 01102 } 01103 01104 bool CWeightedDegreePositionStringKernel::init_block_weights_cubicpoly() 01105 { 01106 delete[] block_weights; 01107 block_weights=new float64_t[seq_length]; 01108 01109 if (block_weights) 01110 { 01111 for (int32_t i=1; i<degree+1 ; i++) 01112 block_weights[i-1]=((float64_t) i)*i*i; 01113 01114 for (int32_t i=degree+1; i<seq_length+1 ; i++) 01115 block_weights[i-1]=i; 01116 } 01117 01118 return (block_weights!=NULL); 01119 } 01120 01121 bool CWeightedDegreePositionStringKernel::init_block_weights_exp() 01122 { 01123 delete[] block_weights; 01124 block_weights=new float64_t[seq_length]; 01125 01126 if (block_weights) 01127 { 01128 for (int32_t i=1; i<degree+1 ; i++) 01129 block_weights[i-1]=exp(((float64_t) i/10.0)); 01130 01131 for (int32_t i=degree+1; i<seq_length+1 ; i++) 01132 block_weights[i-1]=i; 01133 } 01134 01135 return (block_weights!=NULL); 01136 } 01137 01138 bool CWeightedDegreePositionStringKernel::init_block_weights_log() 01139 { 01140 delete[] block_weights; 01141 block_weights=new float64_t[seq_length]; 01142 01143 if (block_weights) 01144 { 01145 for (int32_t i=1; i<degree+1 ; i++) 01146 block_weights[i-1]=CMath::pow(CMath::log((float64_t) i),2); 01147 01148 for (int32_t i=degree+1; i<seq_length+1 ; i++) 01149 block_weights[i-1]=i-degree+1+CMath::pow(CMath::log(degree+1.0),2); 01150 } 01151 01152 return (block_weights!=NULL); 01153 } 01154 01155 bool CWeightedDegreePositionStringKernel::init_block_weights() 01156 { 01157 switch (type) 01158 { 01159 case E_WD: 01160 return init_block_weights_from_wd(); 01161 case E_EXTERNAL: 01162 return init_block_weights_from_wd_external(); 01163 case E_BLOCK_CONST: 01164 return init_block_weights_const(); 01165 case E_BLOCK_LINEAR: 01166 return init_block_weights_linear(); 01167 case E_BLOCK_SQPOLY: 01168 return init_block_weights_sqpoly(); 01169 case E_BLOCK_CUBICPOLY: 01170 return init_block_weights_cubicpoly(); 01171 case E_BLOCK_EXP: 01172 return init_block_weights_exp(); 01173 case E_BLOCK_LOG: 01174 return init_block_weights_log(); 01175 }; 01176 return false; 01177 } 01178 01179 01180 01181 void* CWeightedDegreePositionStringKernel::compute_batch_helper(void* p) 01182 { 01183 S_THREAD_PARAM<DNATrie>* params = (S_THREAD_PARAM<DNATrie>*) p; 01184 int32_t j=params->j; 01185 CWeightedDegreePositionStringKernel* wd=params->kernel; 01186 CTrie<DNATrie>* tries=params->tries; 01187 float64_t* weights=params->weights; 01188 int32_t length=params->length; 01189 int32_t max_shift=params->max_shift; 01190 int32_t* vec=params->vec; 01191 float64_t* result=params->result; 01192 float64_t factor=params->factor; 01193 int32_t* shift=params->shift; 01194 int32_t* vec_idx=params->vec_idx; 01195 01196 for (int32_t i=params->start; i<params->end; i++) 01197 { 01198 int32_t len=0; 01199 CStringFeatures<char>* rhs_feat=((CStringFeatures<char>*) wd->get_rhs()); 01200 CAlphabet* alpha=wd->alphabet; 01201 01202 bool free_vec; 01203 char* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len, free_vec); 01204 for (int32_t k=CMath::max(0,j-max_shift); k<CMath::min(len,j+wd->get_degree()+max_shift); k++) 01205 vec[k]=alpha->remap_to_bin(char_vec[k]); 01206 rhs_feat->free_feature_vector(char_vec, vec_idx[i], free_vec); 01207 01208 SG_UNREF(rhs_feat); 01209 01210 result[i] += factor*wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0)), vec_idx[i]); 01211 01212 if (wd->get_optimization_type()==SLOWBUTMEMEFFICIENT) 01213 { 01214 for (int32_t q=CMath::max(0,j-max_shift); q<CMath::min(len,j+max_shift+1); q++) 01215 { 01216 int32_t s=j-q ; 01217 if ((s>=1) && (s<=shift[q]) && (q+s<len)) 01218 { 01219 result[i] += 01220 wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, 01221 len, q, q+s, q, weights, (length!=0)), 01222 vec_idx[i])/(2.0*s); 01223 } 01224 } 01225 01226 for (int32_t s=1; (s<=shift[j]) && (j+s<len); s++) 01227 { 01228 result[i] += 01229 wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, 01230 len, j+s, j, j+s, weights, (length!=0)), 01231 vec_idx[i])/(2.0*s); 01232 } 01233 } 01234 } 01235 01236 return NULL; 01237 } 01238 01239 void CWeightedDegreePositionStringKernel::compute_batch( 01240 int32_t num_vec, int32_t* vec_idx, float64_t* result, int32_t num_suppvec, 01241 int32_t* IDX, float64_t* alphas, float64_t factor) 01242 { 01243 ASSERT(alphabet); 01244 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 01245 ASSERT(position_weights_lhs==NULL); 01246 ASSERT(position_weights_rhs==NULL); 01247 ASSERT(rhs); 01248 ASSERT(num_vec<=rhs->get_num_vectors()); 01249 ASSERT(num_vec>0); 01250 ASSERT(vec_idx); 01251 ASSERT(result); 01252 create_empty_tries(); 01253 01254 int32_t num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01255 ASSERT(num_feat>0); 01256 int32_t num_threads=parallel->get_num_threads(); 01257 ASSERT(num_threads>0); 01258 int32_t* vec=new int32_t[num_threads*num_feat]; 01259 01260 if (num_threads < 2) 01261 { 01262 #ifdef WIN32 01263 for (int32_t j=0; j<num_feat; j++) 01264 #else 01265 CSignal::clear_cancel(); 01266 for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++) 01267 #endif 01268 { 01269 init_optimization(num_suppvec, IDX, alphas, j); 01270 S_THREAD_PARAM<DNATrie> params; 01271 params.vec=vec; 01272 params.result=result; 01273 params.weights=weights; 01274 params.kernel=this; 01275 params.tries=&tries; 01276 params.factor=factor; 01277 params.j=j; 01278 params.start=0; 01279 params.end=num_vec; 01280 params.length=length; 01281 params.max_shift=max_shift; 01282 params.shift=shift; 01283 params.vec_idx=vec_idx; 01284 compute_batch_helper((void*) ¶ms); 01285 01286 SG_PROGRESS(j,0,num_feat); 01287 } 01288 } 01289 #ifndef WIN32 01290 else 01291 { 01292 01293 CSignal::clear_cancel(); 01294 for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++) 01295 { 01296 init_optimization(num_suppvec, IDX, alphas, j); 01297 pthread_t* threads = new pthread_t[num_threads-1]; 01298 S_THREAD_PARAM<DNATrie>* params = new S_THREAD_PARAM<DNATrie>[num_threads]; 01299 int32_t step= num_vec/num_threads; 01300 int32_t t; 01301 01302 for (t=0; t<num_threads-1; t++) 01303 { 01304 params[t].vec=&vec[num_feat*t]; 01305 params[t].result=result; 01306 params[t].weights=weights; 01307 params[t].kernel=this; 01308 params[t].tries=&tries; 01309 params[t].factor=factor; 01310 params[t].j=j; 01311 params[t].start = t*step; 01312 params[t].end = (t+1)*step; 01313 params[t].length=length; 01314 params[t].max_shift=max_shift; 01315 params[t].shift=shift; 01316 params[t].vec_idx=vec_idx; 01317 pthread_create(&threads[t], NULL, CWeightedDegreePositionStringKernel::compute_batch_helper, (void*)¶ms[t]); 01318 } 01319 01320 params[t].vec=&vec[num_feat*t]; 01321 params[t].result=result; 01322 params[t].weights=weights; 01323 params[t].kernel=this; 01324 params[t].tries=&tries; 01325 params[t].factor=factor; 01326 params[t].j=j; 01327 params[t].start=t*step; 01328 params[t].end=num_vec; 01329 params[t].length=length; 01330 params[t].max_shift=max_shift; 01331 params[t].shift=shift; 01332 params[t].vec_idx=vec_idx; 01333 compute_batch_helper((void*) ¶ms[t]); 01334 01335 for (t=0; t<num_threads-1; t++) 01336 pthread_join(threads[t], NULL); 01337 SG_PROGRESS(j,0,num_feat); 01338 01339 delete[] params; 01340 delete[] threads; 01341 } 01342 } 01343 #endif 01344 01345 delete[] vec; 01346 01347 //really also free memory as this can be huge on testing especially when 01348 //using the combined kernel 01349 create_empty_tries(); 01350 } 01351 01352 float64_t* CWeightedDegreePositionStringKernel::compute_scoring( 01353 int32_t max_degree, int32_t& num_feat, int32_t& num_sym, float64_t* result, 01354 int32_t num_suppvec, int32_t* IDX, float64_t* alphas) 01355 { 01356 ASSERT(position_weights_lhs==NULL); 01357 ASSERT(position_weights_rhs==NULL); 01358 01359 num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01360 ASSERT(num_feat>0); 01361 ASSERT(alphabet); 01362 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 01363 01364 num_sym=4; //for now works only w/ DNA 01365 01366 ASSERT(max_degree>0); 01367 01368 // === variables 01369 int32_t* nofsKmers=new int32_t[max_degree]; 01370 float64_t** C=new float64_t*[max_degree]; 01371 float64_t** L=new float64_t*[max_degree]; 01372 float64_t** R=new float64_t*[max_degree]; 01373 01374 int32_t i; 01375 int32_t k; 01376 01377 // --- return table 01378 int32_t bigtabSize=0; 01379 for (k=0; k<max_degree; ++k ) 01380 { 01381 nofsKmers[k]=(int32_t) CMath::pow(num_sym, k+1); 01382 const int32_t tabSize=nofsKmers[k]*num_feat; 01383 bigtabSize+=tabSize; 01384 } 01385 result=new float64_t[bigtabSize]; 01386 01387 // --- auxilliary tables 01388 int32_t tabOffs=0; 01389 for( k = 0; k < max_degree; ++k ) 01390 { 01391 const int32_t tabSize = nofsKmers[k] * num_feat; 01392 C[k] = &result[tabOffs]; 01393 L[k] = new float64_t[ tabSize ]; 01394 R[k] = new float64_t[ tabSize ]; 01395 tabOffs+=tabSize; 01396 for(i = 0; i < tabSize; i++ ) 01397 { 01398 C[k][i] = 0.0; 01399 L[k][i] = 0.0; 01400 R[k][i] = 0.0; 01401 } 01402 } 01403 01404 // --- tree parsing info 01405 float64_t* margFactors=new float64_t[degree]; 01406 01407 int32_t* x = new int32_t[ degree+1 ]; 01408 int32_t* substrs = new int32_t[ degree+1 ]; 01409 // - fill arrays 01410 margFactors[0] = 1.0; 01411 substrs[0] = 0; 01412 for( k=1; k < degree; ++k ) { 01413 margFactors[k] = 0.25 * margFactors[k-1]; 01414 substrs[k] = -1; 01415 } 01416 substrs[degree] = -1; 01417 // - fill struct 01418 struct TreeParseInfo info; 01419 info.num_sym = num_sym; 01420 info.num_feat = num_feat; 01421 info.p = -1; 01422 info.k = -1; 01423 info.nofsKmers = nofsKmers; 01424 info.margFactors = margFactors; 01425 info.x = x; 01426 info.substrs = substrs; 01427 info.y0 = 0; 01428 info.C_k = NULL; 01429 info.L_k = NULL; 01430 info.R_k = NULL; 01431 01432 // === main loop 01433 i = 0; // total progress 01434 for( k = 0; k < max_degree; ++k ) 01435 { 01436 const int32_t nofKmers = nofsKmers[ k ]; 01437 info.C_k = C[k]; 01438 info.L_k = L[k]; 01439 info.R_k = R[k]; 01440 01441 // --- run over all trees 01442 for(int32_t p = 0; p < num_feat; ++p ) 01443 { 01444 init_optimization( num_suppvec, IDX, alphas, p ); 01445 int32_t tree = p ; 01446 for(int32_t j = 0; j < degree+1; j++ ) { 01447 x[j] = -1; 01448 } 01449 tries.traverse( tree, p, info, 0, x, k ); 01450 SG_PROGRESS(i++,0,num_feat*max_degree); 01451 } 01452 01453 // --- add partial overlap scores 01454 if( k > 0 ) { 01455 const int32_t j = k - 1; 01456 const int32_t nofJmers = (int32_t) CMath::pow( num_sym, j+1 ); 01457 for(int32_t p = 0; p < num_feat; ++p ) { 01458 const int32_t offsetJ = nofJmers * p; 01459 const int32_t offsetJ1 = nofJmers * (p+1); 01460 const int32_t offsetK = nofKmers * p; 01461 int32_t y; 01462 int32_t sym; 01463 for( y = 0; y < nofJmers; ++y ) { 01464 for( sym = 0; sym < num_sym; ++sym ) { 01465 const int32_t y_sym = num_sym*y + sym; 01466 const int32_t sym_y = nofJmers*sym + y; 01467 ASSERT(0<=y_sym && y_sym<nofKmers); 01468 ASSERT(0<=sym_y && sym_y<nofKmers); 01469 C[k][ y_sym + offsetK ] += L[j][ y + offsetJ ]; 01470 if( p < num_feat-1 ) { 01471 C[k][ sym_y + offsetK ] += R[j][ y + offsetJ1 ]; 01472 } 01473 } 01474 } 01475 } 01476 } 01477 // if( k > 1 ) 01478 // j = k-1 01479 // for all positions p 01480 // for all j-mers y 01481 // for n in {A,C,G,T} 01482 // C_k[ p, [y,n] ] += L_j[ p, y ] 01483 // C_k[ p, [n,y] ] += R_j[ p+1, y ] 01484 // end; 01485 // end; 01486 // end; 01487 // end; 01488 } 01489 01490 // === return a vector 01491 num_feat=1; 01492 num_sym = bigtabSize; 01493 // --- clean up 01494 delete[] nofsKmers; 01495 delete[] margFactors; 01496 delete[] substrs; 01497 delete[] x; 01498 delete[] C; 01499 for( k = 0; k < max_degree; ++k ) { 01500 delete[] L[k]; 01501 delete[] R[k]; 01502 } 01503 delete[] L; 01504 delete[] R; 01505 return result; 01506 } 01507 01508 char* CWeightedDegreePositionStringKernel::compute_consensus( 01509 int32_t &num_feat, int32_t num_suppvec, int32_t* IDX, float64_t* alphas) 01510 { 01511 ASSERT(position_weights_lhs==NULL); 01512 ASSERT(position_weights_rhs==NULL); 01513 //only works for order <= 32 01514 ASSERT(degree<=32); 01515 ASSERT(!tries.get_use_compact_terminal_nodes()); 01516 num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01517 ASSERT(num_feat>0); 01518 ASSERT(alphabet); 01519 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 01520 01521 //consensus 01522 char* result=new char[num_feat]; 01523 01524 //backtracking and scoring table 01525 int32_t num_tables=CMath::max(1,num_feat-degree+1); 01526 DynArray<ConsensusEntry>** table=new DynArray<ConsensusEntry>*[num_tables]; 01527 01528 for (int32_t i=0; i<num_tables; i++) 01529 table[i]=new DynArray<ConsensusEntry>(num_suppvec/10); 01530 01531 //compute consensus via dynamic programming 01532 for (int32_t i=0; i<num_tables; i++) 01533 { 01534 bool cumulative=false; 01535 01536 if (i<num_tables-1) 01537 init_optimization(num_suppvec, IDX, alphas, i); 01538 else 01539 { 01540 init_optimization(num_suppvec, IDX, alphas, i, num_feat-1); 01541 cumulative=true; 01542 } 01543 01544 if (i==0) 01545 tries.fill_backtracking_table(i, NULL, table[i], cumulative, weights); 01546 else 01547 tries.fill_backtracking_table(i, table[i-1], table[i], cumulative, weights); 01548 01549 SG_PROGRESS(i,0,num_feat); 01550 } 01551 01552 01553 //int32_t n=table[0]->get_num_elements(); 01554 01555 //for (int32_t i=0; i<n; i++) 01556 //{ 01557 // ConsensusEntry e= table[0]->get_element(i); 01558 // SG_PRint32_t("first: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt); 01559 //} 01560 01561 //n=table[num_tables-1]->get_num_elements(); 01562 //for (int32_t i=0; i<n; i++) 01563 //{ 01564 // ConsensusEntry e= table[num_tables-1]->get_element(i); 01565 // SG_PRint32_t("last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt); 01566 //} 01567 //n=table[num_tables-2]->get_num_elements(); 01568 //for (int32_t i=0; i<n; i++) 01569 //{ 01570 // ConsensusEntry e= table[num_tables-2]->get_element(i); 01571 // SG_PRINT("second last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt); 01572 //} 01573 01574 const char* acgt="ACGT"; 01575 01576 //backtracking start 01577 int32_t max_idx=-1; 01578 float32_t max_score=0; 01579 int32_t num_elements=table[num_tables-1]->get_num_elements(); 01580 01581 for (int32_t i=0; i<num_elements; i++) 01582 { 01583 float64_t sc=table[num_tables-1]->get_element(i).score; 01584 if (sc>max_score || max_idx==-1) 01585 { 01586 max_idx=i; 01587 max_score=sc; 01588 } 01589 } 01590 uint64_t endstr=table[num_tables-1]->get_element(max_idx).string; 01591 01592 SG_INFO("max_idx:%d num_el:%d num_feat:%d num_tables:%d max_score:%f\n", max_idx, num_elements, num_feat, num_tables, max_score); 01593 01594 for (int32_t i=0; i<degree; i++) 01595 result[num_feat-1-i]=acgt[(endstr >> (2*i)) & 3]; 01596 01597 if (num_tables>1) 01598 { 01599 for (int32_t i=num_tables-1; i>=0; i--) 01600 { 01601 //SG_PRINT("max_idx: %d, i:%d\n", max_idx, i); 01602 result[i]=acgt[table[i]->get_element(max_idx).string >> (2*(degree-1)) & 3]; 01603 max_idx=table[i]->get_element(max_idx).bt; 01604 } 01605 } 01606 01607 //for (int32_t t=0; t<num_tables; t++) 01608 //{ 01609 // n=table[t]->get_num_elements(); 01610 // for (int32_t i=0; i<n; i++) 01611 // { 01612 // ConsensusEntry e= table[t]->get_element(i); 01613 // SG_PRINT("table[%d,%d]: str:0%0llx sc:%+f bt:%d\n",t,i, e.string,e.score,e.bt); 01614 // } 01615 //} 01616 01617 for (int32_t i=0; i<num_tables; i++) 01618 delete table[i]; 01619 01620 delete[] table; 01621 return result; 01622 } 01623 01624 01625 float64_t* CWeightedDegreePositionStringKernel::extract_w( 01626 int32_t max_degree, int32_t& num_feat, int32_t& num_sym, 01627 float64_t* w_result, int32_t num_suppvec, int32_t* IDX, float64_t* alphas) 01628 { 01629 delete_optimization(); 01630 use_poim_tries=true; 01631 poim_tries.delete_trees(false); 01632 01633 // === check 01634 ASSERT(position_weights_lhs==NULL); 01635 ASSERT(position_weights_rhs==NULL); 01636 num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01637 ASSERT(num_feat>0); 01638 ASSERT(alphabet->get_alphabet()==DNA); 01639 ASSERT(max_degree>0); 01640 01641 // === general variables 01642 static const int32_t NUM_SYMS = poim_tries.NUM_SYMS; 01643 const int32_t seqLen = num_feat; 01644 float64_t** subs; 01645 int32_t i; 01646 int32_t k; 01647 //int32_t y; 01648 01649 // === init tables "subs" for substring scores / POIMs 01650 // --- compute table sizes 01651 int32_t* offsets; 01652 int32_t offset; 01653 offsets = new int32_t[ max_degree ]; 01654 offset = 0; 01655 for( k = 0; k < max_degree; ++k ) { 01656 offsets[k] = offset; 01657 const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 ); 01658 const int32_t tabSize = nofsKmers * seqLen; 01659 offset += tabSize; 01660 } 01661 // --- allocate memory 01662 const int32_t bigTabSize = offset; 01663 w_result=new float64_t[bigTabSize]; 01664 for (i=0; i<bigTabSize; ++i) 01665 w_result[i]=0; 01666 01667 // --- set pointers for tables 01668 subs = new float64_t*[ max_degree ]; 01669 ASSERT( subs != NULL ); 01670 for( k = 0; k < max_degree; ++k ) { 01671 subs[k] = &w_result[ offsets[k] ]; 01672 } 01673 delete[] offsets; 01674 01675 // === init trees; extract "w" 01676 init_optimization( num_suppvec, IDX, alphas, -1); 01677 poim_tries.POIMs_extract_W( subs, max_degree ); 01678 01679 // === clean; return "subs" as vector 01680 delete[] subs; 01681 num_feat = 1; 01682 num_sym = bigTabSize; 01683 use_poim_tries=false; 01684 poim_tries.delete_trees(false); 01685 return w_result; 01686 } 01687 01688 float64_t* CWeightedDegreePositionStringKernel::compute_POIM( 01689 int32_t max_degree, int32_t& num_feat, int32_t& num_sym, 01690 float64_t* poim_result, int32_t num_suppvec, int32_t* IDX, 01691 float64_t* alphas, float64_t* distrib ) 01692 { 01693 delete_optimization(); 01694 use_poim_tries=true; 01695 poim_tries.delete_trees(false); 01696 01697 // === check 01698 ASSERT(position_weights_lhs==NULL); 01699 ASSERT(position_weights_rhs==NULL); 01700 num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01701 ASSERT(num_feat>0); 01702 ASSERT(alphabet->get_alphabet()==DNA); 01703 ASSERT(max_degree!=0); 01704 ASSERT(distrib); 01705 01706 // === general variables 01707 static const int32_t NUM_SYMS = poim_tries.NUM_SYMS; 01708 const int32_t seqLen = num_feat; 01709 float64_t** subs; 01710 int32_t i; 01711 int32_t k; 01712 01713 // === DEBUGGING mode 01714 // 01715 // Activated if "max_degree" < 0. 01716 // Allows to output selected partial score. 01717 // 01718 // |max_degree| mod 4 01719 // 0: substring 01720 // 1: superstring 01721 // 2: left overlap 01722 // 3: right overlap 01723 // 01724 const int32_t debug = ( max_degree < 0 ) ? ( abs(max_degree) % 4 + 1 ) : 0; 01725 if( debug ) { 01726 max_degree = abs(max_degree) / 4; 01727 switch( debug ) { 01728 case 1: { 01729 printf( "POIM DEBUGGING: substring only (max order=%d)\n", max_degree ); 01730 break; 01731 } 01732 case 2: { 01733 printf( "POIM DEBUGGING: superstring only (max order=%d)\n", max_degree ); 01734 break; 01735 } 01736 case 3: { 01737 printf( "POIM DEBUGGING: left overlap only (max order=%d)\n", max_degree ); 01738 break; 01739 } 01740 case 4: { 01741 printf( "POIM DEBUGGING: right overlap only (max order=%d)\n", max_degree ); 01742 break; 01743 } 01744 default: { 01745 printf( "POIM DEBUGGING: something is wrong (max order=%d)\n", max_degree ); 01746 ASSERT(0); 01747 break; 01748 } 01749 } 01750 } 01751 01752 // --- compute table sizes 01753 int32_t* offsets; 01754 int32_t offset; 01755 offsets = new int32_t[ max_degree ]; 01756 offset = 0; 01757 for( k = 0; k < max_degree; ++k ) { 01758 offsets[k] = offset; 01759 const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 ); 01760 const int32_t tabSize = nofsKmers * seqLen; 01761 offset += tabSize; 01762 } 01763 // --- allocate memory 01764 const int32_t bigTabSize=offset; 01765 poim_result=new float64_t[bigTabSize]; 01766 for (i=0; i<bigTabSize; ++i ) 01767 poim_result[i]=0; 01768 01769 // --- set pointers for tables 01770 subs=new float64_t*[max_degree]; 01771 for (k=0; k<max_degree; ++k) 01772 subs[k]=&poim_result[offsets[k]]; 01773 01774 delete[] offsets; 01775 01776 // === init trees; precalc S, L and R 01777 init_optimization( num_suppvec, IDX, alphas, -1); 01778 poim_tries.POIMs_precalc_SLR( distrib ); 01779 01780 // === compute substring scores 01781 if( debug==0 || debug==1 ) { 01782 poim_tries.POIMs_extract_W( subs, max_degree ); 01783 for( k = 1; k < max_degree; ++k ) { 01784 const int32_t nofKmers2 = ( k > 1 ) ? (int32_t) CMath::pow(NUM_SYMS,k-1) : 0; 01785 const int32_t nofKmers1 = (int32_t) CMath::pow( NUM_SYMS, k ); 01786 const int32_t nofKmers0 = nofKmers1 * NUM_SYMS; 01787 for( i = 0; i < seqLen; ++i ) { 01788 float64_t* const subs_k2i1 = ( k>1 && i<seqLen-1 ) ? &subs[k-2][(i+1)*nofKmers2] : NULL; 01789 float64_t* const subs_k1i1 = ( i < seqLen-1 ) ? &subs[k-1][(i+1)*nofKmers1] : NULL; 01790 float64_t* const subs_k1i0 = & subs[ k-1 ][ i*nofKmers1 ]; 01791 float64_t* const subs_k0i = & subs[ k-0 ][ i*nofKmers0 ]; 01792 int32_t y0; 01793 for( y0 = 0; y0 < nofKmers0; ++y0 ) { 01794 const int32_t y1l = y0 / NUM_SYMS; 01795 const int32_t y1r = y0 % nofKmers1; 01796 const int32_t y2 = y1r / NUM_SYMS; 01797 subs_k0i[ y0 ] += subs_k1i0[ y1l ]; 01798 if( i < seqLen-1 ) { 01799 subs_k0i[ y0 ] += subs_k1i1[ y1r ]; 01800 if( k > 1 ) { 01801 subs_k0i[ y0 ] -= subs_k2i1[ y2 ]; 01802 } 01803 } 01804 } 01805 } 01806 } 01807 } 01808 01809 // === compute POIMs 01810 poim_tries.POIMs_add_SLR( subs, max_degree, debug ); 01811 01812 // === clean; return "subs" as vector 01813 delete[] subs; 01814 num_feat = 1; 01815 num_sym = bigTabSize; 01816 01817 use_poim_tries=false; 01818 poim_tries.delete_trees(false); 01819 01820 return poim_result; 01821 } 01822 01823 01824 void CWeightedDegreePositionStringKernel::prepare_POIM2( 01825 float64_t* distrib, int32_t num_sym, int32_t num_feat) 01826 { 01827 free(m_poim_distrib); 01828 m_poim_distrib=(float64_t*)malloc(num_sym*num_feat*sizeof(float64_t)); 01829 ASSERT(m_poim_distrib); 01830 01831 memcpy(m_poim_distrib, distrib, num_sym*num_feat*sizeof(float64_t)); 01832 m_poim_num_sym=num_sym; 01833 m_poim_num_feat=num_feat; 01834 } 01835 01836 void CWeightedDegreePositionStringKernel::compute_POIM2( 01837 int32_t max_degree, CSVM* svm) 01838 { 01839 ASSERT(svm); 01840 int32_t num_suppvec=svm->get_num_support_vectors(); 01841 int32_t* sv_idx=new int32_t[num_suppvec]; 01842 float64_t* sv_weight=new float64_t[num_suppvec]; 01843 01844 for (int32_t i=0; i<num_suppvec; i++) 01845 { 01846 sv_idx[i]=svm->get_support_vector(i); 01847 sv_weight[i]=svm->get_alpha(i); 01848 } 01849 01850 if ((max_degree < 1) || (max_degree > 12)) 01851 { 01852 //SG_WARNING( "max_degree out of range 1..12 (%d).\n", max_degree); 01853 SG_WARNING( "max_degree out of range 1..12 (%d). setting to 1.\n", max_degree); 01854 max_degree=1; 01855 } 01856 01857 int32_t num_feat = m_poim_num_feat; 01858 int32_t num_sym = m_poim_num_sym; 01859 free(m_poim); 01860 01861 m_poim = compute_POIM(max_degree, num_feat, num_sym, NULL, num_suppvec, sv_idx, 01862 sv_weight, m_poim_distrib); 01863 01864 ASSERT(num_feat==1); 01865 m_poim_result_len=num_sym; 01866 01867 delete[] sv_weight; 01868 delete[] sv_idx; 01869 } 01870 01871 void CWeightedDegreePositionStringKernel::get_POIM2( 01872 float64_t** poim, int32_t* result_len) 01873 { 01874 *poim=(float64_t*) malloc(m_poim_result_len*sizeof(float64_t)); 01875 ASSERT(*poim); 01876 memcpy(*poim, m_poim, m_poim_result_len*sizeof(float64_t)) ; 01877 *result_len=m_poim_result_len ; 01878 } 01879 01880 void CWeightedDegreePositionStringKernel::cleanup_POIM2() 01881 { 01882 free(m_poim) ; 01883 m_poim=NULL ; 01884 free(m_poim_distrib) ; 01885 m_poim_distrib=NULL ; 01886 m_poim_num_sym=0 ; 01887 m_poim_num_sym=0 ; 01888 m_poim_result_len=0 ; 01889 } 01890 01891 void CWeightedDegreePositionStringKernel::load_serializable_post(void) throw (ShogunException) 01892 { 01893 CKernel::load_serializable_post(); 01894 01895 tries=CTrie<DNATrie>(degree); 01896 poim_tries=CTrie<POIMTrie>(degree); 01897 01898 if (weights) 01899 init_block_weights(); 01900 } 01901 01902 void CWeightedDegreePositionStringKernel::init() 01903 { 01904 weights=NULL; 01905 position_weights=NULL; 01906 position_weights_len=0; 01907 01908 position_weights_lhs=NULL; 01909 position_weights_lhs_len=0; 01910 position_weights_rhs=NULL; 01911 position_weights_rhs_len=0; 01912 01913 weights_buffer=NULL; 01914 mkl_stepsize=1; 01915 degree=1; 01916 length=0; 01917 01918 max_shift=0; 01919 max_mismatch=0; 01920 seq_length=0; 01921 shift=NULL; 01922 shift_len=0; 01923 01924 block_weights=NULL; 01925 block_computation=true; 01926 type=E_EXTERNAL; 01927 which_degree=-1; 01928 tries=CTrie<DNATrie>(1); 01929 poim_tries=CTrie<POIMTrie>(1); 01930 01931 tree_initialized=false; 01932 use_poim_tries=false; 01933 m_poim_distrib=NULL; 01934 01935 m_poim=NULL; 01936 m_poim_num_sym=0; 01937 m_poim_num_feat=0; 01938 m_poim_result_len=0; 01939 01940 alphabet=NULL; 01941 01942 properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION; 01943 01944 set_normalizer(new CSqrtDiagKernelNormalizer()); 01945 01946 m_parameters->add_matrix(&weights, &weights_degree, &weights_length, 01947 "weights", "WD Kernel weights."); 01948 m_parameters->add_vector(&position_weights, &position_weights_len, 01949 "position_weights", 01950 "Weights per position."); 01951 m_parameters->add_vector(&position_weights_lhs, &position_weights_lhs_len, 01952 "position_weights_lhs", 01953 "Weights per position left hand side."); 01954 m_parameters->add_vector(&position_weights_rhs, &position_weights_rhs_len, 01955 "position_weights_rhs", 01956 "Weights per position right hand side."); 01957 m_parameters->add_vector(&shift, &shift_len, 01958 "shift", 01959 "Shift Vector."); 01960 m_parameters->add(&mkl_stepsize, "mkl_stepsize", "MKL step size."); 01961 m_parameters->add(°ree, "degree", "Order of WD kernel."); 01962 m_parameters->add(&max_mismatch, "max_mismatch", 01963 "Number of allowed mismatches."); 01964 m_parameters->add(&block_computation, "block_computation", 01965 "If block computation shall be used."); 01966 m_parameters->add((machine_int_t*) &type, "type", 01967 "WeightedDegree kernel type."); 01968 m_parameters->add(&which_degree, "which_degree", 01969 "Unqueal -1 if just a single degree is selected."); 01970 m_parameters->add((CSGObject**) &alphabet, "alphabet", 01971 "Alphabet of Features."); 01972 }