00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013 #include "lib/common.h"
00014 #include "lib/io.h"
00015 #include "lib/File.h"
00016 #include "lib/Time.h"
00017 #include "lib/Signal.h"
00018
00019 #include "base/Parallel.h"
00020
00021 #include "kernel/Kernel.h"
00022 #include "kernel/IdentityKernelNormalizer.h"
00023 #include "features/Features.h"
00024
00025 #include "classifier/svm/SVM.h"
00026
00027 #include <string.h>
00028 #include <unistd.h>
00029 #include <math.h>
00030
00031 #ifndef WIN32
00032 #include <pthread.h>
00033 #endif
00034
00035 using namespace shogun;
00036
00037 CKernel::CKernel()
00038 : CSGObject(), cache_size(10), kernel_matrix(NULL), lhs(NULL),
00039 rhs(NULL), num_lhs(0), num_rhs(0), combined_kernel_weight(1),
00040 optimization_initialized(false), opt_type(FASTBUTMEMHUNGRY),
00041 properties(KP_NONE), normalizer(NULL)
00042 {
00043
00044
00045
00046 set_normalizer(new CIdentityKernelNormalizer());
00047 }
00048
00049 CKernel::CKernel(int32_t size)
00050 : CSGObject(), kernel_matrix(NULL), lhs(NULL), rhs(NULL), num_lhs(0),
00051 num_rhs(0), combined_kernel_weight(1), optimization_initialized(false),
00052 opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE), normalizer(NULL)
00053 {
00054 if (size<10)
00055 size=10;
00056
00057 cache_size=size;
00058
00059
00060 if (get_is_initialized())
00061 SG_ERROR( "COptimizableKernel still initialized on destruction");
00062
00063 set_normalizer(new CIdentityKernelNormalizer());
00064 }
00065
00066
00067 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject(),
00068 kernel_matrix(NULL), lhs(NULL), rhs(NULL), num_lhs(0), num_rhs(0),
00069 combined_kernel_weight(1), optimization_initialized(false),
00070 opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE), normalizer(NULL)
00071 {
00072 if (size<10)
00073 size=10;
00074
00075 cache_size=size;
00076
00077 if (get_is_initialized())
00078 SG_ERROR("Kernel initialized on construction.\n");
00079
00080 set_normalizer(new CIdentityKernelNormalizer());
00081 init(p_lhs, p_rhs);
00082 }
00083
00084 CKernel::~CKernel()
00085 {
00086 if (get_is_initialized())
00087 SG_ERROR("Kernel still initialized on destruction.\n");
00088
00089 remove_lhs_and_rhs();
00090 SG_UNREF(normalizer);
00091
00092 SG_INFO("Kernel deleted (%p).\n", this);
00093 }
00094
00095 void CKernel::get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n)
00096 {
00097 ASSERT(dst && m && n);
00098
00099 float64_t* result = NULL;
00100
00101 if (has_features())
00102 {
00103 int32_t num_vec1=get_num_vec_lhs();
00104 int32_t num_vec2=get_num_vec_rhs();
00105 *m=num_vec1;
00106 *n=num_vec2;
00107
00108 int64_t total_num = ((int64_t) num_vec1) * num_vec2;
00109 SG_DEBUG( "allocating memory for a kernel matrix"
00110 " of size %dx%d\n", num_vec1, num_vec2);
00111
00112 result=(float64_t*) malloc(sizeof(float64_t)*total_num);
00113 ASSERT(result);
00114 get_kernel_matrix<float64_t>(num_vec1,num_vec2, result);
00115 }
00116 else
00117 SG_ERROR( "no features assigned to kernel\n");
00118
00119 *dst=result;
00120 }
00121
00122
00123
00124 bool CKernel::init(CFeatures* l, CFeatures* r)
00125 {
00126
00127 ASSERT(l);
00128 ASSERT(r);
00129
00130
00131 ASSERT(l->get_feature_class()==r->get_feature_class());
00132 ASSERT(l->get_feature_type()==r->get_feature_type());
00133
00134
00135 remove_lhs_and_rhs();
00136
00137
00138 SG_REF(l);
00139 if (l!=r)
00140 SG_REF(r);
00141
00142 lhs=l;
00143 rhs=r;
00144
00145 ASSERT(!num_lhs || num_lhs==l->get_num_vectors());
00146 ASSERT(!num_rhs || num_rhs==l->get_num_vectors());
00147
00148 num_lhs=l->get_num_vectors();
00149 num_rhs=r->get_num_vectors();
00150
00151 return true;
00152 }
00153
00154 bool CKernel::set_normalizer(CKernelNormalizer* n)
00155 {
00156 SG_REF(n);
00157 SG_UNREF(normalizer);
00158 normalizer=n;
00159
00160 return (normalizer!=NULL);
00161 }
00162
00163 CKernelNormalizer* CKernel::get_normalizer()
00164 {
00165 SG_REF(normalizer)
00166 return normalizer;
00167 }
00168
00169 bool CKernel::init_normalizer()
00170 {
00171 return normalizer->init(this);
00172 }
00173
00174 void CKernel::cleanup()
00175 {
00176 remove_lhs_and_rhs();
00177 }
00178
00179
00180
00181 bool CKernel::load(char* fname)
00182 {
00183 return false;
00184 }
00185
00186 bool CKernel::save(char* fname)
00187 {
00188 int32_t i=0;
00189 int32_t num_left=get_num_vec_lhs();
00190 int32_t num_right=rhs->get_num_vectors();
00191 KERNELCACHE_IDX num_total=num_left*num_right;
00192
00193 CFile f(fname, 'w', F_DREAL);
00194
00195 for (int32_t l=0; l< (int32_t) num_left && f.is_ok(); l++)
00196 {
00197 for (int32_t r=0; r< (int32_t) num_right && f.is_ok(); r++)
00198 {
00199 if (!(i % (num_total/200+1)))
00200 SG_PROGRESS(i, 0, num_total-1);
00201
00202 float64_t k=kernel(l,r);
00203 f.save_real_data(&k, 1);
00204
00205 i++;
00206 }
00207 }
00208 SG_DONE();
00209
00210 if (f.is_ok())
00211 SG_INFO( "kernel matrix of size %ld x %ld written (filesize: %ld)\n", num_left, num_right, num_total*sizeof(KERNELCACHE_ELEM));
00212
00213 return (f.is_ok());
00214 }
00215
00216 void CKernel::remove_lhs_and_rhs()
00217 {
00218 if (rhs!=lhs)
00219 SG_UNREF(rhs);
00220 rhs = NULL;
00221 num_rhs=0;
00222
00223 SG_UNREF(lhs);
00224 lhs = NULL;
00225 num_lhs=0;
00226
00227
00228 }
00229
00230 void CKernel::remove_lhs()
00231 {
00232 if (rhs==lhs)
00233 rhs=NULL;
00234 SG_UNREF(lhs);
00235 lhs = NULL;
00236 num_lhs=NULL;
00237
00238
00239 }
00240
00242 void CKernel::remove_rhs()
00243 {
00244 if (rhs!=lhs)
00245 SG_UNREF(rhs);
00246 rhs = NULL;
00247 num_rhs=NULL;
00248
00249
00250 }
00251
00252
00253 void CKernel::list_kernel()
00254 {
00255 SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00256 get_combined_kernel_weight(),
00257 get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00258 "SLOWBUTMEMEFFICIENT");
00259
00260 switch (get_kernel_type())
00261 {
00262 case K_UNKNOWN:
00263 SG_INFO( "K_UNKNOWN ");
00264 break;
00265 case K_LINEAR:
00266 SG_INFO( "K_LINEAR ");
00267 break;
00268 case K_SPARSELINEAR:
00269 SG_INFO( "K_SPARSELINEAR ");
00270 break;
00271 case K_POLY:
00272 SG_INFO( "K_POLY ");
00273 break;
00274 case K_GAUSSIAN:
00275 SG_INFO( "K_GAUSSIAN ");
00276 break;
00277 case K_SPARSEGAUSSIAN:
00278 SG_INFO( "K_SPARSEGAUSSIAN ");
00279 break;
00280 case K_GAUSSIANSHIFT:
00281 SG_INFO( "K_GAUSSIANSHIFT ");
00282 break;
00283 case K_HISTOGRAM:
00284 SG_INFO( "K_HISTOGRAM ");
00285 break;
00286 case K_SALZBERG:
00287 SG_INFO( "K_SALZBERG ");
00288 break;
00289 case K_LOCALITYIMPROVED:
00290 SG_INFO( "K_LOCALITYIMPROVED ");
00291 break;
00292 case K_SIMPLELOCALITYIMPROVED:
00293 SG_INFO( "K_SIMPLELOCALITYIMPROVED ");
00294 break;
00295 case K_FIXEDDEGREE:
00296 SG_INFO( "K_FIXEDDEGREE ");
00297 break;
00298 case K_WEIGHTEDDEGREE:
00299 SG_INFO( "K_WEIGHTEDDEGREE ");
00300 break;
00301 case K_WEIGHTEDDEGREEPOS:
00302 SG_INFO( "K_WEIGHTEDDEGREEPOS ");
00303 break;
00304 case K_WEIGHTEDCOMMWORDSTRING:
00305 SG_INFO( "K_WEIGHTEDCOMMWORDSTRING ");
00306 break;
00307 case K_POLYMATCH:
00308 SG_INFO( "K_POLYMATCH ");
00309 break;
00310 case K_ALIGNMENT:
00311 SG_INFO( "K_ALIGNMENT ");
00312 break;
00313 case K_COMMWORDSTRING:
00314 SG_INFO( "K_COMMWORDSTRING ");
00315 break;
00316 case K_COMMULONGSTRING:
00317 SG_INFO( "K_COMMULONGSTRING ");
00318 break;
00319 case K_COMBINED:
00320 SG_INFO( "K_COMBINED ");
00321 break;
00322 case K_AUC:
00323 SG_INFO( "K_AUC ");
00324 break;
00325 case K_CUSTOM:
00326 SG_INFO( "K_CUSTOM ");
00327 break;
00328 case K_SIGMOID:
00329 SG_INFO( "K_SIGMOID ");
00330 break;
00331 case K_CHI2:
00332 SG_INFO( "K_CHI2 ");
00333 break;
00334 case K_DIAG:
00335 SG_INFO( "K_DIAG ");
00336 break;
00337 case K_CONST:
00338 SG_INFO( "K_CONST ");
00339 break;
00340 case K_DISTANCE:
00341 SG_INFO( "K_DISTANCE ");
00342 break;
00343 case K_LOCALALIGNMENT:
00344 SG_INFO( "K_LOCALALIGNMENT ");
00345 break;
00346 case K_TPPK:
00347 SG_INFO( "K_TPPK ");
00348 break;
00349 default:
00350 SG_ERROR( "ERROR UNKNOWN KERNEL TYPE");
00351 break;
00352 }
00353
00354 switch (get_feature_class())
00355 {
00356 case C_UNKNOWN:
00357 SG_INFO( "C_UNKNOWN ");
00358 break;
00359 case C_SIMPLE:
00360 SG_INFO( "C_SIMPLE ");
00361 break;
00362 case C_SPARSE:
00363 SG_INFO( "C_SPARSE ");
00364 break;
00365 case C_STRING:
00366 SG_INFO( "C_STRING ");
00367 break;
00368 case C_COMBINED:
00369 SG_INFO( "C_COMBINED ");
00370 break;
00371 case C_ANY:
00372 SG_INFO( "C_ANY ");
00373 break;
00374 default:
00375 SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
00376 }
00377
00378 switch (get_feature_type())
00379 {
00380 case F_UNKNOWN:
00381 SG_INFO( "F_UNKNOWN ");
00382 break;
00383 case F_DREAL:
00384 SG_INFO( "F_REAL ");
00385 break;
00386 case F_SHORT:
00387 SG_INFO( "F_SHORT ");
00388 break;
00389 case F_CHAR:
00390 SG_INFO( "F_CHAR ");
00391 break;
00392 case F_INT:
00393 SG_INFO( "F_INT ");
00394 break;
00395 case F_BYTE:
00396 SG_INFO( "F_BYTE ");
00397 break;
00398 case F_WORD:
00399 SG_INFO( "F_WORD ");
00400 break;
00401 case F_ULONG:
00402 SG_INFO( "F_ULONG ");
00403 break;
00404 case F_ANY:
00405 SG_INFO( "F_ANY ");
00406 break;
00407 default:
00408 SG_ERROR( "ERROR UNKNOWN FEATURE TYPE");
00409 break;
00410 }
00411 SG_INFO( "\n");
00412 }
00413
00414 bool CKernel::init_optimization(
00415 int32_t count, int32_t *IDX, float64_t * weights)
00416 {
00417 SG_ERROR( "kernel does not support linadd optimization\n");
00418 return false ;
00419 }
00420
00421 bool CKernel::delete_optimization()
00422 {
00423 SG_ERROR( "kernel does not support linadd optimization\n");
00424 return false;
00425 }
00426
00427 float64_t CKernel::compute_optimized(int32_t vector_idx)
00428 {
00429 SG_ERROR( "kernel does not support linadd optimization\n");
00430 return 0;
00431 }
00432
00433 void CKernel::compute_batch(
00434 int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00435 int32_t* IDX, float64_t* weights, float64_t factor)
00436 {
00437 SG_ERROR( "kernel does not support batch computation\n");
00438 }
00439
00440 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00441 {
00442 SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00443 }
00444
00445 void CKernel::clear_normal()
00446 {
00447 SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00448 }
00449
00450 int32_t CKernel::get_num_subkernels()
00451 {
00452 return 1;
00453 }
00454
00455 void CKernel::compute_by_subkernel(
00456 int32_t vector_idx, float64_t * subkernel_contrib)
00457 {
00458 SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00459 }
00460
00461 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00462 {
00463 num_weights=1 ;
00464 return &combined_kernel_weight ;
00465 }
00466
00467 void CKernel::set_subkernel_weights(float64_t* weights, int32_t num_weights)
00468 {
00469 combined_kernel_weight = weights[0] ;
00470 if (num_weights!=1)
00471 SG_ERROR( "number of subkernel weights should be one ...\n");
00472 }
00473
00474 bool CKernel::init_optimization_svm(CSVM * svm)
00475 {
00476 int32_t num_suppvec=svm->get_num_support_vectors();
00477 int32_t* sv_idx=new int32_t[num_suppvec];
00478 float64_t* sv_weight=new float64_t[num_suppvec];
00479
00480 for (int32_t i=0; i<num_suppvec; i++)
00481 {
00482 sv_idx[i] = svm->get_support_vector(i);
00483 sv_weight[i] = svm->get_alpha(i);
00484 }
00485 bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00486
00487 delete[] sv_idx;
00488 delete[] sv_weight;
00489 return ret;
00490 }
00491