SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include "lib/common.h" 00012 #include "kernel/FixedDegreeStringKernel.h" 00013 #include "kernel/SqrtDiagKernelNormalizer.h" 00014 #include "features/Features.h" 00015 #include "features/StringFeatures.h" 00016 #include "lib/io.h" 00017 00018 using namespace shogun; 00019 00020 void 00021 CFixedDegreeStringKernel::init(void) 00022 { 00023 m_parameters->add(°ree, "degree", "The degree."); 00024 set_normalizer(new CSqrtDiagKernelNormalizer()); 00025 } 00026 00027 CFixedDegreeStringKernel::CFixedDegreeStringKernel(void) 00028 : CStringKernel<char>(0), degree(0) 00029 { 00030 init(); 00031 } 00032 00033 CFixedDegreeStringKernel::CFixedDegreeStringKernel(int32_t size, int32_t d) 00034 : CStringKernel<char>(size), degree(d) 00035 { 00036 init(); 00037 } 00038 00039 CFixedDegreeStringKernel::CFixedDegreeStringKernel( 00040 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t d) 00041 : CStringKernel<char>(10), degree(d) 00042 { 00043 init(); 00044 init(l, r); 00045 } 00046 00047 CFixedDegreeStringKernel::~CFixedDegreeStringKernel() 00048 { 00049 cleanup(); 00050 } 00051 00052 bool CFixedDegreeStringKernel::init(CFeatures* l, CFeatures* r) 00053 { 00054 CStringKernel<char>::init(l, r); 00055 return init_normalizer(); 00056 } 00057 00058 void CFixedDegreeStringKernel::cleanup() 00059 { 00060 CKernel::cleanup(); 00061 } 00062 00063 float64_t CFixedDegreeStringKernel::compute(int32_t idx_a, int32_t idx_b) 00064 { 00065 int32_t alen, blen; 00066 bool free_avec, free_bvec; 00067 00068 char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec); 00069 char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec); 00070 00071 // can only deal with strings of same length 00072 ASSERT(alen==blen); 00073 00074 int64_t sum = 0; 00075 for (int32_t i = 0; i<alen-degree+1; i++) 00076 { 00077 bool match = true; 00078 00079 for (int32_t j = i; j<i+degree && match; j++) 00080 match = avec[j]==bvec[j]; 00081 if (match) 00082 sum++; 00083 } 00084 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec); 00085 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec); 00086 00087 return sum; 00088 }