SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Gunnar Raetsch 00008 * Written (W) 1999-2008 Soeren Sonnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/common.h" 00013 #include "lib/io.h" 00014 #include "kernel/LocalityImprovedStringKernel.h" 00015 #include "features/StringFeatures.h" 00016 00017 using namespace shogun; 00018 00019 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel() 00020 : CStringKernel<char>(0) 00021 { 00022 init(); 00023 } 00024 00025 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel( 00026 int32_t size, int32_t l, int32_t id, int32_t od) 00027 : CStringKernel<char>(size) 00028 { 00029 init(); 00030 00031 length=l; 00032 inner_degree=id; 00033 outer_degree=od; 00034 00035 SG_INFO( "LIK with parms: l=%d, id=%d, od=%d created!\n", l, id, od); 00036 } 00037 00038 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel( 00039 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t len, 00040 int32_t id, int32_t od) 00041 : CStringKernel<char>(10) 00042 { 00043 init(); 00044 00045 length=len; 00046 inner_degree=id; 00047 outer_degree=od; 00048 00049 SG_INFO( "LIK with parms: l=%d, id=%d, od=%d created!\n", len, id, od); 00050 00051 init(l, r); 00052 } 00053 00054 CLocalityImprovedStringKernel::~CLocalityImprovedStringKernel() 00055 { 00056 cleanup(); 00057 } 00058 00059 bool CLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r) 00060 { 00061 CStringKernel<char>::init(l,r); 00062 return init_normalizer(); 00063 } 00064 00065 float64_t CLocalityImprovedStringKernel::compute(int32_t idx_a, int32_t idx_b) 00066 { 00067 int32_t alen, blen; 00068 bool free_avec, free_bvec; 00069 00070 char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec); 00071 char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec); 00072 // can only deal with strings of same length 00073 ASSERT(alen==blen && alen>0); 00074 00075 int32_t i,t; 00076 float64_t* match=new float64_t[alen]; 00077 00078 // initialize match table 1 -> match; 0 -> no match 00079 for (i = 0; i<alen; i++) 00080 match[i] = (avec[i] == bvec[i])? 1 : 0; 00081 00082 float64_t outer_sum = 0; 00083 00084 for (t = 0; t<alen-length; t++) 00085 { 00086 float64_t sum = 0; 00087 for (i = 0; i<length && t+i+length+1<alen; i++) 00088 sum += (i+1)*match[t+i]+(length-i)*match[t+i+length+1]; 00089 //add middle element + normalize with sum_i=0^2l+1 i = (2l+1)(l+1) 00090 float64_t inner_sum = (sum + (length+1)*match[t+length]) / ((2*length+1)*(length+1)); 00091 inner_sum = pow(inner_sum, inner_degree + 1); 00092 outer_sum += inner_sum; 00093 } 00094 delete[] match; 00095 00096 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec); 00097 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec); 00098 return pow(outer_sum, outer_degree + 1); 00099 } 00100 00101 void CLocalityImprovedStringKernel::init() 00102 { 00103 length = 0; 00104 inner_degree = 0; 00105 outer_degree = 0; 00106 00107 m_parameters->add(&length, "length", "Window Length."); 00108 m_parameters->add(&inner_degree, "inner_degree", "Inner degree."); 00109 m_parameters->add(&outer_degree, "outer_degree", "Outer degree."); 00110 }