00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "kernel/SimpleLocalityImprovedStringKernel.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016
00017 using namespace shogun;
00018
00019 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00020 int32_t size, int32_t l, int32_t id, int32_t od)
00021 : CStringKernel<char>(size), length(l), inner_degree(id), outer_degree(od),
00022 pyramid_weights(NULL)
00023 {
00024 }
00025
00026 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00027 CStringFeatures<char>* l, CStringFeatures<char>* r,
00028 int32_t len, int32_t id, int32_t od)
00029 : CStringKernel<char>(10), length(len), inner_degree(id), outer_degree(od),
00030 pyramid_weights(NULL)
00031 {
00032 init(l, r);
00033 }
00034
00035 CSimpleLocalityImprovedStringKernel::~CSimpleLocalityImprovedStringKernel()
00036 {
00037 cleanup();
00038 }
00039
00040 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
00041 {
00042 bool result = CStringKernel<char>::init(l,r);
00043
00044 if (!result)
00045 return false;
00046 int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length();
00047 delete[] pyramid_weights;
00048 pyramid_weights = new float64_t[num_features];
00049
00050 SG_INFO("initializing pyramid weights: size=%ld length=%i\n",
00051 num_features, length);
00052
00053 const int32_t PYRAL = 2 * length - 1;
00054 float64_t PYRAL_pot;
00055 int32_t DEGREE1_1 = (inner_degree & 0x1)==0;
00056 int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0;
00057 int32_t DEGREE1_2 = (inner_degree & 0x2)!=0;
00058 int32_t DEGREE1_3 = (inner_degree & ~0x3)!=0;
00059 int32_t DEGREE1_4 = (inner_degree & 0x4)!=0;
00060 {
00061 float64_t PYRAL_ = PYRAL;
00062 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00063 if (DEGREE1_1n)
00064 {
00065 PYRAL_ *= PYRAL_;
00066 if (DEGREE1_2)
00067 PYRAL_pot *= PYRAL_;
00068 if (DEGREE1_3)
00069 {
00070 PYRAL_ *= PYRAL_;
00071 if (DEGREE1_4)
00072 PYRAL_pot *= PYRAL_;
00073 }
00074 }
00075 }
00076
00077 int32_t pyra_len = num_features-PYRAL+1;
00078 int32_t pyra_len2 = (int32_t) pyra_len/2;
00079 {
00080 int32_t j;
00081 for (j = 0; j < pyra_len; j++)
00082 pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len);
00083 for (j = 0; j < pyra_len; j++)
00084 pyramid_weights[j] /= PYRAL_pot;
00085 }
00086
00087 return init_normalizer();
00088 }
00089
00090 void CSimpleLocalityImprovedStringKernel::cleanup()
00091 {
00092 delete[] pyramid_weights;
00093 pyramid_weights = NULL;
00094
00095 CKernel::cleanup();
00096 }
00097
00098 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1,
00099 const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH,
00100 const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra)
00101 {
00102 const int32_t PYRAL = 2*NTWIDTH-1;
00103 int32_t pyra_len, pyra_len2;
00104 float64_t pot, PYRAL_pot;
00105 float64_t sum;
00106 int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0;
00107 int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
00108 int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0;
00109 int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
00110 int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0;
00111 {
00112 float64_t PYRAL_ = PYRAL;
00113 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00114 if (DEGREE1_1n)
00115 {
00116 PYRAL_ *= PYRAL_;
00117 if (DEGREE1_2) PYRAL_pot *= PYRAL_;
00118 if (DEGREE1_3)
00119 {
00120 PYRAL_ *= PYRAL_;
00121 if (DEGREE1_4) PYRAL_pot *= PYRAL_;
00122 }
00123 }
00124 }
00125
00126 ASSERT((DEGREE1 & ~0x7) == 0);
00127 ASSERT((DEGREE2 & ~0x7) == 0);
00128
00129 pyra_len = NOF_NTS-PYRAL+1;
00130 pyra_len2 = (int32_t) pyra_len/2;
00131 {
00132 int32_t j;
00133 for (j = 0; j < pyra_len; j++)
00134 pyra[j] = 4*((float64_t)((j < pyra_len2) ? j+1 : pyra_len-j))/((float64_t)pyra_len);
00135 for (j = 0; j < pyra_len; j++)
00136 pyra[j] /= PYRAL_pot;
00137 }
00138
00139 register int32_t conv;
00140 register int32_t i;
00141 register int32_t j;
00142
00143 sum = 0.0;
00144 conv = 0;
00145 for (j = 0; j < PYRAL; j++)
00146 conv += (x1[j] == x2[j]) ? 1 : 0;
00147
00148 for (i = 0; i < NOF_NTS-PYRAL+1; i++)
00149 {
00150 register float64_t pot2;
00151 if (i>0)
00152 conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) -
00153 ((x1[i-1] == x2[i-1]) ? 1 : 0);
00154 {
00155 register float64_t conv2 = conv;
00156 pot2 = (DEGREE1_1) ? 1.0 : conv2;
00157 if (DEGREE1_1n)
00158 {
00159 conv2 *= conv2;
00160 if (DEGREE1_2)
00161 pot2 *= conv2;
00162 if (DEGREE1_3 && DEGREE1_4)
00163 pot2 *= conv2*conv2;
00164 }
00165 }
00166 sum += pot2*pyra[i];
00167 }
00168
00169 pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
00170 if ((DEGREE2 & ~0x1) != 0)
00171 {
00172 sum *= sum;
00173 if ((DEGREE2 & 0x2) != 0)
00174 pot *= sum;
00175 if ((DEGREE2 & ~0x3) != 0)
00176 {
00177 sum *= sum;
00178 if ((DEGREE2 & 0x4) != 0)
00179 pot *= sum;
00180 }
00181 }
00182 return pot;
00183 }
00184
00185 float64_t CSimpleLocalityImprovedStringKernel::compute(
00186 int32_t idx_a, int32_t idx_b)
00187 {
00188 int32_t alen, blen;
00189 bool free_avec, free_bvec;
00190
00191 char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00192 char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00193
00194
00195 ASSERT(alen==blen);
00196
00197 float64_t dpt;
00198
00199 dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
00200 dpt = dpt / pow((float64_t)alen, (float64_t)outer_degree);
00201
00202 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00203 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00204 return (float64_t) dpt;
00205 }