SimpleLocalityImprovedStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Gunnar Raetsch
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "kernel/SimpleLocalityImprovedStringKernel.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 
00017 using namespace shogun;
00018 
00019 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00020     int32_t size, int32_t l, int32_t id, int32_t od)
00021 : CStringKernel<char>(size), length(l), inner_degree(id), outer_degree(od),
00022     pyramid_weights(NULL)
00023 {
00024 }
00025 
00026 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00027     CStringFeatures<char>* l, CStringFeatures<char>* r,
00028     int32_t len, int32_t id, int32_t od)
00029 : CStringKernel<char>(10), length(len), inner_degree(id), outer_degree(od),
00030     pyramid_weights(NULL)
00031 {
00032     init(l, r);
00033 }
00034 
00035 CSimpleLocalityImprovedStringKernel::~CSimpleLocalityImprovedStringKernel()
00036 {
00037     cleanup();
00038 }
00039 
00040 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
00041 {
00042     bool result = CStringKernel<char>::init(l,r);
00043 
00044     if (!result)
00045         return false;
00046     int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length();
00047     delete[] pyramid_weights;
00048     pyramid_weights = new float64_t[num_features];
00049 
00050     SG_INFO("initializing pyramid weights: size=%ld length=%i\n",
00051         num_features, length);
00052 
00053     const int32_t PYRAL = 2 * length - 1; // total window length
00054     float64_t PYRAL_pot;
00055     int32_t DEGREE1_1  = (inner_degree & 0x1)==0;
00056     int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0;
00057     int32_t DEGREE1_2  = (inner_degree & 0x2)!=0;
00058     int32_t DEGREE1_3  = (inner_degree & ~0x3)!=0;
00059     int32_t DEGREE1_4  = (inner_degree & 0x4)!=0;
00060     {
00061     float64_t PYRAL_ = PYRAL;
00062     PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00063     if (DEGREE1_1n)
00064     {
00065         PYRAL_ *= PYRAL_;
00066         if (DEGREE1_2)
00067             PYRAL_pot *= PYRAL_;
00068         if (DEGREE1_3)
00069         {
00070             PYRAL_ *= PYRAL_;
00071             if (DEGREE1_4)
00072                 PYRAL_pot *= PYRAL_;
00073         }
00074     }
00075     }
00076 
00077     int32_t pyra_len  = num_features-PYRAL+1;
00078     int32_t pyra_len2 = (int32_t) pyra_len/2;
00079     {
00080     int32_t j;
00081     for (j = 0; j < pyra_len; j++)
00082         pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len);
00083     for (j = 0; j < pyra_len; j++)
00084         pyramid_weights[j] /= PYRAL_pot;
00085     }
00086 
00087     return init_normalizer();
00088 }
00089 
00090 void CSimpleLocalityImprovedStringKernel::cleanup()
00091 {
00092     delete[] pyramid_weights;
00093     pyramid_weights = NULL;
00094 
00095     CKernel::cleanup();
00096 }
00097 
00098 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1,
00099          const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH,
00100          const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra)
00101 {
00102     const int32_t PYRAL = 2*NTWIDTH-1; // total window length
00103     int32_t pyra_len, pyra_len2;
00104     float64_t pot, PYRAL_pot;
00105     float64_t sum;
00106     int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0;
00107     int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
00108     int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0;
00109     int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
00110     int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0;
00111     {
00112     float64_t PYRAL_ = PYRAL;
00113     PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00114     if (DEGREE1_1n)
00115     {
00116         PYRAL_ *= PYRAL_;
00117         if (DEGREE1_2) PYRAL_pot *= PYRAL_;
00118         if (DEGREE1_3)
00119         {
00120             PYRAL_ *= PYRAL_;
00121             if (DEGREE1_4) PYRAL_pot *= PYRAL_;
00122         }
00123     }
00124     }
00125 
00126     ASSERT((DEGREE1 & ~0x7) == 0);
00127     ASSERT((DEGREE2 & ~0x7) == 0);
00128 
00129     pyra_len = NOF_NTS-PYRAL+1;
00130     pyra_len2 = (int32_t) pyra_len/2;
00131     {
00132     int32_t j;
00133     for (j = 0; j < pyra_len; j++)
00134         pyra[j] = 4*((float64_t)((j < pyra_len2) ? j+1 : pyra_len-j))/((float64_t)pyra_len);
00135     for (j = 0; j < pyra_len; j++)
00136         pyra[j] /= PYRAL_pot;
00137     }
00138 
00139     register int32_t conv;
00140     register int32_t i;
00141     register int32_t j;
00142 
00143     sum = 0.0;
00144     conv = 0;
00145     for (j = 0; j < PYRAL; j++)
00146         conv += (x1[j] == x2[j]) ? 1 : 0;
00147 
00148     for (i = 0; i < NOF_NTS-PYRAL+1; i++)
00149     {
00150         register float64_t pot2;
00151         if (i>0)
00152             conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) - 
00153                 ((x1[i-1] == x2[i-1]) ? 1 : 0);
00154         { /* potencing of conv -- float64_t is faster*/
00155         register float64_t conv2 = conv;
00156         pot2 = (DEGREE1_1) ? 1.0 : conv2;
00157             if (DEGREE1_1n)
00158             {
00159                 conv2 *= conv2;
00160                 if (DEGREE1_2)
00161                     pot2 *= conv2;
00162                 if (DEGREE1_3 && DEGREE1_4)
00163                     pot2 *= conv2*conv2;
00164             }
00165         }
00166         sum += pot2*pyra[i];
00167     }
00168 
00169     pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
00170     if ((DEGREE2 & ~0x1) != 0)
00171     {
00172         sum *= sum;
00173         if ((DEGREE2 & 0x2) != 0)
00174             pot *= sum;
00175         if ((DEGREE2 & ~0x3) != 0)
00176         {
00177             sum *= sum;
00178             if ((DEGREE2 & 0x4) != 0)
00179                 pot *= sum;
00180         }
00181     }
00182     return pot;
00183 }
00184 
00185 float64_t CSimpleLocalityImprovedStringKernel::compute(
00186     int32_t idx_a, int32_t idx_b)
00187 {
00188     int32_t alen, blen;
00189     bool free_avec, free_bvec;
00190 
00191     char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00192     char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00193 
00194     // can only deal with strings of same length
00195     ASSERT(alen==blen);
00196 
00197     float64_t dpt;
00198 
00199     dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
00200     dpt = dpt / pow((float64_t)alen, (float64_t)outer_degree);
00201 
00202     ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00203     ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00204     return (float64_t) dpt;
00205 }

SHOGUN Machine Learning Toolbox - Documentation