SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SimpleLocalityImprovedStringKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Gunnar Raetsch
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
11 #include <shogun/lib/common.h>
12 #include <shogun/io/SGIO.h>
16 
17 using namespace shogun;
18 
20 : CStringKernel<char>()
21 {
22  init();
23 }
24 
26  int32_t size, int32_t l, int32_t id, int32_t od)
27 : CStringKernel<char>(size)
28 {
29  init();
30 
31  length=l;
32  inner_degree=id;
33  outer_degree=od;
34 }
35 
38  int32_t len, int32_t id, int32_t od)
39 : CStringKernel<char>()
40 {
41  init();
42 
43  length=len;
44  inner_degree=id;
45  outer_degree=od;
46 
47  init(l, r);
48 }
49 
51 {
52  cleanup();
53 }
54 
55 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
56 {
57  bool result = CStringKernel<char>::init(l,r);
58 
59  if (!result)
60  return false;
61  const int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length();
62  const int32_t PYRAL = 2 * length - 1; // total window length
63  const int32_t pyra_len = num_features-PYRAL+1;
64  const int32_t pyra_len2 = (int32_t) pyra_len/2;
65 
67 
68  pyramid_weights = SG_MALLOC(float64_t, pyra_len);
69  num_pyramid_weights=pyra_len;
70 
71  SG_INFO("initializing pyramid weights: size=%ld length=%i\n",
72  num_features, length);
73 
74  float64_t PYRAL_pot;
75  int32_t DEGREE1_1 = (inner_degree & 0x1)==0;
76  int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0;
77  int32_t DEGREE1_2 = (inner_degree & 0x2)!=0;
78  int32_t DEGREE1_3 = (inner_degree & ~0x3)!=0;
79  int32_t DEGREE1_4 = (inner_degree & 0x4)!=0;
80  {
81  float64_t PYRAL_ = PYRAL;
82  PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
83  if (DEGREE1_1n)
84  {
85  PYRAL_ *= PYRAL_;
86  if (DEGREE1_2)
87  PYRAL_pot *= PYRAL_;
88  if (DEGREE1_3)
89  {
90  PYRAL_ *= PYRAL_;
91  if (DEGREE1_4)
92  PYRAL_pot *= PYRAL_;
93  }
94  }
95  }
96 
97  {
98  int32_t j;
99  for (j = 0; j < pyra_len; j++)
100  pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len);
101  for (j = 0; j < pyra_len; j++)
102  pyramid_weights[j] /= PYRAL_pot;
103  }
104 
105  return init_normalizer();
106 }
107 
109 {
111  pyramid_weights = NULL;
113 
115 }
116 
117 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1,
118  const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH,
119  const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra)
120 {
121  const int32_t PYRAL = 2*NTWIDTH-1; // total window length
122  int32_t pyra_len, pyra_len2;
123  float64_t pot, PYRAL_pot;
124  float64_t sum;
125  int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0;
126  int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
127  int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0;
128  int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
129  int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0;
130  {
131  float64_t PYRAL_ = PYRAL;
132  PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
133  if (DEGREE1_1n)
134  {
135  PYRAL_ *= PYRAL_;
136  if (DEGREE1_2) PYRAL_pot *= PYRAL_;
137  if (DEGREE1_3)
138  {
139  PYRAL_ *= PYRAL_;
140  if (DEGREE1_4) PYRAL_pot *= PYRAL_;
141  }
142  }
143  }
144 
145  ASSERT((DEGREE1 & ~0x7) == 0);
146  ASSERT((DEGREE2 & ~0x7) == 0);
147 
148  pyra_len = NOF_NTS-PYRAL+1;
149  pyra_len2 = (int32_t) pyra_len/2;
150  {
151  int32_t j;
152  for (j = 0; j < pyra_len; j++)
153  pyra[j] = 4*((float64_t)((j < pyra_len2) ? j+1 : pyra_len-j))/((float64_t)pyra_len);
154  for (j = 0; j < pyra_len; j++)
155  pyra[j] /= PYRAL_pot;
156  }
157 
158  register int32_t conv;
159  register int32_t i;
160  register int32_t j;
161 
162  sum = 0.0;
163  conv = 0;
164  for (j = 0; j < PYRAL; j++)
165  conv += (x1[j] == x2[j]) ? 1 : 0;
166 
167  for (i = 0; i < NOF_NTS-PYRAL+1; i++)
168  {
169  register float64_t pot2;
170  if (i>0)
171  conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) -
172  ((x1[i-1] == x2[i-1]) ? 1 : 0);
173  { /* potencing of conv -- float64_t is faster*/
174  register float64_t conv2 = conv;
175  pot2 = (DEGREE1_1) ? 1.0 : conv2;
176  if (DEGREE1_1n)
177  {
178  conv2 *= conv2;
179  if (DEGREE1_2)
180  pot2 *= conv2;
181  if (DEGREE1_3 && DEGREE1_4)
182  pot2 *= conv2*conv2;
183  }
184  }
185  sum += pot2*pyra[i];
186  }
187 
188  pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
189  if ((DEGREE2 & ~0x1) != 0)
190  {
191  sum *= sum;
192  if ((DEGREE2 & 0x2) != 0)
193  pot *= sum;
194  if ((DEGREE2 & ~0x3) != 0)
195  {
196  sum *= sum;
197  if ((DEGREE2 & 0x4) != 0)
198  pot *= sum;
199  }
200  }
201  return pot;
202 }
203 
205  int32_t idx_a, int32_t idx_b)
206 {
207  int32_t alen, blen;
208  bool free_avec, free_bvec;
209 
210  char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
211  char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
212 
213  // can only deal with strings of same length
214  ASSERT(alen==blen);
215 
216  float64_t dpt;
217 
218  dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
219  dpt = dpt / pow((float64_t) alen, (float64_t) outer_degree);
220 
221  ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
222  ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
223  return (float64_t) dpt;
224 }
225 
226 void CSimpleLocalityImprovedStringKernel::init()
227 {
228  length = 3;
229  inner_degree = 3;
230  outer_degree = 1;
231  pyramid_weights=NULL;
233 
234  m_parameters->add(&length, "length", "Window Length.");
235  m_parameters->add(&inner_degree, "inner_degree", "Inner degree.");
236  m_parameters->add(&outer_degree, "outer_degree", "Outer degree.");
237 
239  "pyramid_weights", "Pyramid weights.");
240 }

SHOGUN Machine Learning Toolbox - Documentation