SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DistantSegmentsKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Heiko Strathmann
8  * DS-Kernel implementation Written (W) 2008 Sébastien Boisvert under GPLv3
9  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
10  */
11 
13 #include <string>
14 
15 using namespace shogun;
16 
18  m_delta(0), m_theta(0)
19 {
20  init();
21 }
22 
24  int32_t theta) : CStringKernel<char>(), m_delta(delta), m_theta(theta)
25 {
26  init();
27 }
28 
30  CStringFeatures<char>* r, int32_t size, int32_t delta, int32_t theta) :
31  CStringKernel<char>(), m_delta(delta), m_theta(theta)
32 {
33  init();
35 }
36 
37 bool CDistantSegmentsKernel::init(CFeatures* l, CFeatures* r)
38 {
39  CKernel::init(l, r);
40  return init_normalizer();
41 }
42 
43 void CDistantSegmentsKernel::init()
44 {
45  SG_ADD(&m_delta, "delta", "Delta parameter of the DS-Kernel", MS_AVAILABLE);
46  SG_ADD(&m_theta, "theta", "Theta parameter of the DS-Kernel", MS_AVAILABLE);
47 }
48 
49 float64_t CDistantSegmentsKernel::compute(int32_t idx_a, int32_t idx_b)
50 {
51  bool free_a, free_b;
52  int32_t aLength=0, bLength=0;
53  char* a=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, aLength,
54  free_a);
55  char* b=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, bLength,
56  free_b);
57  ASSERT(a && b);
58 
59  if ((aLength<1)||(bLength<1))
60  SG_ERROR("Empty sequences");
61 
62  float64_t result=compute(a, aLength, b, bLength, m_delta, m_theta);
63 
64  ((CStringFeatures<char>*) lhs)->free_feature_vector(a, idx_a, free_a);
65  ((CStringFeatures<char>*) rhs)->free_feature_vector(b, idx_b, free_b);
66 
67  return result;
68 }
69 
70 int32_t CDistantSegmentsKernel::bin(int32_t j, int32_t i)
71 {
72  if (i>j)
73  return 0;
74  if (i==3 && j>=3)
75  {
76  return j*(j-1)*(j-2)/6;
77  }
78  else if (i==2 && j>=2)
79  {
80  return j*(j-1)/2;
81  }
82  return 0;
83 }
84 
85 int32_t CDistantSegmentsKernel::compute(char* s, int32_t sLength, char* t,
86  int32_t tLength, int32_t delta_m, int32_t theta_m)
87 {
88  int32_t c=0;
89  int32_t* i_=SG_MALLOC(int32_t, delta_m+1);
90  int32_t* l_=SG_MALLOC(int32_t, delta_m+1);
91  for (int32_t j_s=0; j_s<=(int32_t) sLength-1; j_s++)
92  {
93  for (int32_t j_t=0; j_t<=(int32_t) tLength-1; j_t++)
94  {
95  if (s[j_s-1+1]==t[j_t-1+1])
96  {
97  int32_t n=CMath::min(CMath::min(sLength-j_s, tLength-j_t), delta_m);
98  int32_t k=-1;
99  int32_t i=1;
100  while (i<=n)
101  {
102  k++;
103  i_[2*k]=i;
104  i++;
105  while (i<=n&&s[j_s-1+i]==t[j_t-1+i])
106  i++;
107  i_[2*k+1]=i;
108  l_[k]=i_[2*k+1]-i_[2*k]+1;
109  i++;
110  while (i<=n&&s[j_s-1+i]!=t[j_t-1+i])
111  i++;
112  }
113  c+=bin(l_[0], 3)-2*bin(l_[0]-theta_m, 3)
114  +bin(l_[0]-2*theta_m, 3);
115  int32_t c1=0;
116  for (int32_t r=1; r<=k; r++)
117  {
118  c1+=bin(l_[r], 2)-bin(l_[r]-theta_m, 2);
119  }
120  c+=CMath::min(theta_m, i_[1]-i_[0])*c1;
121  }
122  }
123  }
124  delete l_;
125  delete i_;
126  return c;
127 }

SHOGUN Machine Learning Toolbox - Documentation