SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
PositionalPWM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Soeren Sonnenburg
8  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
9  */
12 #include <shogun/base/Parameter.h>
15 
16 using namespace shogun;
17 
19  m_pwm_rows(0), m_pwm_cols(0), m_pwm(NULL),
20  m_sigma(0), m_mean(0),
21  m_w_rows(0), m_w_cols(0), m_w(NULL), m_poim(NULL)
22 
23 {
24  register_params();
25 }
26 
28 {
29  SG_FREE(m_pwm);
30  SG_FREE(m_w);
31 }
32 
34 {
36  return true;
37 }
38 
40 {
41  return m_pwm_rows*m_pwm_cols+2;
42 }
43 
45 {
46  ASSERT(num_param>0 && num_param<=m_pwm_rows*m_pwm_cols+2);
47 
48  if (num_param<m_pwm_rows*m_pwm_cols)
49  {
50  ASSERT(m_pwm);
51  return m_pwm[num_param];
52  }
53  else if (num_param<m_pwm_rows*m_pwm_cols+1)
54  return CMath::log(m_sigma);
55  else
56  return CMath::log(m_mean);
57 }
58 
59 float64_t CPositionalPWM::get_log_derivative(int32_t num_param, int32_t num_example)
60 {
62  return 0;
63 }
64 
66 {
70 
72 
73  float64_t lik=0;
74  int32_t len=0;
75  bool do_free=false;
76 
77  uint8_t* str = strs->get_feature_vector(num_example, len, do_free);
78 
79  if (!(m_w && m_w_cols==len))
80  return 0; //TODO
81 
82  for (int32_t i=0; i<len; i++)
83  lik+=m_w[4*i+str[i]];
84 
85  strs->free_feature_vector(str, num_example, do_free);
86  return lik;
87 }
88 
90 {
91  ASSERT(m_pwm_cols == len);
92  float64_t score = CMath::log(1/(m_sigma*CMath::sqrt(2*M_PI))) -
94 
95  for (int32_t i=0; i<m_pwm_cols; i++)
96  score+=m_pwm[m_pwm_rows*i+window[i]];
97 
98  return score;
99 }
100 
101 void CPositionalPWM::compute_w(int32_t num_pos)
102 {
104 
106  m_w_cols=num_pos;
107 
108  SG_FREE(m_w);
110 
111  uint8_t* window=SG_MALLOC(uint8_t, m_pwm_cols);
112  CMath::fill_vector(window, m_pwm_cols, (uint8_t) 0);
113 
114  const int32_t last_idx=m_pwm_cols-1;
115  for (int32_t i=0; i<m_w_rows; i++)
116  {
117  for (int32_t j=0; j<m_w_cols; j++)
118  m_w[j*m_w_rows+i]=get_log_likelihood_window(window, m_pwm_cols, j);
119 
120  window[last_idx]++;
121  int32_t window_ptr=last_idx;
122  while (window[window_ptr]==m_pwm_rows && window_ptr>0)
123  {
124  window[window_ptr]=0;
125  window_ptr--;
126  window[window_ptr]++;
127  }
128 
129  }
130 }
131 
132 void CPositionalPWM::register_params()
133 {
134  m_parameters->add_vector(&m_poim, &m_poim_len, "poim", "POIM Scoring Matrix");
135  m_parameters->add_matrix(&m_w, &m_w_rows, &m_w_cols, "w", "Scoring Matrix");
136  m_parameters->add_matrix(&m_pwm, &m_pwm_rows, &m_pwm_cols, "pwm", "Positional Weight Matrix.");
137  m_parameters->add(&m_sigma, "sigma", "Standard Deviation.");
138  m_parameters->add(&m_mean, "mean", "Mean.");
139 }
140 
141 void CPositionalPWM::compute_scoring(int32_t max_degree)
142 {
143  ASSERT(m_w);
144 
145  int32_t num_feat=m_w_cols;
146  int32_t num_sym=0;
147  int32_t order=m_pwm_cols;
148  int32_t num_words=m_pwm_cols;
149 
150  CAlphabet* alpha=new CAlphabet(DNA);
152  int32_t num_bits=alpha->get_num_bits();
153  str->compute_symbol_mask_table(num_bits);
154 
155  for (int32_t i=0; i<order; i++)
156  num_sym+=CMath::pow((int32_t) num_words,i+1);
157 
158  SG_FREE(m_poim);
159  m_poim_len=num_feat*num_sym;
160  m_poim=SG_MALLOC(float64_t, num_feat*num_sym);
161  memset(m_poim,0, size_t(num_feat)*size_t(num_sym));
162 
163  uint32_t kmer_mask=0;
164  uint32_t words=CMath::pow((int32_t) num_words,(int32_t) order);
165  int32_t offset=0;
166 
167  for (int32_t o=0; o<max_degree; o++)
168  {
169  float64_t* contrib=&m_poim[offset];
170  offset+=CMath::pow((int32_t) num_words,(int32_t) o+1);
171 
172  kmer_mask=(kmer_mask<<(num_bits)) | str->get_masked_symbols(0xffff, 1);
173 
174  for (int32_t p=-o; p<order; p++)
175  {
176  int32_t o_sym=0, m_sym=0, il=0,ir=0, jl=0;
177  uint32_t imer_mask=kmer_mask;
178  uint32_t jmer_mask=kmer_mask;
179 
180  if (p<0)
181  {
182  il=-p;
183  m_sym=order-o-p-1;
184  o_sym=-p;
185  }
186  else if (p<order-o)
187  {
188  ir=p;
189  m_sym=order-o-1;
190  }
191  else
192  {
193  ir=p;
194  m_sym=p;
195  o_sym=p-order+o+1;
196  jl=order-ir;
197  imer_mask=(kmer_mask>>(num_bits*o_sym));
198  jmer_mask=(kmer_mask>>(num_bits*jl));
199  }
200 
201  float64_t marginalizer=
202  1.0/CMath::pow((int32_t) num_words,(int32_t) m_sym);
203 
204  for (uint32_t i=0; i<words; i++)
205  {
206  uint16_t x= ((i << (num_bits*il)) >> (num_bits*ir)) & imer_mask;
207 
208  if (p>=0 && p<order-o)
209  {
210  contrib[x]+=m_w[m_w_cols*ir+i]*marginalizer;
211  }
212  else
213  {
214  for (uint32_t j=0; j< (uint32_t) CMath::pow((int32_t) num_words, (int32_t) o_sym); j++)
215  {
216  uint32_t c=x | ((j & jmer_mask) << (num_bits*jl));
217  contrib[c]+=m_w[m_w_cols*il+i]*marginalizer;
218  }
219  }
220  }
221  }
222  }
223 }
224 
226 {
227  int32_t offs=0;
228  for (int32_t i=0; i<d-1; i++)
229  offs+=CMath::pow((int32_t) m_w_rows,i+1);
230  int32_t rows=CMath::pow((int32_t) m_w_rows,d);
231  int32_t cols=m_w_cols;
232  return SGMatrix<float64_t>(&m_poim[offs],rows,cols);
233 }

SHOGUN Machine Learning Toolbox - Documentation