ViennaCL - The Vienna Computing Library  1.2.0
fft.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_FFT_HPP
2 #define VIENNACL_FFT_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
24 #include <viennacl/vector.hpp>
25 #include <viennacl/matrix.hpp>
26 
28 
29 #include <cmath>
30 
31 #include <stdexcept>
32 
33 namespace viennacl
34 {
35  namespace detail
36  {
37  namespace fft
38  {
39  const std::size_t MAX_LOCAL_POINTS_NUM = 512;
40 
41  namespace FFT_DATA_ORDER {
42  enum DATA_ORDER {
45  };
46  }
47  }
48  }
49 }
50 
52 namespace viennacl {
53  namespace detail {
54  namespace fft {
55 
56  inline bool is_radix2(std::size_t data_size) {
57  return !((data_size > 2) && (data_size & (data_size - 1)));
58 
59  }
60 
61  inline std::size_t next_power_2(std::size_t n) {
62  n = n - 1;
63 
64  std::size_t power = 1;
65 
66  while(power < sizeof(std::size_t) * 8) {
67  n = n | (n >> power);
68  power *= 2;
69  }
70 
71  return n + 1;
72  }
73 
74  inline std::size_t num_bits(std::size_t size)
75  {
76  std::size_t bits_datasize = 0;
77  std::size_t ds = 1;
78 
79  while(ds < size)
80  {
81  ds = ds << 1;
82  bits_datasize++;
83  }
84 
85  return bits_datasize;
86  }
87 
88 
95  template<class SCALARTYPE>
96  void direct(const viennacl::ocl::handle<cl_mem>& in,
98  std::size_t size,
99  std::size_t stride,
100  std::size_t batch_num,
101  SCALARTYPE sign = -1.0f,
103  )
104  {
105  viennacl::linalg::kernels::matrix_row<SCALARTYPE, 1>::init();
106  std::string program_string = viennacl::linalg::kernels::matrix_row<SCALARTYPE, 1>::program_name();
107  if (data_order == FFT_DATA_ORDER::COL_MAJOR)
108  {
109  viennacl::linalg::kernels::matrix_col<SCALARTYPE, 1>::init();
110  program_string = viennacl::linalg::kernels::matrix_col<SCALARTYPE, 1>::program_name();
111  }
112  viennacl::ocl::kernel& kernel = viennacl::ocl::current_context().get_program(program_string).get_kernel("fft_direct");
113  viennacl::ocl::enqueue(kernel(in, out, static_cast<cl_uint>(size), static_cast<cl_uint>(stride), static_cast<cl_uint>(batch_num), sign));
114  }
115 
116  /*
117  * This function performs reorder of input data. Indexes are sorted in bit-reversal order.
118  * Such reordering should be done before in-place FFT.
119  */
120  template <typename SCALARTYPE>
121  void reorder(const viennacl::ocl::handle<cl_mem>& in,
122  std::size_t size,
123  std::size_t stride,
124  std::size_t bits_datasize,
125  std::size_t batch_num,
127  )
128  {
129  viennacl::linalg::kernels::matrix_row<SCALARTYPE, 1>::init();
130  std::string program_string = viennacl::linalg::kernels::matrix_row<SCALARTYPE, 1>::program_name();
131  if (data_order == FFT_DATA_ORDER::COL_MAJOR)
132  {
133  viennacl::linalg::kernels::matrix_col<SCALARTYPE, 1>::init();
134  program_string = viennacl::linalg::kernels::matrix_col<SCALARTYPE, 1>::program_name();
135  }
136 
138  .get_program(program_string)
139  .get_kernel("fft_reorder");
140  viennacl::ocl::enqueue(kernel(in,
141  static_cast<cl_uint>(bits_datasize),
142  static_cast<cl_uint>(size),
143  static_cast<cl_uint>(stride),
144  static_cast<cl_uint>(batch_num)
145  )
146  );
147  }
148 
156  template<class SCALARTYPE>
157  void radix2(const viennacl::ocl::handle<cl_mem>& in,
158  std::size_t size,
159  std::size_t stride,
160  std::size_t batch_num,
161  SCALARTYPE sign = -1.0f,
163  )
164  {
165  viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
166 
167  assert(batch_num != 0);
168  assert(is_radix2(size));
169 
170  viennacl::linalg::kernels::matrix_row<SCALARTYPE, 1>::init();
171  std::string program_string = viennacl::linalg::kernels::matrix_row<SCALARTYPE, 1>::program_name();
172  if (data_order == FFT_DATA_ORDER::COL_MAJOR)
173  {
174  viennacl::linalg::kernels::matrix_col<SCALARTYPE, 1>::init();
175  program_string = viennacl::linalg::kernels::matrix_col<SCALARTYPE, 1>::program_name();
176  }
177 
178  std::size_t bits_datasize = num_bits(size);
179 
180  if(size <= MAX_LOCAL_POINTS_NUM)
181  {
183  .get_program(program_string)
184  .get_kernel("fft_radix2_local");
185  viennacl::ocl::enqueue(kernel(in,
186  viennacl::ocl::local_mem((size * 4) * sizeof(SCALARTYPE)),
187  static_cast<cl_uint>(bits_datasize),
188  static_cast<cl_uint>(size),
189  static_cast<cl_uint>(stride),
190  static_cast<cl_uint>(batch_num),
191  sign));
192  }
193  else
194  {
195  reorder<SCALARTYPE>(in, size, stride, bits_datasize, batch_num);
196 
197  for(std::size_t step = 0; step < bits_datasize; step++)
198  {
200  .get_program(program_string)
201  .get_kernel("fft_radix2");
202  viennacl::ocl::enqueue(kernel(in,
203  static_cast<cl_uint>(step),
204  static_cast<cl_uint>(bits_datasize),
205  static_cast<cl_uint>(size),
206  static_cast<cl_uint>(stride),
207  static_cast<cl_uint>(batch_num),
208  sign));
209  }
210 
211  }
212  }
213 
221  template<class SCALARTYPE, unsigned int ALIGNMENT>
222  void bluestein(viennacl::vector<SCALARTYPE, ALIGNMENT>& in,
224  std::size_t batch_num,
225  SCALARTYPE sign = -1.0
226  )
227  {
228  viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
229 
230  std::size_t size = in.size() >> 1;
231  std::size_t ext_size = next_power_2(2 * size - 1);
232 
235 
237 
238  {
240  .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
241  .get_kernel("zero2");
242  viennacl::ocl::enqueue(kernel(
243  A,
244  B,
245  static_cast<cl_uint>(ext_size)
246  ));
247 
248  }
249  {
251  .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
252  .get_kernel("bluestein_pre");
253  viennacl::ocl::enqueue(kernel(
254  in,
255  A,
256  B,
257  static_cast<cl_uint>(size),
258  static_cast<cl_uint>(ext_size)
259  ));
260  }
261 
263 
264  {
266  .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
267  .get_kernel("bluestein_post");
268  viennacl::ocl::enqueue(kernel(
269  Z,
270  out,
271  static_cast<cl_uint>(size)
272  ));
273  }
274  }
275 
276  template<class SCALARTYPE, unsigned int ALIGNMENT>
277  void multiply(viennacl::vector<SCALARTYPE, ALIGNMENT> const & input1,
280  {
281  viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
282  std::size_t size = input1.size() >> 1;
284  .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
285  .get_kernel("fft_mult_vec");
286  viennacl::ocl::enqueue(kernel(input1, input2, output, static_cast<cl_uint>(size)));
287  }
288 
289  template<class SCALARTYPE, unsigned int ALIGNMENT>
290  void normalize(viennacl::vector<SCALARTYPE, ALIGNMENT> & input)
291  {
292  viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
294  .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
295  .get_kernel("fft_div_vec_scalar");
296  std::size_t size = input.size() >> 1;
297  SCALARTYPE norm_factor = static_cast<SCALARTYPE>(size);
298  viennacl::ocl::enqueue(kernel(input, static_cast<cl_uint>(size), norm_factor));
299  }
300 
301  template<class SCALARTYPE, unsigned int ALIGNMENT>
303  {
304  viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
306  .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
307  .get_kernel("transpose_inplace");
308  viennacl::ocl::enqueue(kernel(input,
309  static_cast<cl_uint>(input.internal_size1()),
310  static_cast<cl_uint>(input.internal_size2()) >> 1));
311  }
312 
313  template<class SCALARTYPE, unsigned int ALIGNMENT>
316  {
317  viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
318 
320  .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
321  .get_kernel("transpose");
322  viennacl::ocl::enqueue(kernel(input,
323  output,
324  static_cast<cl_uint>(input.internal_size1()),
325  static_cast<cl_uint>(input.internal_size2() >> 1))
326  );
327  }
328 
329  template<class SCALARTYPE, unsigned int ALIGNMENT>
330  void real_to_complex(viennacl::vector<SCALARTYPE, ALIGNMENT> const & in,
332  std::size_t size)
333  {
334  viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
336  .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
337  .get_kernel("real_to_complex");
338  viennacl::ocl::enqueue(kernel(in, out, static_cast<cl_uint>(size)));
339  }
340 
341  template<class SCALARTYPE, unsigned int ALIGNMENT>
342  void complex_to_real(viennacl::vector<SCALARTYPE, ALIGNMENT> const & in,
344  std::size_t size)
345  {
346  viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
348  .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
349  .get_kernel("complex_to_real");
350  viennacl::ocl::enqueue(kernel(in, out, static_cast<cl_uint>(size)));
351  }
352 
353  template<class SCALARTYPE, unsigned int ALIGNMENT>
355  {
356  viennacl::linalg::kernels::fft<SCALARTYPE, 1>::init();
357  std::size_t size = in.size();
359  .get_program(viennacl::linalg::kernels::fft<SCALARTYPE, 1>::program_name())
360  .get_kernel("reverse_inplace");
361  viennacl::ocl::enqueue(kernel(in, static_cast<cl_uint>(size)));
362  }
363 
364 
365  } //namespace fft
366  } //namespace detail
367 
375  template<class SCALARTYPE, unsigned int ALIGNMENT>
376  void inplace_fft(viennacl::vector<SCALARTYPE, ALIGNMENT>& input,
377  std::size_t batch_num = 1,
378  SCALARTYPE sign = -1.0)
379  {
380  std::size_t size = (input.size() >> 1) / batch_num;
381 
382  if(!detail::fft::is_radix2(size))
383  {
385  detail::fft::direct(input.handle(),
386  output.handle(),
387  size,
388  size,
389  batch_num,
390  sign);
391 
392  viennacl::copy(output, input);
393  } else {
394  detail::fft::radix2(input.handle(), size, size, batch_num, sign);
395  }
396  }
397 
406  template<class SCALARTYPE, unsigned int ALIGNMENT>
409  std::size_t batch_num = 1,
410  SCALARTYPE sign = -1.0
411  )
412  {
413  std::size_t size = (input.size() >> 1) / batch_num;
414 
415  if(detail::fft::is_radix2(size))
416  {
417  viennacl::copy(input, output);
418  detail::fft::radix2(output.handle(), size, size, batch_num, sign);
419  } else {
420  detail::fft::direct(input.handle(),
421  output.handle(),
422  size,
423  size,
424  batch_num,
425  sign);
426  }
427  }
428 
435  template<class SCALARTYPE, unsigned int ALIGNMENT>
437  SCALARTYPE sign = -1.0)
438  {
439  std::size_t rows_num = input.size1();
440  std::size_t cols_num = input.size2() >> 1;
441 
442  std::size_t cols_int = input.internal_size2() >> 1;
443 
444  // batch with rows
445  if(detail::fft::is_radix2(cols_num))
446  {
447  detail::fft::radix2(input.handle(), cols_num, cols_int, rows_num, sign, detail::fft::FFT_DATA_ORDER::ROW_MAJOR);
448  }
449  else
450  {
452 
453  detail::fft::direct(input.handle(),
454  output.handle(),
455  cols_num,
456  cols_int,
457  rows_num,
458  sign,
460  );
461 
462  input = output;
463  }
464 
465  // batch with cols
466  if (detail::fft::is_radix2(rows_num)) {
467  detail::fft::radix2(input.handle(), rows_num, cols_int, cols_num, sign, detail::fft::FFT_DATA_ORDER::COL_MAJOR);
468  } else {
470 
471  detail::fft::direct(input.handle(),
472  output.handle(),
473  rows_num,
474  cols_int,
475  cols_num,
476  sign,
478 
479  input = output;
480  }
481 
482  }
483 
491  template<class SCALARTYPE, unsigned int ALIGNMENT>
494  SCALARTYPE sign = -1.0)
495  {
496  std::size_t rows_num = input.size1();
497  std::size_t cols_num = input.size2() >> 1;
498 
499  std::size_t cols_int = input.internal_size2() >> 1;
500 
501  // batch with rows
502  if(detail::fft::is_radix2(cols_num))
503  {
504  output = input;
505  detail::fft::radix2(output.handle(), cols_num, cols_int, rows_num, sign, detail::fft::FFT_DATA_ORDER::ROW_MAJOR);
506  }
507  else
508  {
509  detail::fft::direct(input.handle(),
510  output.handle(),
511  cols_num,
512  cols_int,
513  rows_num,
514  sign,
516  );
517  }
518 
519  // batch with cols
520  if(detail::fft::is_radix2(rows_num))
521  {
522  detail::fft::radix2(output.handle(), rows_num, cols_int, cols_num, sign, detail::fft::FFT_DATA_ORDER::COL_MAJOR);
523  }
524  else
525  {
527  tmp = output;
528 
529  detail::fft::direct(tmp.handle(),
530  output.handle(),
531  rows_num,
532  cols_int,
533  cols_num,
534  sign,
536  }
537  }
538 
548  template<class SCALARTYPE, unsigned int ALIGNMENT>
549  void inplace_ifft(viennacl::vector<SCALARTYPE, ALIGNMENT>& input,
550  std::size_t batch_num = 1)
551  {
552  viennacl::inplace_fft(input, batch_num, SCALARTYPE(1.0));
553  detail::fft::normalize(input);
554  }
555 
566  template<class SCALARTYPE, unsigned int ALIGNMENT>
569  std::size_t batch_num = 1
570  )
571  {
572  viennacl::fft(input, output, batch_num, SCALARTYPE(1.0));
573  detail::fft::normalize(output);
574  }
575 
576  namespace linalg
577  {
587  template<class SCALARTYPE, unsigned int ALIGNMENT>
588  void convolve(viennacl::vector<SCALARTYPE, ALIGNMENT>& input1,
591  )
592  {
593  assert(input1.size() == input2.size());
594  assert(input1.size() == output.size());
595  //temporal arrays
599 
600  // align input arrays to equal size
601  // FFT of input data
602  viennacl::fft(input1, tmp1);
603  viennacl::fft(input2, tmp2);
604 
605  // multiplication of input data
606  viennacl::detail::fft::multiply(tmp1, tmp2, tmp3);
607  // inverse FFT of input data
608  viennacl::ifft(tmp3, output);
609  }
610 
620  template<class SCALARTYPE, unsigned int ALIGNMENT>
624  )
625  {
626  assert(input1.size() == input2.size());
627  assert(input1.size() == output.size());
628 
629  viennacl::inplace_fft(input1);
630  viennacl::inplace_fft(input2);
631 
632  viennacl::detail::fft::multiply(input1, input2, output);
633 
634  viennacl::inplace_ifft(output);
635  }
636  }
637 } //namespace linalg
638 
640 #endif