ViennaCL - The Vienna Computing Library  1.2.0
fft_source.h
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_KERNELS_FFT_SOURCE_HPP_
2 #define VIENNACL_LINALG_KERNELS_FFT_SOURCE_HPP_
3 //Automatically generated file from auxiliary-directory, do not edit manually!
4 namespace viennacl
5 {
6  namespace linalg
7  {
8  namespace kernels
9  {
10 const char * const fft_align1_reverse_inplace =
11 "// reverses the entries in a vector\n"
12 "__kernel void reverse_inplace(__global float* vec, uint size) {\n"
13 " for(uint i = get_global_id(0); i < (size >> 1); i+=get_global_size(0)) {\n"
14 " float val1 = vec[i];\n"
15 " float val2 = vec[size - i - 1];\n"
16 " vec[i] = val2;\n"
17 " vec[size - i - 1] = val1;\n"
18 " }\n"
19 "}\n"
20 ; //fft_align1_reverse_inplace
21 
22 const char * const fft_align1_transpose =
23 "// simplistic matrix transpose function\n"
24 "__kernel void transpose(__global float2* input,\n"
25 " __global float2* output,\n"
26 " unsigned int row_num,\n"
27 " unsigned int col_num) {\n"
28 " unsigned int size = row_num * col_num;\n"
29 " for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) {\n"
30 " unsigned int row = i / col_num;\n"
31 " unsigned int col = i - row*col_num;\n"
32 " unsigned int new_pos = col * row_num + row;\n"
33 " output[new_pos] = input[i];\n"
34 " }\n"
35 "}\n"
36 ; //fft_align1_transpose
37 
38 const char * const fft_align1_bluestein_pre =
39 "// Preprocessing phase of Bluestein algorithm\n"
40 "__kernel void bluestein_pre(__global float2* input,\n"
41 " __global float2* A,\n"
42 " __global float2* B,\n"
43 " unsigned int size,\n"
44 " unsigned int ext_size\n"
45 " ) {\n"
46 " unsigned int glb_id = get_global_id(0);\n"
47 " unsigned int glb_sz = get_global_size(0);\n"
48 " unsigned int double_size = size << 1;\n"
49 " float sn_a, cs_a;\n"
50 " const float NUM_PI = 3.14159265358979323846;\n"
51 " for(unsigned int i = glb_id; i < size; i += glb_sz) {\n"
52 " unsigned int rm = i * i % (double_size);\n"
53 " float angle = (float)rm / size * NUM_PI;\n"
54 " sn_a = sincos(-angle, &cs_a);\n"
55 " float2 a_i = (float2)(cs_a, sn_a);\n"
56 " float2 b_i = (float2)(cs_a, -sn_a);\n"
57 " A[i] = (float2)(input[i].x * a_i.x - input[i].y * a_i.y, input[i].x * a_i.y + input[i].y * a_i.x);\n"
58 " B[i] = b_i;\n"
59 " // very bad instruction, to be fixed\n"
60 " if(i) \n"
61 " B[ext_size - i] = b_i;\n"
62 " }\n"
63 "}\n"
64 ; //fft_align1_bluestein_pre
65 
66 const char * const fft_align1_bluestein_post =
67 "// Postprocessing phase of Bluestein algorithm\n"
68 "__kernel void bluestein_post(__global float2* Z,\n"
69 " __global float2* out,\n"
70 " unsigned int size) \n"
71 "{\n"
72 " unsigned int glb_id = get_global_id(0);\n"
73 " unsigned int glb_sz = get_global_size(0);\n"
74 " unsigned int double_size = size << 1;\n"
75 " float sn_a, cs_a;\n"
76 " const float NUM_PI = 3.14159265358979323846;\n"
77 " for(unsigned int i = glb_id; i < size; i += glb_sz) {\n"
78 " unsigned int rm = i * i % (double_size);\n"
79 " float angle = (float)rm / size * (-NUM_PI);\n"
80 " sn_a = sincos(angle, &cs_a);\n"
81 " float2 b_i = (float2)(cs_a, sn_a);\n"
82 " out[i] = (float2)(Z[i].x * b_i.x - Z[i].y * b_i.y, Z[i].x * b_i.y + Z[i].y * b_i.x);\n"
83 " }\n"
84 "}\n"
85 ; //fft_align1_bluestein_post
86 
87 const char * const fft_align1_fft_div_vec_scalar =
88 "// divide a vector by a scalar (to be removed...)\n"
89 "__kernel void fft_div_vec_scalar(__global float2* input1, unsigned int size, float factor) {\n"
90 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
91 " input1[i] /= factor;\n"
92 " }\n"
93 "}\n"
94 ; //fft_align1_fft_div_vec_scalar
95 
96 const char * const fft_align1_real_to_complex =
97 "// embedd a real-valued vector into a complex one\n"
98 "__kernel void real_to_complex(__global float* in,\n"
99 " __global float2* out,\n"
100 " unsigned int size) {\n"
101 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
102 " float2 val = 0;\n"
103 " val.x = in[i];\n"
104 " out[i] = val;\n"
105 " }\n"
106 "}\n"
107 ; //fft_align1_real_to_complex
108 
109 const char * const fft_align1_complex_to_real =
110 "__kernel void complex_to_real(__global float2* in,\n"
111 " __global float* out,\n"
112 " unsigned int size) {\n"
113 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
114 " out[i] = in[i].x;\n"
115 " }\n"
116 "}\n"
117 ; //fft_align1_complex_to_real
118 
119 const char * const fft_align1_fft_mult_vec =
120 "// elementwise product of two complex vectors\n"
121 "__kernel void fft_mult_vec(__global const float2* input1,\n"
122 " __global const float2* input2,\n"
123 " __global float2* output,\n"
124 " unsigned int size) {\n"
125 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
126 " float2 in1 = input1[i];\n"
127 " float2 in2 = input2[i];\n"
128 " output[i] = (float2)(in1.x * in2.x - in1.y * in2.y, in1.x * in2.y + in1.y * in2.x);\n"
129 " }\n"
130 "}\n"
131 ; //fft_align1_fft_mult_vec
132 
133 const char * const fft_align1_transpose_inplace =
134 "// inplace-transpose of a matrix\n"
135 "__kernel void transpose_inplace(__global float2* input,\n"
136 " unsigned int row_num,\n"
137 " unsigned int col_num) {\n"
138 " unsigned int size = row_num * col_num;\n"
139 " for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) {\n"
140 " unsigned int row = i / col_num;\n"
141 " unsigned int col = i - row*col_num;\n"
142 " unsigned int new_pos = col * row_num + row;\n"
143 " //new_pos = col < row?0:1;\n"
144 " //input[i] = new_pos;\n"
145 " if(i < new_pos) {\n"
146 " float2 val = input[i];\n"
147 " input[i] = input[new_pos];\n"
148 " input[new_pos] = val;\n"
149 " }\n"
150 " }\n"
151 "}\n"
152 ; //fft_align1_transpose_inplace
153 
154 const char * const fft_align1_zero2 =
155 "// Zero two complex vectors (to avoid kernel launch overhead)\n"
156 "__kernel void zero2(__global float2* input1,\n"
157 " __global float2* input2,\n"
158 " unsigned int size) {\n"
159 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
160 " input1[i] = 0;\n"
161 " input2[i] = 0;\n"
162 " }\n"
163 "}\n"
164 ; //fft_align1_zero2
165 
166 const char * const fft_align1_vandermonde_prod =
167 "// computes the matrix vector product with a Vandermonde matrix\n"
168 "__kernel void vandermonde_prod(__global float* vander,\n"
169 " __global float* vector,\n"
170 " __global float* result,\n"
171 " uint size) {\n"
172 " for(uint i = get_global_id(0); i < size; i+= get_global_size(0)) {\n"
173 " float mul = vander[i];\n"
174 " float pwr = 1;\n"
175 " float val = 0;\n"
176 " for(uint j = 0; j < size; j++) {\n"
177 " val = val + pwr * vector[j];\n"
178 " pwr *= mul;\n"
179 " }\n"
180 " \n"
181 " result[i] = val;\n"
182 " }\n"
183 "}\n"
184 ; //fft_align1_vandermonde_prod
185 
186  } //namespace kernels
187  } //namespace linalg
188 } //namespace viennacl
189 #endif