ViennaCL - The Vienna Computing Library
1.2.0
Main Page
Namespaces
Data Structures
Files
File List
Globals
viennacl
linalg
kernels
fft_source.h
Go to the documentation of this file.
1
#ifndef VIENNACL_LINALG_KERNELS_FFT_SOURCE_HPP_
2
#define VIENNACL_LINALG_KERNELS_FFT_SOURCE_HPP_
3
//Automatically generated file from auxiliary-directory, do not edit manually!
4
namespace
viennacl
5
{
6
namespace
linalg
7
{
8
namespace
kernels
9
{
10
const
char
*
const
fft_align1_reverse_inplace
=
11
"// reverses the entries in a vector\n"
12
"__kernel void reverse_inplace(__global float* vec, uint size) {\n"
13
" for(uint i = get_global_id(0); i < (size >> 1); i+=get_global_size(0)) {\n"
14
" float val1 = vec[i];\n"
15
" float val2 = vec[size - i - 1];\n"
16
" vec[i] = val2;\n"
17
" vec[size - i - 1] = val1;\n"
18
" }\n"
19
"}\n"
20
;
//fft_align1_reverse_inplace
21
22
const
char
*
const
fft_align1_transpose
=
23
"// simplistic matrix transpose function\n"
24
"__kernel void transpose(__global float2* input,\n"
25
" __global float2* output,\n"
26
" unsigned int row_num,\n"
27
" unsigned int col_num) {\n"
28
" unsigned int size = row_num * col_num;\n"
29
" for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) {\n"
30
" unsigned int row = i / col_num;\n"
31
" unsigned int col = i - row*col_num;\n"
32
" unsigned int new_pos = col * row_num + row;\n"
33
" output[new_pos] = input[i];\n"
34
" }\n"
35
"}\n"
36
;
//fft_align1_transpose
37
38
const
char
*
const
fft_align1_bluestein_pre
=
39
"// Preprocessing phase of Bluestein algorithm\n"
40
"__kernel void bluestein_pre(__global float2* input,\n"
41
" __global float2* A,\n"
42
" __global float2* B,\n"
43
" unsigned int size,\n"
44
" unsigned int ext_size\n"
45
" ) {\n"
46
" unsigned int glb_id = get_global_id(0);\n"
47
" unsigned int glb_sz = get_global_size(0);\n"
48
" unsigned int double_size = size << 1;\n"
49
" float sn_a, cs_a;\n"
50
" const float NUM_PI = 3.14159265358979323846;\n"
51
" for(unsigned int i = glb_id; i < size; i += glb_sz) {\n"
52
" unsigned int rm = i * i % (double_size);\n"
53
" float angle = (float)rm / size * NUM_PI;\n"
54
" sn_a = sincos(-angle, &cs_a);\n"
55
" float2 a_i = (float2)(cs_a, sn_a);\n"
56
" float2 b_i = (float2)(cs_a, -sn_a);\n"
57
" A[i] = (float2)(input[i].x * a_i.x - input[i].y * a_i.y, input[i].x * a_i.y + input[i].y * a_i.x);\n"
58
" B[i] = b_i;\n"
59
" // very bad instruction, to be fixed\n"
60
" if(i) \n"
61
" B[ext_size - i] = b_i;\n"
62
" }\n"
63
"}\n"
64
;
//fft_align1_bluestein_pre
65
66
const
char
*
const
fft_align1_bluestein_post
=
67
"// Postprocessing phase of Bluestein algorithm\n"
68
"__kernel void bluestein_post(__global float2* Z,\n"
69
" __global float2* out,\n"
70
" unsigned int size) \n"
71
"{\n"
72
" unsigned int glb_id = get_global_id(0);\n"
73
" unsigned int glb_sz = get_global_size(0);\n"
74
" unsigned int double_size = size << 1;\n"
75
" float sn_a, cs_a;\n"
76
" const float NUM_PI = 3.14159265358979323846;\n"
77
" for(unsigned int i = glb_id; i < size; i += glb_sz) {\n"
78
" unsigned int rm = i * i % (double_size);\n"
79
" float angle = (float)rm / size * (-NUM_PI);\n"
80
" sn_a = sincos(angle, &cs_a);\n"
81
" float2 b_i = (float2)(cs_a, sn_a);\n"
82
" out[i] = (float2)(Z[i].x * b_i.x - Z[i].y * b_i.y, Z[i].x * b_i.y + Z[i].y * b_i.x);\n"
83
" }\n"
84
"}\n"
85
;
//fft_align1_bluestein_post
86
87
const
char
*
const
fft_align1_fft_div_vec_scalar
=
88
"// divide a vector by a scalar (to be removed...)\n"
89
"__kernel void fft_div_vec_scalar(__global float2* input1, unsigned int size, float factor) {\n"
90
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
91
" input1[i] /= factor;\n"
92
" }\n"
93
"}\n"
94
;
//fft_align1_fft_div_vec_scalar
95
96
const
char
*
const
fft_align1_real_to_complex
=
97
"// embedd a real-valued vector into a complex one\n"
98
"__kernel void real_to_complex(__global float* in,\n"
99
" __global float2* out,\n"
100
" unsigned int size) {\n"
101
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
102
" float2 val = 0;\n"
103
" val.x = in[i];\n"
104
" out[i] = val;\n"
105
" }\n"
106
"}\n"
107
;
//fft_align1_real_to_complex
108
109
const
char
*
const
fft_align1_complex_to_real
=
110
"__kernel void complex_to_real(__global float2* in,\n"
111
" __global float* out,\n"
112
" unsigned int size) {\n"
113
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
114
" out[i] = in[i].x;\n"
115
" }\n"
116
"}\n"
117
;
//fft_align1_complex_to_real
118
119
const
char
*
const
fft_align1_fft_mult_vec
=
120
"// elementwise product of two complex vectors\n"
121
"__kernel void fft_mult_vec(__global const float2* input1,\n"
122
" __global const float2* input2,\n"
123
" __global float2* output,\n"
124
" unsigned int size) {\n"
125
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
126
" float2 in1 = input1[i];\n"
127
" float2 in2 = input2[i];\n"
128
" output[i] = (float2)(in1.x * in2.x - in1.y * in2.y, in1.x * in2.y + in1.y * in2.x);\n"
129
" }\n"
130
"}\n"
131
;
//fft_align1_fft_mult_vec
132
133
const
char
*
const
fft_align1_transpose_inplace
=
134
"// inplace-transpose of a matrix\n"
135
"__kernel void transpose_inplace(__global float2* input,\n"
136
" unsigned int row_num,\n"
137
" unsigned int col_num) {\n"
138
" unsigned int size = row_num * col_num;\n"
139
" for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) {\n"
140
" unsigned int row = i / col_num;\n"
141
" unsigned int col = i - row*col_num;\n"
142
" unsigned int new_pos = col * row_num + row;\n"
143
" //new_pos = col < row?0:1;\n"
144
" //input[i] = new_pos;\n"
145
" if(i < new_pos) {\n"
146
" float2 val = input[i];\n"
147
" input[i] = input[new_pos];\n"
148
" input[new_pos] = val;\n"
149
" }\n"
150
" }\n"
151
"}\n"
152
;
//fft_align1_transpose_inplace
153
154
const
char
*
const
fft_align1_zero2
=
155
"// Zero two complex vectors (to avoid kernel launch overhead)\n"
156
"__kernel void zero2(__global float2* input1,\n"
157
" __global float2* input2,\n"
158
" unsigned int size) {\n"
159
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
160
" input1[i] = 0;\n"
161
" input2[i] = 0;\n"
162
" }\n"
163
"}\n"
164
;
//fft_align1_zero2
165
166
const
char
*
const
fft_align1_vandermonde_prod
=
167
"// computes the matrix vector product with a Vandermonde matrix\n"
168
"__kernel void vandermonde_prod(__global float* vander,\n"
169
" __global float* vector,\n"
170
" __global float* result,\n"
171
" uint size) {\n"
172
" for(uint i = get_global_id(0); i < size; i+= get_global_size(0)) {\n"
173
" float mul = vander[i];\n"
174
" float pwr = 1;\n"
175
" float val = 0;\n"
176
" for(uint j = 0; j < size; j++) {\n"
177
" val = val + pwr * vector[j];\n"
178
" pwr *= mul;\n"
179
" }\n"
180
" \n"
181
" result[i] = val;\n"
182
" }\n"
183
"}\n"
184
;
//fft_align1_vandermonde_prod
185
186
}
//namespace kernels
187
}
//namespace linalg
188
}
//namespace viennacl
189
#endif
Generated on Wed Oct 10 2012 09:58:14 for ViennaCL - The Vienna Computing Library by
1.8.1.2