ViennaCL - The Vienna Computing Library  1.2.0
matrix_solve_row_col_source.h
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_KERNELS_MATRIX_SOLVE_ROW_COL_SOURCE_HPP_
2 #define VIENNACL_LINALG_KERNELS_MATRIX_SOLVE_ROW_COL_SOURCE_HPP_
3 //Automatically generated file from auxiliary-directory, do not edit manually!
4 namespace viennacl
5 {
6  namespace linalg
7  {
8  namespace kernels
9  {
11 "// file automatically generated - do not edit!\n"
12 "// inplace solve A \\ B^T\n"
13 "// matrix layouts: A...row_major, B...col_major\n"
14 "__kernel void lower_trans_solve(\n"
15 " __global const float * A,\n"
16 " unsigned int A_rows,\n"
17 " unsigned int A_cols,\n"
18 " unsigned int A_internal_rows,\n"
19 " unsigned int A_internal_cols,\n"
20 " __global float * B, \n"
21 " unsigned int B_rows,\n"
22 " unsigned int B_cols,\n"
23 " unsigned int B_internal_rows,\n"
24 " unsigned int B_internal_cols)\n"
25 "{ \n"
26 " float temp; \n"
27 " for (int row = 0; row < A_rows; ++row) \n"
28 " { \n"
29 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
30 " if (get_local_id(0) == 0) \n"
31 " B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
32 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
33 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
34 " //eliminate column of op(A) with index 'row' in parallel: \n"
35 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
36 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_cols + row];\n"
37 " }\n"
38 "}\n"
39 ; //matrix_solve_row_col_align1_lower_trans_solve
40 
42 "// file automatically generated - do not edit!\n"
43 "// inplace solve A \\ B\n"
44 "// matrix layouts: A...row_major, B...col_major\n"
45 "__kernel void unit_lower_solve(\n"
46 " __global const float * A,\n"
47 " unsigned int A_rows,\n"
48 " unsigned int A_cols,\n"
49 " unsigned int A_internal_rows,\n"
50 " unsigned int A_internal_cols,\n"
51 " __global float * B, \n"
52 " unsigned int B_rows,\n"
53 " unsigned int B_cols,\n"
54 " unsigned int B_internal_rows,\n"
55 " unsigned int B_internal_cols)\n"
56 "{ \n"
57 " float temp; \n"
58 " for (int row = 0; row < A_rows; ++row) \n"
59 " { \n"
60 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
61 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
62 " //eliminate column of op(A) with index 'row' in parallel: \n"
63 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
64 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_cols + row];\n"
65 " }\n"
66 "}\n"
67 ; //matrix_solve_row_col_align1_unit_lower_solve
68 
70 "// file automatically generated - do not edit!\n"
71 "// inplace solve A^T \\ B^T\n"
72 "// matrix layouts: A...row_major, B...col_major\n"
73 "__kernel void trans_unit_upper_trans_solve(\n"
74 " __global const float * A,\n"
75 " unsigned int A_rows,\n"
76 " unsigned int A_cols,\n"
77 " unsigned int A_internal_rows,\n"
78 " unsigned int A_internal_cols,\n"
79 " __global float * B, \n"
80 " unsigned int B_rows,\n"
81 " unsigned int B_cols,\n"
82 " unsigned int B_internal_rows,\n"
83 " unsigned int B_internal_cols)\n"
84 "{ \n"
85 " float temp; \n"
86 " for (int row = A_rows-1; row > -1; --row) \n"
87 " { \n"
88 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
89 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
90 " //eliminate column of op(A) with index 'row' in parallel: \n"
91 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
92 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_cols];\n"
93 " }\n"
94 "}\n"
95 ; //matrix_solve_row_col_align1_trans_unit_upper_trans_solve
96 
98 "// file automatically generated - do not edit!\n"
99 "// inplace solve A \\ B^T\n"
100 "// matrix layouts: A...row_major, B...col_major\n"
101 "__kernel void unit_upper_trans_solve(\n"
102 " __global const float * A,\n"
103 " unsigned int A_rows,\n"
104 " unsigned int A_cols,\n"
105 " unsigned int A_internal_rows,\n"
106 " unsigned int A_internal_cols,\n"
107 " __global float * B, \n"
108 " unsigned int B_rows,\n"
109 " unsigned int B_cols,\n"
110 " unsigned int B_internal_rows,\n"
111 " unsigned int B_internal_cols)\n"
112 "{ \n"
113 " float temp; \n"
114 " for (int row = A_rows-1; row > -1; --row) \n"
115 " { \n"
116 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
117 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
118 " //eliminate column of op(A) with index 'row' in parallel: \n"
119 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
120 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_cols + row];\n"
121 " }\n"
122 "}\n"
123 ; //matrix_solve_row_col_align1_unit_upper_trans_solve
124 
126 "// file automatically generated - do not edit!\n"
127 "// inplace solve A \\ B^T\n"
128 "// matrix layouts: A...row_major, B...col_major\n"
129 "__kernel void unit_lower_trans_solve(\n"
130 " __global const float * A,\n"
131 " unsigned int A_rows,\n"
132 " unsigned int A_cols,\n"
133 " unsigned int A_internal_rows,\n"
134 " unsigned int A_internal_cols,\n"
135 " __global float * B, \n"
136 " unsigned int B_rows,\n"
137 " unsigned int B_cols,\n"
138 " unsigned int B_internal_rows,\n"
139 " unsigned int B_internal_cols)\n"
140 "{ \n"
141 " float temp; \n"
142 " for (int row = 0; row < A_rows; ++row) \n"
143 " { \n"
144 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
145 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
146 " //eliminate column of op(A) with index 'row' in parallel: \n"
147 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
148 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_cols + row];\n"
149 " }\n"
150 "}\n"
151 ; //matrix_solve_row_col_align1_unit_lower_trans_solve
152 
154 "// file automatically generated - do not edit!\n"
155 "// inplace solve A^T \\ B^T\n"
156 "// matrix layouts: A...row_major, B...col_major\n"
157 "__kernel void trans_upper_trans_solve(\n"
158 " __global const float * A,\n"
159 " unsigned int A_rows,\n"
160 " unsigned int A_cols,\n"
161 " unsigned int A_internal_rows,\n"
162 " unsigned int A_internal_cols,\n"
163 " __global float * B, \n"
164 " unsigned int B_rows,\n"
165 " unsigned int B_cols,\n"
166 " unsigned int B_internal_rows,\n"
167 " unsigned int B_internal_cols)\n"
168 "{ \n"
169 " float temp; \n"
170 " for (int row = A_rows-1; row > -1; --row) \n"
171 " { \n"
172 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
173 " if (get_local_id(0) == 0) \n"
174 " B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
175 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
176 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
177 " //eliminate column of op(A) with index 'row' in parallel: \n"
178 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
179 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_cols];\n"
180 " }\n"
181 "}\n"
182 ; //matrix_solve_row_col_align1_trans_upper_trans_solve
183 
185 "// file automatically generated - do not edit!\n"
186 "// inplace solve A \\ B\n"
187 "// matrix layouts: A...row_major, B...col_major\n"
188 "__kernel void upper_solve(\n"
189 " __global const float * A,\n"
190 " unsigned int A_rows,\n"
191 " unsigned int A_cols,\n"
192 " unsigned int A_internal_rows,\n"
193 " unsigned int A_internal_cols,\n"
194 " __global float * B, \n"
195 " unsigned int B_rows,\n"
196 " unsigned int B_cols,\n"
197 " unsigned int B_internal_rows,\n"
198 " unsigned int B_internal_cols)\n"
199 "{ \n"
200 " float temp; \n"
201 " for (int row = A_rows-1; row > -1; --row) \n"
202 " { \n"
203 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
204 " if (get_local_id(0) == 0) \n"
205 " B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
206 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
207 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
208 " //eliminate column of op(A) with index 'row' in parallel: \n"
209 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
210 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_cols + row];\n"
211 " }\n"
212 "}\n"
213 ; //matrix_solve_row_col_align1_upper_solve
214 
216 "// file automatically generated - do not edit!\n"
217 "// inplace solve A \\ B\n"
218 "// matrix layouts: A...row_major, B...col_major\n"
219 "__kernel void lower_solve(\n"
220 " __global const float * A,\n"
221 " unsigned int A_rows,\n"
222 " unsigned int A_cols,\n"
223 " unsigned int A_internal_rows,\n"
224 " unsigned int A_internal_cols,\n"
225 " __global float * B, \n"
226 " unsigned int B_rows,\n"
227 " unsigned int B_cols,\n"
228 " unsigned int B_internal_rows,\n"
229 " unsigned int B_internal_cols)\n"
230 "{ \n"
231 " float temp; \n"
232 " for (int row = 0; row < A_rows; ++row) \n"
233 " { \n"
234 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
235 " if (get_local_id(0) == 0) \n"
236 " B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
237 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
238 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
239 " //eliminate column of op(A) with index 'row' in parallel: \n"
240 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
241 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_cols + row];\n"
242 " }\n"
243 "}\n"
244 ; //matrix_solve_row_col_align1_lower_solve
245 
247 "// file automatically generated - do not edit!\n"
248 "// inplace solve A^T \\ B\n"
249 "// matrix layouts: A...row_major, B...col_major\n"
250 "__kernel void trans_unit_lower_solve(\n"
251 " __global const float * A,\n"
252 " unsigned int A_rows,\n"
253 " unsigned int A_cols,\n"
254 " unsigned int A_internal_rows,\n"
255 " unsigned int A_internal_cols,\n"
256 " __global float * B, \n"
257 " unsigned int B_rows,\n"
258 " unsigned int B_cols,\n"
259 " unsigned int B_internal_rows,\n"
260 " unsigned int B_internal_cols)\n"
261 "{ \n"
262 " float temp; \n"
263 " for (int row = 0; row < A_rows; ++row) \n"
264 " { \n"
265 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
266 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
267 " //eliminate column of op(A) with index 'row' in parallel: \n"
268 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
269 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_cols];\n"
270 " }\n"
271 "}\n"
272 ; //matrix_solve_row_col_align1_trans_unit_lower_solve
273 
275 "// file automatically generated - do not edit!\n"
276 "// inplace solve A^T \\ B^T\n"
277 "// matrix layouts: A...row_major, B...col_major\n"
278 "__kernel void trans_lower_trans_solve(\n"
279 " __global const float * A,\n"
280 " unsigned int A_rows,\n"
281 " unsigned int A_cols,\n"
282 " unsigned int A_internal_rows,\n"
283 " unsigned int A_internal_cols,\n"
284 " __global float * B, \n"
285 " unsigned int B_rows,\n"
286 " unsigned int B_cols,\n"
287 " unsigned int B_internal_rows,\n"
288 " unsigned int B_internal_cols)\n"
289 "{ \n"
290 " float temp; \n"
291 " for (int row = 0; row < A_rows; ++row) \n"
292 " { \n"
293 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
294 " if (get_local_id(0) == 0) \n"
295 " B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
296 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
297 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
298 " //eliminate column of op(A) with index 'row' in parallel: \n"
299 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
300 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_cols];\n"
301 " }\n"
302 "}\n"
303 ; //matrix_solve_row_col_align1_trans_lower_trans_solve
304 
306 "// file automatically generated - do not edit!\n"
307 "// inplace solve A^T \\ B\n"
308 "// matrix layouts: A...row_major, B...col_major\n"
309 "__kernel void trans_lower_solve(\n"
310 " __global const float * A,\n"
311 " unsigned int A_rows,\n"
312 " unsigned int A_cols,\n"
313 " unsigned int A_internal_rows,\n"
314 " unsigned int A_internal_cols,\n"
315 " __global float * B, \n"
316 " unsigned int B_rows,\n"
317 " unsigned int B_cols,\n"
318 " unsigned int B_internal_rows,\n"
319 " unsigned int B_internal_cols)\n"
320 "{ \n"
321 " float temp; \n"
322 " for (int row = 0; row < A_rows; ++row) \n"
323 " { \n"
324 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
325 " if (get_local_id(0) == 0) \n"
326 " B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
327 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
328 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
329 " //eliminate column of op(A) with index 'row' in parallel: \n"
330 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
331 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_cols];\n"
332 " }\n"
333 "}\n"
334 ; //matrix_solve_row_col_align1_trans_lower_solve
335 
337 "// file automatically generated - do not edit!\n"
338 "// inplace solve A \\ B\n"
339 "// matrix layouts: A...row_major, B...col_major\n"
340 "__kernel void unit_upper_solve(\n"
341 " __global const float * A,\n"
342 " unsigned int A_rows,\n"
343 " unsigned int A_cols,\n"
344 " unsigned int A_internal_rows,\n"
345 " unsigned int A_internal_cols,\n"
346 " __global float * B, \n"
347 " unsigned int B_rows,\n"
348 " unsigned int B_cols,\n"
349 " unsigned int B_internal_rows,\n"
350 " unsigned int B_internal_cols)\n"
351 "{ \n"
352 " float temp; \n"
353 " for (int row = A_rows-1; row > -1; --row) \n"
354 " { \n"
355 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
356 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
357 " //eliminate column of op(A) with index 'row' in parallel: \n"
358 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
359 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_cols + row];\n"
360 " }\n"
361 "}\n"
362 ; //matrix_solve_row_col_align1_unit_upper_solve
363 
365 "// file automatically generated - do not edit!\n"
366 "// inplace solve A^T \\ B\n"
367 "// matrix layouts: A...row_major, B...col_major\n"
368 "__kernel void trans_upper_solve(\n"
369 " __global const float * A,\n"
370 " unsigned int A_rows,\n"
371 " unsigned int A_cols,\n"
372 " unsigned int A_internal_rows,\n"
373 " unsigned int A_internal_cols,\n"
374 " __global float * B, \n"
375 " unsigned int B_rows,\n"
376 " unsigned int B_cols,\n"
377 " unsigned int B_internal_rows,\n"
378 " unsigned int B_internal_cols)\n"
379 "{ \n"
380 " float temp; \n"
381 " for (int row = A_rows-1; row > -1; --row) \n"
382 " { \n"
383 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
384 " if (get_local_id(0) == 0) \n"
385 " B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
386 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
387 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
388 " //eliminate column of op(A) with index 'row' in parallel: \n"
389 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
390 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_cols];\n"
391 " }\n"
392 "}\n"
393 ; //matrix_solve_row_col_align1_trans_upper_solve
394 
396 "// file automatically generated - do not edit!\n"
397 "// inplace solve A \\ B^T\n"
398 "// matrix layouts: A...row_major, B...col_major\n"
399 "__kernel void upper_trans_solve(\n"
400 " __global const float * A,\n"
401 " unsigned int A_rows,\n"
402 " unsigned int A_cols,\n"
403 " unsigned int A_internal_rows,\n"
404 " unsigned int A_internal_cols,\n"
405 " __global float * B, \n"
406 " unsigned int B_rows,\n"
407 " unsigned int B_cols,\n"
408 " unsigned int B_internal_rows,\n"
409 " unsigned int B_internal_cols)\n"
410 "{ \n"
411 " float temp; \n"
412 " for (int row = A_rows-1; row > -1; --row) \n"
413 " { \n"
414 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
415 " if (get_local_id(0) == 0) \n"
416 " B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
417 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
418 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
419 " //eliminate column of op(A) with index 'row' in parallel: \n"
420 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
421 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_cols + row];\n"
422 " }\n"
423 "}\n"
424 ; //matrix_solve_row_col_align1_upper_trans_solve
425 
427 "// file automatically generated - do not edit!\n"
428 "// inplace solve A^T \\ B^T\n"
429 "// matrix layouts: A...row_major, B...col_major\n"
430 "__kernel void trans_unit_lower_trans_solve(\n"
431 " __global const float * A,\n"
432 " unsigned int A_rows,\n"
433 " unsigned int A_cols,\n"
434 " unsigned int A_internal_rows,\n"
435 " unsigned int A_internal_cols,\n"
436 " __global float * B, \n"
437 " unsigned int B_rows,\n"
438 " unsigned int B_cols,\n"
439 " unsigned int B_internal_rows,\n"
440 " unsigned int B_internal_cols)\n"
441 "{ \n"
442 " float temp; \n"
443 " for (int row = 0; row < A_rows; ++row) \n"
444 " { \n"
445 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
446 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
447 " //eliminate column of op(A) with index 'row' in parallel: \n"
448 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
449 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_cols];\n"
450 " }\n"
451 "}\n"
452 ; //matrix_solve_row_col_align1_trans_unit_lower_trans_solve
453 
455 "// file automatically generated - do not edit!\n"
456 "// inplace solve A^T \\ B\n"
457 "// matrix layouts: A...row_major, B...col_major\n"
458 "__kernel void trans_unit_upper_solve(\n"
459 " __global const float * A,\n"
460 " unsigned int A_rows,\n"
461 " unsigned int A_cols,\n"
462 " unsigned int A_internal_rows,\n"
463 " unsigned int A_internal_cols,\n"
464 " __global float * B, \n"
465 " unsigned int B_rows,\n"
466 " unsigned int B_cols,\n"
467 " unsigned int B_internal_rows,\n"
468 " unsigned int B_internal_cols)\n"
469 "{ \n"
470 " float temp; \n"
471 " for (int row = A_rows-1; row > -1; --row) \n"
472 " { \n"
473 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
474 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
475 " //eliminate column of op(A) with index 'row' in parallel: \n"
476 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
477 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_cols];\n"
478 " }\n"
479 "}\n"
480 ; //matrix_solve_row_col_align1_trans_unit_upper_solve
481 
482  } //namespace kernels
483  } //namespace linalg
484 } //namespace viennacl
485 #endif