ViennaCL - The Vienna Computing Library
1.2.0
Main Page
Namespaces
Data Structures
Files
File List
Globals
viennacl
linalg
kernels
matrix_solve_col_col_source.h
Go to the documentation of this file.
1
#ifndef VIENNACL_LINALG_KERNELS_MATRIX_SOLVE_COL_COL_SOURCE_HPP_
2
#define VIENNACL_LINALG_KERNELS_MATRIX_SOLVE_COL_COL_SOURCE_HPP_
3
//Automatically generated file from auxiliary-directory, do not edit manually!
4
namespace
viennacl
5
{
6
namespace
linalg
7
{
8
namespace
kernels
9
{
10
const
char
*
const
matrix_solve_col_col_align1_lower_trans_solve
=
11
"// file automatically generated - do not edit!\n"
12
"// inplace solve A \\ B^T\n"
13
"// matrix layouts: A...col_major, B...col_major\n"
14
"__kernel void lower_trans_solve(\n"
15
" __global const float * A,\n"
16
" unsigned int A_rows,\n"
17
" unsigned int A_cols,\n"
18
" unsigned int A_internal_rows,\n"
19
" unsigned int A_internal_cols,\n"
20
" __global float * B, \n"
21
" unsigned int B_rows,\n"
22
" unsigned int B_cols,\n"
23
" unsigned int B_internal_rows,\n"
24
" unsigned int B_internal_cols)\n"
25
"{ \n"
26
" float temp; \n"
27
" for (int row = 0; row < A_rows; ++row) \n"
28
" { \n"
29
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
30
" if (get_local_id(0) == 0) \n"
31
" B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
32
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
33
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
34
" //eliminate column of op(A) with index 'row' in parallel: \n"
35
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
36
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
37
" }\n"
38
"}\n"
39
;
//matrix_solve_col_col_align1_lower_trans_solve
40
41
const
char
*
const
matrix_solve_col_col_align1_unit_lower_solve
=
42
"// file automatically generated - do not edit!\n"
43
"// inplace solve A \\ B\n"
44
"// matrix layouts: A...col_major, B...col_major\n"
45
"__kernel void unit_lower_solve(\n"
46
" __global const float * A,\n"
47
" unsigned int A_rows,\n"
48
" unsigned int A_cols,\n"
49
" unsigned int A_internal_rows,\n"
50
" unsigned int A_internal_cols,\n"
51
" __global float * B, \n"
52
" unsigned int B_rows,\n"
53
" unsigned int B_cols,\n"
54
" unsigned int B_internal_rows,\n"
55
" unsigned int B_internal_cols)\n"
56
"{ \n"
57
" float temp; \n"
58
" for (int row = 0; row < A_rows; ++row) \n"
59
" { \n"
60
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
61
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
62
" //eliminate column of op(A) with index 'row' in parallel: \n"
63
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
64
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
65
" }\n"
66
"}\n"
67
;
//matrix_solve_col_col_align1_unit_lower_solve
68
69
const
char
*
const
matrix_solve_col_col_align1_trans_unit_upper_trans_solve
=
70
"// file automatically generated - do not edit!\n"
71
"// inplace solve A^T \\ B^T\n"
72
"// matrix layouts: A...col_major, B...col_major\n"
73
"__kernel void trans_unit_upper_trans_solve(\n"
74
" __global const float * A,\n"
75
" unsigned int A_rows,\n"
76
" unsigned int A_cols,\n"
77
" unsigned int A_internal_rows,\n"
78
" unsigned int A_internal_cols,\n"
79
" __global float * B, \n"
80
" unsigned int B_rows,\n"
81
" unsigned int B_cols,\n"
82
" unsigned int B_internal_rows,\n"
83
" unsigned int B_internal_cols)\n"
84
"{ \n"
85
" float temp; \n"
86
" for (int row = A_rows-1; row > -1; --row) \n"
87
" { \n"
88
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
89
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
90
" //eliminate column of op(A) with index 'row' in parallel: \n"
91
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
92
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
93
" }\n"
94
"}\n"
95
;
//matrix_solve_col_col_align1_trans_unit_upper_trans_solve
96
97
const
char
*
const
matrix_solve_col_col_align1_unit_upper_trans_solve
=
98
"// file automatically generated - do not edit!\n"
99
"// inplace solve A \\ B^T\n"
100
"// matrix layouts: A...col_major, B...col_major\n"
101
"__kernel void unit_upper_trans_solve(\n"
102
" __global const float * A,\n"
103
" unsigned int A_rows,\n"
104
" unsigned int A_cols,\n"
105
" unsigned int A_internal_rows,\n"
106
" unsigned int A_internal_cols,\n"
107
" __global float * B, \n"
108
" unsigned int B_rows,\n"
109
" unsigned int B_cols,\n"
110
" unsigned int B_internal_rows,\n"
111
" unsigned int B_internal_cols)\n"
112
"{ \n"
113
" float temp; \n"
114
" for (int row = A_rows-1; row > -1; --row) \n"
115
" { \n"
116
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
117
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
118
" //eliminate column of op(A) with index 'row' in parallel: \n"
119
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
120
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
121
" }\n"
122
"}\n"
123
;
//matrix_solve_col_col_align1_unit_upper_trans_solve
124
125
const
char
*
const
matrix_solve_col_col_align1_unit_lower_trans_solve
=
126
"// file automatically generated - do not edit!\n"
127
"// inplace solve A \\ B^T\n"
128
"// matrix layouts: A...col_major, B...col_major\n"
129
"__kernel void unit_lower_trans_solve(\n"
130
" __global const float * A,\n"
131
" unsigned int A_rows,\n"
132
" unsigned int A_cols,\n"
133
" unsigned int A_internal_rows,\n"
134
" unsigned int A_internal_cols,\n"
135
" __global float * B, \n"
136
" unsigned int B_rows,\n"
137
" unsigned int B_cols,\n"
138
" unsigned int B_internal_rows,\n"
139
" unsigned int B_internal_cols)\n"
140
"{ \n"
141
" float temp; \n"
142
" for (int row = 0; row < A_rows; ++row) \n"
143
" { \n"
144
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
145
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
146
" //eliminate column of op(A) with index 'row' in parallel: \n"
147
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
148
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
149
" }\n"
150
"}\n"
151
;
//matrix_solve_col_col_align1_unit_lower_trans_solve
152
153
const
char
*
const
matrix_solve_col_col_align1_trans_upper_trans_solve
=
154
"// file automatically generated - do not edit!\n"
155
"// inplace solve A^T \\ B^T\n"
156
"// matrix layouts: A...col_major, B...col_major\n"
157
"__kernel void trans_upper_trans_solve(\n"
158
" __global const float * A,\n"
159
" unsigned int A_rows,\n"
160
" unsigned int A_cols,\n"
161
" unsigned int A_internal_rows,\n"
162
" unsigned int A_internal_cols,\n"
163
" __global float * B, \n"
164
" unsigned int B_rows,\n"
165
" unsigned int B_cols,\n"
166
" unsigned int B_internal_rows,\n"
167
" unsigned int B_internal_cols)\n"
168
"{ \n"
169
" float temp; \n"
170
" for (int row = A_rows-1; row > -1; --row) \n"
171
" { \n"
172
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
173
" if (get_local_id(0) == 0) \n"
174
" B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
175
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
176
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
177
" //eliminate column of op(A) with index 'row' in parallel: \n"
178
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
179
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
180
" }\n"
181
"}\n"
182
;
//matrix_solve_col_col_align1_trans_upper_trans_solve
183
184
const
char
*
const
matrix_solve_col_col_align1_upper_solve
=
185
"// file automatically generated - do not edit!\n"
186
"// inplace solve A \\ B\n"
187
"// matrix layouts: A...col_major, B...col_major\n"
188
"__kernel void upper_solve(\n"
189
" __global const float * A,\n"
190
" unsigned int A_rows,\n"
191
" unsigned int A_cols,\n"
192
" unsigned int A_internal_rows,\n"
193
" unsigned int A_internal_cols,\n"
194
" __global float * B, \n"
195
" unsigned int B_rows,\n"
196
" unsigned int B_cols,\n"
197
" unsigned int B_internal_rows,\n"
198
" unsigned int B_internal_cols)\n"
199
"{ \n"
200
" float temp; \n"
201
" for (int row = A_rows-1; row > -1; --row) \n"
202
" { \n"
203
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
204
" if (get_local_id(0) == 0) \n"
205
" B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
206
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
207
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
208
" //eliminate column of op(A) with index 'row' in parallel: \n"
209
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
210
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
211
" }\n"
212
"}\n"
213
;
//matrix_solve_col_col_align1_upper_solve
214
215
const
char
*
const
matrix_solve_col_col_align1_lower_solve
=
216
"// file automatically generated - do not edit!\n"
217
"// inplace solve A \\ B\n"
218
"// matrix layouts: A...col_major, B...col_major\n"
219
"__kernel void lower_solve(\n"
220
" __global const float * A,\n"
221
" unsigned int A_rows,\n"
222
" unsigned int A_cols,\n"
223
" unsigned int A_internal_rows,\n"
224
" unsigned int A_internal_cols,\n"
225
" __global float * B, \n"
226
" unsigned int B_rows,\n"
227
" unsigned int B_cols,\n"
228
" unsigned int B_internal_rows,\n"
229
" unsigned int B_internal_cols)\n"
230
"{ \n"
231
" float temp; \n"
232
" for (int row = 0; row < A_rows; ++row) \n"
233
" { \n"
234
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
235
" if (get_local_id(0) == 0) \n"
236
" B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
237
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
238
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
239
" //eliminate column of op(A) with index 'row' in parallel: \n"
240
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
241
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
242
" }\n"
243
"}\n"
244
;
//matrix_solve_col_col_align1_lower_solve
245
246
const
char
*
const
matrix_solve_col_col_align1_trans_unit_lower_solve
=
247
"// file automatically generated - do not edit!\n"
248
"// inplace solve A^T \\ B\n"
249
"// matrix layouts: A...col_major, B...col_major\n"
250
"__kernel void trans_unit_lower_solve(\n"
251
" __global const float * A,\n"
252
" unsigned int A_rows,\n"
253
" unsigned int A_cols,\n"
254
" unsigned int A_internal_rows,\n"
255
" unsigned int A_internal_cols,\n"
256
" __global float * B, \n"
257
" unsigned int B_rows,\n"
258
" unsigned int B_cols,\n"
259
" unsigned int B_internal_rows,\n"
260
" unsigned int B_internal_cols)\n"
261
"{ \n"
262
" float temp; \n"
263
" for (int row = 0; row < A_rows; ++row) \n"
264
" { \n"
265
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
266
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
267
" //eliminate column of op(A) with index 'row' in parallel: \n"
268
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
269
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
270
" }\n"
271
"}\n"
272
;
//matrix_solve_col_col_align1_trans_unit_lower_solve
273
274
const
char
*
const
matrix_solve_col_col_align1_trans_lower_trans_solve
=
275
"// file automatically generated - do not edit!\n"
276
"// inplace solve A^T \\ B^T\n"
277
"// matrix layouts: A...col_major, B...col_major\n"
278
"__kernel void trans_lower_trans_solve(\n"
279
" __global const float * A,\n"
280
" unsigned int A_rows,\n"
281
" unsigned int A_cols,\n"
282
" unsigned int A_internal_rows,\n"
283
" unsigned int A_internal_cols,\n"
284
" __global float * B, \n"
285
" unsigned int B_rows,\n"
286
" unsigned int B_cols,\n"
287
" unsigned int B_internal_rows,\n"
288
" unsigned int B_internal_cols)\n"
289
"{ \n"
290
" float temp; \n"
291
" for (int row = 0; row < A_rows; ++row) \n"
292
" { \n"
293
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
294
" if (get_local_id(0) == 0) \n"
295
" B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
296
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
297
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
298
" //eliminate column of op(A) with index 'row' in parallel: \n"
299
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
300
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
301
" }\n"
302
"}\n"
303
;
//matrix_solve_col_col_align1_trans_lower_trans_solve
304
305
const
char
*
const
matrix_solve_col_col_align1_trans_lower_solve
=
306
"// file automatically generated - do not edit!\n"
307
"// inplace solve A^T \\ B\n"
308
"// matrix layouts: A...col_major, B...col_major\n"
309
"__kernel void trans_lower_solve(\n"
310
" __global const float * A,\n"
311
" unsigned int A_rows,\n"
312
" unsigned int A_cols,\n"
313
" unsigned int A_internal_rows,\n"
314
" unsigned int A_internal_cols,\n"
315
" __global float * B, \n"
316
" unsigned int B_rows,\n"
317
" unsigned int B_cols,\n"
318
" unsigned int B_internal_rows,\n"
319
" unsigned int B_internal_cols)\n"
320
"{ \n"
321
" float temp; \n"
322
" for (int row = 0; row < A_rows; ++row) \n"
323
" { \n"
324
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
325
" if (get_local_id(0) == 0) \n"
326
" B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
327
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
328
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
329
" //eliminate column of op(A) with index 'row' in parallel: \n"
330
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
331
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
332
" }\n"
333
"}\n"
334
;
//matrix_solve_col_col_align1_trans_lower_solve
335
336
const
char
*
const
matrix_solve_col_col_align1_unit_upper_solve
=
337
"// file automatically generated - do not edit!\n"
338
"// inplace solve A \\ B\n"
339
"// matrix layouts: A...col_major, B...col_major\n"
340
"__kernel void unit_upper_solve(\n"
341
" __global const float * A,\n"
342
" unsigned int A_rows,\n"
343
" unsigned int A_cols,\n"
344
" unsigned int A_internal_rows,\n"
345
" unsigned int A_internal_cols,\n"
346
" __global float * B, \n"
347
" unsigned int B_rows,\n"
348
" unsigned int B_cols,\n"
349
" unsigned int B_internal_rows,\n"
350
" unsigned int B_internal_cols)\n"
351
"{ \n"
352
" float temp; \n"
353
" for (int row = A_rows-1; row > -1; --row) \n"
354
" { \n"
355
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
356
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
357
" //eliminate column of op(A) with index 'row' in parallel: \n"
358
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
359
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
360
" }\n"
361
"}\n"
362
;
//matrix_solve_col_col_align1_unit_upper_solve
363
364
const
char
*
const
matrix_solve_col_col_align1_trans_upper_solve
=
365
"// file automatically generated - do not edit!\n"
366
"// inplace solve A^T \\ B\n"
367
"// matrix layouts: A...col_major, B...col_major\n"
368
"__kernel void trans_upper_solve(\n"
369
" __global const float * A,\n"
370
" unsigned int A_rows,\n"
371
" unsigned int A_cols,\n"
372
" unsigned int A_internal_rows,\n"
373
" unsigned int A_internal_cols,\n"
374
" __global float * B, \n"
375
" unsigned int B_rows,\n"
376
" unsigned int B_cols,\n"
377
" unsigned int B_internal_rows,\n"
378
" unsigned int B_internal_cols)\n"
379
"{ \n"
380
" float temp; \n"
381
" for (int row = A_rows-1; row > -1; --row) \n"
382
" { \n"
383
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
384
" if (get_local_id(0) == 0) \n"
385
" B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
386
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
387
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
388
" //eliminate column of op(A) with index 'row' in parallel: \n"
389
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
390
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
391
" }\n"
392
"}\n"
393
;
//matrix_solve_col_col_align1_trans_upper_solve
394
395
const
char
*
const
matrix_solve_col_col_align1_upper_trans_solve
=
396
"// file automatically generated - do not edit!\n"
397
"// inplace solve A \\ B^T\n"
398
"// matrix layouts: A...col_major, B...col_major\n"
399
"__kernel void upper_trans_solve(\n"
400
" __global const float * A,\n"
401
" unsigned int A_rows,\n"
402
" unsigned int A_cols,\n"
403
" unsigned int A_internal_rows,\n"
404
" unsigned int A_internal_cols,\n"
405
" __global float * B, \n"
406
" unsigned int B_rows,\n"
407
" unsigned int B_cols,\n"
408
" unsigned int B_internal_rows,\n"
409
" unsigned int B_internal_cols)\n"
410
"{ \n"
411
" float temp; \n"
412
" for (int row = A_rows-1; row > -1; --row) \n"
413
" { \n"
414
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
415
" if (get_local_id(0) == 0) \n"
416
" B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
417
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
418
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
419
" //eliminate column of op(A) with index 'row' in parallel: \n"
420
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
421
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
422
" }\n"
423
"}\n"
424
;
//matrix_solve_col_col_align1_upper_trans_solve
425
426
const
char
*
const
matrix_solve_col_col_align1_trans_unit_lower_trans_solve
=
427
"// file automatically generated - do not edit!\n"
428
"// inplace solve A^T \\ B^T\n"
429
"// matrix layouts: A...col_major, B...col_major\n"
430
"__kernel void trans_unit_lower_trans_solve(\n"
431
" __global const float * A,\n"
432
" unsigned int A_rows,\n"
433
" unsigned int A_cols,\n"
434
" unsigned int A_internal_rows,\n"
435
" unsigned int A_internal_cols,\n"
436
" __global float * B, \n"
437
" unsigned int B_rows,\n"
438
" unsigned int B_cols,\n"
439
" unsigned int B_internal_rows,\n"
440
" unsigned int B_internal_cols)\n"
441
"{ \n"
442
" float temp; \n"
443
" for (int row = 0; row < A_rows; ++row) \n"
444
" { \n"
445
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
446
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
447
" //eliminate column of op(A) with index 'row' in parallel: \n"
448
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
449
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
450
" }\n"
451
"}\n"
452
;
//matrix_solve_col_col_align1_trans_unit_lower_trans_solve
453
454
const
char
*
const
matrix_solve_col_col_align1_trans_unit_upper_solve
=
455
"// file automatically generated - do not edit!\n"
456
"// inplace solve A^T \\ B\n"
457
"// matrix layouts: A...col_major, B...col_major\n"
458
"__kernel void trans_unit_upper_solve(\n"
459
" __global const float * A,\n"
460
" unsigned int A_rows,\n"
461
" unsigned int A_cols,\n"
462
" unsigned int A_internal_rows,\n"
463
" unsigned int A_internal_cols,\n"
464
" __global float * B, \n"
465
" unsigned int B_rows,\n"
466
" unsigned int B_cols,\n"
467
" unsigned int B_internal_rows,\n"
468
" unsigned int B_internal_cols)\n"
469
"{ \n"
470
" float temp; \n"
471
" for (int row = A_rows-1; row > -1; --row) \n"
472
" { \n"
473
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
474
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
475
" //eliminate column of op(A) with index 'row' in parallel: \n"
476
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
477
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
478
" }\n"
479
"}\n"
480
;
//matrix_solve_col_col_align1_trans_unit_upper_solve
481
482
}
//namespace kernels
483
}
//namespace linalg
484
}
//namespace viennacl
485
#endif
Generated on Wed Oct 10 2012 09:58:14 for ViennaCL - The Vienna Computing Library by
1.8.1.2