ViennaCL - The Vienna Computing Library
1.2.0
Main Page
Namespaces
Data Structures
Files
File List
Globals
viennacl
linalg
kernels
compressed_matrix_source.h
Go to the documentation of this file.
1
#ifndef VIENNACL_LINALG_KERNELS_COMPRESSED_MATRIX_SOURCE_HPP_
2
#define VIENNACL_LINALG_KERNELS_COMPRESSED_MATRIX_SOURCE_HPP_
3
//Automatically generated file from auxiliary-directory, do not edit manually!
4
namespace
viennacl
5
{
6
namespace
linalg
7
{
8
namespace
kernels
9
{
10
const
char
*
const
compressed_matrix_align4_vec_mul
=
11
"__kernel void vec_mul(\n"
12
" __global const unsigned int * row_indices,\n"
13
" __global const uint4 * column_indices, \n"
14
" __global const float4 * elements,\n"
15
" __global const float * vector, \n"
16
" __global float * result,\n"
17
" unsigned int size)\n"
18
"{ \n"
19
" float dot_prod;\n"
20
" unsigned int start, next_stop;\n"
21
" uint4 col_idx;\n"
22
" float4 tmp_vec;\n"
23
" float4 tmp_entries;\n"
24
" for (unsigned int row = get_global_id(0); row < size; row += get_global_size(0))\n"
25
" {\n"
26
" dot_prod = 0.0f;\n"
27
" start = row_indices[row] / 4;\n"
28
" next_stop = row_indices[row+1] / 4;\n"
29
" for (unsigned int i = start; i < next_stop; ++i)\n"
30
" {\n"
31
" col_idx = column_indices[i];\n"
32
" tmp_entries = elements[i];\n"
33
" tmp_vec.x = vector[col_idx.x];\n"
34
" tmp_vec.y = vector[col_idx.y];\n"
35
" tmp_vec.z = vector[col_idx.z];\n"
36
" tmp_vec.w = vector[col_idx.w];\n"
37
" dot_prod += dot(tmp_entries, tmp_vec);\n"
38
" }\n"
39
" result[row] = dot_prod;\n"
40
" }\n"
41
"}\n"
42
;
//compressed_matrix_align4_vec_mul
43
44
const
char
*
const
compressed_matrix_align1_jacobi_precond
=
45
"__kernel void jacobi_precond(\n"
46
" __global const unsigned int * row_indices,\n"
47
" __global const unsigned int * column_indices, \n"
48
" __global const float * elements,\n"
49
" __global float * diag_M_inv,\n"
50
" unsigned int size) \n"
51
"{ \n"
52
" for (unsigned int row = get_global_id(0); row < size; row += get_global_size(0))\n"
53
" {\n"
54
" float diag = 1.0f;\n"
55
" unsigned int row_end = row_indices[row+1];\n"
56
" for (unsigned int i = row_indices[row]; i < row_end; ++i)\n"
57
" {\n"
58
" if (row == column_indices[i])\n"
59
" {\n"
60
" diag = elements[i];\n"
61
" break;\n"
62
" }\n"
63
" }\n"
64
" diag_M_inv[row] = 1.0f / diag;\n"
65
" }\n"
66
"}\n"
67
;
//compressed_matrix_align1_jacobi_precond
68
69
const
char
*
const
compressed_matrix_align1_lu_forward
=
70
" \n"
71
"// compute y in Ly = z for incomplete LU factorizations of a sparse matrix in compressed format\n"
72
"__kernel void lu_forward(\n"
73
" __global const unsigned int * row_indices,\n"
74
" __global const unsigned int * column_indices, \n"
75
" __global const float * elements,\n"
76
" __local int * buffer, \n"
77
" __local float * vec_entries, //a memory block from vector\n"
78
" __global float * vector,\n"
79
" unsigned int size) \n"
80
"{\n"
81
" int waiting_for; //block index that must be finished before the current thread can start\n"
82
" unsigned int waiting_for_index;\n"
83
" int block_offset;\n"
84
" unsigned int col;\n"
85
" unsigned int row;\n"
86
" unsigned int row_index_end;\n"
87
" \n"
88
" //backward substitution: one thread per row in blocks of get_global_size(0)\n"
89
" for (unsigned int block_num = 0; block_num <= size / get_global_size(0); ++block_num)\n"
90
" {\n"
91
" block_offset = block_num * get_global_size(0);\n"
92
" row = block_offset + get_global_id(0);\n"
93
" buffer[get_global_id(0)] = 0; //set flag to 'undone'\n"
94
" waiting_for = -1;\n"
95
" if (row < size)\n"
96
" {\n"
97
" vec_entries[get_global_id(0)] = vector[row];\n"
98
" waiting_for_index = row_indices[row];\n"
99
" row_index_end = row_indices[row+1];\n"
100
" }\n"
101
" \n"
102
" if (get_global_id(0) == 0)\n"
103
" buffer[get_global_size(0)] = 1;\n"
104
" //try to eliminate all lines in the block. \n"
105
" //in worst case scenarios, in each step only one line can be substituted, thus loop\n"
106
" for (unsigned int k = 0; k<get_global_size(0); ++k)\n"
107
" {\n"
108
" barrier(CLK_LOCAL_MEM_FENCE);\n"
109
" if (row < size) //valid index?\n"
110
" {\n"
111
" if (waiting_for >= 0)\n"
112
" {\n"
113
" if (buffer[waiting_for] == 1)\n"
114
" waiting_for = -1;\n"
115
" }\n"
116
" \n"
117
" if (waiting_for == -1) //substitution not yet done, check whether possible\n"
118
" {\n"
119
" //check whether reduction is possible:\n"
120
" for (unsigned int j = waiting_for_index; j < row_index_end; ++j)\n"
121
" {\n"
122
" col = column_indices[j];\n"
123
" if (col < block_offset) //index valid, but not from current block\n"
124
" vec_entries[get_global_id(0)] -= elements[j] * vector[col];\n"
125
" else if (col < row) //index is from current block\n"
126
" {\n"
127
" if (buffer[col - block_offset] == 0) //entry is not yet calculated\n"
128
" {\n"
129
" waiting_for = col - block_offset;\n"
130
" waiting_for_index = j;\n"
131
" break;\n"
132
" }\n"
133
" else //updated entry is available in shared memory:\n"
134
" vec_entries[get_global_id(0)] -= elements[j] * vec_entries[col - block_offset];\n"
135
" }\n"
136
" }\n"
137
" \n"
138
" if (waiting_for == -1) //this row is done\n"
139
" {\n"
140
" buffer[get_global_id(0)] = 1;\n"
141
" waiting_for = -2; //magic number: thread is finished\n"
142
" }\n"
143
" } \n"
144
" } //row < size\n"
145
" else\n"
146
" buffer[get_global_id(0)] = 1; //work done (because there is no work to be done at all...)\n"
147
" ///////// check whether all threads are done. If yes, exit loop /////////////\n"
148
" \n"
149
" if (buffer[get_global_id(0)] == 0)\n"
150
" buffer[get_global_size(0)] = 0;\n"
151
" barrier(CLK_LOCAL_MEM_FENCE);\n"
152
" \n"
153
" if (buffer[get_global_size(0)] > 0) //all threads break this loop simultaneously\n"
154
" break;\n"
155
" if (get_global_id(0) == 0)\n"
156
" buffer[get_global_size(0)] = 1;\n"
157
" } //for k\n"
158
" \n"
159
" //write to vector:\n"
160
" if (row < size)\n"
161
" vector[row] = vec_entries[get_global_id(0)];\n"
162
" \n"
163
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
164
" } //for block_num\n"
165
"}\n"
166
;
//compressed_matrix_align1_lu_forward
167
168
const
char
*
const
compressed_matrix_align1_bicgstab_kernel1
=
169
"void helper_bicgstab_kernel1_parallel_reduction( __local float * tmp_buffer )\n"
170
"{\n"
171
" for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2)\n"
172
" {\n"
173
" barrier(CLK_LOCAL_MEM_FENCE);\n"
174
" if (get_local_id(0) < stride)\n"
175
" tmp_buffer[get_local_id(0)] += tmp_buffer[get_local_id(0)+stride];\n"
176
" }\n"
177
"}\n"
178
"//////// inner products:\n"
179
"float bicgstab_kernel1_inner_prod(\n"
180
" __global const float * vec1,\n"
181
" __global const float * vec2,\n"
182
" unsigned int size,\n"
183
" __local float * tmp_buffer)\n"
184
"{\n"
185
" float tmp = 0;\n"
186
" unsigned int i_end = ((size - 1) / get_local_size(0) + 1) * get_local_size(0);\n"
187
" for (unsigned int i = get_local_id(0); i < i_end; i += get_local_size(0))\n"
188
" {\n"
189
" if (i < size)\n"
190
" tmp += vec1[i] * vec2[i];\n"
191
" }\n"
192
" tmp_buffer[get_local_id(0)] = tmp;\n"
193
" \n"
194
" helper_bicgstab_kernel1_parallel_reduction(tmp_buffer);\n"
195
" barrier(CLK_LOCAL_MEM_FENCE);\n"
196
" return tmp_buffer[0];\n"
197
"}\n"
198
"__kernel void bicgstab_kernel1(\n"
199
" __global const float * tmp0,\n"
200
" __global const float * r0star, \n"
201
" __global const float * residual,\n"
202
" __global float * s,\n"
203
" __global float * alpha,\n"
204
" __global const float * ip_rr0star,\n"
205
" __local float * tmp_buffer,\n"
206
" unsigned int size) \n"
207
"{ \n"
208
" float alpha_local = ip_rr0star[0] / bicgstab_kernel1_inner_prod(tmp0, r0star, size, tmp_buffer);\n"
209
" \n"
210
" for (unsigned int i = get_local_id(0); i < size; i += get_local_size(0))\n"
211
" s[i] = residual[i] - alpha_local * tmp0[i];\n"
212
" \n"
213
" if (get_global_id(0) == 0)\n"
214
" alpha[0] = alpha_local;\n"
215
"}\n"
216
;
//compressed_matrix_align1_bicgstab_kernel1
217
218
const
char
*
const
compressed_matrix_align1_lu_backward
=
219
"// compute x in Ux = y for incomplete LU factorizations of a sparse matrix in compressed format\n"
220
"__kernel void lu_backward(\n"
221
" __global const unsigned int * row_indices,\n"
222
" __global const unsigned int * column_indices, \n"
223
" __global const float * elements,\n"
224
" __local int * buffer, \n"
225
" __local float * vec_entries, //a memory block from vector\n"
226
" __global float * vector,\n"
227
" unsigned int size) \n"
228
"{\n"
229
" int waiting_for; //block index that must be finished before the current thread can start\n"
230
" unsigned int waiting_for_index;\n"
231
" unsigned int block_offset;\n"
232
" unsigned int col;\n"
233
" unsigned int row;\n"
234
" unsigned int row_index_end;\n"
235
" float diagonal_entry = 42;\n"
236
" \n"
237
" //forward substitution: one thread per row in blocks of get_global_size(0)\n"
238
" for (int block_num = size / get_global_size(0); block_num > -1; --block_num)\n"
239
" {\n"
240
" block_offset = block_num * get_global_size(0);\n"
241
" row = block_offset + get_global_id(0);\n"
242
" buffer[get_global_id(0)] = 0; //set flag to 'undone'\n"
243
" waiting_for = -1;\n"
244
" \n"
245
" if (row < size)\n"
246
" {\n"
247
" vec_entries[get_global_id(0)] = vector[row];\n"
248
" waiting_for_index = row_indices[row];\n"
249
" row_index_end = row_indices[row+1];\n"
250
" diagonal_entry = column_indices[waiting_for_index];\n"
251
" }\n"
252
" \n"
253
" if (get_global_id(0) == 0)\n"
254
" buffer[get_global_size(0)] = 1;\n"
255
" //try to eliminate all lines in the block. \n"
256
" //in worst case scenarios, in each step only one line can be substituted, thus loop\n"
257
" for (unsigned int k = 0; k<get_global_size(0); ++k)\n"
258
" {\n"
259
" barrier(CLK_LOCAL_MEM_FENCE);\n"
260
" if (row < size) //valid index?\n"
261
" {\n"
262
" if (waiting_for >= 0)\n"
263
" {\n"
264
" if (buffer[waiting_for] == 1)\n"
265
" waiting_for = -1;\n"
266
" }\n"
267
" \n"
268
" if (waiting_for == -1) //substitution not yet done, check whether possible\n"
269
" {\n"
270
" //check whether reduction is possible:\n"
271
" for (unsigned int j = waiting_for_index; j < row_index_end; ++j)\n"
272
" {\n"
273
" col = column_indices[j];\n"
274
" barrier(CLK_LOCAL_MEM_FENCE);\n"
275
" if (col >= block_offset + get_global_size(0)) //index valid, but not from current block\n"
276
" vec_entries[get_global_id(0)] -= elements[j] * vector[col];\n"
277
" else if (col > row) //index is from current block\n"
278
" {\n"
279
" if (buffer[col - block_offset] == 0) //entry is not yet calculated\n"
280
" {\n"
281
" waiting_for = col - block_offset;\n"
282
" waiting_for_index = j;\n"
283
" break;\n"
284
" }\n"
285
" else //updated entry is available in shared memory:\n"
286
" vec_entries[get_global_id(0)] -= elements[j] * vec_entries[col - block_offset];\n"
287
" }\n"
288
" else if (col == row)\n"
289
" diagonal_entry = elements[j];\n"
290
" }\n"
291
" \n"
292
" if (waiting_for == -1) //this row is done\n"
293
" {\n"
294
" if (row == 0)\n"
295
" vec_entries[get_global_id(0)] /= elements[0];\n"
296
" else\n"
297
" vec_entries[get_global_id(0)] /= diagonal_entry;\n"
298
" buffer[get_global_id(0)] = 1;\n"
299
" waiting_for = -2; //magic number: thread is finished\n"
300
" }\n"
301
" } \n"
302
" } //row < size\n"
303
" else\n"
304
" buffer[get_global_id(0)] = 1; //work done (because there is no work to be done at all...)\n"
305
" \n"
306
" ///////// check whether all threads are done. If yes, exit loop /////////////\n"
307
" if (buffer[get_global_id(0)] == 0)\n"
308
" buffer[get_global_size(0)] = 0;\n"
309
" barrier(CLK_LOCAL_MEM_FENCE);\n"
310
" \n"
311
" if (buffer[get_global_size(0)] > 0) //all threads break the loop simultaneously\n"
312
" break;\n"
313
" if (get_global_id(0) == 0)\n"
314
" buffer[get_global_size(0)] = 1;\n"
315
" } //for k\n"
316
" if (row < size)\n"
317
" vector[row] = vec_entries[get_global_id(0)];\n"
318
" //vector[row] = diagonal_entry;\n"
319
" \n"
320
" //if (row == 0)\n"
321
" //vector[0] = diagonal_entry;\n"
322
" //vector[0] = elements[0];\n"
323
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
324
" } //for block_num\n"
325
"}\n"
326
;
//compressed_matrix_align1_lu_backward
327
328
const
char
*
const
compressed_matrix_align1_row_scaling_2
=
329
"__kernel void row_scaling_2(\n"
330
" __global const unsigned int * row_indices,\n"
331
" __global const unsigned int * column_indices, \n"
332
" __global const float * elements,\n"
333
" __global float * diag_M_inv,\n"
334
" unsigned int size) \n"
335
"{ \n"
336
" for (unsigned int row = get_global_id(0); row < size; row += get_global_size(0))\n"
337
" {\n"
338
" float dot_prod = 0.0f;\n"
339
" float temp = 0.0f;\n"
340
" unsigned int row_end = row_indices[row+1];\n"
341
" for (unsigned int i = row_indices[row]; i < row_end; ++i)\n"
342
" {\n"
343
" temp = elements[i];\n"
344
" dot_prod += temp * temp;\n"
345
" }\n"
346
" diag_M_inv[row] = 1.0f / sqrt(dot_prod);\n"
347
" }\n"
348
"}\n"
349
;
//compressed_matrix_align1_row_scaling_2
350
351
const
char
*
const
compressed_matrix_align1_vec_mul
=
352
"__kernel void vec_mul(\n"
353
" __global const unsigned int * row_indices,\n"
354
" __global const unsigned int * column_indices, \n"
355
" __global const float * elements,\n"
356
" __global const float * vector, \n"
357
" __global float * result,\n"
358
" unsigned int size) \n"
359
"{ \n"
360
" for (unsigned int row = get_global_id(0); row < size; row += get_global_size(0))\n"
361
" {\n"
362
" float dot_prod = 0.0f;\n"
363
" unsigned int row_end = row_indices[row+1];\n"
364
" for (unsigned int i = row_indices[row]; i < row_end; ++i)\n"
365
" dot_prod += elements[i] * vector[column_indices[i]];\n"
366
" result[row] = dot_prod;\n"
367
" }\n"
368
"}\n"
369
;
//compressed_matrix_align1_vec_mul
370
371
const
char
*
const
compressed_matrix_align1_jacobi
=
372
"__kernel void jacobi(\n"
373
" __global const unsigned int * row_indices,\n"
374
" __global const unsigned int * column_indices,\n"
375
" __global const float * elements,\n"
376
" float weight,\n"
377
" __global const float * old_result,\n"
378
" __global float * new_result,\n"
379
" __global const float * rhs,\n"
380
" unsigned int size)\n"
381
" {\n"
382
" float sum, diag=1;\n"
383
" int col;\n"
384
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
385
" {\n"
386
" sum = 0;\n"
387
" for (unsigned int j = row_indices[i]; j<row_indices[i+1]; j++)\n"
388
" {\n"
389
" col = column_indices[j];\n"
390
" if (i == col)\n"
391
" diag = elements[j];\n"
392
" else \n"
393
" sum += elements[j] * old_result[col]; \n"
394
" } \n"
395
" new_result[i] = weight * (rhs[i]-sum) / diag + (1-weight) * old_result[i]; \n"
396
" } \n"
397
" } \n"
398
;
//compressed_matrix_align1_jacobi
399
400
const
char
*
const
compressed_matrix_align1_bicgstab_kernel2
=
401
"void helper_bicgstab_kernel2_parallel_reduction( __local float * tmp_buffer )\n"
402
"{\n"
403
" for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2)\n"
404
" {\n"
405
" barrier(CLK_LOCAL_MEM_FENCE);\n"
406
" if (get_local_id(0) < stride)\n"
407
" tmp_buffer[get_local_id(0)] += tmp_buffer[get_local_id(0)+stride];\n"
408
" }\n"
409
"}\n"
410
"//////// inner products:\n"
411
"float bicgstab_kernel2_inner_prod(\n"
412
" __global const float * vec1,\n"
413
" __global const float * vec2,\n"
414
" unsigned int size,\n"
415
" __local float * tmp_buffer)\n"
416
"{\n"
417
" float tmp = 0;\n"
418
" unsigned int i_end = ((size - 1) / get_local_size(0) + 1) * get_local_size(0);\n"
419
" for (unsigned int i = get_local_id(0); i < i_end; i += get_local_size(0))\n"
420
" {\n"
421
" if (i < size)\n"
422
" tmp += vec1[i] * vec2[i];\n"
423
" }\n"
424
" tmp_buffer[get_local_id(0)] = tmp;\n"
425
" \n"
426
" helper_bicgstab_kernel2_parallel_reduction(tmp_buffer);\n"
427
" barrier(CLK_LOCAL_MEM_FENCE);\n"
428
" return tmp_buffer[0];\n"
429
"}\n"
430
"__kernel void bicgstab_kernel2(\n"
431
" __global const float * tmp0,\n"
432
" __global const float * tmp1,\n"
433
" __global const float * r0star, \n"
434
" __global const float * s, \n"
435
" __global float * p, \n"
436
" __global float * result,\n"
437
" __global float * residual,\n"
438
" __global const float * alpha,\n"
439
" __global float * ip_rr0star,\n"
440
" __global float * error_estimate,\n"
441
" __local float * tmp_buffer,\n"
442
" unsigned int size) \n"
443
"{ \n"
444
" float omega_local = bicgstab_kernel2_inner_prod(tmp1, s, size, tmp_buffer) / bicgstab_kernel2_inner_prod(tmp1, tmp1, size, tmp_buffer);\n"
445
" float alpha_local = alpha[0];\n"
446
" \n"
447
" //result += alpha * p + omega * s;\n"
448
" for (unsigned int i = get_local_id(0); i < size; i += get_local_size(0))\n"
449
" result[i] += alpha_local * p[i] + omega_local * s[i];\n"
450
" //residual = s - omega * tmp1;\n"
451
" for (unsigned int i = get_local_id(0); i < size; i += get_local_size(0))\n"
452
" residual[i] = s[i] - omega_local * tmp1[i];\n"
453
" //new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);\n"
454
" float new_ip_rr0star = bicgstab_kernel2_inner_prod(residual, r0star, size, tmp_buffer);\n"
455
" float beta = (new_ip_rr0star / ip_rr0star[0]) * (alpha_local / omega_local);\n"
456
" \n"
457
" //p = residual + beta * (p - omega*tmp0);\n"
458
" for (unsigned int i = get_local_id(0); i < size; i += get_local_size(0))\n"
459
" p[i] = residual[i] + beta * (p[i] - omega_local * tmp0[i]);\n"
460
" //compute norm of residual:\n"
461
" float new_error_estimate = bicgstab_kernel2_inner_prod(residual, residual, size, tmp_buffer);\n"
462
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
463
" //update values:\n"
464
" if (get_global_id(0) == 0)\n"
465
" {\n"
466
" error_estimate[0] = new_error_estimate;\n"
467
" ip_rr0star[0] = new_ip_rr0star;\n"
468
" }\n"
469
"}\n"
470
;
//compressed_matrix_align1_bicgstab_kernel2
471
472
const
char
*
const
compressed_matrix_align1_row_scaling_1
=
473
"__kernel void row_scaling_1(\n"
474
" __global const unsigned int * row_indices,\n"
475
" __global const unsigned int * column_indices, \n"
476
" __global const float * elements,\n"
477
" __global float * diag_M_inv,\n"
478
" unsigned int size) \n"
479
"{ \n"
480
" for (unsigned int row = get_global_id(0); row < size; row += get_global_size(0))\n"
481
" {\n"
482
" float dot_prod = 0.0f;\n"
483
" unsigned int row_end = row_indices[row+1];\n"
484
" for (unsigned int i = row_indices[row]; i < row_end; ++i)\n"
485
" dot_prod += fabs(elements[i]);\n"
486
" diag_M_inv[row] = 1.0f / dot_prod;\n"
487
" }\n"
488
"}\n"
489
;
//compressed_matrix_align1_row_scaling_1
490
491
const
char
*
const
compressed_matrix_align8_vec_mul
=
492
"__kernel void vec_mul(\n"
493
" __global const unsigned int * row_indices,\n"
494
" __global const uint8 * column_indices, \n"
495
" __global const float8 * elements,\n"
496
" __global const float * vector, \n"
497
" __global float * result,\n"
498
" unsigned int size)\n"
499
"{ \n"
500
" float dot_prod;\n"
501
" unsigned int start, next_stop;\n"
502
" uint8 col_idx;\n"
503
" float8 tmp_vec;\n"
504
" float8 tmp_entries;\n"
505
" for (unsigned int row = get_global_id(0); row < size; row += get_global_size(0))\n"
506
" {\n"
507
" dot_prod = 0.0f;\n"
508
" start = row_indices[row] / 8;\n"
509
" next_stop = row_indices[row+1] / 8;\n"
510
" for (unsigned int i = start; i < next_stop; ++i)\n"
511
" {\n"
512
" col_idx = column_indices[i];\n"
513
" tmp_entries = elements[i];\n"
514
" tmp_vec.s0 = vector[col_idx.s0];\n"
515
" tmp_vec.s1 = vector[col_idx.s1];\n"
516
" tmp_vec.s2 = vector[col_idx.s2];\n"
517
" tmp_vec.s3 = vector[col_idx.s3];\n"
518
" tmp_vec.s4 = vector[col_idx.s4];\n"
519
" tmp_vec.s5 = vector[col_idx.s5];\n"
520
" tmp_vec.s6 = vector[col_idx.s6];\n"
521
" tmp_vec.s7 = vector[col_idx.s7];\n"
522
" dot_prod += dot(tmp_entries.lo, tmp_vec.lo);\n"
523
" dot_prod += dot(tmp_entries.hi, tmp_vec.hi);\n"
524
" }\n"
525
" result[row] = dot_prod;\n"
526
" }\n"
527
"}\n"
528
;
//compressed_matrix_align8_vec_mul
529
530
}
//namespace kernels
531
}
//namespace linalg
532
}
//namespace viennacl
533
#endif
Generated on Wed Oct 10 2012 09:58:14 for ViennaCL - The Vienna Computing Library by
1.8.1.2