ViennaCL - The Vienna Computing Library  1.2.0
direct_solve.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_DIRECT_SOLVE_HPP_
2 #define VIENNACL_DIRECT_SOLVE_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
24 #include "viennacl/vector.hpp"
25 #include "viennacl/matrix.hpp"
28 #include "viennacl/ocl/kernel.hpp"
29 #include "viennacl/ocl/device.hpp"
30 #include "viennacl/ocl/handle.hpp"
31 
32 
33 namespace viennacl
34 {
35  namespace linalg
36  {
38 
43  template<typename SCALARTYPE, typename F1, typename F2, unsigned int A1, unsigned int A2, typename SOLVERTAG>
46  SOLVERTAG)
47  {
48  assert(mat.size1() == mat.size2());
49  assert(mat.size2() == B.size1());
50 
52  matrix<SCALARTYPE, F2, A2> >::ResultType KernelClass;
53  KernelClass::init();
54 
55  std::stringstream ss;
56  ss << SOLVERTAG::name() << "_solve";
57  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), ss.str());
58 
59  k.global_work_size(0, B.size2() * k.local_work_size());
60  viennacl::ocl::enqueue(k(mat, cl_uint(mat.size1()), cl_uint(mat.size2()),
61  cl_uint(mat.internal_size1()), cl_uint(mat.internal_size2()),
62  B, cl_uint(B.size1()), cl_uint(B.size2()),
63  cl_uint(B.internal_size1()), cl_uint(B.internal_size2()))
64  );
65  }
66 
72  template<typename SCALARTYPE, typename F1, typename F2, unsigned int A1, unsigned int A2, typename SOLVERTAG>
76  op_trans> & B,
77  SOLVERTAG)
78  {
79  assert(mat.size1() == mat.size2());
80  assert(mat.size2() == B.lhs().size2());
81 
83  matrix<SCALARTYPE, F2, A2> >::ResultType KernelClass;
84  KernelClass::init();
85 
86  std::stringstream ss;
87  ss << SOLVERTAG::name() << "_trans_solve";
88  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), ss.str());
89 
90  k.global_work_size(0, B.lhs().size1() * k.local_work_size());
91  viennacl::ocl::enqueue(k(mat, cl_uint(mat.size1()), cl_uint(mat.size2()),
92  cl_uint(mat.internal_size1()), cl_uint(mat.internal_size2()),
93  B.lhs(), cl_uint(B.lhs().size1()), cl_uint(B.lhs().size2()),
94  cl_uint(B.lhs().internal_size1()), cl_uint(B.lhs().internal_size2()))
95  );
96  }
97 
98  //upper triangular solver for transposed lower triangular matrices
104  template<typename SCALARTYPE, typename F1, typename F2, unsigned int A1, unsigned int A2, typename SOLVERTAG>
107  op_trans> & proxy,
109  SOLVERTAG)
110  {
111  assert(proxy.lhs().size1() == proxy.lhs().size2());
112  assert(proxy.lhs().size2() == B.size1());
113 
115  matrix<SCALARTYPE, F2, A2> >::ResultType KernelClass;
116  KernelClass::init();
117 
118  std::stringstream ss;
119  ss << "trans_" << SOLVERTAG::name() << "_solve";
120  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), ss.str());
121 
122  k.global_work_size(0, B.size2() * k.local_work_size());
123  viennacl::ocl::enqueue(k(proxy.lhs(), cl_uint(proxy.lhs().size1()), cl_uint(proxy.lhs().size2()),
124  cl_uint(proxy.lhs().internal_size1()), cl_uint(proxy.lhs().internal_size2()),
125  B, cl_uint(B.size1()), cl_uint(B.size2()),
126  cl_uint(B.internal_size1()), cl_uint(B.internal_size2()))
127  );
128  }
129 
135  template<typename SCALARTYPE, typename F1, typename F2, unsigned int A1, unsigned int A2, typename SOLVERTAG>
138  op_trans> & proxy,
141  op_trans> & B,
142  SOLVERTAG)
143  {
144  assert(proxy.lhs().size1() == proxy.lhs().size2());
145  assert(proxy.lhs().size2() == B.lhs().size2());
146 
148  matrix<SCALARTYPE, F2, A2> >::ResultType KernelClass;
149  KernelClass::init();
150 
151  std::stringstream ss;
152  ss << "trans_" << SOLVERTAG::name() << "_trans_solve";
153  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), ss.str());
154 
155  k.global_work_size(0, B.lhs().size1() * k.local_work_size());
156  viennacl::ocl::enqueue(k(proxy.lhs(), cl_uint(proxy.lhs().size1()), cl_uint(proxy.lhs().size2()),
157  cl_uint(proxy.lhs().internal_size1()), cl_uint(proxy.lhs().internal_size2()),
158  B.lhs(), cl_uint(B.lhs().size1()), cl_uint(B.lhs().size2()),
159  cl_uint(B.lhs().internal_size1()), cl_uint(B.lhs().internal_size2()))
160  );
161  }
162 
163  template<typename SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VEC_ALIGNMENT, typename SOLVERTAG>
166  SOLVERTAG)
167  {
168  assert(mat.size1() == vec.size());
169  assert(mat.size2() == vec.size());
170 
172 
173  std::stringstream ss;
174  ss << SOLVERTAG::name() << "_triangular_substitute_inplace";
175  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), ss.str());
176 
178  viennacl::ocl::enqueue(k(mat, cl_uint(mat.size1()), cl_uint(mat.size2()),
179  cl_uint(mat.internal_size1()), cl_uint(mat.internal_size2()), vec));
180  }
181 
187  template<typename SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VEC_ALIGNMENT, typename SOLVERTAG>
190  op_trans> & proxy,
192  SOLVERTAG)
193  {
194  assert(proxy.lhs().size1() == vec.size());
195  assert(proxy.lhs().size2() == vec.size());
196 
198 
199  std::stringstream ss;
200  ss << "trans_" << SOLVERTAG::name() << "_triangular_substitute_inplace";
201  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), ss.str());
202 
204  viennacl::ocl::enqueue(k(proxy.lhs(), cl_uint(proxy.lhs().size1()), cl_uint(proxy.lhs().size2()),
205  cl_uint(proxy.lhs().internal_size1()), cl_uint(proxy.lhs().internal_size2()), vec));
206  }
207 
209 
216  template<typename SCALARTYPE, typename F1, typename F2, unsigned int ALIGNMENT_A, unsigned int ALIGNMENT_B, typename TAG>
219  TAG const & tag)
220  {
221  // do an inplace solve on the result vector:
223  result = B;
224 
225  inplace_solve(A, result, tag);
226 
227  return result;
228  }
229 
236  template<typename SCALARTYPE, typename F1, typename F2, unsigned int ALIGNMENT_A, unsigned int ALIGNMENT_B, typename TAG>
240  op_trans> & proxy,
241  TAG const & tag)
242  {
243  // do an inplace solve on the result vector:
244  matrix<SCALARTYPE, F2, ALIGNMENT_B> result(proxy.lhs().size2(), proxy.lhs().size1());
245  result = proxy;
246 
247  inplace_solve(A, result, tag);
248 
249  return result;
250  }
251 
258  template<typename SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VEC_ALIGNMENT, typename TAG>
261  TAG const & tag)
262  {
263  // do an inplace solve on the result vector:
265  result = vec;
266 
267  inplace_solve(mat, result, tag);
268 
269  return result;
270  }
271 
272 
274 
280  template<typename SCALARTYPE, typename F1, typename F2, unsigned int ALIGNMENT_A, unsigned int ALIGNMENT_B, typename TAG>
283  op_trans> & proxy,
285  TAG const & tag)
286  {
287  // do an inplace solve on the result vector:
289  result = B;
290 
291  inplace_solve(proxy, result, tag);
292 
293  return result;
294  }
295 
296 
303  template<typename SCALARTYPE, typename F1, typename F2, unsigned int ALIGNMENT_A, unsigned int ALIGNMENT_B, typename TAG>
306  op_trans> & proxy_A,
309  op_trans> & proxy_B,
310  TAG const & tag)
311  {
312  // do an inplace solve on the result vector:
313  matrix<SCALARTYPE, F2, ALIGNMENT_B> result(proxy_B.lhs().size2(), proxy_B.lhs().size1());
314  result = trans(proxy_B.lhs());
315 
316  inplace_solve(proxy_A, result, tag);
317 
318  return result;
319  }
320 
327  template<typename SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VEC_ALIGNMENT, typename TAG>
330  op_trans> & proxy,
332  TAG const & tag)
333  {
334  // do an inplace solve on the result vector:
336  result = vec;
337 
338  inplace_solve(proxy, result, tag);
339 
340  return result;
341  }
342 
343 
345 
349  template<typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
351  {
352  assert(mat.size1() == mat.size2());
353 
355 
356  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "lu_factorize");
357 
359  viennacl::ocl::enqueue(k(mat, cl_uint(mat.size1()), cl_uint(mat.size2()),
360  cl_uint(mat.internal_size1()), cl_uint(mat.internal_size2())) );
361  }
362 
363 
369  template<typename SCALARTYPE, typename F1, typename F2, unsigned int ALIGNMENT_A, unsigned int ALIGNMENT_B>
372  {
373  assert(A.size1() == A.size2());
374  assert(A.size1() == A.size2());
375  inplace_solve(A, B, unit_lower_tag());
376  inplace_solve(A, B, upper_tag());
377  }
378 
384  template<typename SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VEC_ALIGNMENT>
387  {
388  assert(mat.size1() == mat.size2());
389  inplace_solve(mat, vec, unit_lower_tag());
390  inplace_solve(mat, vec, upper_tag());
391  }
392 
393  }
394 }
395 
396 #endif