ViennaCL - The Vienna Computing Library  1.2.0
coordinate_matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_COORDINATE_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_COORDINATE_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
24 #include "viennacl/forwards.h"
25 #include "viennacl/ocl/device.hpp"
26 #include "viennacl/ocl/handle.hpp"
27 #include "viennacl/ocl/kernel.hpp"
28 #include "viennacl/scalar.hpp"
29 #include "viennacl/vector.hpp"
30 #include "viennacl/tools/tools.hpp"
32 
33 namespace viennacl
34 {
35  namespace linalg
36  {
37 
38 
39  // A * x
47  template<class SCALARTYPE, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
48  vector_expression<const coordinate_matrix<SCALARTYPE, ALIGNMENT>,
49  const vector<SCALARTYPE, VECTOR_ALIGNMENT>,
52  {
55  op_prod >(mat, vec);
56  }
57 
58  // A * x
67  template<class SCALARTYPE, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
72  size_t NUM_THREADS)
73  {
76  viennacl::op_prod >(mat, vec);
77  }
78 
79  //namespace {
88  template<class TYPE, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
92  {
93  assert(mat.size1() == result.size());
94  assert(mat.size2() == vec.size());
95  result.clear();
96 
97  //std::cout << "prod(coordinate_matrix" << ALIGNMENT << ", vector) called with internal_nnz=" << mat.internal_nnz() << std::endl;
98 
99  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::coordinate_matrix<TYPE, ALIGNMENT>::program_name(), "vec_mul");
100  unsigned int thread_num = 256; //k.local_work_size(0);
101 
102  k.local_work_size(0, thread_num);
103 
104  k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
105  //k.global_work_size(0, thread_num); //Only one work group
106  viennacl::ocl::enqueue(k(mat.handle12(), mat, mat.handle3(),
107  vec,
108  result,
109  viennacl::ocl::local_mem(sizeof(cl_uint)*thread_num),
110  viennacl::ocl::local_mem(sizeof(TYPE)*thread_num)) );
111 
112  }
113  //};
114 
115  } //namespace linalg
116 
117 
118 
123  template <typename SCALARTYPE, unsigned int ALIGNMENT>
124  template <unsigned int MAT_ALIGNMENT>
128  viennacl::op_prod> & proxy)
129  {
130  // check for the special case x = A * x
131  if (proxy.rhs().handle() == this->handle())
132  {
133  viennacl::vector<SCALARTYPE, ALIGNMENT> result(proxy.rhs().size());
134  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
135  *this = result;
136  return *this;
137  }
138  else
139  {
140  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
141  return *this;
142  }
143  return *this;
144  }
145 
146  //v += A * x
151  template <typename SCALARTYPE, unsigned int ALIGNMENT>
152  template <unsigned int MAT_ALIGNMENT>
156  op_prod> & proxy)
157  {
158  vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1());
159  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
160  *this += result;
161  return *this;
162  }
163 
168  template <typename SCALARTYPE, unsigned int ALIGNMENT>
169  template <unsigned int MAT_ALIGNMENT>
173  op_prod> & proxy)
174  {
175  vector<SCALARTYPE, ALIGNMENT> result(proxy.get_lhs().size1());
176  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
177  *this -= result;
178  return *this;
179  }
180 
181 
182  //free functions:
187  template <typename SCALARTYPE, unsigned int ALIGNMENT>
188  template <unsigned int MAT_ALIGNMENT>
192  op_prod> & proxy)
193  {
194  assert(proxy.get_lhs().size1() == size());
196  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
197  result += *this;
198  return result;
199  }
200 
205  template <typename SCALARTYPE, unsigned int ALIGNMENT>
206  template <unsigned int MAT_ALIGNMENT>
210  op_prod> & proxy)
211  {
212  assert(proxy.get_lhs().size1() == size());
214  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
215  result = *this - result;
216  return result;
217  }
218 
219 } //namespace viennacl
220 
221 
222 #endif