ViennaCL - The Vienna Computing Library  1.2.0
coordinate_matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_COORDINATE_MATRIX_HPP_
2 #define VIENNACL_COORDINATE_MATRIX_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
24 #include <map>
25 #include <vector>
26 #include <list>
27 
28 #include "viennacl/forwards.h"
29 #include "viennacl/ocl/backend.hpp"
30 #include "viennacl/vector.hpp"
31 
33 
34 namespace viennacl
35 {
36 
37 
38  //provide copy-operation:
46  template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
47  void copy(const CPU_MATRIX & cpu_matrix,
49  {
50  size_t group_num = 64;
51 
52  // Step 1: Determine nonzeros:
53  if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
54  {
55  std::size_t num_entries = 0;
56  for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
57  row_it != cpu_matrix.end1();
58  ++row_it)
59  {
60  for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
61  col_it != row_it.end();
62  ++col_it)
63  {
64  ++num_entries;
65  }
66  }
67 
68  // Step 2: Set up matrix data:
69  std::cout << "Number of entries: " << num_entries << std::endl;
70  gpu_matrix.nonzeros_ = num_entries;
71  gpu_matrix.rows_ = cpu_matrix.size1();
72  gpu_matrix.cols_ = cpu_matrix.size2();
73 
74  std::vector<cl_uint> coord_buffer(2*gpu_matrix.internal_nnz());
75  std::vector<cl_uint> group_boundaries(group_num + 1);
76  std::vector<SCALARTYPE> elements(gpu_matrix.internal_nnz());
77 
78  std::size_t data_index = 0;
79  std::size_t current_fraction = 0;
80 
81  for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
82  row_it != cpu_matrix.end1();
83  ++row_it)
84  {
85  for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
86  col_it != row_it.end();
87  ++col_it)
88  {
89  coord_buffer[2*data_index] = static_cast<cl_uint>(col_it.index1());
90  coord_buffer[2*data_index + 1] = static_cast<cl_uint>(col_it.index2());
91  elements[data_index] = *col_it;
92  ++data_index;
93  }
94 
95  if (data_index > (current_fraction + 1) / static_cast<double>(group_num) * num_entries) //split data equally over 64 groups
96  group_boundaries[++current_fraction] = data_index;
97  }
98 
99  //write end of last group:
100  group_boundaries[group_num] = data_index;
101  //group_boundaries[1] = data_index; //for one compute unit
102 
103  /*std::cout << "Group boundaries: " << std::endl;
104  for (size_t i=0; i<group_boundaries.size(); ++i)
105  std::cout << group_boundaries[i] << std::endl;*/
106 
107  gpu_matrix.coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, coord_buffer);
108  gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, elements);
109  gpu_matrix.group_boundaries_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, group_boundaries);
110  }
111  }
112 
118  template <typename SCALARTYPE, unsigned int ALIGNMENT>
119  void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix,
121  {
122  copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix), gpu_matrix);
123  }
124 
125  //gpu to cpu:
135  template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
137  CPU_MATRIX & cpu_matrix )
138  {
139  if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
140  {
141  cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2(), false);
142 
143  //get raw data from memory:
144  std::vector<cl_uint> coord_buffer(2*gpu_matrix.nnz());
145  std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
146 
147  //std::cout << "GPU nonzeros: " << gpu_matrix.nnz() << std::endl;
148 
149  cl_int err;
150  err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle12(), CL_TRUE, 0, sizeof(cl_uint)* 2 *gpu_matrix.nnz(), &(coord_buffer[0]), 0, NULL, NULL);
151  VIENNACL_ERR_CHECK(err);
152  err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
153  VIENNACL_ERR_CHECK(err);
155 
156  //fill the cpu_matrix:
157  for (std::size_t index = 0; index < gpu_matrix.nnz(); ++index)
158  {
159  cpu_matrix(coord_buffer[2*index], coord_buffer[2*index+1]) = elements[index];
160  }
161  }
162  }
163 
169  template <typename SCALARTYPE, unsigned int ALIGNMENT>
171  std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix)
172  {
174  copy(gpu_matrix, temp);
175  }
176 
177 
179 
186  template<class SCALARTYPE, unsigned int ALIGNMENT /* see VCLForwards.h */ >
188  {
189  public:
191 
193  coordinate_matrix() : rows_(0), cols_(0), nonzeros_(0) { viennacl::linalg::kernels::coordinate_matrix<SCALARTYPE, ALIGNMENT>::init(); }
194 
201  coordinate_matrix(std::size_t rows, std::size_t cols, std::size_t nonzeros = 0) :
202  rows_(rows), cols_(cols), nonzeros_(nonzeros)
203  {
204  viennacl::linalg::kernels::coordinate_matrix<SCALARTYPE, ALIGNMENT>::init();
205  if (nonzeros > 0)
206  {
207  coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * 2 * internal_nnz());
208  elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * internal_nnz());
209  group_boundaries_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * (rows + 1));
210  }
211  }
212 
214  void reserve(std::size_t new_nonzeros)
215  {
216  if (new_nonzeros > nonzeros_)
217  {
218  viennacl::ocl::handle<cl_mem> coord_buffer_old = coord_buffer_;
219  viennacl::ocl::handle<cl_mem> elements_old = elements_;
220  coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * 2 * internal_nnz());
221  elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * internal_nnz());
222 
223  cl_int err;
224  err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), coord_buffer_old, coord_buffer_, 0, 0, sizeof(cl_uint) * 2 * nonzeros_, 0, NULL, NULL);
225  VIENNACL_ERR_CHECK(err);
226  err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_old, elements_, 0, 0, sizeof(SCALARTYPE)*nonzeros_, 0, NULL, NULL);
227  VIENNACL_ERR_CHECK(err);
228 
229  //new memory must be padded with zeros:
230  std::vector<long> temp(internal_nnz() - nonzeros_);
231  err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), coord_buffer_old, coord_buffer_, 0, nonzeros_, sizeof(cl_uint) * 2 * temp.size(), 0, NULL, NULL);
232  VIENNACL_ERR_CHECK(err);
233  err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_old, elements_, 0, nonzeros_, sizeof(SCALARTYPE)*temp.size(), 0, NULL, NULL);
234  VIENNACL_ERR_CHECK(err);
235  }
236  }
237 
244  void resize(std::size_t new_size1, std::size_t new_size2, bool preserve = true)
245  {
246  assert (new_size1 > 0 && new_size2 > 0);
247 
248  if (new_size1 < rows_ || new_size2 < cols_) //enlarge buffer
249  {
250  std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix;
251  if (rows_ > 0)
252  stl_sparse_matrix.resize(rows_);
253 
254  if (preserve && rows_ > 0)
255  viennacl::copy(*this, stl_sparse_matrix);
256 
257  stl_sparse_matrix.resize(new_size1);
258 
259  std::cout << "Cropping STL matrix of size " << stl_sparse_matrix.size() << std::endl;
260  if (new_size2 < cols_ && rows_ > 0)
261  {
262  for (std::size_t i=0; i<stl_sparse_matrix.size(); ++i)
263  {
264  std::list<unsigned int> to_delete;
265  for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin();
266  it != stl_sparse_matrix[i].end();
267  ++it)
268  {
269  if (it->first >= new_size2)
270  to_delete.push_back(it->first);
271  }
272 
273  for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it)
274  stl_sparse_matrix[i].erase(*it);
275  }
276  std::cout << "Cropping done..." << std::endl;
277  }
278 
279  rows_ = new_size1;
280  cols_ = new_size2;
281  viennacl::copy(stl_sparse_matrix, *this);
282  }
283 
284  rows_ = new_size1;
285  cols_ = new_size2;
286  }
287 
288 
290  std::size_t size1() const { return rows_; }
292  std::size_t size2() const { return cols_; }
294  std::size_t nnz() const { return nonzeros_; }
296  std::size_t internal_nnz() const { return viennacl::tools::roundUpToNextMultiple<std::size_t>(nonzeros_, ALIGNMENT);; }
297 
299  const viennacl::ocl::handle<cl_mem> & handle12() const { return coord_buffer_; }
301  const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; }
303  const viennacl::ocl::handle<cl_mem> & handle3() const { return group_boundaries_; }
304 
305  #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment
306  template <typename CPU_MATRIX>
307  friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix & gpu_matrix );
308  #else
309  template <typename CPU_MATRIX, typename SCALARTYPE2, unsigned int ALIGNMENT2>
310  friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix<SCALARTYPE2, ALIGNMENT2> & gpu_matrix );
311  #endif
312 
313  private:
316 
318  coordinate_matrix & operator=(coordinate_matrix const &);
319 
320 
321  std::size_t rows_;
322  std::size_t cols_;
323  std::size_t nonzeros_;
324  viennacl::ocl::handle<cl_mem> coord_buffer_;
326  viennacl::ocl::handle<cl_mem> group_boundaries_;
327  };
328 
329 
330 }
331 
332 #endif