ViennaCL - The Vienna Computing Library  1.2.0
compressed_matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_COMPRESSED_MATRIX_HPP_
2 #define VIENNACL_COMPRESSED_MATRIX_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
24 #include <vector>
25 #include <list>
26 #include <map>
27 #include "viennacl/forwards.h"
28 #include "viennacl/ocl/backend.hpp"
29 #include "viennacl/vector.hpp"
30 
32 
33 #include "viennacl/tools/tools.hpp"
34 
35 namespace viennacl
36 {
37 
38 
39  //provide copy-operation:
54  template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
55  void copy(const CPU_MATRIX & cpu_matrix,
57  {
58  //std::cout << "copy for (" << cpu_matrix.size1() << ", " << cpu_matrix.size2() << ", " << cpu_matrix.nnz() << ")" << std::endl;
59 
60  if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
61  {
62  //determine nonzeros:
63  long num_entries = 0;
64  for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
65  row_it != cpu_matrix.end1();
66  ++row_it)
67  {
68  std::size_t entries_per_row = 0;
69  for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
70  col_it != row_it.end();
71  ++col_it)
72  {
73  ++entries_per_row;
74  }
75  num_entries += viennacl::tools::roundUpToNextMultiple<std::size_t>(entries_per_row, ALIGNMENT);
76  }
77 
78  if (num_entries == 0) //we copy an empty matrix
79  {
80  num_entries = 1;
81  }
82 
83  //set up matrix entries:
84  std::vector<cl_uint> row_buffer(cpu_matrix.size1() + 1);
85  std::vector<cl_uint> col_buffer(num_entries);
86  std::vector<SCALARTYPE> elements(num_entries);
87 
88  std::size_t row_index = 0;
89  std::size_t data_index = 0;
90 
91  for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
92  row_it != cpu_matrix.end1();
93  ++row_it)
94  {
95  row_buffer[row_index] = data_index;
96  ++row_index;
97 
98  for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
99  col_it != row_it.end();
100  ++col_it)
101  {
102  col_buffer[data_index] = static_cast<std::size_t>(col_it.index2());
103  elements[data_index] = *col_it;
104  ++data_index;
105  }
106  data_index = viennacl::tools::roundUpToNextMultiple<std::size_t>(data_index, ALIGNMENT); //take care of alignment
107  }
108  row_buffer[row_index] = data_index;
109 
110  gpu_matrix.set(&row_buffer[0],
111  &col_buffer[0],
112  &elements[0],
113  cpu_matrix.size1(),
114  cpu_matrix.size2(),
115  num_entries);
116  }
117  }
118 
119 
120  //adapted for std::vector< std::map < > > argument:
126  template <typename SCALARTYPE, unsigned int ALIGNMENT>
127  void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix,
129  {
130  copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix), gpu_matrix);
131  }
132 
133  #ifdef VIENNACL_HAVE_EIGEN
134  template <typename SCALARTYPE, int flags, unsigned int ALIGNMENT>
135  void copy(const Eigen::SparseMatrix<SCALARTYPE, flags> & eigen_matrix,
136  compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix)
137  {
138  std::vector< std::map<unsigned int, SCALARTYPE> > stl_matrix(eigen_matrix.rows());
139 
140  for (int k=0; k < eigen_matrix.outerSize(); ++k)
141  for (typename Eigen::SparseMatrix<SCALARTYPE, flags>::InnerIterator it(eigen_matrix, k); it; ++it)
142  stl_matrix[it.row()][it.col()] = it.value();
143 
144  copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(stl_matrix), gpu_matrix);
145  }
146  #endif
147 
148 
149  #ifdef VIENNACL_HAVE_MTL4
150  template <typename SCALARTYPE, unsigned int ALIGNMENT>
151  void copy(const mtl::compressed2D<SCALARTYPE> & cpu_matrix,
152  compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix)
153  {
154  typedef mtl::compressed2D<SCALARTYPE> MatrixType;
155 
156  std::vector< std::map<unsigned int, SCALARTYPE> > stl_matrix(cpu_matrix.num_rows());
157 
158  using mtl::traits::range_generator;
159  using mtl::traits::range::min;
160 
161  // Choose between row and column traversal
162  typedef typename min<range_generator<mtl::tag::row, MatrixType>,
163  range_generator<mtl::tag::col, MatrixType> >::type range_type;
164  range_type my_range;
165 
166  // Type of outer cursor
167  typedef typename range_type::type c_type;
168  // Type of inner cursor
169  typedef typename mtl::traits::range_generator<mtl::tag::nz, c_type>::type ic_type;
170 
171  // Define the property maps
172  typename mtl::traits::row<MatrixType>::type row(cpu_matrix);
173  typename mtl::traits::col<MatrixType>::type col(cpu_matrix);
174  typename mtl::traits::const_value<MatrixType>::type value(cpu_matrix);
175 
176  // Now iterate over the matrix
177  for (c_type cursor(my_range.begin(cpu_matrix)), cend(my_range.end(cpu_matrix)); cursor != cend; ++cursor)
178  for (ic_type icursor(mtl::begin<mtl::tag::nz>(cursor)), icend(mtl::end<mtl::tag::nz>(cursor)); icursor != icend; ++icursor)
179  stl_matrix[row(*icursor)][col(*icursor)] = value(*icursor);
180 
181  copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(stl_matrix), gpu_matrix);
182  }
183  #endif
184 
185 
186 
187 
188 
189 
190 
191  //
192  // gpu to cpu:
193  //
203  template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
205  CPU_MATRIX & cpu_matrix )
206  {
207  if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
208  {
209  cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2(), false);
210 
211  //get raw data from memory:
212  std::vector<cl_uint> row_buffer(gpu_matrix.size1() + 1);
213  std::vector<cl_uint> col_buffer(gpu_matrix.nnz());
214  std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
215 
216  //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl;
217 
218  cl_int err;
219  err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(), CL_TRUE, 0, sizeof(cl_uint)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
220  VIENNACL_ERR_CHECK(err);
221  err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(), CL_TRUE, 0, sizeof(cl_uint)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
222  VIENNACL_ERR_CHECK(err);
223  err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
224  VIENNACL_ERR_CHECK(err);
226 
227  //fill the cpu_matrix:
228  std::size_t data_index = 0;
229  for (std::size_t row = 1; row <= gpu_matrix.size1(); ++row)
230  {
231  while (data_index < row_buffer[row])
232  {
233  if (col_buffer[data_index] >= gpu_matrix.size2())
234  {
235  std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl;
236  return;
237  }
238 
239  if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
240  cpu_matrix(row-1, col_buffer[data_index]) = elements[data_index];
241  ++data_index;
242  }
243  }
244  }
245  }
246 
247 
253  template <typename SCALARTYPE, unsigned int ALIGNMENT>
255  std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix)
256  {
258  copy(gpu_matrix, temp);
259  }
260 
261 
262  #ifdef VIENNACL_HAVE_EIGEN
263  template <typename SCALARTYPE, int flags, unsigned int ALIGNMENT>
264  void copy(compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
265  Eigen::SparseMatrix<SCALARTYPE, flags> & eigen_matrix)
266  {
267  if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
268  {
269  assert(static_cast<unsigned int>(eigen_matrix.rows()) >= gpu_matrix.size1()
270  && static_cast<unsigned int>(eigen_matrix.cols()) >= gpu_matrix.size2()
271  && "Provided Eigen compressed matrix is too small!");
272 
273  //get raw data from memory:
274  std::vector<cl_uint> row_buffer(gpu_matrix.size1() + 1);
275  std::vector<cl_uint> col_buffer(gpu_matrix.nnz());
276  std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
277 
278  cl_int err;
279  err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(),
280  CL_TRUE, 0, sizeof(cl_uint)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
281  VIENNACL_ERR_CHECK(err);
282  err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(),
283  CL_TRUE, 0, sizeof(cl_uint)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
284  VIENNACL_ERR_CHECK(err);
285  err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(),
286  CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
287  VIENNACL_ERR_CHECK(err);
289 
290  eigen_matrix.setZero();
291  eigen_matrix.startFill();
292  std::size_t data_index = 0;
293  for (std::size_t row = 1; row <= gpu_matrix.size1(); ++row)
294  {
295  while (data_index < row_buffer[row])
296  {
297  assert(col_buffer[data_index] < gpu_matrix.size2() && "ViennaCL encountered invalid data at col_buffer");
298  if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
299  eigen_matrix.fill(row-1, col_buffer[data_index]) = elements[data_index];
300  ++data_index;
301  }
302  }
303  eigen_matrix.endFill();
304  }
305  }
306  #endif
307 
308 
309 
310  #ifdef VIENNACL_HAVE_MTL4
311  template <typename SCALARTYPE, unsigned int ALIGNMENT>
312  void copy(compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
313  mtl::compressed2D<SCALARTYPE> & mtl4_matrix)
314  {
315  if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
316  {
317  assert(mtl4_matrix.num_rows() >= gpu_matrix.size1()
318  && mtl4_matrix.num_cols() >= gpu_matrix.size2()
319  && "Provided MTL4 compressed matrix is too small!");
320 
321  //get raw data from memory:
322  std::vector<unsigned int> row_buffer(gpu_matrix.size1() + 1);
323  std::vector<unsigned int> col_buffer(gpu_matrix.nnz());
324  std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
325 
326  cl_int err;
327  err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(),
328  CL_TRUE, 0, sizeof(cl_uint)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
329  VIENNACL_ERR_CHECK(err);
330  err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(),
331  CL_TRUE, 0, sizeof(cl_uint)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
332  VIENNACL_ERR_CHECK(err);
333  err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(),
334  CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
335  VIENNACL_ERR_CHECK(err);
337 
338  //set_to_zero(mtl4_matrix);
339  //mtl4_matrix.change_dim(gpu_matrix.size1(), gpu_matrix.size2());
340 
341  mtl::matrix::inserter< mtl::compressed2D<SCALARTYPE> > ins(mtl4_matrix);
342  std::size_t data_index = 0;
343  for (std::size_t row = 1; row <= gpu_matrix.size1(); ++row)
344  {
345  while (data_index < row_buffer[row])
346  {
347  assert(col_buffer[data_index] < gpu_matrix.size2() && "ViennaCL encountered invalid data at col_buffer");
348  if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
349  ins(row-1, col_buffer[data_index]) << typename mtl::Collection< mtl::compressed2D<SCALARTYPE> >::value_type(elements[data_index]);
350  ++data_index;
351  }
352  }
353  }
354  }
355  #endif
356 
357 
358 
359 
360 
362 
367  template<class SCALARTYPE, unsigned int ALIGNMENT /* see VCLForwards.h */>
369  {
370  public:
372 
374  compressed_matrix() : _rows(0), _cols(0), _nonzeros(0) { viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, ALIGNMENT>::init(); }
375 
382  explicit compressed_matrix(std::size_t rows, std::size_t cols, std::size_t nonzeros = 0) :
383  _rows(rows), _cols(cols), _nonzeros(nonzeros)
384  {
385  viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, ALIGNMENT>::init();
386 
387  if (rows > 0)
388  _row_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * rows);
389  if (nonzeros > 0)
390  {
391  _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * nonzeros);
392  _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * nonzeros);
393  }
394  }
395 
396  explicit compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_col_buffer, cl_mem mem_elements,
397  std::size_t rows, std::size_t cols, std::size_t nonzeros) :
398  _rows(rows), _cols(cols), _nonzeros(nonzeros)
399  {
400  _row_buffer = mem_row_buffer;
401  _row_buffer.inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed.
402  _col_buffer = mem_col_buffer;
403  _col_buffer.inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed.
404  _elements = mem_elements;
405  _elements.inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed.
406  }
407 
408 
418  void set(cl_uint * row_jumper,
419  cl_uint * col_buffer,
420  SCALARTYPE * elements,
421  std::size_t rows,
422  std::size_t cols,
423  std::size_t nonzeros)
424  {
425  assert(cols > 0);
426  assert(nonzeros > 0);
427  //std::cout << "Setting memory: " << cols + 1 << ", " << nonzeros << std::endl;
428  _row_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * (rows + 1), row_jumper);
429  _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * nonzeros, col_buffer);
430  _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * nonzeros, elements);
431  _nonzeros = nonzeros;
432  _rows = rows;
433  _cols = cols;
434  }
435 
437  void reserve(std::size_t new_nonzeros)
438  {
439  if (new_nonzeros > _nonzeros)
440  {
441  viennacl::ocl::handle<cl_mem> _col_buffer_old = _col_buffer;
442  viennacl::ocl::handle<cl_mem> _elements_old = _elements;
443  _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * new_nonzeros);
444  _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * new_nonzeros);
445 
446  cl_int err;
447  err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), _col_buffer_old, _col_buffer, 0, 0, sizeof(cl_uint)*_nonzeros, 0, NULL, NULL);
448  VIENNACL_ERR_CHECK(err);
449  err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), _elements_old, _elements, 0, 0, sizeof(SCALARTYPE)*_nonzeros, 0, NULL, NULL);
450  VIENNACL_ERR_CHECK(err);
451 
452  _nonzeros = new_nonzeros;
453  }
454  }
455 
462  void resize(std::size_t new_size1, std::size_t new_size2, bool preserve = true)
463  {
464  assert(new_size1 > 0 && new_size2 > 0);
465  //std::cout << "Resizing from (" << _rows << ", " << _cols << ") to (" << new_size1 << ", " << new_size2 << ")" << std::endl;
466 
467  if (new_size1 != _rows || new_size2 != _cols)
468  {
469  std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix;
470  if (_rows > 0)
471  stl_sparse_matrix.resize(_rows);
472 
473  if (preserve && _rows > 0)
474  viennacl::copy(*this, stl_sparse_matrix);
475 
476  stl_sparse_matrix.resize(new_size1);
477 
478  //discard entries with column index larger than new_size2
479  if (new_size2 < _cols && _rows > 0)
480  {
481  for (size_t i=0; i<stl_sparse_matrix.size(); ++i)
482  {
483  std::list<unsigned int> to_delete;
484  for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin();
485  it != stl_sparse_matrix[i].end();
486  ++it)
487  {
488  if (it->first >= new_size2)
489  to_delete.push_back(it->first);
490  }
491 
492  for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it)
493  stl_sparse_matrix[i].erase(*it);
494  }
495  }
496 
497  copy(stl_sparse_matrix, *this);
498 
499  _rows = new_size1;
500  _cols = new_size2;
501  }
502  }
503 
505  const std::size_t & size1() const { return _rows; }
507  const std::size_t & size2() const { return _cols; }
509  const std::size_t & nnz() const { return _nonzeros; }
510 
512  const viennacl::ocl::handle<cl_mem> & handle1() const { return _row_buffer; }
514  const viennacl::ocl::handle<cl_mem> & handle2() const { return _col_buffer; }
516  const viennacl::ocl::handle<cl_mem> & handle() const { return _elements; }
517 
518  private:
519  // /** @brief Copy constructor is by now not available. */
520  //compressed_matrix(compressed_matrix const &);
521 
523  compressed_matrix & operator=(compressed_matrix const &);
524 
525 
526  std::size_t _rows;
527  std::size_t _cols;
528  std::size_t _nonzeros;
529  viennacl::ocl::handle<cl_mem> _row_buffer;
530  viennacl::ocl::handle<cl_mem> _col_buffer;
532  };
533 
534 
535 
536 
537 }
538 
539 #endif