ViennaCL - The Vienna Computing Library  1.2.0
matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_MATRIX_HPP_
2 #define VIENNACL_MATRIX_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
24 #include "viennacl/forwards.h"
25 #include "viennacl/ocl/backend.hpp"
26 #include "viennacl/scalar.hpp"
27 #include "viennacl/vector.hpp"
29 #include "viennacl/tools/tools.hpp"
33 
34 namespace viennacl
35 {
37  struct row_major
38  {
47  {
48  return i * num_cols + j;
49  }
50 
52  {
53  return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(rows, alignment);;
54  }
55 
57  {
58  return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(cols, alignment);
59  }
60  };
61 
62  struct column_major
63  {
72  {
73  return i + j * num_rows;
74  }
75 
77  {
78  return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(rows, alignment);
79  }
80 
82  {
83  return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(cols, alignment);
84  }
85  };
86 
87  template <typename LHS, typename RHS, typename OP>
89  {
90 
91 
92  public:
94  //*/
95  //typedef typename viennacl::tools::VECTOR_EXTRACTOR<LHS, RHS>::ResultType VectorType;
96 
97  matrix_expression(LHS & lhs, RHS & rhs) : _lhs(lhs), _rhs(rhs) {}
98 
101  LHS & lhs() const { return _lhs; }
104  RHS & rhs() const { return _rhs; }
105 
107  std::size_t size1() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size1(_lhs, _rhs); }
108  std::size_t size2() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size2(_lhs, _rhs); }
109 
110  private:
115  };
116 
117 
119  struct row_iteration {};
120 
122  struct col_iteration {};
123 
124  //STL-like iterator. TODO: STL-compliance...
125  template <typename ROWCOL, typename MATRIXTYPE>
127  {
129  public:
130  typedef typename MATRIXTYPE::value_type value_type;
131 
132  matrix_iterator(MATRIXTYPE & mat,
133  std::size_t start_row,
134  std::size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {};
135 
136  value_type operator*(void) { return mat_(row_, col_); }
138  self_type & operator++(int) { self_type tmp = *this; ++(*this); return tmp; }
139 
140  bool operator==(self_type const & other) { return (row_ == other.row_) && (col_ == other.col_); }
141  bool operator!=(self_type const & other) { return !(*this == other); }
142 
143  vcl_size_t index1() { return row_; }
144  vcl_size_t index2() { return col_; }
145 
146  MATRIXTYPE & operator()(void) const { return mat_; }
147 
148  private:
149  MATRIXTYPE & mat_;
150  vcl_size_t row_;
151  vcl_size_t col_;
152  };
153 
160  template <class SCALARTYPE, typename F, unsigned int ALIGNMENT>
161  class matrix
162  {
163 
164  public:
165 
170 
172  matrix() : rows_(0), columns_(0)
173  {
175  KernelClass::init();
176  };
177 
183  explicit matrix(size_type rows, size_type columns) :
184  rows_(rows), columns_(columns)
185  {
187  KernelClass::init();
188  elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size());
189  }
190 
191  explicit matrix(cl_mem mem, size_type rows, size_type columns) :
192  rows_(rows), columns_(columns)
193  {
195  KernelClass::init();
196  elements_ = mem;
197  elements_.inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed.
198  }
199 
200  template <typename LHS, typename RHS, typename OP>
201  matrix(matrix_expression< LHS, RHS, OP> const & proxy) : rows_(proxy.size1()), columns_(proxy.size2())
202  {
204  KernelClass::init();
205  elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size());
206 
207  *this = proxy;
208  }
209 
210 
211  //copy constructor:
213  rows_(mat.size1()), columns_(mat.size2()),
214  elements_(viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size()))
215  {
216  cl_int err;
217  err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), mat.handle(), handle(), 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL);
218  VIENNACL_ERR_CHECK(err);
219  }
220 
222  {
223  resize(mat.size1(), mat.size2(), false);
224  cl_int err;
225  err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), mat.handle(), handle(), 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL);
226  VIENNACL_ERR_CHECK(err);
227  return *this;
228  }
229 
232  op_trans> & proxy)
233  {
234  assert(handle() != proxy.lhs().handle() && "Self-assignment of matrix transpose not implemented");
235  assert(proxy.lhs().size1() == size2() && "Matrix dimensions do not match!");
236  assert(proxy.lhs().size2() == size1() && "Matrix dimensions do not match!");
237 
238  resize(proxy.lhs().size2(), proxy.lhs().size1(), false);
239 
240  std::vector<SCALARTYPE> temp(proxy.lhs().internal_size());
241 
242  cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
243  proxy.lhs().handle(), CL_TRUE, 0,
244  sizeof(SCALARTYPE)*proxy.lhs().internal_size(),
245  &(temp[0]), 0, NULL, NULL);
246  VIENNACL_ERR_CHECK(err);
248 
249  /*
250  for (size_t i=0; i<proxy.lhs().size1(); ++i)
251  {
252  for (size_t j=0; j<proxy.lhs().size2(); ++j)
253  std::cout << temp[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())] << ", ";
254  }*/
255 
256  std::vector<SCALARTYPE> temp_trans(internal_size());
257 
258  for (vcl_size_t i=0; i<proxy.lhs().size1(); ++i)
259  for (vcl_size_t j=0; j<proxy.lhs().size2(); ++j)
260  temp_trans[F::mem_index(j,i, internal_size1(), internal_size2())]
261  = temp[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())];
262 
263  /*
264  for (size_t i=0; i<proxy.lhs().size1(); ++i)
265  {
266  for (size_t j=0; j<proxy.lhs().size2(); ++j)
267  std::cout << temp_trans[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())] << ", ";
268  }*/
269 
270  elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
271  sizeof(SCALARTYPE)*internal_size(),
272  &(temp_trans[0]));
273 
274  return *this;
275  }
276 
277 
278 
286  void resize(size_type rows, size_type columns, bool preserve = true)
287  {
288  assert(rows > 0 && columns > 0);
289  if (preserve)
290  {
291  //get old entries:
292  std::vector< SCALARTYPE > old_entries(internal_size());
293  cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), //src
294  handle(), //dest
295  CL_TRUE, //blocking
296  0, //offset
297  sizeof(SCALARTYPE)*internal_size(), //size
298  &(old_entries[0]), //destination
299  0, NULL, NULL);
300  VIENNACL_ERR_CHECK(err);
301 
302  //set up entries of new matrix:
303  std::vector< SCALARTYPE > new_entries(F::internal_size1(rows, ALIGNMENT) * F::internal_size2(columns, ALIGNMENT));
304  for (size_type i=0; i<rows; ++i)
305  {
306  if (i >= rows_)
307  continue;
308 
309  for (size_type j=0; j<columns; ++j)
310  {
311  if (j >= columns_)
312  continue;
313  new_entries[F::mem_index(i, j, F::internal_size1(rows, ALIGNMENT), F::internal_size2(columns, ALIGNMENT))]
314  = old_entries[F::mem_index(i, j, internal_size1(), internal_size2())];
315  }
316  }
317 
318  //copy new entries to GPU:
319  elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, new_entries);
320  rows_ = rows;
321  columns_ = columns;
322  }
323  else //discard old entries:
324  {
325  rows_ = rows;
326  columns_ = columns;
327 
328  std::vector< SCALARTYPE > new_entries(F::internal_size1(rows, ALIGNMENT) * F::internal_size2(columns, ALIGNMENT));
329  elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, new_entries);
330  }
331  }
332 
333 
334  //read-write access to an element of the vector
338  {
339  return entry_proxy<SCALARTYPE>(F::mem_index(row_index, col_index, internal_size1(), internal_size2()), elements_);
340  }
341 
344  scalar<SCALARTYPE> operator()(size_type row_index, size_type col_index) const
345  {
346  scalar<SCALARTYPE> tmp;
347  cl_int err;
348  err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(),
349  elements_,
350  tmp.handle(),
351  sizeof(SCALARTYPE) * F::mem_index(row_index, col_index, internal_size1(), internal_size2()),
352  0,
353  sizeof(SCALARTYPE),
354  0,
355  NULL,
356  NULL);
357  //assert(err == CL_SUCCESS);
358  VIENNACL_ERR_CHECK(err);
359  return tmp;
360  }
361 
362 
365  op_add >
367  {
370  op_add > (*this, other);
371  }
372 
373 
375  {
376  viennacl::linalg::inplace_add(*this, other);
377  return *this;
378  }
379 
381  {
382  viennacl::linalg::inplace_add(*this, other);
383  return *this;
384  }
385 
388  op_sub >
390  {
393  op_sub > (*this, other);
394  }
395 
397  {
398  viennacl::linalg::inplace_sub(*this, other);
399  return *this;
400  }
401 
402  template <unsigned int A1, unsigned int A2>
405  op_prod > & proxy)
406  {
407  viennacl::linalg::rank_1_update(*this, proxy.lhs(), proxy.rhs());
408  return *this;
409  }
410 
411  template <unsigned int A1, unsigned int A2>
414  op_prod > & proxy)
415  {
416  viennacl::linalg::scaled_rank_1_update(*this, static_cast<SCALARTYPE>(-1.0), proxy.lhs(), proxy.rhs());
417  return *this;
418  }
419 
420  template <unsigned int A1, unsigned int A2>
423  op_prod >,
424  const SCALARTYPE,
425  op_prod > & proxy)
426  {
427  viennacl::linalg::scaled_rank_1_update(*this, proxy.rhs(), proxy.lhs().lhs(), proxy.lhs().rhs());
428  return *this;
429  }
430 
431  template <unsigned int A1, unsigned int A2>
434  op_prod >,
435  const SCALARTYPE,
436  op_prod > & proxy)
437  {
438  viennacl::linalg::scaled_rank_1_update(*this, static_cast<SCALARTYPE>(-1.0) * proxy.rhs(), proxy.lhs().lhs(), proxy.lhs().rhs());
439  return *this;
440  }
441 
442 
444  {
445  viennacl::linalg::inplace_mult(*this, val);
446  return *this;
447  }
448 
450  {
451  viennacl::linalg::inplace_mult(*this, val);
452  return *this;
453  }
454 
456  {
457  viennacl::linalg::inplace_mult(*this, SCALARTYPE(1.0) / val);
458  return *this;
459  }
460 
462  {
464  return *this;
465  }
466 
467 
468  //this = A * B and related (with trans())
469  template <typename MatrixType1, typename MatrixType2>
471  MatrixType2,
472  op_prod > & proxy)
473  {
474  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
475  return *this;
476  }
477 
478  //this = A + B
481  op_add > & proxy)
482  {
483  viennacl::linalg::add(proxy.lhs(), proxy.rhs(), *this);
484  return *this;
485  }
486 
487  //this = A - B
490  op_sub > & proxy)
491  {
492  viennacl::linalg::sub(proxy.lhs(), proxy.rhs(), *this);
493  return *this;
494  }
495 
496 
498  const size_type & size1() const { return rows_;}
500  const size_type & size2() const { return columns_; }
501 
503  void clear()
504  {
505  std::size_t internal_size = internal_size1() * internal_size2();
506 
508 
509  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "clear");
510  viennacl::ocl::enqueue(k(elements_, static_cast<cl_uint>(internal_size)));
511  }
512 
513 
514  //const unsigned int row_stride() const { return roundUpToNextMultiple<unsigned int>(columns(), ALIGNMENT); }
516  const size_type internal_size1() const { return F::internal_size1(size1(), ALIGNMENT); }
518  const size_type internal_size2() const { return F::internal_size2(size2(), ALIGNMENT); }
520  const size_type internal_size() const { return internal_size1() * internal_size2(); }
521 
523  const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; }
524 
525  #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment
526  template <typename CPU_MATRIX>
527  friend void copy(const CPU_MATRIX & cpu_matrix,
528  matrix & gpu_matrix );
529 
530  template <typename SCALARTYPE2, typename A1, typename A2>
531  friend void copy(const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix,
532  matrix & gpu_matrix );
533 
534  template <typename SCALARTYPE2>
535  friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin,
536  SCALARTYPE2 * cpu_matrix_end,
537  matrix & gpu_matrix);
538 
539  #ifdef VIENNACL_HAVE_EIGEN
540  friend void copy(const Eigen::MatrixXf & cpu_matrix,
541  matrix & gpu_matrix);
542 
543  friend void copy(const Eigen::MatrixXd & cpu_matrix,
544  matrix & gpu_matrix);
545  #endif
546 
547  #ifdef VIENNACL_HAVE_MTL4
548  template <typename SCALARTYPE2, typename T>
549  friend void copy(const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix,
550  matrix & gpu_matrix);
551  #endif
552  #else
553  template <typename CPU_MATRIX, typename SCALARTYPE2, typename F2, unsigned int ALIGNMENT2>
554  friend void copy(const CPU_MATRIX & cpu_matrix,
556 
557  template <typename SCALARTYPE2, typename A1, typename A2, typename F2, unsigned int ALIGNMENT2>
558  friend void copy(const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix,
560 
561  template <typename SCALARTYPE2, typename F2, unsigned int ALIGNMENT2>
562  friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin,
563  SCALARTYPE2 * cpu_matrix_end,
565 
566  #ifdef VIENNACL_HAVE_EIGEN
567  template <typename F2, unsigned int ALIGNMENT2>
568  friend void copy(const Eigen::MatrixXf & cpu_matrix,
569  matrix<float, F2, ALIGNMENT2> & gpu_matrix);
570 
571  template <typename F2, unsigned int ALIGNMENT2>
572  friend void copy(const Eigen::MatrixXd & cpu_matrix,
573  matrix<double, F2, ALIGNMENT2> & gpu_matrix);
574  #endif
575 
576  #ifdef VIENNACL_HAVE_MTL4
577  template <typename SCALARTYPE2, typename T, typename F2, unsigned int ALIGNMENT2>
578  friend void copy(const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix,
580  #endif
581  #endif
582 
583  private:
584  size_type rows_;
585  size_type columns_;
587  }; //matrix
588 
594  template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
595  std::ostream & operator<<(std::ostream & s, const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
596  {
597  typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type;
598 
599  std::vector<SCALARTYPE> tmp(gpu_matrix.internal_size());
600  cl_int err;
601  err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE) * gpu_matrix.internal_size(), &tmp[0], 0, NULL, NULL);
602  VIENNACL_ERR_CHECK(err);
604 
605  s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]";
606 
607  s << "(";
608  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
609  {
610  s << "(";
611  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
612  {
613  s << tmp[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
614  if (j < gpu_matrix.size2() - 1)
615  s << ",";
616  }
617  s << ")";
618  if (i < gpu_matrix.size1() - 1)
619  s << ",";
620  }
621  s << ")";
622  return s;
623  }
624 
630  template<typename LHS, typename RHS, typename OP>
631  std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)
632  {
634 
635  matrix<ScalarType> temp = expr;
636  s << temp;
637  return s;
638  }
639 
641  template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
642  matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
643  const matrix<SCALARTYPE, F, ALIGNMENT>,
645  {
648  op_trans>(mat, mat);
649  }
650 
651 
653 
654  //
655  //cpu to gpu, generic type:
656  //
662  template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
663  void copy(const CPU_MATRIX & cpu_matrix,
664  matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
665  {
666  typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type;
667 
668  //std::cout << "Copying CPU_MATRIX!" << std::endl;
669  //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
670  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
671  {
672  gpu_matrix.resize(cpu_matrix.size1(),
673  cpu_matrix.size2(), false);
674  }
675  else
676  {
677  assert( (gpu_matrix.size1() == cpu_matrix.size1())
678  && (gpu_matrix.size2() == cpu_matrix.size2())
679  );
680  }
681 
682  std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
683  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
684  {
685  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
686  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
687  }
688 
689  gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
690  //std::cout << "Size at end: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
691  }
692 
693  //
694  //cpu to gpu, STL type:
695  //
701  template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
702  void copy(const std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix,
703  matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
704  {
705  typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type;
706 
707  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
708  {
709  gpu_matrix.resize(cpu_matrix.size(),
710  cpu_matrix[0].size(),
711  false);
712  }
713  else
714  {
715  assert( (gpu_matrix.size1() == cpu_matrix.size())
716  && (gpu_matrix.size2() == cpu_matrix[0].size())
717  );
718  }
719 
720  std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
721  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
722  {
723  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
724  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
725  }
726 
727  gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
728  }
729 
730 
731  //
732  //cpu to gpu, another STL type:
733  //
740  template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
741  void fast_copy(SCALARTYPE * cpu_matrix_begin,
742  SCALARTYPE * cpu_matrix_end,
744  {
745  gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
746  sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin),
747  cpu_matrix_begin);
748  }
749 
750 
751  #ifdef VIENNACL_HAVE_EIGEN
752 
757  template <typename F, unsigned int ALIGNMENT>
758  void copy(const Eigen::MatrixXf & cpu_matrix,
759  matrix<float, F, ALIGNMENT> & gpu_matrix)
760  {
761  typedef typename matrix<float, F, ALIGNMENT>::size_type size_type;
762 
763  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
764  {
765  gpu_matrix.resize(cpu_matrix.rows(),
766  cpu_matrix.cols(),
767  false);
768  }
769  else
770  {
771  assert( (gpu_matrix.size1() == static_cast<std::size_t>(cpu_matrix.rows()))
772  && (gpu_matrix.size2() == static_cast<std::size_t>(cpu_matrix.cols()))
773  );
774  }
775 
776  std::vector<float> data(gpu_matrix.internal_size());
777  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
778  {
779  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
780  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
781  }
782 
783  gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
784  }
785 
791  template <typename F, unsigned int ALIGNMENT>
792  void copy(const Eigen::MatrixXd & cpu_matrix,
793  matrix<double, F, ALIGNMENT> & gpu_matrix)
794  {
795  typedef typename matrix<double, F, ALIGNMENT>::size_type size_type;
796 
797  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
798  {
799  gpu_matrix.resize(cpu_matrix.rows(),
800  cpu_matrix.cols(),
801  false);
802  }
803  else
804  {
805  assert( (gpu_matrix.size1() == static_cast<std::size_t>(cpu_matrix.rows()))
806  && (gpu_matrix.size2() == static_cast<std::size_t>(cpu_matrix.cols()))
807  );
808  }
809 
810  std::vector<double> data(gpu_matrix.internal_size());
811  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
812  {
813  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
814  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
815  }
816 
817  gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
818  }
819  #endif
820 
821  #ifdef VIENNACL_HAVE_MTL4
822 
827  template <typename SCALARTYPE, typename T, typename F, unsigned int ALIGNMENT>
828  void copy(const mtl::dense2D<SCALARTYPE, T>& cpu_matrix,
829  matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
830  {
831  typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type;
832 
833  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
834  {
835  gpu_matrix.resize(cpu_matrix.num_rows(),
836  cpu_matrix.num_cols(),
837  false);
838  }
839  else
840  {
841  assert( (gpu_matrix.size1() == cpu_matrix.num_rows())
842  && (gpu_matrix.size2() == cpu_matrix.num_cols())
843  );
844  }
845 
846  std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
847  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
848  {
849  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
850  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
851  }
852 
853  gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
854  }
855  #endif
856 
857 
858 
859 
860  //
861  //gpu to cpu, generic type
862  //
868  template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
869  void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
870  CPU_MATRIX & cpu_matrix )
871  {
872  typedef typename matrix<float, F, ALIGNMENT>::size_type size_type;
873 
874  if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
875  {
876  std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
877  cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]), 0, NULL, NULL);
878  VIENNACL_ERR_CHECK(err);
879 
880  //now copy entries to cpu_matrix:
881  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
882  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
883  cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
884  }
885  }
886 
887  //gpu to cpu, STL type
893  template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
894  void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
895  std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix)
896  {
897  typedef typename matrix<float, F, ALIGNMENT>::size_type size_type;
898 
899  if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0)
900  && (cpu_matrix.size() >= gpu_matrix.size1()) && (cpu_matrix[0].size() >= gpu_matrix.size2()))
901  {
902  std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
903  cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]), 0, NULL, NULL);
904  VIENNACL_ERR_CHECK(err);
905 
906  //now copy entries to cpu_matrix:
907  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
908  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
909  cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
910  }
911  }
912 
913  //gpu to cpu, STL type
919  template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
921  SCALARTYPE * cpu_matrix_begin)
922  {
923  cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
924  gpu_matrix.handle(),
925  CL_TRUE, 0,
926  sizeof(SCALARTYPE)*gpu_matrix.internal_size(),
927  cpu_matrix_begin, 0, NULL, NULL);
928  VIENNACL_ERR_CHECK(err);
929  }
930 
931 
932 
933 
934 
935 
936 
937 
938 
939  // outer_prod(v1, v2) * val;
940  template<typename CPU_SCALAR, typename SCALARTYPE,unsigned int VECTOR_ALIGNMENT>
943  op_prod>,
944  const SCALARTYPE,
947  op_prod> & proxy,
948  CPU_SCALAR const & val)
949  {
952  op_prod>,
953  const SCALARTYPE,
954  op_prod>(proxy, static_cast<SCALARTYPE>(val));
955  }
956 
957  // val * outer_prod(v1, v2);
958  template <typename CPU_SCALAR, typename SCALARTYPE, unsigned int VA1, unsigned int VA2>
961  op_prod>,
962  const SCALARTYPE,
963  op_prod> operator*(CPU_SCALAR const & val,
966  op_prod> const & proxy)
967  {
970  op_prod>,
971  const SCALARTYPE,
972  op_prod>(proxy, static_cast<SCALARTYPE>(val));
973  }
974 
975 
976 
977 } //namespace viennacl
978 
979 #endif