1 #ifndef VIENNACL_MATRIX_HPP_
2 #define VIENNACL_MATRIX_HPP_
48 return i * num_cols + j;
53 return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(rows, alignment);;
58 return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(cols, alignment);
73 return i + j * num_rows;
78 return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(rows, alignment);
83 return viennacl::tools::roundUpToNextMultiple<vcl_size_t>(cols, alignment);
87 template <
typename LHS,
typename RHS,
typename OP>
101 LHS &
lhs()
const {
return _lhs; }
104 RHS &
rhs()
const {
return _rhs; }
125 template <
typename ROWCOL,
typename MATRIXTYPE>
133 std::size_t start_row,
134 std::size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {};
160 template <
class SCALARTYPE,
typename F,
unsigned int ALIGNMENT>
184 rows_(rows), columns_(columns)
192 rows_(rows), columns_(columns)
200 template <
typename LHS,
typename RHS,
typename OP>
234 assert(
handle() != proxy.lhs().
handle() &&
"Self-assignment of matrix transpose not implemented");
235 assert(proxy.lhs().size1() ==
size2() &&
"Matrix dimensions do not match!");
236 assert(proxy.lhs().size2() ==
size1() &&
"Matrix dimensions do not match!");
238 resize(proxy.lhs().size2(), proxy.lhs().size1(),
false);
240 std::vector<SCALARTYPE> temp(proxy.lhs().internal_size());
243 proxy.lhs().
handle(), CL_TRUE, 0,
244 sizeof(SCALARTYPE)*proxy.lhs().internal_size(),
245 &(temp[0]), 0, NULL, NULL);
258 for (
vcl_size_t i=0; i<proxy.lhs().size1(); ++i)
259 for (
vcl_size_t j=0; j<proxy.lhs().size2(); ++j)
261 = temp[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())];
288 assert(rows > 0 && columns > 0);
370 op_add > (*
this, other);
393 op_sub > (*
this, other);
402 template <
unsigned int A1,
unsigned int A2>
411 template <
unsigned int A1,
unsigned int A2>
420 template <
unsigned int A1,
unsigned int A2>
431 template <
unsigned int A1,
unsigned int A2>
469 template <
typename MatrixType1,
typename MatrixType2>
525 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment
526 template <
typename CPU_MATRIX>
527 friend void copy(
const CPU_MATRIX & cpu_matrix,
530 template <
typename SCALARTYPE2,
typename A1,
typename A2>
531 friend void copy(
const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix,
534 template <
typename SCALARTYPE2>
535 friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin,
536 SCALARTYPE2 * cpu_matrix_end,
539 #ifdef VIENNACL_HAVE_EIGEN
540 friend void copy(
const Eigen::MatrixXf & cpu_matrix,
543 friend void copy(
const Eigen::MatrixXd & cpu_matrix,
547 #ifdef VIENNACL_HAVE_MTL4
548 template <
typename SCALARTYPE2,
typename T>
549 friend void copy(
const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix,
553 template <
typename CPU_MATRIX,
typename SCALARTYPE2,
typename F2,
unsigned int ALIGNMENT2>
554 friend void copy(
const CPU_MATRIX & cpu_matrix,
557 template <
typename SCALARTYPE2,
typename A1,
typename A2,
typename F2,
unsigned int ALIGNMENT2>
558 friend void copy(
const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix,
561 template <
typename SCALARTYPE2,
typename F2,
unsigned int ALIGNMENT2>
562 friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin,
563 SCALARTYPE2 * cpu_matrix_end,
566 #ifdef VIENNACL_HAVE_EIGEN
567 template <
typename F2,
unsigned int ALIGNMENT2>
568 friend void copy(
const Eigen::MatrixXf & cpu_matrix,
571 template <
typename F2,
unsigned int ALIGNMENT2>
572 friend void copy(
const Eigen::MatrixXd & cpu_matrix,
576 #ifdef VIENNACL_HAVE_MTL4
577 template <
typename SCALARTYPE2,
typename T,
typename F2,
unsigned int ALIGNMENT2>
578 friend void copy(
const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix,
594 template<
class SCALARTYPE,
typename F,
unsigned int ALIGNMENT>
595 std::ostream & operator<<(std::ostream & s, const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
599 std::vector<SCALARTYPE> tmp(gpu_matrix.internal_size());
605 s <<
"[" << gpu_matrix.size1() <<
"," << gpu_matrix.size2() <<
"]";
608 for (size_type i = 0; i < gpu_matrix.size1(); ++i)
611 for (size_type j = 0; j < gpu_matrix.size2(); ++j)
613 s << tmp[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
614 if (j < gpu_matrix.size2() - 1)
618 if (i < gpu_matrix.size1() - 1)
630 template<
typename LHS,
typename RHS,
typename OP>
631 std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)
641 template<
class SCALARTYPE,
typename F,
unsigned int ALIGNMENT>
642 matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
643 const matrix<SCALARTYPE, F, ALIGNMENT>,
662 template <
typename CPU_MATRIX,
typename SCALARTYPE,
typename F,
unsigned int ALIGNMENT>
663 void copy(
const CPU_MATRIX & cpu_matrix,
670 if (gpu_matrix.
size1() == 0 || gpu_matrix.
size2() == 0)
672 gpu_matrix.
resize(cpu_matrix.size1(),
673 cpu_matrix.size2(),
false);
677 assert( (gpu_matrix.
size1() == cpu_matrix.size1())
678 && (gpu_matrix.
size2() == cpu_matrix.size2())
683 for (size_type i = 0; i < gpu_matrix.
size1(); ++i)
685 for (size_type j = 0; j < gpu_matrix.
size2(); ++j)
701 template <
typename SCALARTYPE,
typename A1,
typename A2,
typename F,
unsigned int ALIGNMENT>
702 void copy(
const std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix,
707 if (gpu_matrix.
size1() == 0 || gpu_matrix.
size2() == 0)
709 gpu_matrix.
resize(cpu_matrix.size(),
710 cpu_matrix[0].size(),
715 assert( (gpu_matrix.
size1() == cpu_matrix.size())
716 && (gpu_matrix.
size2() == cpu_matrix[0].size())
721 for (size_type i = 0; i < gpu_matrix.
size1(); ++i)
723 for (size_type j = 0; j < gpu_matrix.
size2(); ++j)
740 template <
typename SCALARTYPE,
typename F,
unsigned int ALIGNMENT>
742 SCALARTYPE * cpu_matrix_end,
746 sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin),
751 #ifdef VIENNACL_HAVE_EIGEN
757 template <
typename F,
unsigned int ALIGNMENT>
758 void copy(
const Eigen::MatrixXf & cpu_matrix,
759 matrix<float, F, ALIGNMENT> & gpu_matrix)
761 typedef typename matrix<float, F, ALIGNMENT>::size_type size_type;
763 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
765 gpu_matrix.resize(cpu_matrix.rows(),
771 assert( (gpu_matrix.size1() ==
static_cast<std::size_t
>(cpu_matrix.rows()))
772 && (gpu_matrix.size2() ==
static_cast<std::size_t
>(cpu_matrix.cols()))
776 std::vector<float> data(gpu_matrix.internal_size());
777 for (size_type i = 0; i < gpu_matrix.size1(); ++i)
779 for (size_type j = 0; j < gpu_matrix.size2(); ++j)
780 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
791 template <
typename F,
unsigned int ALIGNMENT>
792 void copy(
const Eigen::MatrixXd & cpu_matrix,
793 matrix<double, F, ALIGNMENT> & gpu_matrix)
795 typedef typename matrix<double, F, ALIGNMENT>::size_type size_type;
797 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
799 gpu_matrix.resize(cpu_matrix.rows(),
805 assert( (gpu_matrix.size1() ==
static_cast<std::size_t
>(cpu_matrix.rows()))
806 && (gpu_matrix.size2() ==
static_cast<std::size_t
>(cpu_matrix.cols()))
810 std::vector<double> data(gpu_matrix.internal_size());
811 for (size_type i = 0; i < gpu_matrix.size1(); ++i)
813 for (size_type j = 0; j < gpu_matrix.size2(); ++j)
814 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
821 #ifdef VIENNACL_HAVE_MTL4
827 template <
typename SCALARTYPE,
typename T,
typename F,
unsigned int ALIGNMENT>
828 void copy(
const mtl::dense2D<SCALARTYPE, T>& cpu_matrix,
829 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
831 typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type;
833 if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
835 gpu_matrix.resize(cpu_matrix.num_rows(),
836 cpu_matrix.num_cols(),
841 assert( (gpu_matrix.size1() == cpu_matrix.num_rows())
842 && (gpu_matrix.size2() == cpu_matrix.num_cols())
846 std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
847 for (size_type i = 0; i < gpu_matrix.size1(); ++i)
849 for (size_type j = 0; j < gpu_matrix.size2(); ++j)
850 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
868 template <
typename CPU_MATRIX,
typename SCALARTYPE,
typename F,
unsigned int ALIGNMENT>
870 CPU_MATRIX & cpu_matrix )
874 if ( (gpu_matrix.
size1() > 0) && (gpu_matrix.
size2() > 0) )
876 std::vector<SCALARTYPE> temp_buffer(gpu_matrix.
internal_size());
881 for (size_type i = 0; i < gpu_matrix.
size1(); ++i)
882 for (size_type j = 0; j < gpu_matrix.
size2(); ++j)
893 template <
typename SCALARTYPE,
typename A1,
typename A2,
typename F,
unsigned int ALIGNMENT>
895 std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix)
899 if ( (gpu_matrix.
size1() > 0) && (gpu_matrix.
size2() > 0)
900 && (cpu_matrix.size() >= gpu_matrix.
size1()) && (cpu_matrix[0].
size() >= gpu_matrix.
size2()))
902 std::vector<SCALARTYPE> temp_buffer(gpu_matrix.
internal_size());
907 for (size_type i = 0; i < gpu_matrix.
size1(); ++i)
908 for (size_type j = 0; j < gpu_matrix.
size2(); ++j)
919 template <
typename SCALARTYPE,
typename F,
unsigned int ALIGNMENT>
921 SCALARTYPE * cpu_matrix_begin)
927 cpu_matrix_begin, 0, NULL, NULL);
940 template<
typename CPU_SCALAR,
typename SCALARTYPE,
unsigned int VECTOR_ALIGNMENT>
948 CPU_SCALAR
const & val)
954 op_prod>(proxy,
static_cast<SCALARTYPE
>(val));
958 template <
typename CPU_SCALAR,
typename SCALARTYPE,
unsigned int VA1,
unsigned int VA2>
966 op_prod>
const & proxy)
972 op_prod>(proxy,
static_cast<SCALARTYPE
>(val));