1 #ifndef VIENNACL_MATRIX_PROXY_HPP_
2 #define VIENNACL_MATRIX_PROXY_HPP_
32 template <
typename MatrixType>
43 range const & row_range,
44 range const & col_range) : A_(A), row_range_(row_range), col_range_(col_range) {}
52 template <
typename MatrixType1,
typename MatrixType2>
68 template <
typename MatrixType1,
typename MatrixType2>
73 MatrixType1 temp = proxy;
81 template <
typename MatrixType1,
typename MatrixType2>
86 MatrixType1 temp(proxy.size1(), proxy.size2());
98 MatrixType &
get() {
return A_; }
99 const MatrixType &
get()
const {
return A_; }
109 template <
typename MatrixType>
110 matrix_expression< const matrix_range<MatrixType>,
111 const matrix_range<MatrixType>,
127 template <
typename CPU_MATRIX,
typename SCALARTYPE>
128 void copy(
const CPU_MATRIX & cpu_matrix,
131 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
132 && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
134 if ( gpu_matrix_range.start2() != 0 || gpu_matrix_range.size2() != gpu_matrix_range.get().size2())
136 std::vector<SCALARTYPE> entries(gpu_matrix_range.size2());
139 for (
size_t i=0; i < gpu_matrix_range.size1(); ++i)
141 for (
size_t j=0; j < gpu_matrix_range.size2(); ++j)
142 entries[j] = cpu_matrix(i,j);
144 size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.get().internal_size2() + gpu_matrix_range.start2();
145 size_t num_entries = gpu_matrix_range.size2();
147 gpu_matrix_range.get().
handle(), CL_TRUE,
148 sizeof(SCALARTYPE)*start_offset,
149 sizeof(SCALARTYPE)*num_entries,
150 &(entries[0]), 0, NULL, NULL);
158 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
161 for (
size_t i=0; i < gpu_matrix_range.size1(); ++i)
162 for (
size_t j=0; j < gpu_matrix_range.size2(); ++j)
163 entries[i*gpu_matrix_range.get().internal_size2() + j] = cpu_matrix(i,j);
165 size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.get().internal_size2();
166 size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
169 gpu_matrix_range.get().
handle(), CL_TRUE,
170 sizeof(SCALARTYPE)*start_offset,
171 sizeof(SCALARTYPE)*num_entries,
172 &(entries[0]), 0, NULL, NULL);
179 template <
typename CPU_MATRIX,
typename SCALARTYPE>
180 void copy(
const CPU_MATRIX & cpu_matrix,
183 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
184 && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
186 if ( gpu_matrix_range.start1() != 0 || gpu_matrix_range.size1() != gpu_matrix_range.get().size1())
188 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1());
191 for (
size_t j=0; j < gpu_matrix_range.size2(); ++j)
193 for (
size_t i=0; i < gpu_matrix_range.size1(); ++i)
194 entries[i] = cpu_matrix(i,j);
196 size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.get().internal_size1() + gpu_matrix_range.start1();
197 size_t num_entries = gpu_matrix_range.size1();
199 gpu_matrix_range.get().
handle(), CL_TRUE,
200 sizeof(SCALARTYPE)*start_offset,
201 sizeof(SCALARTYPE)*num_entries,
202 &(entries[0]), 0, NULL, NULL);
210 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
213 for (
size_t i=0; i < gpu_matrix_range.size1(); ++i)
214 for (
size_t j=0; j < gpu_matrix_range.size2(); ++j)
215 entries[i + j*gpu_matrix_range.get().internal_size1()] = cpu_matrix(i,j);
217 size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.get().internal_size1();
218 size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
221 gpu_matrix_range.get().
handle(), CL_TRUE,
222 sizeof(SCALARTYPE)*start_offset,
223 sizeof(SCALARTYPE)*num_entries,
224 &(entries[0]), 0, NULL, NULL);
238 template <
typename CPU_MATRIX,
typename SCALARTYPE>
240 CPU_MATRIX & cpu_matrix)
242 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
243 && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
245 if ( gpu_matrix_range.start2() != 0 || gpu_matrix_range.size2() != gpu_matrix_range.get().size2())
247 std::vector<SCALARTYPE> entries(gpu_matrix_range.size2());
250 for (
size_t i=0; i < gpu_matrix_range.size1(); ++i)
252 size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.get().internal_size2() + gpu_matrix_range.start2();
253 size_t num_entries = gpu_matrix_range.size2();
255 gpu_matrix_range.get().
handle(), CL_TRUE,
256 sizeof(SCALARTYPE)*start_offset,
257 sizeof(SCALARTYPE)*num_entries,
258 &(entries[0]), 0, NULL, NULL);
262 for (
size_t j=0; j < gpu_matrix_range.size2(); ++j)
263 cpu_matrix(i,j) = entries[j];
270 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
272 size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.get().internal_size2();
273 size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
276 gpu_matrix_range.get().
handle(), CL_TRUE,
277 sizeof(SCALARTYPE)*start_offset,
278 sizeof(SCALARTYPE)*num_entries,
279 &(entries[0]), 0, NULL, NULL);
283 for (
size_t i=0; i < gpu_matrix_range.size1(); ++i)
284 for (
size_t j=0; j < gpu_matrix_range.size2(); ++j)
285 cpu_matrix(i,j) = entries[i*gpu_matrix_range.get().internal_size2() + j];
292 template <
typename CPU_MATRIX,
typename SCALARTYPE>
294 CPU_MATRIX & cpu_matrix)
296 assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
297 && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
299 if ( gpu_matrix_range.start1() != 0 || gpu_matrix_range.size1() != gpu_matrix_range.get().size1())
301 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1());
304 for (
size_t j=0; j < gpu_matrix_range.size2(); ++j)
306 size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.get().internal_size1() + gpu_matrix_range.start1();
307 size_t num_entries = gpu_matrix_range.size1();
309 gpu_matrix_range.get().
handle(), CL_TRUE,
310 sizeof(SCALARTYPE)*start_offset,
311 sizeof(SCALARTYPE)*num_entries,
312 &(entries[0]), 0, NULL, NULL);
316 for (
size_t i=0; i < gpu_matrix_range.size1(); ++i)
317 cpu_matrix(i,j) = entries[i];
323 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
326 size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.get().internal_size1();
327 size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
330 gpu_matrix_range.get().
handle(), CL_TRUE,
331 sizeof(SCALARTYPE)*start_offset,
332 sizeof(SCALARTYPE)*num_entries,
333 &(entries[0]), 0, NULL, NULL);
337 for (
size_t i=0; i < gpu_matrix_range.size1(); ++i)
338 for (
size_t j=0; j < gpu_matrix_range.size2(); ++j)
339 cpu_matrix(i,j) = entries[i + j*gpu_matrix_range.get().internal_size1()];