1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP
37 #include "boost/numeric/ublas/vector.hpp"
38 #include "boost/numeric/ublas/matrix.hpp"
39 #include "boost/numeric/ublas/matrix_proxy.hpp"
40 #include "boost/numeric/ublas/vector_proxy.hpp"
41 #include "boost/numeric/ublas/storage.hpp"
42 #include "boost/numeric/ublas/io.hpp"
43 #include "boost/numeric/ublas/lu.hpp"
44 #include "boost/numeric/ublas/triangular.hpp"
45 #include "boost/numeric/ublas/matrix_expression.hpp"
74 typedef std::pair<unsigned int, double>
PairT;
78 return static_cast<double>(left.second) > static_cast<double>(right.second);
89 template<
typename SparseMatrixType,
typename DenseMatrixType>
90 void initProjectSubMatrix(
const SparseMatrixType& A_in,
const std::vector<unsigned int>& J, std::vector<unsigned int>& I,
91 DenseMatrixType& A_out){
92 typedef typename DenseMatrixType::value_type ScalarType;
93 A_out.resize(I.size(), J.size(),
false);
94 for(
size_t j = 0; j < J.size(); ++j){
95 for(
size_t i = 0; i < I.size(); ++i){
96 A_out(i,j) = A_in(I[i],J[j]);
105 bool isInIndexSet(
const std::vector<unsigned int>& J,
const unsigned int& ind){
106 return (std::find(J.begin(), J.end(), ind) != J.end());
114 template<
typename MatrixType>
115 void composeNewR(
const MatrixType& A,
const MatrixType& R_n, MatrixType& R){
116 typedef typename MatrixType::value_type ScalarType;
117 size_t row_n = R_n.size1() - (A.size1() - R.size2());
118 MatrixType C = boost::numeric::ublas::zero_matrix<ScalarType>(R.size1() + row_n, R.size2() + A.size2());
123 R.size2() + A.size2())) +=
126 if(R_n.size1() > 0 && R_n.size2() > 0)
136 template<
typename VectorType>
138 typedef typename VectorType::value_type ScalarType;
139 VectorType w = boost::numeric::ublas::zero_vector<ScalarType>(v.size() + v_n.size());
149 template<
typename SparseVectorType,
typename ScalarType>
151 for(
typename SparseVectorType::const_iterator vec_it = v.begin(); vec_it != v.end(); ++vec_it){
152 norm += (vec_it->second)*(vec_it->second);
154 norm = std::sqrt(norm);
162 template<
typename SparseVectorType,
typename ScalarType>
163 void sparse_inner_prod(
const SparseVectorType& v1,
const SparseVectorType& v2, ScalarType& res_v){
164 typename SparseVectorType::const_iterator v_it1 = v1.begin();
165 typename SparseVectorType::const_iterator v_it2 = v2.begin();
166 while((v_it1 != v1.end())&&(v_it2 != v2.end())){
167 if(v_it1->first == v_it2->first){
168 res_v += (v_it1->second)*(v_it2->second);
172 else if(v_it1->first < v_it2->first){
189 template <
typename SparseVectorType,
typename ScalarType>
191 const SparseVectorType& res,
192 std::vector<unsigned int>& J,
193 std::vector<unsigned int>& J_u,
195 std::vector<std::pair<unsigned int, ScalarType> > p;
197 ScalarType inprod, norm2;
199 for(
typename SparseVectorType::const_iterator res_it = res.begin(); res_it != res.end(); ++res_it){
204 p.push_back(std::pair<size_t, ScalarType>(res_it->first, (inprod*inprod)/(norm2*norm2)));
209 while ((cur_size < J.size())&&(p.size() > 0)) {
210 J_u.push_back(p[0].first);
215 return (cur_size > 0);
224 template<
typename SparseVectorType>
225 void buildNewRowSet(
const std::vector<SparseVectorType>& A_v_c,
const std::vector<unsigned int>& I,
226 const std::vector<unsigned int>& J_n, std::vector<unsigned int>& I_n){
227 for(
size_t i = 0; i < J_n.size(); ++i){
228 for(
typename SparseVectorType::const_iterator col_it = A_v_c[J_n[i]].begin(); col_it!=A_v_c[J_n[i]].end(); ++col_it){
230 I_n.push_back(col_it->first);
241 template<
typename MatrixType>
243 typedef typename MatrixType::value_type ScalarType;
244 size_t row_n1 = A_I_J_u.size1() - A_I_J.size2();
245 size_t row_n2 = A_I_u_J_u.size1();
246 size_t row_n = row_n1 + row_n2;
247 size_t col_n = A_I_J_u.size2();
248 MatrixType C = boost::numeric::ublas::zero_matrix<ScalarType>(row_n, col_n);
269 template<
typename SparseMatrixType,
typename SparseVectorType,
typename DenseMatrixType,
typename VectorType>
270 void block_update(
const SparseMatrixType& A,
const std::vector<SparseVectorType>& A_v_c,
271 std::vector<SparseVectorType>& g_res,
272 std::vector<bool>& g_is_update,
273 std::vector<std::vector<unsigned int> >& g_I,
274 std::vector<std::vector<unsigned int> >& g_J,
275 std::vector<VectorType>& g_b_v,
276 std::vector<DenseMatrixType>& g_A_I_J,
278 typedef typename DenseMatrixType::value_type ScalarType;
280 std::vector<std::vector<unsigned int> > g_J_u(g_J.size());
282 std::vector<std::vector<unsigned int> > g_I_u(g_J.size());
284 std::vector<DenseMatrixType> g_A_I_J_u(g_J.size());
286 std::vector<DenseMatrixType> g_A_I_u_J_u(g_J.size());
288 std::vector<VectorType> g_b_v_u(g_J.size());
290 #pragma omp parallel for
292 for(std::size_t i = 0; i < g_J.size(); ++i){
294 if(buildAugmentedIndexSet<SparseVectorType, ScalarType>(A_v_c, g_res[i], g_J[i], g_J_u[i], tag)){
307 composeNewR(g_A_I_J_u[i], g_A_I_u_J_u[i], g_A_I_J[i]);
310 g_J[i].insert(g_J[i].end(), g_J_u[i].begin(), g_J_u[i].end());
311 g_I[i].insert(g_I[i].end(), g_I_u[i].begin(), g_I_u[i].end());
313 g_is_update[i] =
false;
331 template<
typename ScalarType>
333 const std::vector<std::vector<unsigned int> >& g_I,
337 std::vector<cl_uint>& g_is_update,
338 const unsigned int cur_iter){
339 unsigned int local_r_n, local_c_n, sz_blocks;
343 std::vector<cl_uint> matrix_dims(g_I.size()*2,
static_cast<cl_uint
>(0));
344 std::vector<cl_uint> blocks_ind(g_I.size() + 1,
static_cast<cl_uint
>(0));
346 std::vector<ScalarType> con_A_I_J(sz_blocks, static_cast<ScalarType>(0));
358 static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
367 static_cast<cl_uint
>(g_I.size())));
376 void assemble_qr_row_inds(
const std::vector<std::vector<unsigned int> >& g_I,
const std::vector<std::vector<unsigned int> > g_J,
377 const std::vector<std::vector<unsigned int> >& g_I_u,
378 std::vector<std::vector<unsigned int> >& g_I_q){
380 #pragma omp parallel for
382 for(std::size_t i = 0; i < g_I.size(); ++i){
383 for(std::size_t j = g_J[i].
size(); j < g_I[i].size(); ++j){
384 g_I_q[i].push_back(g_I[i][j]);
387 for(std::size_t j = 0; j < g_I_u[i].size(); ++j){
388 g_I_q[i].push_back(g_I_u[i][j]);
407 template<
typename ScalarType>
409 const std::vector<std::vector<unsigned int> >& g_J,
410 const std::vector<std::vector<unsigned int> >& g_I,
411 const std::vector<std::vector<unsigned int> >& g_J_u,
412 const std::vector<std::vector<unsigned int> >& g_I_u,
413 std::vector<std::vector<unsigned int> >& g_I_q,
417 std::vector<cl_uint>& g_is_update,
418 const bool is_empty_block,
419 const unsigned int cur_iter){
422 unsigned int sz_blocks;
423 std::vector<cl_uint> matrix_dims(g_I.size()*2,
static_cast<cl_uint
>(0));
424 std::vector<cl_uint> blocks_ind(g_I.size() + 1,
static_cast<cl_uint
>(0));
426 std::vector<ScalarType> con_A_I_J_q(sz_blocks, static_cast<ScalarType>(0));
451 static_cast<unsigned int>(
sizeof(ScalarType)*sz_blocks),
454 static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
457 static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size() + 1)),
460 static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
478 static_cast<unsigned int>(g_I.size())));
488 static_cast<unsigned int>(g_I.size())));
506 template<
typename ScalarType>
507 void assemble_r(std::vector<std::vector<unsigned int> >& g_I, std::vector<std::vector<unsigned int> >& g_J,
513 std::vector<cl_uint>& g_is_update,
514 const unsigned int cur_iter){
515 std::vector<cl_uint> matrix_dims(g_I.size()*2,
static_cast<cl_uint
>(0));
516 std::vector<cl_uint> blocks_ind(g_I.size() + 1,
static_cast<cl_uint
>(0));
517 std::vector<cl_uint> start_bv_r_inds(g_I.size() + 1, 0);
518 unsigned int sz_blocks, bv_size;
522 std::vector<ScalarType> con_A_I_J_r(sz_blocks, static_cast<ScalarType>(0));
523 std::vector<ScalarType> b_v_r(bv_size, static_cast<ScalarType>(0));
542 static_cast<unsigned int>(
sizeof(ScalarType)*sz_blocks),
545 static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
548 static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size() + 1)),
551 static_cast<unsigned int>(
sizeof(ScalarType)*bv_size),
554 static_cast<unsigned int>(
sizeof(cl_uint)*(g_I.size() + 1)),
555 &(start_bv_r_inds[0]));
557 static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
567 g_is_update_vcl,
static_cast<cl_uint
>(g_I.size())));
570 bv_assembly_kernel.local_work_size(0, 1);
571 bv_assembly_kernel.global_work_size(0, 256);
575 static_cast<cl_uint
>(g_I.size())));
596 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT,
typename SparseVectorType>
598 std::vector<cl_uint>& g_is_update,
599 std::vector<SparseVectorType>& g_res,
600 std::vector<std::vector<unsigned int> >& g_J,
601 std::vector<std::vector<unsigned int> >& g_I,
605 const unsigned int cur_iter){
607 std::vector<std::vector<unsigned int> > g_J_u(g_J.size());
609 std::vector<std::vector<unsigned int> > g_I_u(g_J.size());
611 std::vector<std::vector<unsigned int> > g_I_q(g_J.size());
620 #pragma omp parallel for
622 for(std::size_t i = 0; i < g_J.size(); ++i){
624 if(buildAugmentedIndexSet<SparseVectorType, ScalarType>(A_v_c, g_res[i], g_J[i], g_J_u[i], tag)){
630 block_assembly(A, g_J_u, g_I, g_A_I_J_u_vcl, g_is_update, is_empty_block, cur_iter);
632 block_q_multiplication<ScalarType>(g_J_u, g_I, g_A_I_J_vcl, g_bv_vcl, g_A_I_J_u_vcl, g_is_update, cur_iter);
634 block_assembly(A, g_J_u, g_I_u, g_A_I_u_J_u_vcl, g_is_update, is_empty_block, cur_iter);
635 assemble_qr_block<ScalarType>(g_J, g_I, g_J_u, g_I_u, g_I_q, g_A_I_J_u_vcl, g_A_I_J_vcl.handle1(),
636 g_A_I_u_J_u_vcl, g_is_update, is_empty_block, cur_iter);
638 block_qr<ScalarType>(g_I_q, g_J_u, g_A_I_u_J_u_vcl, g_bv_u_vcl, g_is_update, cur_iter);
641 #pragma omp parallel for
643 for(std::size_t i = 0; i < g_J.size(); ++i){
644 g_J[i].insert(g_J[i].end(), g_J_u[i].begin(), g_J_u[i].end());
645 g_I[i].insert(g_I[i].end(), g_I_u[i].begin(), g_I_u[i].end());
647 assemble_r<ScalarType>(g_I, g_J, g_A_I_J_vcl, g_A_I_J_u_vcl, g_A_I_u_J_u_vcl, g_bv_vcl, g_bv_u_vcl, g_is_update, cur_iter);