ViennaCL - The Vienna Computing Library  1.2.0
qr.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
26 #include <utility>
27 #include <iostream>
28 #include <fstream>
29 #include <string>
30 #include <algorithm>
31 #include <vector>
32 #include <math.h>
33 #include <cmath>
34 #include <sstream>
35 #include "viennacl/ocl/backend.hpp"
36 #include "boost/numeric/ublas/vector.hpp"
37 #include "boost/numeric/ublas/matrix.hpp"
38 #include "boost/numeric/ublas/matrix_proxy.hpp"
39 #include "boost/numeric/ublas/storage.hpp"
40 #include "boost/numeric/ublas/io.hpp"
41 #include "boost/numeric/ublas/matrix_expression.hpp"
42 #include "boost/numeric/ublas/detail/matrix_assign.hpp"
43 //#include "boost/thread/thread.hpp"
44 
45 #include "viennacl/vector.hpp"
46 #include "viennacl/matrix.hpp"
47 
52 
53 namespace viennacl
54 {
55  namespace linalg
56  {
57  namespace detail
58  {
59  namespace spai
60  {
61 
62 
63 
64  //********** DEBUG FUNCTIONS *****************//
65  template< typename T, typename InputIterator>
66  void Print(std::ostream& ostr, InputIterator it_begin, InputIterator it_end){
67  //std::ostream_iterator<int> it_os(ostr, delimiter);
68  std::string delimiters = " ";
69  std::copy(it_begin, it_end, std::ostream_iterator<T>(ostr, delimiters.c_str()));
70  ostr<<std::endl;
71  }
72 
73  template<typename VectorType, typename MatrixType>
74  void write_to_block(VectorType& con_A_I_J, unsigned int start_ind, const std::vector<unsigned int>& I, const std::vector<unsigned int>& J, MatrixType& m){
75  m.resize(I.size(), J.size(), false);
76  for(size_t i = 0; i < J.size(); ++i){
77  for(size_t j = 0; j < I.size(); ++j){
78  m(j,i) = con_A_I_J[start_ind + i*I.size() + j];
79  }
80  }
81  }
82 
83  template<typename VectorType>
84  void print_continious_matrix(VectorType& con_A_I_J, std::vector<cl_uint>& blocks_ind,
85  const std::vector<std::vector<unsigned int> >& g_I, const std::vector<std::vector<unsigned int> >& g_J){
86  typedef typename VectorType::value_type ScalarType;
87  std::vector<boost::numeric::ublas::matrix<ScalarType> > com_A_I_J(g_I.size());
88  for(size_t i = 0; i < g_I.size(); ++i){
89  write_to_block( con_A_I_J, blocks_ind[i], g_I[i], g_J[i], com_A_I_J[i]);
90  std::cout<<com_A_I_J[i]<<std::endl;
91  }
92  }
93  template<typename VectorType>
94  void print_continious_vector(VectorType& con_v, std::vector<cl_uint>& block_ind, const std::vector<std::vector<unsigned int> >& g_J){
95  typedef typename VectorType::value_type ScalarType;
96  std::vector<boost::numeric::ublas::vector<ScalarType> > com_v(g_J.size());
97  //Print<ScalarType>(std::cout, con_v.begin(), con_v.end());
98  for(size_t i = 0; i < g_J.size(); ++i){
99  com_v[i].resize(g_J[i].size());
100  for(size_t j = 0; j < g_J[i].size(); ++j){
101  com_v[i](j) = con_v[block_ind[i] + j];
102  }
103  std::cout<<com_v[i]<<std::endl;
104  }
105  }
106 
108 
115  void compute_blocks_size(const std::vector<std::vector<unsigned int> >& g_I, const std::vector<std::vector<unsigned int> >& g_J,
116  unsigned int& sz, std::vector<cl_uint>& blocks_ind, std::vector<cl_uint>& matrix_dims){
117  sz = 0;
118  for(size_t i = 0; i < g_I.size(); ++i){
119  sz += static_cast<unsigned int>(g_I[i].size()*g_J[i].size());
120  matrix_dims[2*i] = static_cast<cl_uint>(g_I[i].size());
121  matrix_dims[2*i + 1] = static_cast<cl_uint>(g_J[i].size());
122  blocks_ind[i+1] = blocks_ind[i] + static_cast<cl_uint>(g_I[i].size()*g_J[i].size());
123 
124  }
125  }
130  void get_size(const std::vector<std::vector<unsigned int> >& inds, unsigned int& size){
131  size = 0;
132  for (size_t i = 0; i < inds.size(); ++i) {
133  size += static_cast<unsigned int>(inds[i].size());
134  }
135  }
136 
141  void init_start_inds(const std::vector<std::vector<unsigned int> >& inds, std::vector<cl_uint>& start_inds){
142  for(size_t i = 0; i < inds.size(); ++i){
143  start_inds[i+1] = start_inds[i] + static_cast<cl_uint>(inds[i].size());
144  }
145  }
146 
147  //************************************* QR FUNCTIONS ***************************************//
153  template<typename MatrixType, typename ScalarType>
154  void dot_prod(const MatrixType& A, unsigned int beg_ind, ScalarType& res){
155  res = static_cast<ScalarType>(0);
156  for(size_t i = beg_ind; i < A.size1(); ++i){
157  res += A(i, beg_ind-1)*A(i, beg_ind-1);
158  }
159  }
167  template<typename MatrixType, typename VectorType, typename ScalarType>
168  void custom_inner_prod(const MatrixType& A, const VectorType& v, unsigned int col_ind, unsigned int start_ind, ScalarType& res){
169  res = static_cast<ScalarType>(0);
170  for(unsigned int i = start_ind; i < static_cast<unsigned int>(A.size1()); ++i){
171  res += A(i, col_ind)*v(i);
172  }
173  }
174 
180  template<typename MatrixType, typename VectorType>
181  void copy_vector(const MatrixType & A, VectorType & v, const unsigned int beg_ind){
182  for(unsigned int i = beg_ind; i < static_cast<unsigned int>(A.size1()); ++i){
183  v(i) = A( i, beg_ind-1);
184  }
185  }
186 
187  //householder reflection c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.210
194  template<typename MatrixType, typename VectorType, typename ScalarType>
195  void householder_vector(const MatrixType& A, unsigned int j, VectorType& v, ScalarType& b){
196  ScalarType sg;
197  //
198  dot_prod(A, j+1, sg);
199  copy_vector(A, v, j+1);
200  ScalarType mu;
201  v(j) = static_cast<ScalarType>(1.0);
202  if(sg == 0){
203  b = 0;
204  }
205  else{
206  mu = std::sqrt(A(j,j)*A(j, j) + sg);
207  if(A(j, j) <= 0){
208  v(j) = A(j, j) - mu;
209  }else{
210  v(j) = -sg/(A(j, j) + mu);
211  }
212  b = 2*(v(j)*v(j))/(sg + v(j)*v(j));
213  v = v/v(j);
214  }
215  }
222  template<typename MatrixType, typename VectorType, typename ScalarType>
223  void apply_householder_reflection(MatrixType& A, unsigned int iter_cnt, VectorType& v, ScalarType b){
224  //update every column of matrix A
225  ScalarType in_prod_res;
226  for(unsigned int i = iter_cnt; i < static_cast<unsigned int>(A.size2()); ++i){
227  //update each column in a fashion: ai = ai - b*v*(v'*ai)
228  custom_inner_prod(A, v, i, iter_cnt, in_prod_res);
229  for(unsigned int j = iter_cnt; j < static_cast<unsigned int>(A.size1()); ++j){
230  A(j, i) -= b*in_prod_res*v(j);
231  }
232  }
233  }
234 
240  template<typename MatrixType, typename VectorType>
241  void store_householder_vector(MatrixType& A, unsigned int ind, VectorType& v){
242  for(unsigned int i = ind; i < static_cast<unsigned int>(A.size1()); ++i){
243  A(i, ind-1) = v(i);
244  }
245  }
246 
247 
248  //QR algorithm
253  template<typename MatrixType, typename VectorType>
254  void single_qr(MatrixType& R, VectorType& b_v){
255  typedef typename MatrixType::value_type ScalarType;
256  if((R.size1() > 0) && (R.size2() > 0)){
257  VectorType v = (VectorType)boost::numeric::ublas::zero_vector<ScalarType>(R.size1());
258  b_v = (VectorType)boost::numeric::ublas::zero_vector<ScalarType>(R.size2());
259  for(unsigned int i = 0; i < static_cast<unsigned int>(R.size2()); ++i){
260  householder_vector(R, i, v, b_v[i]);
261  apply_householder_reflection(R, i, v, b_v[i]);
262  if(i < R.size1()) store_householder_vector(R, i+1, v);
263  }
264  }
265  }
266 
267  //********************** HELP FUNCTIONS FOR GPU-based QR factorization *************************//
272  void read_kernel_from_file(std::string& file_name, std::string& kernel_source){
273  std::ifstream ifs(file_name.c_str(), std::ifstream::in);
274 
275  if (!ifs)
276  std::cerr << "WARNING: Cannot open file " << file_name << std::endl;
277 
278  std::string line;
279  std::ostringstream ost;
280  while (std::getline(ifs, line)) {
281  ost<<line<<std::endl;
282  }
283  kernel_source = ost.str();
284  }
285 
290  void get_max_block_size(const std::vector<std::vector<unsigned int> >& inds, unsigned int& max_size){
291  max_size = 0;
292  for(unsigned int i = 0; i < inds.size(); ++i){
293  if(inds[i].size() > max_size){
294  max_size = static_cast<unsigned int>(inds[i].size());
295  }
296  }
297  }
298 
305  template<typename MatrixType, typename VectorType, typename ScalarType>
306  void custom_dot_prod(const MatrixType& A, const VectorType& v, unsigned int ind, ScalarType& res){
307  res = static_cast<ScalarType>(0);
308  for(unsigned int j = ind; j < A.size1(); ++j){
309  if(j == ind){
310  res += v(j);
311  }else{
312  res += A(j, ind)*v(j);
313  }
314  }
315  }
316 
322  template<typename MatrixType, typename VectorType>
323  void apply_q_trans_vec(const MatrixType& R, const VectorType& b_v, VectorType& y){
324  typedef typename MatrixType::value_type ScalarType;
325  ScalarType inn_prod = static_cast<ScalarType>(0);
326  for(size_t i = 0; i < R.size2(); ++i){
327  custom_dot_prod(R, y, static_cast<unsigned int>(i), inn_prod);
328  for(size_t j = i; j < R.size1(); ++j){
329  if(i == j){
330  y(j) -= b_v(i)*inn_prod;
331  }
332  else{
333  y(j) -= b_v(i)*inn_prod*R(j,i);
334  }
335  }
336  }
337  }
338 
344  template<typename MatrixType, typename VectorType>
345  void apply_q_trans_mat(const MatrixType& R, const VectorType& b_v, MatrixType& A){
346  VectorType tmp_v;
347  for(size_t i = 0; i < A.size2(); ++i){
348  tmp_v = (VectorType)column(A,i);
349  apply_q_trans_vec(R, b_v, tmp_v);
350  column(A,i) = tmp_v;
351  }
352  }
353 
354  //parallel QR for GPU
364  template<typename ScalarType>
365  void block_qr(std::vector<std::vector<unsigned int> >& g_I,
366  std::vector<std::vector<unsigned int> >& g_J,
367  block_matrix& g_A_I_J_vcl,
368  block_vector& g_bv_vcl,
369  std::vector<cl_uint>& g_is_update,
370  const unsigned int cur_iter){
371  //typedef typename MatrixType::value_type ScalarType;
372  unsigned int bv_size;
373  unsigned int v_size;
374  //set up arguments for GPU
375  //find maximum size of rows/columns
376  unsigned int local_r_n, local_c_n;
377  //find max size for blocks
378  get_max_block_size(g_I, local_r_n);
379  get_max_block_size(g_J, local_c_n);
380  //get size
381  get_size(g_J, bv_size);
382  get_size(g_I, v_size);
383  //get start indices
384  std::vector<cl_uint> start_bv_inds(g_I.size() + 1, 0);
385  std::vector<cl_uint> start_v_inds(g_I.size() + 1, 0);
386  init_start_inds(g_J, start_bv_inds);
387  init_start_inds(g_I, start_v_inds);
388  //init arrays
389  std::vector<ScalarType> b_v(bv_size, static_cast<ScalarType>(0));
390  std::vector<ScalarType> v(v_size, static_cast<ScalarType>(0));
391  //call qr program
392  block_vector v_vcl;
393  /*if(cur_iter == 0)
394  {
395  //if first run - compile the program
396  std::string qr_kernel_file_name = "kernels/spai/qr3_a_n.cl";
397  std::string qr_kernel_source;
398  read_kernel_from_file(qr_kernel_file_name, qr_kernel_source);
399  viennacl::ocl::program & qr_prog = viennacl::ocl::current_context().add_program(qr_kernel_source.c_str(), "qr_kernel_source");
400  qr_prog.add_kernel("block_qr");
401  //
402  }*/
403 
404  g_bv_vcl.handle() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
405  static_cast<unsigned int>(sizeof(ScalarType)*bv_size),
406  &(b_v[0]));
407 
408  v_vcl.handle() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
409  static_cast<unsigned int>(sizeof(ScalarType)*v_size),
410  &(v[0]));
411  //the same as j_start_inds
412  g_bv_vcl.handle1() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
413  static_cast<unsigned int>(sizeof(cl_uint)*g_I.size()),
414  &(start_bv_inds[0]));
415 
416  v_vcl.handle1() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
417  static_cast<unsigned int>(sizeof(cl_uint)*g_I.size()),
418  &(start_v_inds[0]));
419  viennacl::ocl::handle<cl_mem> g_is_update_vcl = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
420  static_cast<unsigned int>(sizeof(cl_uint)*g_is_update.size()),
421  &(g_is_update[0]));
422  //local memory
423  //viennacl::ocl::enqueue(k(vcl_vec, size, viennacl::ocl::local_mem(sizeof(SCALARTYPE) * k.local_work_size()), temp));
424  viennacl::ocl::kernel& qr_kernel = viennacl::ocl::get_kernel(viennacl::linalg::kernels::spai<ScalarType, 1>::program_name(), "block_qr");
425  qr_kernel.local_work_size(0, local_c_n);
426  qr_kernel.global_work_size(0, 256);
427  viennacl::ocl::enqueue(qr_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle1(), g_bv_vcl.handle(),
428  v_vcl.handle(), g_A_I_J_vcl.handle2(),
429  g_bv_vcl.handle1(), v_vcl.handle1(), g_is_update_vcl,
430  viennacl::ocl::local_mem(static_cast<unsigned int>(sizeof(ScalarType)*(local_r_n*local_c_n))),
431  static_cast<cl_uint>(g_I.size())));
432 
433  }
434  }
435  }
436  }
437 }
438 #endif