ViennaCL - The Vienna Computing Library  1.2.0
bicgstab.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_BICGSTAB_HPP_
2 #define VIENNACL_BICGSTAB_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
24 #include <vector>
25 #include <cmath>
26 #include "viennacl/forwards.h"
27 #include "viennacl/tools/tools.hpp"
28 #include "viennacl/linalg/prod.hpp"
31 #include "viennacl/traits/size.hpp"
33 
34 namespace viennacl
35 {
36  namespace linalg
37  {
38 
42  {
43  public:
49  bicgstab_tag(double tol = 1e-8, unsigned int max_iterations = 300) : _tol(tol), _iterations(max_iterations) {};
50 
52  double tolerance() const { return _tol; }
54  unsigned int max_iterations() const { return _iterations; }
55 
57  unsigned int iters() const { return iters_taken_; }
58  void iters(unsigned int i) const { iters_taken_ = i; }
59 
61  double error() const { return last_error_; }
63  void error(double e) const { last_error_ = e; }
64 
65  private:
66  double _tol;
67  unsigned int _iterations;
68 
69  //return values from solver
70  mutable unsigned int iters_taken_;
71  mutable double last_error_;
72  };
73 
74 
84  template <typename MatrixType, typename VectorType>
85  VectorType solve(const MatrixType & matrix, VectorType const & rhs, bicgstab_tag const & tag)
86  {
87  typedef typename viennacl::result_of::value_type<VectorType>::type ScalarType;
88  typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
89  unsigned int problem_size = viennacl::traits::size(rhs);
90  VectorType result(problem_size);
92 
93  VectorType residual = rhs;
94  VectorType p = rhs;
95  VectorType r0star = rhs;
96  VectorType tmp0(problem_size);
97  VectorType tmp1(problem_size);
98  VectorType s(problem_size);
99 
100  CPU_ScalarType ip_rr0star = viennacl::linalg::inner_prod(rhs,r0star);
101  CPU_ScalarType norm_rhs_host = ip_rr0star;
102  CPU_ScalarType beta;
103  CPU_ScalarType alpha;
104  CPU_ScalarType omega;
105  ScalarType inner_prod_temp; //temporary variable for inner product computation
106  ScalarType new_ip_rr0star = 0;
107 
108  for (unsigned int i = 0; i < tag.max_iterations(); ++i)
109  {
110  tag.iters(i+1);
111  tmp0 = viennacl::linalg::prod(matrix, p);
112  //alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);
113  inner_prod_temp = viennacl::linalg::inner_prod(tmp0, r0star);
114  alpha = ip_rr0star / static_cast<CPU_ScalarType>(inner_prod_temp);
115 
116  //s = residual - alpha*tmp0;
117  s = residual;
118  s -= alpha*tmp0;
119 
120  tmp1 = viennacl::linalg::prod(matrix, s);
121  //omega = viennacl::linalg::inner_prod(tmp1, s) / viennacl::linalg::inner_prod(tmp1, tmp1);
122  inner_prod_temp = viennacl::linalg::inner_prod(tmp1, s);
123  omega = inner_prod_temp;
124  inner_prod_temp = viennacl::linalg::inner_prod(tmp1, tmp1);
125  omega /= inner_prod_temp;
126 
127  //result += alpha * p + omega * s;
128  result += alpha * p;
129  result += omega * s;
130 
131  //residual = s - omega * tmp1;
132  residual = s;
133  residual -= omega*tmp1;
134 
135  new_ip_rr0star = viennacl::linalg::inner_prod(residual,r0star);
136  if (fabs(CPU_ScalarType(viennacl::linalg::inner_prod(residual, residual)) / norm_rhs_host) < tag.tolerance() * tag.tolerance())
137  break;
138 
139  //beta = new_ip_rr0star / ip_rr0star * alpha/omega;
140  CPU_ScalarType cpu_temp = new_ip_rr0star; //read from device only once
141  beta = cpu_temp / ip_rr0star * alpha/omega;
142  ip_rr0star = cpu_temp;
143 
144  // Execution of
145  // p = residual + beta * (p - omega*tmp0);
146  // without introducing temporary vectors:
147  p -= omega * tmp0;
148  p *= beta;
149  p += residual;
150  }
151 
152  //store last error estimate:
153  tag.error(std::sqrt(fabs(CPU_ScalarType(viennacl::linalg::inner_prod(residual, residual)) / norm_rhs_host)));
154 
155  return result;
156  }
157 
158  template <typename MatrixType, typename VectorType>
159  VectorType solve(const MatrixType & matrix, VectorType const & rhs, bicgstab_tag const & tag, viennacl::linalg::no_precond)
160  {
161  return solve(matrix, rhs, tag);
162  }
163 
174  template <typename MatrixType, typename VectorType, typename PreconditionerType>
175  VectorType solve(const MatrixType & matrix, VectorType const & rhs, bicgstab_tag const & tag, PreconditionerType const & precond)
176  {
177  typedef typename viennacl::result_of::value_type<VectorType>::type ScalarType;
178  typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
179  unsigned int problem_size = viennacl::traits::size(rhs);
180  VectorType result(problem_size);
181  result.clear();
182 
183  VectorType residual = rhs;
184  precond.apply(residual);
185  VectorType r0star = residual; //can be chosen arbitrarily in fact
186  VectorType tmp0(problem_size);
187  VectorType tmp1(problem_size);
188  VectorType s(problem_size);
189 
190  VectorType p = residual;
191 
192  CPU_ScalarType ip_rr0star = viennacl::linalg::inner_prod(residual,r0star);
193  CPU_ScalarType norm_rhs_host = ip_rr0star;
194  CPU_ScalarType beta;
195  CPU_ScalarType alpha;
196  CPU_ScalarType omega;
197  ScalarType new_ip_rr0star = 0;
198  ScalarType inner_prod_temp; //temporary variable for inner product
199 
200  for (unsigned int i = 0; i < tag.max_iterations(); ++i)
201  {
202  tag.iters(i+1);
203  tmp0 = viennacl::linalg::prod(matrix, p);
204  precond.apply(tmp0);
205  //alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);
206  inner_prod_temp = viennacl::linalg::inner_prod(tmp0, r0star);
207  alpha = ip_rr0star / static_cast<CPU_ScalarType>(inner_prod_temp);
208 
209  //s = residual - alpha*tmp0;
210  s = residual;
211  s -= alpha*tmp0;
212 
213  tmp1 = viennacl::linalg::prod(matrix, s);
214  precond.apply(tmp1);
215  //omega = viennacl::linalg::inner_prod(tmp1, s) / viennacl::linalg::inner_prod(tmp1, tmp1);
216  inner_prod_temp = viennacl::linalg::inner_prod(tmp1, s);
217  omega = inner_prod_temp;
218  inner_prod_temp = viennacl::linalg::inner_prod(tmp1, tmp1);
219  omega /= inner_prod_temp;
220 
221  //result += alpha * p + omega * s;
222  result += alpha * p;
223  result += omega * s;
224  //residual = s - omega * tmp1;
225  residual = s;
226  residual -= omega*tmp1;
227 
228  new_ip_rr0star = viennacl::linalg::inner_prod(residual,r0star);
229  if (fabs(CPU_ScalarType(viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host)) < tag.tolerance() * tag.tolerance() )
230  break;
231 
232  //beta = new_ip_rr0star / ip_rr0star * alpha/omega;
233  CPU_ScalarType cpu_temp = new_ip_rr0star; //read from device only once
234  beta = cpu_temp / ip_rr0star * alpha/omega;
235  ip_rr0star = cpu_temp;
236 
237  // Execution of
238  // p = residual + beta * (p - omega*tmp0);
239  // without introducing temporary vectors:
240  p -= omega * tmp0;
241  p *= beta;
242  p += residual;
243 
244  //std::cout << "Rel. Residual in current step: " << std::sqrt(std::fabs(viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host)) << std::endl;
245  }
246 
247  //store last error estimate:
248  tag.error(std::sqrt(fabs(CPU_ScalarType(viennacl::linalg::inner_prod(residual, residual)) / norm_rhs_host)));
249 
250  return result;
251  }
252 
253  }
254 }
255 
256 #endif