ViennaCL - The Vienna Computing Library  1.2.0
gmres.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_GMRES_HPP_
2 #define VIENNACL_GMRES_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
24 #include <vector>
25 #include <cmath>
26 #include <limits>
27 #include "viennacl/forwards.h"
28 #include "viennacl/tools/tools.hpp"
30 #include "viennacl/linalg/prod.hpp"
33 #include "viennacl/traits/size.hpp"
35 
36 namespace viennacl
37 {
38  namespace linalg
39  {
40 
43  class gmres_tag //generalized minimum residual
44  {
45  public:
52  gmres_tag(double tol = 1e-10, unsigned int max_iterations = 300, unsigned int krylov_dim = 20)
53  : _tol(tol), _iterations(max_iterations), _krylov_dim(krylov_dim), iters_taken_(0) {};
54 
56  double tolerance() const { return _tol; }
58  unsigned int max_iterations() const { return _iterations; }
60  unsigned int krylov_dim() const { return _krylov_dim; }
62  unsigned int max_restarts() const
63  {
64  unsigned int ret = _iterations / _krylov_dim;
65  if (ret > 0 && (ret * _krylov_dim == _iterations) )
66  return ret - 1;
67  return ret;
68  }
69 
71  unsigned int iters() const { return iters_taken_; }
73  void iters(unsigned int i) const { iters_taken_ = i; }
74 
76  double error() const { return last_error_; }
78  void error(double e) const { last_error_ = e; }
79 
80  private:
81  double _tol;
82  unsigned int _iterations;
83  unsigned int _krylov_dim;
84 
85  //return values from solver
86  mutable unsigned int iters_taken_;
87  mutable double last_error_;
88  };
89 
90  namespace
91  {
92 
93  template <typename SRC_VECTOR, typename DEST_VECTOR>
94  void gmres_copy_helper(SRC_VECTOR const & src, DEST_VECTOR & dest, unsigned int len)
95  {
96  for (unsigned int i=0; i<len; ++i)
97  dest[i] = src[i];
98  }
99 
100  template <typename ScalarType, typename DEST_VECTOR>
101  void gmres_copy_helper(viennacl::vector<ScalarType> const & src, DEST_VECTOR & dest, unsigned int len)
102  {
103  viennacl::copy(src.begin(), src.begin() + len, dest.begin());
104  }
105 
106  template <typename ScalarType>
107  void gmres_copy_helper(viennacl::vector<ScalarType> const & src, viennacl::vector<ScalarType> & dest, unsigned int len)
108  {
109  viennacl::copy(src.begin(), src.begin() + len, dest.begin());
110  }
111 
112  }
113 
124  template <typename MatrixType, typename VectorType, typename PreconditionerType>
125  VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag, PreconditionerType const & precond)
126  {
127  typedef typename viennacl::result_of::value_type<VectorType>::type ScalarType;
128  typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
129  unsigned int problem_size = viennacl::traits::size(rhs);
130  VectorType result(problem_size);
131  viennacl::traits::clear(result);
132  unsigned int krylov_dim = tag.krylov_dim();
133  if (problem_size < tag.krylov_dim())
134  krylov_dim = problem_size; //A Krylov space larger than the matrix would lead to seg-faults (mathematically, error is certain to be zero already)
135 
136  VectorType res(problem_size);
137  VectorType v_k_tilde(problem_size);
138  VectorType v_k_tilde_temp(problem_size);
139 
140  std::vector< std::vector<CPU_ScalarType> > R(krylov_dim);
141  std::vector<CPU_ScalarType> projection_rhs(krylov_dim);
142  std::vector<VectorType> U(krylov_dim);
143 
144  const CPU_ScalarType gpu_scalar_minus_1 = static_cast<CPU_ScalarType>(-1); //representing the scalar '-1' on the GPU. Prevents blocking write operations
145  const CPU_ScalarType gpu_scalar_1 = static_cast<CPU_ScalarType>(1); //representing the scalar '1' on the GPU. Prevents blocking write operations
146  const CPU_ScalarType gpu_scalar_2 = static_cast<CPU_ScalarType>(2); //representing the scalar '2' on the GPU. Prevents blocking write operations
147 
148  CPU_ScalarType norm_rhs = viennacl::linalg::norm_2(rhs);
149 
150  unsigned int k;
151  for (k = 0; k < krylov_dim; ++k)
152  {
153  R[k].resize(tag.krylov_dim());
154  viennacl::traits::resize(U[k], problem_size);
155  }
156 
157  //std::cout << "Starting GMRES..." << std::endl;
158  tag.iters(0);
159 
160  for (unsigned int it = 0; it <= tag.max_restarts(); ++it)
161  {
162  //std::cout << "-- GMRES Start " << it << " -- " << std::endl;
163 
164  res = rhs;
165  res -= viennacl::linalg::prod(matrix, result); //initial guess zero
166  precond.apply(res);
167  //std::cout << "Residual: " << res << std::endl;
168 
169  CPU_ScalarType rho_0 = viennacl::linalg::norm_2(res);
170  CPU_ScalarType rho = static_cast<CPU_ScalarType>(1.0);
171  //std::cout << "rho_0: " << rho_0 << std::endl;
172 
173  if (rho_0 / norm_rhs < tag.tolerance() || (norm_rhs == CPU_ScalarType(0.0)) )
174  {
175  //std::cout << "Allowed Error reached at begin of loop" << std::endl;
176  tag.error(rho_0 / norm_rhs);
177  return result;
178  }
179 
180  res /= rho_0;
181  //std::cout << "Normalized Residual: " << res << std::endl;
182 
183  for (k=0; k<krylov_dim; ++k)
184  {
187  R[k].resize(krylov_dim);
188  viennacl::traits::resize(U[k], problem_size);
189  }
190 
191  for (k = 0; k < krylov_dim; ++k)
192  {
193  tag.iters( tag.iters() + 1 ); //increase iteration counter
194 
195  //compute v_k = A * v_{k-1} via Householder matrices
196  if (k == 0)
197  {
198  v_k_tilde = viennacl::linalg::prod(matrix, res);
199  precond.apply(v_k_tilde);
200  }
201  else
202  {
203  viennacl::traits::clear(v_k_tilde);
204  v_k_tilde[k-1] = gpu_scalar_1;
205  //Householder rotations part 1
206  for (int i = k-1; i > -1; --i)
207  v_k_tilde -= U[i] * (viennacl::linalg::inner_prod(U[i], v_k_tilde) * gpu_scalar_2);
208 
209  v_k_tilde_temp = viennacl::linalg::prod(matrix, v_k_tilde);
210  precond.apply(v_k_tilde_temp);
211  v_k_tilde = v_k_tilde_temp;
212 
213  //Householder rotations part 2
214  for (unsigned int i = 0; i < k; ++i)
215  v_k_tilde -= U[i] * (viennacl::linalg::inner_prod(U[i], v_k_tilde) * gpu_scalar_2);
216  }
217 
218  //std::cout << "v_k_tilde: " << v_k_tilde << std::endl;
219 
221  viennacl::traits::resize(U[k], problem_size);
222  //copy first k entries from v_k_tilde to U[k]:
223  gmres_copy_helper(v_k_tilde, U[k], k);
224 
225  U[k][k] = std::sqrt( viennacl::linalg::inner_prod(v_k_tilde, v_k_tilde) - viennacl::linalg::inner_prod(U[k], U[k]) );
226 
227  if (fabs(U[k][k]) < CPU_ScalarType(10 * std::numeric_limits<CPU_ScalarType>::epsilon()))
228  break; //Note: Solution is essentially (up to round-off error) already in Krylov space. No need to proceed.
229 
230  //copy first k+1 entries from U[k] to R[k]
231  gmres_copy_helper(U[k], R[k], k+1);
232 
233  U[k] -= v_k_tilde;
234  //std::cout << "U[k] before normalization: " << U[k] << std::endl;
235  U[k] *= gpu_scalar_minus_1 / viennacl::linalg::norm_2( U[k] );
236  //std::cout << "Householder vector U[k]: " << U[k] << std::endl;
237 
238  //DEBUG: Make sure that P_k v_k_tilde equals (rho_{1,k}, ... , rho_{k,k}, 0, 0 )
239 #ifdef VIENNACL_GMRES_DEBUG
240  std::cout << "P_k v_k_tilde: " << (v_k_tilde - 2.0 * U[k] * inner_prod(U[k], v_k_tilde)) << std::endl;
241  std::cout << "R[k]: [" << R[k].size() << "](";
242  for (size_t i=0; i<R[k].size(); ++i)
243  std::cout << R[k][i] << ",";
244  std::cout << ")" << std::endl;
245 #endif
246  //std::cout << "P_k res: " << (res - 2.0 * U[k] * inner_prod(U[k], res)) << std::endl;
247  res -= U[k] * (viennacl::linalg::inner_prod( U[k], res ) * gpu_scalar_2);
248  //std::cout << "zeta_k: " << viennacl::linalg::inner_prod( U[k], res ) * gpu_scalar_2 << std::endl;
249  //std::cout << "Updated res: " << res << std::endl;
250 
251 #ifdef VIENNACL_GMRES_DEBUG
252  VectorType v1(U[k].size()); v1.clear(); v1.resize(U[k].size());
253  v1(0) = 1.0;
254  v1 -= U[k] * (viennacl::linalg::inner_prod( U[k], v1 ) * gpu_scalar_2);
255  std::cout << "v1: " << v1 << std::endl;
256  boost::numeric::ublas::matrix<ScalarType> P = -2.0 * outer_prod(U[k], U[k]);
257  P(0,0) += 1.0; P(1,1) += 1.0; P(2,2) += 1.0;
258  std::cout << "P: " << P << std::endl;
259 #endif
260 
261  if (res[k] > rho) //machine precision reached
262  res[k] = rho;
263 
264  if (res[k] < -1.0 * rho) //machine precision reached
265  res[k] = -1.0 * rho;
266 
267  projection_rhs[k] = res[k];
268 
269  rho *= std::sin( std::acos(projection_rhs[k] / rho) );
270 
271 #ifdef VIENNACL_GMRES_DEBUG
272  std::cout << "k-th component of r: " << res[k] << std::endl;
273  std::cout << "New rho (norm of res): " << rho << std::endl;
274 #endif
275 
276  if (std::fabs(rho * rho_0 / norm_rhs) < tag.tolerance())
277  {
278  //std::cout << "Krylov space big enough" << endl;
279  tag.error( std::fabs(rho*rho_0 / norm_rhs) );
280  ++k;
281  break;
282  }
283 
284  //std::cout << "Current residual: " << rho * rho_0 << std::endl;
285  //std::cout << " - End of Krylov space setup - " << std::endl;
286  } // for k
287 
288 #ifdef VIENNACL_GMRES_DEBUG
289  //inplace solution of the upper triangular matrix:
290  std::cout << "Upper triangular system:" << std::endl;
291  std::cout << "Size of Krylov space: " << k << std::endl;
292  for (size_t i=0; i<k; ++i)
293  {
294  for (size_t j=0; j<k; ++j)
295  {
296  std::cout << R[j][i] << ", ";
297  }
298  std::cout << " | " << projection_rhs[i] << std::endl;
299  }
300 #endif
301 
302  for (int i=k-1; i>-1; --i)
303  {
304  for (unsigned int j=i+1; j<k; ++j)
305  //temp_rhs[i] -= R[i][j] * temp_rhs[j]; //if R is not transposed
306  projection_rhs[i] -= R[j][i] * projection_rhs[j]; //R is transposed
307 
308  projection_rhs[i] /= R[i][i];
309  }
310 
311 #ifdef VIENNACL_GMRES_DEBUG
312  std::cout << "Result of triangular solver: ";
313  for (size_t i=0; i<k; ++i)
314  std::cout << projection_rhs[i] << ", ";
315  std::cout << std::endl;
316 #endif
317  res *= projection_rhs[0];
318 
319  if (k > 0)
320  {
321  for (unsigned int i = 0; i < k-1; ++i)
322  {
323  res[i] += projection_rhs[i+1];
324  }
325  }
326 
327  for (int i = k-1; i > -1; --i)
328  res -= U[i] * (viennacl::linalg::inner_prod(U[i], res) * gpu_scalar_2);
329 
330  res *= rho_0;
331  result += res;
332 
333  if ( std::fabs(rho*rho_0 / norm_rhs) < tag.tolerance() )
334  {
335  //std::cout << "Allowed Error reached at end of loop" << std::endl;
336  tag.error(std::fabs(rho*rho_0 / norm_rhs));
337  return result;
338  }
339 
340  //res = rhs;
341  //res -= viennacl::linalg::prod(matrix, result);
342  //std::cout << "norm_2(r)=" << norm_2(r) << std::endl;
343  //std::cout << "std::abs(rho*rho_0)=" << std::abs(rho*rho_0) << std::endl;
344  //std::cout << r << std::endl;
345 
346  tag.error(std::fabs(rho*rho_0));
347  }
348 
349  return result;
350  }
351 
354  template <typename MatrixType, typename VectorType>
355  VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag)
356  {
357  return solve(matrix, rhs, tag, no_precond());
358  }
359 
360 
361  }
362 }
363 
364 #endif