1 #ifndef VIENNACL_GMRES_HPP_
2 #define VIENNACL_GMRES_HPP_
64 unsigned int ret = _iterations / _krylov_dim;
65 if (ret > 0 && (ret * _krylov_dim == _iterations) )
71 unsigned int iters()
const {
return iters_taken_; }
73 void iters(
unsigned int i)
const { iters_taken_ = i; }
76 double error()
const {
return last_error_; }
78 void error(
double e)
const { last_error_ = e; }
82 unsigned int _iterations;
83 unsigned int _krylov_dim;
86 mutable unsigned int iters_taken_;
87 mutable double last_error_;
93 template <
typename SRC_VECTOR,
typename DEST_VECTOR>
94 void gmres_copy_helper(SRC_VECTOR
const & src, DEST_VECTOR & dest,
unsigned int len)
96 for (
unsigned int i=0; i<len; ++i)
100 template <
typename ScalarType,
typename DEST_VECTOR>
106 template <
typename ScalarType>
124 template <
typename MatrixType,
typename VectorType,
typename PreconditionerType>
125 VectorType
solve(
const MatrixType &
matrix, VectorType
const & rhs,
gmres_tag const & tag, PreconditionerType
const & precond)
130 VectorType result(problem_size);
134 krylov_dim = problem_size;
136 VectorType res(problem_size);
137 VectorType v_k_tilde(problem_size);
138 VectorType v_k_tilde_temp(problem_size);
140 std::vector< std::vector<CPU_ScalarType> > R(krylov_dim);
141 std::vector<CPU_ScalarType> projection_rhs(krylov_dim);
142 std::vector<VectorType> U(krylov_dim);
144 const CPU_ScalarType gpu_scalar_minus_1 =
static_cast<CPU_ScalarType
>(-1);
145 const CPU_ScalarType gpu_scalar_1 =
static_cast<CPU_ScalarType
>(1);
146 const CPU_ScalarType gpu_scalar_2 =
static_cast<CPU_ScalarType
>(2);
151 for (k = 0; k < krylov_dim; ++k)
160 for (
unsigned int it = 0; it <= tag.
max_restarts(); ++it)
170 CPU_ScalarType rho =
static_cast<CPU_ScalarType
>(1.0);
173 if (rho_0 / norm_rhs < tag.
tolerance() || (norm_rhs == CPU_ScalarType(0.0)) )
176 tag.
error(rho_0 / norm_rhs);
183 for (k=0; k<krylov_dim; ++k)
187 R[k].resize(krylov_dim);
191 for (k = 0; k < krylov_dim; ++k)
199 precond.apply(v_k_tilde);
204 v_k_tilde[k-1] = gpu_scalar_1;
206 for (
int i = k-1; i > -1; --i)
210 precond.apply(v_k_tilde_temp);
211 v_k_tilde = v_k_tilde_temp;
214 for (
unsigned int i = 0; i < k; ++i)
223 gmres_copy_helper(v_k_tilde, U[k], k);
227 if (fabs(U[k][k]) < CPU_ScalarType(10 * std::numeric_limits<CPU_ScalarType>::epsilon()))
231 gmres_copy_helper(U[k], R[k], k+1);
239 #ifdef VIENNACL_GMRES_DEBUG
240 std::cout <<
"P_k v_k_tilde: " << (v_k_tilde - 2.0 * U[k] *
inner_prod(U[k], v_k_tilde)) << std::endl;
241 std::cout <<
"R[k]: [" << R[k].size() <<
"](";
242 for (
size_t i=0; i<R[k].size(); ++i)
243 std::cout << R[k][i] <<
",";
244 std::cout <<
")" << std::endl;
251 #ifdef VIENNACL_GMRES_DEBUG
252 VectorType v1(U[k].
size()); v1.clear(); v1.resize(U[k].
size());
255 std::cout <<
"v1: " << v1 << std::endl;
256 boost::numeric::ublas::matrix<ScalarType> P = -2.0 *
outer_prod(U[k], U[k]);
257 P(0,0) += 1.0; P(1,1) += 1.0; P(2,2) += 1.0;
258 std::cout <<
"P: " << P << std::endl;
264 if (res[k] < -1.0 * rho)
267 projection_rhs[k] = res[k];
269 rho *= std::sin( std::acos(projection_rhs[k] / rho) );
271 #ifdef VIENNACL_GMRES_DEBUG
272 std::cout <<
"k-th component of r: " << res[k] << std::endl;
273 std::cout <<
"New rho (norm of res): " << rho << std::endl;
276 if (std::fabs(rho * rho_0 / norm_rhs) < tag.
tolerance())
279 tag.
error( std::fabs(rho*rho_0 / norm_rhs) );
288 #ifdef VIENNACL_GMRES_DEBUG
290 std::cout <<
"Upper triangular system:" << std::endl;
291 std::cout <<
"Size of Krylov space: " << k << std::endl;
292 for (
size_t i=0; i<k; ++i)
294 for (
size_t j=0; j<k; ++j)
296 std::cout << R[j][i] <<
", ";
298 std::cout <<
" | " << projection_rhs[i] << std::endl;
302 for (
int i=k-1; i>-1; --i)
304 for (
unsigned int j=i+1; j<k; ++j)
306 projection_rhs[i] -= R[j][i] * projection_rhs[j];
308 projection_rhs[i] /= R[i][i];
311 #ifdef VIENNACL_GMRES_DEBUG
312 std::cout <<
"Result of triangular solver: ";
313 for (
size_t i=0; i<k; ++i)
314 std::cout << projection_rhs[i] <<
", ";
315 std::cout << std::endl;
317 res *= projection_rhs[0];
321 for (
unsigned int i = 0; i < k-1; ++i)
323 res[i] += projection_rhs[i+1];
327 for (
int i = k-1; i > -1; --i)
333 if ( std::fabs(rho*rho_0 / norm_rhs) < tag.
tolerance() )
336 tag.
error(std::fabs(rho*rho_0 / norm_rhs));
346 tag.
error(std::fabs(rho*rho_0));
354 template <
typename MatrixType,
typename VectorType>