[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

regression.hxx
1 /************************************************************************/
2 /* */
3 /* Copyright 2008 by Ullrich Koethe */
4 /* */
5 /* This file is part of the VIGRA computer vision library. */
6 /* The VIGRA Website is */
7 /* http://hci.iwr.uni-heidelberg.de/vigra/ */
8 /* Please direct questions, bug reports, and contributions to */
9 /* ullrich.koethe@iwr.uni-heidelberg.de or */
10 /* vigra@informatik.uni-hamburg.de */
11 /* */
12 /* Permission is hereby granted, free of charge, to any person */
13 /* obtaining a copy of this software and associated documentation */
14 /* files (the "Software"), to deal in the Software without */
15 /* restriction, including without limitation the rights to use, */
16 /* copy, modify, merge, publish, distribute, sublicense, and/or */
17 /* sell copies of the Software, and to permit persons to whom the */
18 /* Software is furnished to do so, subject to the following */
19 /* conditions: */
20 /* */
21 /* The above copyright notice and this permission notice shall be */
22 /* included in all copies or substantial portions of the */
23 /* Software. */
24 /* */
25 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
26 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
27 /* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
28 /* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
29 /* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
30 /* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
31 /* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
32 /* OTHER DEALINGS IN THE SOFTWARE. */
33 /* */
34 /************************************************************************/
35 
36 
37 #ifndef VIGRA_REGRESSION_HXX
38 #define VIGRA_REGRESSION_HXX
39 
40 #include "matrix.hxx"
41 #include "linear_solve.hxx"
42 #include "singular_value_decomposition.hxx"
43 #include "numerictraits.hxx"
44 #include "functorexpression.hxx"
45 
46 
47 namespace vigra
48 {
49 
50 namespace linalg
51 {
52 
53 /** \addtogroup Optimization Optimization and Regression
54  */
55 //@{
56  /** Ordinary Least Squares Regression.
57 
58  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
59  and a column vector \a b of length <tt>m</tt> rows, this function computes
60  the column vector \a x of length <tt>n</tt> rows that solves the optimization problem
61 
62  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
63  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
64  \f]
65 
66  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
67  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
68  \a b. Note that all matrices must already have the correct shape.
69 
70  This function is just another name for \ref linearSolve(), perhaps
71  leading to more readable code when \a A is a rectangular matrix. It returns
72  <tt>false</tt> when the rank of \a A is less than <tt>n</tt>.
73  See \ref linearSolve() for more documentation.
74 
75  <b>\#include</b> <vigra/regression.hxx>
76  Namespaces: vigra and vigra::linalg
77  */
78 template <class T, class C1, class C2, class C3>
79 inline bool
82  std::string method = "QR")
83 {
84  return linearSolve(A, b, x, method);
85 }
86 
87  /** Weighted Least Squares Regression.
88 
89  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
90  a vector \a b of length <tt>m</tt>, and a weight vector \a weights of length <tt>m</tt>
91  with non-negative entries, this function computes the vector \a x of length <tt>n</tt>
92  that solves the optimization problem
93 
94  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
95  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
96  \textrm{diag}(\textrm{\bf weights})
97  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)
98  \f]
99 
100  where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
101  The algorithm calls \ref leastSquares() on the equivalent problem
102 
103  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
104  \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
105  \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2
106  \f]
107 
108  where the square root of \a weights is just taken element-wise.
109 
110  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
111  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
112  \a b. Note that all matrices must already have the correct shape.
113 
114  The function returns
115  <tt>false</tt> when the rank of the weighted matrix \a A is less than <tt>n</tt>.
116 
117  <b>\#include</b> <vigra/regression.hxx>
118  Namespaces: vigra and vigra::linalg
119  */
120 template <class T, class C1, class C2, class C3, class C4>
121 bool
123  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
124  MultiArrayView<2, T, C4> &x, std::string method = "QR")
125 {
126  typedef T Real;
127 
128  const unsigned int rows = rowCount(A);
129  const unsigned int cols = columnCount(A);
130  const unsigned int rhsCount = columnCount(b);
131  vigra_precondition(rows >= cols,
132  "weightedLeastSquares(): Input matrix A must be rectangular with rowCount >= columnCount.");
133  vigra_precondition(rowCount(b) == rows,
134  "weightedLeastSquares(): Shape mismatch between matrices A and b.");
135  vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
136  "weightedLeastSquares(): Weight matrix has wrong shape.");
137  vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
138  "weightedLeastSquares(): Result matrix x has wrong shape.");
139 
140  Matrix<T> wa(A.shape()), wb(b.shape());
141 
142  for(unsigned int k=0; k<rows; ++k)
143  {
144  vigra_precondition(weights(k,0) >= 0,
145  "weightedLeastSquares(): Weights must be positive.");
146  T w = std::sqrt(weights(k,0));
147  for(unsigned int l=0; l<cols; ++l)
148  wa(k,l) = w * A(k,l);
149  for(unsigned int l=0; l<rhsCount; ++l)
150  wb(k,l) = w * b(k,l);
151  }
152 
153  return leastSquares(wa, wb, x, method);
154 }
155 
156  /** Ridge Regression.
157 
158  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
159  a vector \a b of length <tt>m</tt>, and a regularization parameter <tt>lambda >= 0.0</tt>,
160  this function computes the vector \a x of length <tt>n</tt>
161  that solves the optimization problem
162 
163  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
164  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2 +
165  \lambda \textrm{\bf x}^T\textrm{\bf x}
166  \f]
167 
168  This is implemented by means of \ref singularValueDecomposition().
169 
170  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
171  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
172  \a b. Note that all matrices must already have the correct shape.
173 
174  The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
175  and <tt>lambda == 0.0</tt>.
176 
177  <b>\#include</b> <vigra/regression.hxx>
178  Namespaces: vigra and vigra::linalg
179  */
180 template <class T, class C1, class C2, class C3>
181 bool
183  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, double lambda)
184 {
185  typedef T Real;
186 
187  const unsigned int rows = rowCount(A);
188  const unsigned int cols = columnCount(A);
189  const unsigned int rhsCount = columnCount(b);
190  vigra_precondition(rows >= cols,
191  "ridgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
192  vigra_precondition(rowCount(b) == rows,
193  "ridgeRegression(): Shape mismatch between matrices A and b.");
194  vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
195  "ridgeRegression(): Result matrix x has wrong shape.");
196  vigra_precondition(lambda >= 0.0,
197  "ridgeRegression(): lambda >= 0.0 required.");
198 
199  unsigned int m = rows;
200  unsigned int n = cols;
201 
202  Matrix<T> u(m, n), s(n, 1), v(n, n);
203 
204  unsigned int rank = singularValueDecomposition(A, u, s, v);
205  if(rank < n && lambda == 0.0)
206  return false;
207 
208  Matrix<T> t = transpose(u)*b;
209  for(unsigned int k=0; k<cols; ++k)
210  for(unsigned int l=0; l<rhsCount; ++l)
211  t(k,l) *= s(k,0) / (sq(s(k,0)) + lambda);
212  x = v*t;
213  return true;
214 }
215 
216  /** Weighted ridge Regression.
217 
218  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
219  a vector \a b of length <tt>m</tt>, a weight vector \a weights of length <tt>m</tt>
220  with non-negative entries, and a regularization parameter <tt>lambda >= 0.0</tt>
221  this function computes the vector \a x of length <tt>n</tt>
222  that solves the optimization problem
223 
224  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
225  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
226  \textrm{diag}(\textrm{\bf weights})
227  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right) +
228  \lambda \textrm{\bf x}^T\textrm{\bf x}
229  \f]
230 
231  where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
232  The algorithm calls \ref ridgeRegression() on the equivalent problem
233 
234  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
235  \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
236  \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2 +
237  \lambda \textrm{\bf x}^T\textrm{\bf x}
238  \f]
239 
240  where the square root of \a weights is just taken element-wise. This solution is
241  computed by means of \ref singularValueDecomposition().
242 
243  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
244  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
245  \a b. Note that all matrices must already have the correct shape.
246 
247  The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
248  and <tt>lambda == 0.0</tt>.
249 
250  <b>\#include</b> <vigra/regression.hxx>
251  Namespaces: vigra and vigra::linalg
252  */
253 template <class T, class C1, class C2, class C3, class C4>
254 bool
256  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
257  MultiArrayView<2, T, C4> &x, double lambda)
258 {
259  typedef T Real;
260 
261  const unsigned int rows = rowCount(A);
262  const unsigned int cols = columnCount(A);
263  const unsigned int rhsCount = columnCount(b);
264  vigra_precondition(rows >= cols,
265  "weightedRidgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
266  vigra_precondition(rowCount(b) == rows,
267  "weightedRidgeRegression(): Shape mismatch between matrices A and b.");
268  vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
269  "weightedRidgeRegression(): Weight matrix has wrong shape.");
270  vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
271  "weightedRidgeRegression(): Result matrix x has wrong shape.");
272  vigra_precondition(lambda >= 0.0,
273  "weightedRidgeRegression(): lambda >= 0.0 required.");
274 
275  Matrix<T> wa(A.shape()), wb(b.shape());
276 
277  for(unsigned int k=0; k<rows; ++k)
278  {
279  vigra_precondition(weights(k,0) >= 0,
280  "weightedRidgeRegression(): Weights must be positive.");
281  T w = std::sqrt(weights(k,0));
282  for(unsigned int l=0; l<cols; ++l)
283  wa(k,l) = w * A(k,l);
284  for(unsigned int l=0; l<rhsCount; ++l)
285  wb(k,l) = w * b(k,l);
286  }
287 
288  return ridgeRegression(wa, wb, x, lambda);
289 }
290 
291  /** Ridge Regression with many lambdas.
292 
293  This executes \ref ridgeRegression() for a sequence of regularization parameters. This
294  is implemented so that the \ref singularValueDecomposition() has to be executed only once.
295  \a lambda must be an array conforming to the <tt>std::vector</tt> interface, i.e. must
296  support <tt>lambda.size()</tt> and <tt>lambda[k]</tt>. The columns of the matrix \a x
297  will contain the solutions for the corresponding lambda, so the number of columns of
298  the matrix \a x must be equal to <tt>lambda.size()</tt>, and \a b must be a columns vector,
299  i.e. cannot contain several right hand sides at once.
300 
301  The function returns <tt>false</tt> when the matrix \a A is rank deficient. If this
302  happens, and one of the lambdas is zero, the corresponding column of \a x will be skipped.
303 
304  <b>\#include</b> <vigra/regression.hxx>
305  Namespaces: vigra and vigra::linalg
306  */
307 template <class T, class C1, class C2, class C3, class Array>
308 bool
310  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, Array const & lambda)
311 {
312  typedef T Real;
313 
314  const unsigned int rows = rowCount(A);
315  const unsigned int cols = columnCount(A);
316  const unsigned int lambdaCount = lambda.size();
317  vigra_precondition(rows >= cols,
318  "ridgeRegressionSeries(): Input matrix A must be rectangular with rowCount >= columnCount.");
319  vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
320  "ridgeRegressionSeries(): Shape mismatch between matrices A and b.");
321  vigra_precondition(rowCount(x) == cols && columnCount(x) == lambdaCount,
322  "ridgeRegressionSeries(): Result matrix x has wrong shape.");
323 
324  unsigned int m = rows;
325  unsigned int n = cols;
326 
327  Matrix<T> u(m, n), s(n, 1), v(n, n);
328 
329  unsigned int rank = singularValueDecomposition(A, u, s, v);
330 
331  Matrix<T> xl = transpose(u)*b;
332  Matrix<T> xt(cols,1);
333  for(unsigned int i=0; i<lambdaCount; ++i)
334  {
335  vigra_precondition(lambda[i] >= 0.0,
336  "ridgeRegressionSeries(): lambda >= 0.0 required.");
337  if(lambda[i] == 0.0 && rank < rows)
338  continue;
339  for(unsigned int k=0; k<cols; ++k)
340  xt(k,0) = xl(k,0) * s(k,0) / (sq(s(k,0)) + lambda[i]);
341  columnVector(x, i) = v*xt;
342  }
343  return (rank == n);
344 }
345 
346 /** \brief Pass options to leastAngleRegression().
347 
348  <b>\#include</b> <vigra/regression.hxx>
349  Namespaces: vigra and vigra::linalg
350 */
352 {
353  public:
354  enum Mode { LARS, LASSO, NNLASSO };
355 
356  /** Initialize all options with default values.
357  */
359  : max_solution_count(0),
360  unconstrained_dimension_count(0),
361  mode(LASSO),
362  least_squares_solutions(true)
363  {}
364 
365  /** Maximum number of solutions to be computed.
366 
367  If \a n is 0 (the default), the number of solutions is determined by the length
368  of the solution array. Otherwise, the minimum of maxSolutionCount() and that
369  length is taken.<br>
370  Default: 0 (use length of solution array)
371  */
373  {
374  max_solution_count = (int)n;
375  return *this;
376  }
377 
378  /** Set the mode of the algorithm.
379 
380  Mode must be one of "lars", "lasso", "nnlasso". The function just calls
381  the member function of the corresponding name to set the mode.
382 
383  Default: "lasso"
384  */
386  {
387  for(unsigned int k=0; k<mode.size(); ++k)
388  mode[k] = (std::string::value_type)tolower(mode[k]);
389  if(mode == "lars")
390  this->lars();
391  else if(mode == "lasso")
392  this->lasso();
393  else if(mode == "nnlasso")
394  this->nnlasso();
395  else
396  vigra_fail("LeastAngleRegressionOptions.setMode(): Invalid mode.");
397  return *this;
398  }
399 
400 
401  /** Use the plain LARS algorithm.
402 
403  Default: inactive
404  */
406  {
407  mode = LARS;
408  return *this;
409  }
410 
411  /** Use the LASSO modification of the LARS algorithm.
412 
413  This allows features to be removed from the active set under certain conditions.<br>
414  Default: active
415  */
417  {
418  mode = LASSO;
419  return *this;
420  }
421 
422  /** Use the non-negative LASSO modification of the LARS algorithm.
423 
424  This enforces all non-zero entries in the solution to be positive.<br>
425  Default: inactive
426  */
428  {
429  mode = NNLASSO;
430  return *this;
431  }
432 
433  /** Compute least squares solutions.
434 
435  Use least angle regression to determine active sets, but
436  return least squares solutions for the features in each active set,
437  instead of constrained solutions.<br>
438  Default: <tt>true</tt>
439  */
441  {
442  least_squares_solutions = select;
443  return *this;
444  }
445 
446  int max_solution_count, unconstrained_dimension_count;
447  Mode mode;
448  bool least_squares_solutions;
449 };
450 
451 namespace detail {
452 
453 template <class T, class C1, class C2>
454 struct LarsData
455 {
456  typedef typename MultiArrayShape<2>::type Shape;
457 
458  int activeSetSize;
461  Matrix<T> R, qtb, lars_solution, lars_prediction, next_lsq_solution, next_lsq_prediction, searchVector;
462  ArrayVector<MultiArrayIndex> columnPermutation;
463 
464  // init data for a new run
465  LarsData(MultiArrayView<2, T, C1> const & Ai, MultiArrayView<2, T, C2> const & bi)
466  : activeSetSize(1),
467  A(Ai), b(bi), R(A), qtb(b),
468  lars_solution(A.shape(1), 1), lars_prediction(A.shape(0), 1),
469  next_lsq_solution(A.shape(1), 1), next_lsq_prediction(A.shape(0), 1), searchVector(A.shape(0), 1),
470  columnPermutation(A.shape(1))
471  {
472  for(unsigned int k=0; k<columnPermutation.size(); ++k)
473  columnPermutation[k] = k;
474  }
475 
476  // copy data for the recursive call in nnlassolsq
477  LarsData(LarsData const & d, int asetSize)
478  : activeSetSize(asetSize),
479  A(d.R.subarray(Shape(0,0), Shape(d.A.shape(0), activeSetSize))), b(d.qtb), R(A), qtb(b),
480  lars_solution(d.lars_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))), lars_prediction(d.lars_prediction),
481  next_lsq_solution(d.next_lsq_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))),
482  next_lsq_prediction(d.next_lsq_prediction), searchVector(d.searchVector),
483  columnPermutation(A.shape(1))
484  {
485  for(unsigned int k=0; k<columnPermutation.size(); ++k)
486  columnPermutation[k] = k;
487  }
488 };
489 
490 template <class T, class C1, class C2, class Array1, class Array2>
491 unsigned int leastAngleRegressionMainLoop(LarsData<T, C1, C2> & d,
492  Array1 & activeSets, Array2 * lars_solutions, Array2 * lsq_solutions,
493  LeastAngleRegressionOptions const & options)
494 {
495  using namespace vigra::functor;
496 
497  typedef typename MultiArrayShape<2>::type Shape;
498  typedef typename Matrix<T>::view_type Subarray;
499  typedef ArrayVector<MultiArrayIndex> Permutation;
500  typedef typename Permutation::view_type ColumnSet;
501 
502  vigra_precondition(d.activeSetSize > 0,
503  "leastAngleRegressionMainLoop() must not be called with empty active set.");
504 
505  bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
506  bool lasso_modification = (options.mode != LeastAngleRegressionOptions::LARS);
507 
508  const MultiArrayIndex rows = rowCount(d.R);
509  const MultiArrayIndex cols = columnCount(d.R);
510  const MultiArrayIndex maxRank = std::min(rows, cols);
511 
512  MultiArrayIndex maxSolutionCount = options.max_solution_count;
513  if(maxSolutionCount == 0)
514  maxSolutionCount = lasso_modification
515  ? 10*maxRank
516  : maxRank;
517 
518  bool needToRemoveColumn = false;
519  MultiArrayIndex columnToBeAdded = 0, columnToBeRemoved = 0;
520  MultiArrayIndex currentSolutionCount = 0;
521  while(currentSolutionCount < maxSolutionCount)
522  {
523  ColumnSet activeSet = d.columnPermutation.subarray(0, (unsigned int)d.activeSetSize);
524  ColumnSet inactiveSet = d.columnPermutation.subarray((unsigned int)d.activeSetSize, (unsigned int)cols);
525 
526  // find next dimension to be activated
527  Matrix<T> cLARS = transpose(d.A) * (d.b - d.lars_prediction), // correlation with LARS residual
528  cLSQ = transpose(d.A) * (d.b - d.next_lsq_prediction); // correlation with LSQ residual
529 
530  // In theory, all vectors in the active set should have the same correlation C, and
531  // the correlation of all others should not exceed this. In practice, we may find the
532  // maximum correlation in any variable due to tiny numerical inaccuracies. Therefore, we
533  // determine C from the entire set of variables.
534  MultiArrayIndex cmaxIndex = enforce_positive
535  ? argMax(cLARS)
536  : argMax(abs(cLARS));
537  T C = abs(cLARS(cmaxIndex, 0));
538 
539  Matrix<T> ac(cols - d.activeSetSize, 1);
540  for(MultiArrayIndex k = 0; k<cols-d.activeSetSize; ++k)
541  {
542  T rho = cLSQ(inactiveSet[k], 0),
543  cc = C - sign(rho)*cLARS(inactiveSet[k], 0);
544 
545  if(rho == 0.0) // make sure that 0/0 cannot happen in the other cases
546  ac(k,0) = 1.0; // variable k is linearly dependent on the active set
547  else if(rho > 0.0)
548  ac(k,0) = cc / (cc + rho); // variable k would enter the active set with positive sign
549  else if(enforce_positive)
550  ac(k,0) = 1.0; // variable k cannot enter the active set because it would be negative
551  else
552  ac(k,0) = cc / (cc - rho); // variable k would enter the active set with negative sign
553  }
554 
555  // in the non-negative case: make sure that a column just removed cannot re-enter right away
556  // (in standard LASSO, this is allowed, because the variable may re-enter with opposite sign)
557  if(enforce_positive && needToRemoveColumn)
558  ac(columnToBeRemoved-d.activeSetSize,0) = 1.0;
559 
560  // find candidate
561  // Note: R uses Arg1() > epsilon, but this is only possible because it allows several variables to
562  // join the active set simultaneously, so that gamma = 0 cannot occur.
563  columnToBeAdded = argMin(ac);
564 
565  // if no new column can be added, we do a full step gamma = 1.0 and then stop, unless a column is removed below
566  T gamma = (d.activeSetSize == maxRank)
567  ? 1.0
568  : ac(columnToBeAdded, 0);
569 
570  // adjust columnToBeAdded: we skipped the active set
571  if(columnToBeAdded >= 0)
572  columnToBeAdded += d.activeSetSize;
573 
574  // check whether we have to remove a column from the active set
575  needToRemoveColumn = false;
576  if(lasso_modification)
577  {
578  // find dimensions whose weight changes sign below gamma*searchDirection
579  Matrix<T> s(Shape(d.activeSetSize, 1), NumericTraits<T>::max());
580  for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
581  {
582  if(( enforce_positive && d.next_lsq_solution(k,0) < 0.0) ||
583  (!enforce_positive && sign(d.lars_solution(k,0))*sign(d.next_lsq_solution(k,0)) == -1.0))
584  s(k,0) = d.lars_solution(k,0) / (d.lars_solution(k,0) - d.next_lsq_solution(k,0));
585  }
586 
587  columnToBeRemoved = argMinIf(s, Arg1() <= Param(gamma));
588  if(columnToBeRemoved >= 0)
589  {
590  needToRemoveColumn = true; // remove takes precedence over add
591  gamma = s(columnToBeRemoved, 0);
592  }
593  }
594 
595  // compute the current solutions
596  d.lars_prediction = gamma * d.next_lsq_prediction + (1.0 - gamma) * d.lars_prediction;
597  d.lars_solution = gamma * d.next_lsq_solution + (1.0 - gamma) * d.lars_solution;
598  if(needToRemoveColumn)
599  d.lars_solution(columnToBeRemoved, 0) = 0.0; // turn possible epsilon into an exact zero
600 
601  // write the current solution
602  ++currentSolutionCount;
603  activeSets.push_back(typename Array1::value_type(d.columnPermutation.begin(), d.columnPermutation.begin()+d.activeSetSize));
604 
605  if(lsq_solutions != 0)
606  {
607  if(enforce_positive)
608  {
609  ArrayVector<Matrix<T> > nnresults;
610  ArrayVector<ArrayVector<MultiArrayIndex> > nnactiveSets;
611  LarsData<T, C1, C2> nnd(d, d.activeSetSize);
612 
613  leastAngleRegressionMainLoop(nnd, nnactiveSets, &nnresults, (Array2*)0,
614  LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
615  Matrix<T> nnlsq_solution(d.activeSetSize, 1);
616  for(unsigned int k=0; k<nnactiveSets.back().size(); ++k)
617  {
618  nnlsq_solution(nnactiveSets.back()[k],0) = nnresults.back()[k];
619  }
620  lsq_solutions->push_back(nnlsq_solution);
621  }
622  else
623  {
624  lsq_solutions->push_back(d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
625  }
626  }
627  if(lars_solutions != 0)
628  {
629  lars_solutions->push_back(d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
630  }
631 
632  // no further solutions possible
633  if(gamma == 1.0)
634  break;
635 
636  if(needToRemoveColumn)
637  {
638  --d.activeSetSize;
639  if(columnToBeRemoved != d.activeSetSize)
640  {
641  // remove column 'columnToBeRemoved' and restore triangular form of R
642  // note: columnPermutation is automatically swapped here
643  detail::upperTriangularSwapColumns(columnToBeRemoved, d.activeSetSize, d.R, d.qtb, d.columnPermutation);
644 
645  // swap solution entries
646  std::swap(d.lars_solution(columnToBeRemoved, 0), d.lars_solution(d.activeSetSize,0));
647  std::swap(d.next_lsq_solution(columnToBeRemoved, 0), d.next_lsq_solution(d.activeSetSize,0));
648  columnToBeRemoved = d.activeSetSize; // keep track of removed column
649  }
650  d.lars_solution(d.activeSetSize,0) = 0.0;
651  d.next_lsq_solution(d.activeSetSize,0) = 0.0;
652  }
653  else
654  {
655  vigra_invariant(columnToBeAdded >= 0,
656  "leastAngleRegression(): internal error (columnToBeAdded < 0)");
657  // add column 'columnToBeAdded'
658  if(d.activeSetSize != columnToBeAdded)
659  {
660  std::swap(d.columnPermutation[d.activeSetSize], d.columnPermutation[columnToBeAdded]);
661  columnVector(d.R, d.activeSetSize).swapData(columnVector(d.R, columnToBeAdded));
662  columnToBeAdded = d.activeSetSize; // keep track of added column
663  }
664 
665  // zero the corresponding entries of the solutions
666  d.next_lsq_solution(d.activeSetSize,0) = 0.0;
667  d.lars_solution(d.activeSetSize,0) = 0.0;
668 
669  // reduce R (i.e. its newly added column) to triangular form
670  detail::qrColumnHouseholderStep(d.activeSetSize, d.R, d.qtb);
671  ++d.activeSetSize;
672  }
673 
674  // compute the LSQ solution of the new active set
675  Subarray Ractive = d.R.subarray(Shape(0,0), Shape(d.activeSetSize, d.activeSetSize));
676  Subarray qtbactive = d.qtb.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
677  Subarray next_lsq_solution_view = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
678  linearSolveUpperTriangular(Ractive, qtbactive, next_lsq_solution_view);
679 
680  // compute the LSQ prediction of the new active set
681  d.next_lsq_prediction.init(0.0);
682  for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
683  d.next_lsq_prediction += next_lsq_solution_view(k,0)*columnVector(d.A, d.columnPermutation[k]);
684  }
685 
686  return (unsigned int)currentSolutionCount;
687 }
688 
689 template <class T, class C1, class C2, class Array1, class Array2>
690 unsigned int
691 leastAngleRegressionImpl(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
692  Array1 & activeSets, Array2 * lasso_solutions, Array2 * lsq_solutions,
693  LeastAngleRegressionOptions const & options)
694 {
695  using namespace vigra::functor;
696 
697  const MultiArrayIndex rows = rowCount(A);
698 
699  vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
700  "leastAngleRegression(): Shape mismatch between matrices A and b.");
701 
702  bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
703 
704  detail::LarsData<T, C1, C2> d(A, b);
705 
706  // find dimension with largest correlation
707  Matrix<T> c = transpose(A)*b;
708  MultiArrayIndex initialColumn = enforce_positive
709  ? argMaxIf(c, Arg1() > Param(0.0))
710  : argMax(abs(c));
711  if(initialColumn == -1)
712  return 0; // no solution found
713 
714  // prepare initial active set and search direction etc.
715  std::swap(d.columnPermutation[0], d.columnPermutation[initialColumn]);
716  columnVector(d.R, 0).swapData(columnVector(d.R, initialColumn));
717  detail::qrColumnHouseholderStep(0, d.R, d.qtb);
718  d.next_lsq_solution(0,0) = d.qtb(0,0) / d.R(0,0);
719  d.next_lsq_prediction = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
720  d.searchVector = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
721 
722  return leastAngleRegressionMainLoop(d, activeSets, lasso_solutions, lsq_solutions, options);
723 }
724 
725 } // namespace detail
726 
727  /** Least Angle Regression.
728 
729  <b>\#include</b> <vigra/regression.hxx>
730  Namespaces: vigra and vigra::linalg
731 
732  <b> Declarations:</b>
733 
734  \code
735  namespace vigra {
736  namespace linalg {
737  // compute either LASSO or least squares solutions
738  template <class T, class C1, class C2, class Array1, class Array2>
739  unsigned int
740  leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
741  Array1 & activeSets, Array2 & solutions,
742  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
743 
744  // compute LASSO and least squares solutions
745  template <class T, class C1, class C2, class Array1, class Array2>
746  unsigned int
747  leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
748  Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
749  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
750  }
751  using linalg::leastAngleRegression;
752  }
753  \endcode
754 
755  This function implements Least Angle Regression (LARS) as described in
756 
757  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
758  B.Efron, T.Hastie, I.Johnstone, and R.Tibshirani: <em>"Least Angle Regression"</em>,
759  Annals of Statistics 32(2):407-499, 2004.
760 
761  It is an efficient algorithm to solve the L1-regularized least squares (LASSO) problem
762 
763  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
764  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
765  \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s
766  \f]
767 
768  and the L1-regularized non-negative least squares (NN-LASSO) problem
769 
770  \f[ \tilde \textrm{\bf x} = \textrm{argmin} \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
771  \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s \textrm{ and } \textrm{\bf x}\ge \textrm{\bf 0}
772  \f]
773 
774  where \a A is a matrix with <tt>m</tt> rows and <tt>n</tt> columns (often with <tt>m < n</tt>),
775  \a b a vector of length <tt>m</tt>, and a regularization parameter s >= 0.0.
776  L1-regularization has the desirable effect that it causes the solution \a x to be sparse, i.e. only
777  the most important variables (called the <em>active set</em>) have non-zero values. The
778  key insight of the LARS algorithm is the following: When the solution vector is considered
779  as a function of the regularization parameter s, then <b>x</b>(s) is a piecewise
780  linear function, i.e. a polyline in n-dimensional space. The knots of the polyline
781  occur precisely at those values of s where one variable enters or leaves the active set,
782  and can be efficiently computed.
783 
784  Therefore, leastAngleRegression() returns the entire solution path as a sequence of knot points, starting
785  at \f$\textrm{\bf x}(s=0)\f$ (where the only feasible solution is obviously <b>x</b> = 0) and ending at
786  \f$\textrm{\bf x}(s=\infty)\f$ (where the solution becomes the ordinary least squares solution). Actually,
787  the initial null solution is not explicitly returned, i.e. the sequence starts at the first non-zero
788  solution with one variable in the active set. The function leastAngleRegression() returns the number
789  of solutions( i.e. knot points) computed.
790 
791  The sequences of active sets and corresponding variable weights are returned in \a activeSets and
792  \a solutions respectively. That is, <tt>activeSets[i]</tt> is an \ref vigra::ArrayVector "ArrayVector<int>"
793  containing the indices of the variables that are active at the i-th knot, and <tt>solutions</tt> is a
794  \ref vigra::linalg::Matrix "Matrix<T>" containing the weights of those variables, in the same order (see
795  example below). Variables not contained in <tt>activeSets[i]</tt> are zero at this solution.
796 
797  The behavior of the algorithm can be adapted by \ref vigra::linalg::LeastAngleRegressionOptions
798  "LeastAngleRegressionOptions":
799  <DL>
800  <DT><b>options.lasso()</b> (active by default)
801  <DD> Compute the LASSO solution as described above.
802  <DT><b>options.nnlasso()</b> (inactive by default)
803  <DD> Compute non-negative LASSO solutions, i.e. use the additional constraint that
804  <b>x</b> >= 0 in all solutions.
805  <DT><b>options.lars()</b> (inactive by default)
806  <DD> Compute a solution path according to the plain LARS rule, i.e. never remove
807  a variable from the active set once it entered.
808  <DT><b>options.leastSquaresSolutions(bool)</b> (default: true)
809  <DD> Use the algorithm mode selected above
810  to determine the sequence of active sets, but then compute and return an
811  ordinary (unconstrained) least squares solution for every active set.<br>
812  <b>Note:</b> The second form of leastAngleRegression() ignores this option and
813  does always compute both constrained and unconstrained solutions (returned in
814  \a lasso_solutions and \a lsq_solutions respectively).
815  <DT><b>maxSolutionCount(unsigned int n)</b> (default: n = 0, i.e. compute all solutions)
816  <DD> Compute at most <tt>n</tt> solutions.
817  </DL>
818 
819  <b>Usage:</b>
820 
821  \code
822  int m = ..., n = ...;
823  Matrix<double> A(m, n), b(m, 1);
824  ... // fill A and b
825 
826  // normalize the input
827  Matrix<double> offset(1,n), scaling(1,n);
828  prepareColumns(A, A, offset, scaling, DataPreparationGoals(ZeroMean|UnitVariance));
829  prepareColumns(b, b, DataPreparationGoals(ZeroMean));
830 
831  // arrays to hold the output
832  ArrayVector<ArrayVector<int> > activeSets;
833  ArrayVector<Matrix<double> > solutions;
834 
835  // run leastAngleRegression() in non-negative LASSO mode
836  int numSolutions = leastAngleRegression(A, b, activeSets, solutions,
837  LeastAngleRegressionOptions().nnlasso());
838 
839  // print results
840  Matrix<double> denseSolution(1, n);
841  for (MultiArrayIndex k = 0; k < numSolutions; ++k)
842  {
843  // transform the sparse solution into a dense vector
844  denseSolution.init(0.0); // ensure that inactive variables are zero
845  for (unsigned int i = 0; i < activeSets[k].size(); ++i)
846  {
847  // set the values of the active variables;
848  // activeSets[k][i] is the true index of the i-th variable in the active set
849  denseSolution(0, activeSets[k][i]) = solutions[k](i,0);
850  }
851 
852  // invert the input normalization
853  denseSolution = denseSolution * pointWise(scaling);
854 
855  // output the solution
856  std::cout << "solution " << k << ":\n" << denseSolution << std::endl;
857  }
858  \endcode
859 
860  <b>Required Interface:</b>
861 
862  <ul>
863  <li> <tt>T</tt> must be numeric type (compatible to double)
864  <li> <tt>Array1 a1;</tt><br>
865  <tt>a1.push_back(ArrayVector<int>());</tt>
866  <li> <tt>Array2 a2;</tt><br>
867  <tt>a2.push_back(Matrix<T>());</tt>
868  </ul>
869  */
870 doxygen_overloaded_function(template <...> unsigned int leastAngleRegression)
871 
872 template <class T, class C1, class C2, class Array1, class Array2>
873 inline unsigned int
874 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
875  Array1 & activeSets, Array2 & solutions,
876  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions())
877 {
878  if(options.least_squares_solutions)
879  return detail::leastAngleRegressionImpl(A, b, activeSets, (Array2*)0, &solutions, options);
880  else
881  return detail::leastAngleRegressionImpl(A, b, activeSets, &solutions, (Array2*)0, options);
882 }
883 
884 template <class T, class C1, class C2, class Array1, class Array2>
885 inline unsigned int
886 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
887  Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
888  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions())
889 {
890  return detail::leastAngleRegressionImpl(A, b, activeSets, &lasso_solutions, &lsq_solutions, options);
891 }
892 
893  /** Non-negative Least Squares Regression.
894 
895  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
896  and a column vector \a b of length <tt>m</tt> rows, this function computes
897  a column vector \a x of length <tt>n</tt> with <b>non-negative entries</b> that solves the optimization problem
898 
899  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
900  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
901  \textrm{ subject to } \textrm{\bf x} \ge \textrm{\bf 0}
902  \f]
903 
904  Both \a b and \a x must be column vectors (i.e. matrices with <tt>1</tt> column).
905  Note that all matrices must already have the correct shape. The solution is computed by means
906  of \ref leastAngleRegression() with non-negativity constraint.
907 
908  <b>\#include</b> <vigra/regression.hxx>
909  Namespaces: vigra and vigra::linalg
910  */
911 template <class T, class C1, class C2, class C3>
912 inline void
915 {
916  vigra_precondition(columnCount(A) == rowCount(x) && rowCount(A) == rowCount(b),
917  "nonnegativeLeastSquares(): Matrix shape mismatch.");
918  vigra_precondition(columnCount(b) == 1 && columnCount(x) == 1,
919  "nonnegativeLeastSquares(): RHS and solution must be vectors (i.e. columnCount == 1).");
920 
922  ArrayVector<Matrix<T> > results;
923 
924  leastAngleRegression(A, b, activeSets, results,
925  LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
926  x.init(NumericTraits<T>::zero());
927  if(activeSets.size() > 0)
928  for(unsigned int k=0; k<activeSets.back().size(); ++k)
929  x(activeSets.back()[k],0) = results.back()[k];
930 }
931 
932 
933 //@}
934 
935 } // namespace linalg
936 
944 using linalg::LeastAngleRegressionOptions;
945 
946 } // namespace vigra
947 
948 #endif // VIGRA_REGRESSION_HXX

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.8.0 (Wed Sep 26 2012)