[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

regression.hxx
1 /************************************************************************/
2 /* */
3 /* Copyright 2008 by Ullrich Koethe */
4 /* */
5 /* This file is part of the VIGRA computer vision library. */
6 /* The VIGRA Website is */
7 /* http://hci.iwr.uni-heidelberg.de/vigra/ */
8 /* Please direct questions, bug reports, and contributions to */
9 /* ullrich.koethe@iwr.uni-heidelberg.de or */
10 /* vigra@informatik.uni-hamburg.de */
11 /* */
12 /* Permission is hereby granted, free of charge, to any person */
13 /* obtaining a copy of this software and associated documentation */
14 /* files (the "Software"), to deal in the Software without */
15 /* restriction, including without limitation the rights to use, */
16 /* copy, modify, merge, publish, distribute, sublicense, and/or */
17 /* sell copies of the Software, and to permit persons to whom the */
18 /* Software is furnished to do so, subject to the following */
19 /* conditions: */
20 /* */
21 /* The above copyright notice and this permission notice shall be */
22 /* included in all copies or substantial portions of the */
23 /* Software. */
24 /* */
25 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
26 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
27 /* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
28 /* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
29 /* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
30 /* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
31 /* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
32 /* OTHER DEALINGS IN THE SOFTWARE. */
33 /* */
34 /************************************************************************/
35 
36 
37 #ifndef VIGRA_REGRESSION_HXX
38 #define VIGRA_REGRESSION_HXX
39 
40 #include "matrix.hxx"
41 #include "linear_solve.hxx"
42 #include "singular_value_decomposition.hxx"
43 #include "numerictraits.hxx"
44 #include "functorexpression.hxx"
45 
46 
47 namespace vigra
48 {
49 
50 namespace linalg
51 {
52 
53 /** \addtogroup Optimization Optimization and Regression
54  */
55 //@{
56  /** Ordinary Least Squares Regression.
57 
58  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
59  and a column vector \a b of length <tt>m</tt> rows, this function computes
60  the column vector \a x of length <tt>n</tt> rows that solves the optimization problem
61 
62  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
63  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
64  \f]
65 
66  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
67  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
68  \a b. Note that all matrices must already have the correct shape.
69 
70  This function is just another name for \ref linearSolve(), perhaps
71  leading to more readable code when \a A is a rectangular matrix. It returns
72  <tt>false</tt> when the rank of \a A is less than <tt>n</tt>.
73  See \ref linearSolve() for more documentation.
74 
75  <b>\#include</b> <vigra/regression.hxx>
76  Namespaces: vigra and vigra::linalg
77  */
78 template <class T, class C1, class C2, class C3>
79 inline bool
82  std::string method = "QR")
83 {
84  return linearSolve(A, b, x, method);
85 }
86 
87  /** Weighted Least Squares Regression.
88 
89  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
90  a vector \a b of length <tt>m</tt>, and a weight vector \a weights of length <tt>m</tt>
91  with non-negative entries, this function computes the vector \a x of length <tt>n</tt>
92  that solves the optimization problem
93 
94  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
95  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
96  \textrm{diag}(\textrm{\bf weights})
97  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)
98  \f]
99 
100  where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
101  The algorithm calls \ref leastSquares() on the equivalent problem
102 
103  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
104  \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
105  \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2
106  \f]
107 
108  where the square root of \a weights is just taken element-wise.
109 
110  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
111  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
112  \a b. Note that all matrices must already have the correct shape.
113 
114  The function returns
115  <tt>false</tt> when the rank of the weighted matrix \a A is less than <tt>n</tt>.
116 
117  <b>\#include</b> <vigra/regression.hxx>
118  Namespaces: vigra and vigra::linalg
119  */
120 template <class T, class C1, class C2, class C3, class C4>
121 bool
123  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
124  MultiArrayView<2, T, C4> &x, std::string method = "QR")
125 {
126  typedef T Real;
127 
128  const unsigned int rows = rowCount(A);
129  const unsigned int cols = columnCount(A);
130  const unsigned int rhsCount = columnCount(b);
131  vigra_precondition(rows >= cols,
132  "weightedLeastSquares(): Input matrix A must be rectangular with rowCount >= columnCount.");
133  vigra_precondition(rowCount(b) == rows,
134  "weightedLeastSquares(): Shape mismatch between matrices A and b.");
135  vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
136  "weightedLeastSquares(): Weight matrix has wrong shape.");
137  vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
138  "weightedLeastSquares(): Result matrix x has wrong shape.");
139 
140  Matrix<T> wa(A.shape()), wb(b.shape());
141 
142  for(unsigned int k=0; k<rows; ++k)
143  {
144  vigra_precondition(weights(k,0) >= 0,
145  "weightedLeastSquares(): Weights must be positive.");
146  T w = std::sqrt(weights(k,0));
147  for(unsigned int l=0; l<cols; ++l)
148  wa(k,l) = w * A(k,l);
149  for(unsigned int l=0; l<rhsCount; ++l)
150  wb(k,l) = w * b(k,l);
151  }
152 
153  return leastSquares(wa, wb, x, method);
154 }
155 
156  /** Ridge Regression.
157 
158  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
159  a vector \a b of length <tt>m</tt>, and a regularization parameter <tt>lambda >= 0.0</tt>,
160  this function computes the vector \a x of length <tt>n</tt>
161  that solves the optimization problem
162 
163  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
164  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2 +
165  \lambda \textrm{\bf x}^T\textrm{\bf x}
166  \f]
167 
168  This is implemented by means of \ref singularValueDecomposition().
169 
170  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
171  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
172  \a b. Note that all matrices must already have the correct shape.
173 
174  The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
175  and <tt>lambda == 0.0</tt>.
176 
177  <b>\#include</b> <vigra/regression.hxx>
178  Namespaces: vigra and vigra::linalg
179  */
180 template <class T, class C1, class C2, class C3>
181 bool
183  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, double lambda)
184 {
185  typedef T Real;
186 
187  const unsigned int rows = rowCount(A);
188  const unsigned int cols = columnCount(A);
189  const unsigned int rhsCount = columnCount(b);
190  vigra_precondition(rows >= cols,
191  "ridgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
192  vigra_precondition(rowCount(b) == rows,
193  "ridgeRegression(): Shape mismatch between matrices A and b.");
194  vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
195  "ridgeRegression(): Result matrix x has wrong shape.");
196  vigra_precondition(lambda >= 0.0,
197  "ridgeRegression(): lambda >= 0.0 required.");
198 
199  unsigned int m = rows;
200  unsigned int n = cols;
201 
202  Matrix<T> u(m, n), s(n, 1), v(n, n);
203 
204  unsigned int rank = singularValueDecomposition(A, u, s, v);
205  if(rank < n && lambda == 0.0)
206  return false;
207 
208  Matrix<T> t = transpose(u)*b;
209  for(unsigned int k=0; k<cols; ++k)
210  for(unsigned int l=0; l<rhsCount; ++l)
211  t(k,l) *= s(k,0) / (sq(s(k,0)) + lambda);
212  x = v*t;
213  return true;
214 }
215 
216  /** Weighted ridge Regression.
217 
218  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
219  a vector \a b of length <tt>m</tt>, a weight vector \a weights of length <tt>m</tt>
220  with non-negative entries, and a regularization parameter <tt>lambda >= 0.0</tt>
221  this function computes the vector \a x of length <tt>n</tt>
222  that solves the optimization problem
223 
224  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
225  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
226  \textrm{diag}(\textrm{\bf weights})
227  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right) +
228  \lambda \textrm{\bf x}^T\textrm{\bf x}
229  \f]
230 
231  where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
232  The algorithm calls \ref ridgeRegression() on the equivalent problem
233 
234  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
235  \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
236  \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2 +
237  \lambda \textrm{\bf x}^T\textrm{\bf x}
238  \f]
239 
240  where the square root of \a weights is just taken element-wise. This solution is
241  computed by means of \ref singularValueDecomposition().
242 
243  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
244  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
245  \a b. Note that all matrices must already have the correct shape.
246 
247  The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
248  and <tt>lambda == 0.0</tt>.
249 
250  <b>\#include</b> <vigra/regression.hxx>
251  Namespaces: vigra and vigra::linalg
252  */
253 template <class T, class C1, class C2, class C3, class C4>
254 bool
256  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
257  MultiArrayView<2, T, C4> &x, double lambda)
258 {
259  typedef T Real;
260 
261  const unsigned int rows = rowCount(A);
262  const unsigned int cols = columnCount(A);
263  const unsigned int rhsCount = columnCount(b);
264  vigra_precondition(rows >= cols,
265  "weightedRidgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
266  vigra_precondition(rowCount(b) == rows,
267  "weightedRidgeRegression(): Shape mismatch between matrices A and b.");
268  vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
269  "weightedRidgeRegression(): Weight matrix has wrong shape.");
270  vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
271  "weightedRidgeRegression(): Result matrix x has wrong shape.");
272  vigra_precondition(lambda >= 0.0,
273  "weightedRidgeRegression(): lambda >= 0.0 required.");
274 
275  Matrix<T> wa(A.shape()), wb(b.shape());
276 
277  for(unsigned int k=0; k<rows; ++k)
278  {
279  vigra_precondition(weights(k,0) >= 0,
280  "weightedRidgeRegression(): Weights must be positive.");
281  T w = std::sqrt(weights(k,0));
282  for(unsigned int l=0; l<cols; ++l)
283  wa(k,l) = w * A(k,l);
284  for(unsigned int l=0; l<rhsCount; ++l)
285  wb(k,l) = w * b(k,l);
286  }
287 
288  return ridgeRegression(wa, wb, x, lambda);
289 }
290 
291  /** Ridge Regression with many lambdas.
292 
293  This executes \ref ridgeRegression() for a sequence of regularization parameters. This
294  is implemented so that the \ref singularValueDecomposition() has to be executed only once.
295  \a lambda must be an array conforming to the <tt>std::vector</tt> interface, i.e. must
296  support <tt>lambda.size()</tt> and <tt>lambda[k]</tt>. The columns of the matrix \a x
297  will contain the solutions for the corresponding lambda, so the number of columns of
298  the matrix \a x must be equal to <tt>lambda.size()</tt>, and \a b must be a columns vector,
299  i.e. cannot contain several right hand sides at once.
300 
301  The function returns <tt>false</tt> when the matrix \a A is rank deficient. If this
302  happens, and one of the lambdas is zero, the corresponding column of \a x will be skipped.
303 
304  <b>\#include</b> <vigra/regression.hxx>
305  Namespaces: vigra and vigra::linalg
306  */
307 template <class T, class C1, class C2, class C3, class Array>
308 bool
310  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, Array const & lambda)
311 {
312  typedef T Real;
313 
314  const unsigned int rows = rowCount(A);
315  const unsigned int cols = columnCount(A);
316  const unsigned int lambdaCount = lambda.size();
317  vigra_precondition(rows >= cols,
318  "ridgeRegressionSeries(): Input matrix A must be rectangular with rowCount >= columnCount.");
319  vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
320  "ridgeRegressionSeries(): Shape mismatch between matrices A and b.");
321  vigra_precondition(rowCount(x) == cols && columnCount(x) == lambdaCount,
322  "ridgeRegressionSeries(): Result matrix x has wrong shape.");
323 
324  unsigned int m = rows;
325  unsigned int n = cols;
326 
327  Matrix<T> u(m, n), s(n, 1), v(n, n);
328 
329  unsigned int rank = singularValueDecomposition(A, u, s, v);
330 
331  Matrix<T> xl = transpose(u)*b;
332  Matrix<T> xt(cols,1);
333  for(unsigned int i=0; i<lambdaCount; ++i)
334  {
335  vigra_precondition(lambda[i] >= 0.0,
336  "ridgeRegressionSeries(): lambda >= 0.0 required.");
337  if(lambda[i] == 0.0 && rank < rows)
338  continue;
339  for(unsigned int k=0; k<cols; ++k)
340  xt(k,0) = xl(k,0) * s(k,0) / (sq(s(k,0)) + lambda[i]);
341  columnVector(x, i) = v*xt;
342  }
343  return (rank == n);
344 }
345 
346 /** \brief Pass options to leastAngleRegression().
347 
348  <b>\#include</b> <vigra/regression.hxx>
349  Namespaces: vigra and vigra::linalg
350 */
352 {
353  public:
354  enum Mode { LARS, LASSO, NNLASSO };
355 
356  /** Initialize all options with default values.
357  */
359  : max_solution_count(0),
360  unconstrained_dimension_count(0),
361  mode(LASSO),
362  least_squares_solutions(true)
363  {}
364 
365  /** Maximum number of solutions to be computed.
366 
367  If \a n is 0 (the default), the number of solutions is determined by the length
368  of the solution array. Otherwise, the minimum of maxSolutionCount() and that
369  length is taken.<br>
370  Default: 0 (use length of solution array)
371  */
373  {
374  max_solution_count = (int)n;
375  return *this;
376  }
377 
378  /** Set the mode of the algorithm.
379 
380  Mode must be one of "lars", "lasso", "nnlasso". The function just calls
381  the member function of the corresponding name to set the mode.
382 
383  Default: "lasso"
384  */
386  {
387  mode = tolower(mode);
388  if(mode == "lars")
389  this->lars();
390  else if(mode == "lasso")
391  this->lasso();
392  else if(mode == "nnlasso")
393  this->nnlasso();
394  else
395  vigra_fail("LeastAngleRegressionOptions.setMode(): Invalid mode.");
396  return *this;
397  }
398 
399 
400  /** Use the plain LARS algorithm.
401 
402  Default: inactive
403  */
405  {
406  mode = LARS;
407  return *this;
408  }
409 
410  /** Use the LASSO modification of the LARS algorithm.
411 
412  This allows features to be removed from the active set under certain conditions.<br>
413  Default: active
414  */
416  {
417  mode = LASSO;
418  return *this;
419  }
420 
421  /** Use the non-negative LASSO modification of the LARS algorithm.
422 
423  This enforces all non-zero entries in the solution to be positive.<br>
424  Default: inactive
425  */
427  {
428  mode = NNLASSO;
429  return *this;
430  }
431 
432  /** Compute least squares solutions.
433 
434  Use least angle regression to determine active sets, but
435  return least squares solutions for the features in each active set,
436  instead of constrained solutions.<br>
437  Default: <tt>true</tt>
438  */
440  {
441  least_squares_solutions = select;
442  return *this;
443  }
444 
445  int max_solution_count, unconstrained_dimension_count;
446  Mode mode;
447  bool least_squares_solutions;
448 };
449 
450 namespace detail {
451 
452 template <class T, class C1, class C2>
453 struct LarsData
454 {
455  typedef typename MultiArrayShape<2>::type Shape;
456 
457  int activeSetSize;
460  Matrix<T> R, qtb, lars_solution, lars_prediction, next_lsq_solution, next_lsq_prediction, searchVector;
461  ArrayVector<MultiArrayIndex> columnPermutation;
462 
463  // init data for a new run
464  LarsData(MultiArrayView<2, T, C1> const & Ai, MultiArrayView<2, T, C2> const & bi)
465  : activeSetSize(1),
466  A(Ai), b(bi), R(A), qtb(b),
467  lars_solution(A.shape(1), 1), lars_prediction(A.shape(0), 1),
468  next_lsq_solution(A.shape(1), 1), next_lsq_prediction(A.shape(0), 1), searchVector(A.shape(0), 1),
469  columnPermutation(A.shape(1))
470  {
471  for(unsigned int k=0; k<columnPermutation.size(); ++k)
472  columnPermutation[k] = k;
473  }
474 
475  // copy data for the recursive call in nnlassolsq
476  LarsData(LarsData const & d, int asetSize)
477  : activeSetSize(asetSize),
478  A(d.R.subarray(Shape(0,0), Shape(d.A.shape(0), activeSetSize))), b(d.qtb), R(A), qtb(b),
479  lars_solution(d.lars_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))), lars_prediction(d.lars_prediction),
480  next_lsq_solution(d.next_lsq_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))),
481  next_lsq_prediction(d.next_lsq_prediction), searchVector(d.searchVector),
482  columnPermutation(A.shape(1))
483  {
484  for(unsigned int k=0; k<columnPermutation.size(); ++k)
485  columnPermutation[k] = k;
486  }
487 };
488 
489 template <class T, class C1, class C2, class Array1, class Array2, class Array3>
490 unsigned int
491 leastAngleRegressionMainLoop(LarsData<T, C1, C2> & d,
492  Array1 & activeSets,
493  Array2 * lars_solutions, Array3 * lsq_solutions,
494  LeastAngleRegressionOptions const & options)
495 {
496  using namespace vigra::functor;
497 
498  typedef typename MultiArrayShape<2>::type Shape;
499  typedef typename Matrix<T>::view_type Subarray;
500  typedef ArrayVector<MultiArrayIndex> Permutation;
501  typedef typename Permutation::view_type ColumnSet;
502 
503  vigra_precondition(d.activeSetSize > 0,
504  "leastAngleRegressionMainLoop() must not be called with empty active set.");
505 
506  bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
507  bool lasso_modification = (options.mode != LeastAngleRegressionOptions::LARS);
508 
509  const MultiArrayIndex rows = rowCount(d.R);
510  const MultiArrayIndex cols = columnCount(d.R);
511  const MultiArrayIndex maxRank = std::min(rows, cols);
512 
513  MultiArrayIndex maxSolutionCount = options.max_solution_count;
514  if(maxSolutionCount == 0)
515  maxSolutionCount = lasso_modification
516  ? 10*maxRank
517  : maxRank;
518 
519  bool needToRemoveColumn = false;
520  MultiArrayIndex columnToBeAdded = 0, columnToBeRemoved = 0;
521  MultiArrayIndex currentSolutionCount = 0;
522  while(currentSolutionCount < maxSolutionCount)
523  {
524  //ColumnSet activeSet = d.columnPermutation.subarray(0, (unsigned int)d.activeSetSize);
525  ColumnSet inactiveSet = d.columnPermutation.subarray((unsigned int)d.activeSetSize, (unsigned int)cols);
526 
527  // find next dimension to be activated
528  Matrix<T> cLARS = transpose(d.A) * (d.b - d.lars_prediction), // correlation with LARS residual
529  cLSQ = transpose(d.A) * (d.b - d.next_lsq_prediction); // correlation with LSQ residual
530 
531  // In theory, all vectors in the active set should have the same correlation C, and
532  // the correlation of all others should not exceed this. In practice, we may find the
533  // maximum correlation in any variable due to tiny numerical inaccuracies. Therefore, we
534  // determine C from the entire set of variables.
535  MultiArrayIndex cmaxIndex = enforce_positive
536  ? argMax(cLARS)
537  : argMax(abs(cLARS));
538  T C = abs(cLARS(cmaxIndex, 0));
539 
540  Matrix<T> ac(cols - d.activeSetSize, 1);
541  for(MultiArrayIndex k = 0; k<cols-d.activeSetSize; ++k)
542  {
543  T rho = cLSQ(inactiveSet[k], 0),
544  cc = C - sign(rho)*cLARS(inactiveSet[k], 0);
545 
546  if(rho == 0.0) // make sure that 0/0 cannot happen in the other cases
547  ac(k,0) = 1.0; // variable k is linearly dependent on the active set
548  else if(rho > 0.0)
549  ac(k,0) = cc / (cc + rho); // variable k would enter the active set with positive sign
550  else if(enforce_positive)
551  ac(k,0) = 1.0; // variable k cannot enter the active set because it would be negative
552  else
553  ac(k,0) = cc / (cc - rho); // variable k would enter the active set with negative sign
554  }
555 
556  // in the non-negative case: make sure that a column just removed cannot re-enter right away
557  // (in standard LASSO, this is allowed, because the variable may re-enter with opposite sign)
558  if(enforce_positive && needToRemoveColumn)
559  ac(columnToBeRemoved-d.activeSetSize,0) = 1.0;
560 
561  // find candidate
562  // Note: R uses Arg1() > epsilon, but this is only possible because it allows several variables to
563  // join the active set simultaneously, so that gamma = 0 cannot occur.
564  columnToBeAdded = argMin(ac);
565 
566  // if no new column can be added, we do a full step gamma = 1.0 and then stop, unless a column is removed below
567  T gamma = (d.activeSetSize == maxRank)
568  ? 1.0
569  : ac(columnToBeAdded, 0);
570 
571  // adjust columnToBeAdded: we skipped the active set
572  if(columnToBeAdded >= 0)
573  columnToBeAdded += d.activeSetSize;
574 
575  // check whether we have to remove a column from the active set
576  needToRemoveColumn = false;
577  if(lasso_modification)
578  {
579  // find dimensions whose weight changes sign below gamma*searchDirection
580  Matrix<T> s(Shape(d.activeSetSize, 1), NumericTraits<T>::max());
581  for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
582  {
583  if(( enforce_positive && d.next_lsq_solution(k,0) < 0.0) ||
584  (!enforce_positive && sign(d.lars_solution(k,0))*sign(d.next_lsq_solution(k,0)) == -1.0))
585  s(k,0) = d.lars_solution(k,0) / (d.lars_solution(k,0) - d.next_lsq_solution(k,0));
586  }
587 
588  columnToBeRemoved = argMinIf(s, Arg1() <= Param(gamma));
589  if(columnToBeRemoved >= 0)
590  {
591  needToRemoveColumn = true; // remove takes precedence over add
592  gamma = s(columnToBeRemoved, 0);
593  }
594  }
595 
596  // compute the current solutions
597  d.lars_prediction = gamma * d.next_lsq_prediction + (1.0 - gamma) * d.lars_prediction;
598  d.lars_solution = gamma * d.next_lsq_solution + (1.0 - gamma) * d.lars_solution;
599  if(needToRemoveColumn)
600  d.lars_solution(columnToBeRemoved, 0) = 0.0; // turn possible epsilon into an exact zero
601 
602  // write the current solution
603  ++currentSolutionCount;
604  activeSets.push_back(typename Array1::value_type(d.columnPermutation.begin(), d.columnPermutation.begin()+d.activeSetSize));
605 
606  if(lsq_solutions != 0)
607  {
608  if(enforce_positive)
609  {
610  ArrayVector<Matrix<T> > nnresults;
611  ArrayVector<ArrayVector<MultiArrayIndex> > nnactiveSets;
612  LarsData<T, C1, C2> nnd(d, d.activeSetSize);
613 
614  leastAngleRegressionMainLoop(nnd, nnactiveSets, &nnresults, (Array3*)0,
615  LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
616  //Matrix<T> nnlsq_solution(d.activeSetSize, 1);
617  typename Array2::value_type nnlsq_solution(Shape(d.activeSetSize, 1));
618  for(unsigned int k=0; k<nnactiveSets.back().size(); ++k)
619  {
620  nnlsq_solution(nnactiveSets.back()[k],0) = nnresults.back()[k];
621  }
622  //lsq_solutions->push_back(nnlsq_solution);
623  lsq_solutions->push_back(typename Array3::value_type());
624  lsq_solutions->back() = nnlsq_solution;
625  }
626  else
627  {
628  //lsq_solutions->push_back(d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
629  lsq_solutions->push_back(typename Array3::value_type());
630  lsq_solutions->back() = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
631  }
632  }
633  if(lars_solutions != 0)
634  {
635  //lars_solutions->push_back(d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
636  lars_solutions->push_back(typename Array2::value_type());
637  lars_solutions->back() = d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
638  }
639 
640  // no further solutions possible
641  if(gamma == 1.0)
642  break;
643 
644  if(needToRemoveColumn)
645  {
646  --d.activeSetSize;
647  if(columnToBeRemoved != d.activeSetSize)
648  {
649  // remove column 'columnToBeRemoved' and restore triangular form of R
650  // note: columnPermutation is automatically swapped here
651  detail::upperTriangularSwapColumns(columnToBeRemoved, d.activeSetSize, d.R, d.qtb, d.columnPermutation);
652 
653  // swap solution entries
654  std::swap(d.lars_solution(columnToBeRemoved, 0), d.lars_solution(d.activeSetSize,0));
655  std::swap(d.next_lsq_solution(columnToBeRemoved, 0), d.next_lsq_solution(d.activeSetSize,0));
656  columnToBeRemoved = d.activeSetSize; // keep track of removed column
657  }
658  d.lars_solution(d.activeSetSize,0) = 0.0;
659  d.next_lsq_solution(d.activeSetSize,0) = 0.0;
660  }
661  else
662  {
663  vigra_invariant(columnToBeAdded >= 0,
664  "leastAngleRegression(): internal error (columnToBeAdded < 0)");
665  // add column 'columnToBeAdded'
666  if(d.activeSetSize != columnToBeAdded)
667  {
668  std::swap(d.columnPermutation[d.activeSetSize], d.columnPermutation[columnToBeAdded]);
669  columnVector(d.R, d.activeSetSize).swapData(columnVector(d.R, columnToBeAdded));
670  columnToBeAdded = d.activeSetSize; // keep track of added column
671  }
672 
673  // zero the corresponding entries of the solutions
674  d.next_lsq_solution(d.activeSetSize,0) = 0.0;
675  d.lars_solution(d.activeSetSize,0) = 0.0;
676 
677  // reduce R (i.e. its newly added column) to triangular form
678  detail::qrColumnHouseholderStep(d.activeSetSize, d.R, d.qtb);
679  ++d.activeSetSize;
680  }
681 
682  // compute the LSQ solution of the new active set
683  Subarray Ractive = d.R.subarray(Shape(0,0), Shape(d.activeSetSize, d.activeSetSize));
684  Subarray qtbactive = d.qtb.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
685  Subarray next_lsq_solution_view = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
686  linearSolveUpperTriangular(Ractive, qtbactive, next_lsq_solution_view);
687 
688  // compute the LSQ prediction of the new active set
689  d.next_lsq_prediction.init(0.0);
690  for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
691  d.next_lsq_prediction += next_lsq_solution_view(k,0)*columnVector(d.A, d.columnPermutation[k]);
692  }
693 
694  return (unsigned int)currentSolutionCount;
695 }
696 
697 template <class T, class C1, class C2, class Array1, class Array2>
698 unsigned int
699 leastAngleRegressionImpl(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
700  Array1 & activeSets, Array2 * lasso_solutions, Array2 * lsq_solutions,
701  LeastAngleRegressionOptions const & options)
702 {
703  using namespace vigra::functor;
704 
705  const MultiArrayIndex rows = rowCount(A);
706 
707  vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
708  "leastAngleRegression(): Shape mismatch between matrices A and b.");
709 
710  bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
711 
712  detail::LarsData<T, C1, C2> d(A, b);
713 
714  // find dimension with largest correlation
715  Matrix<T> c = transpose(A)*b;
716  MultiArrayIndex initialColumn = enforce_positive
717  ? argMaxIf(c, Arg1() > Param(0.0))
718  : argMax(abs(c));
719  if(initialColumn == -1)
720  return 0; // no solution found
721 
722  // prepare initial active set and search direction etc.
723  std::swap(d.columnPermutation[0], d.columnPermutation[initialColumn]);
724  columnVector(d.R, 0).swapData(columnVector(d.R, initialColumn));
725  detail::qrColumnHouseholderStep(0, d.R, d.qtb);
726  d.next_lsq_solution(0,0) = d.qtb(0,0) / d.R(0,0);
727  d.next_lsq_prediction = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
728  d.searchVector = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
729 
730  return leastAngleRegressionMainLoop(d, activeSets, lasso_solutions, lsq_solutions, options);
731 }
732 
733 } // namespace detail
734 
735  /** Least Angle Regression.
736 
737  <b>\#include</b> <vigra/regression.hxx>
738  Namespaces: vigra and vigra::linalg
739 
740  <b> Declarations:</b>
741 
742  \code
743  namespace vigra {
744  namespace linalg {
745  // compute either LASSO or least squares solutions
746  template <class T, class C1, class C2, class Array1, class Array2>
747  unsigned int
748  leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
749  Array1 & activeSets, Array2 & solutions,
750  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
751 
752  // compute LASSO and least squares solutions
753  template <class T, class C1, class C2, class Array1, class Array2>
754  unsigned int
755  leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
756  Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
757  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
758  }
759  using linalg::leastAngleRegression;
760  }
761  \endcode
762 
763  This function implements Least Angle Regression (LARS) as described in
764 
765  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
766  B.Efron, T.Hastie, I.Johnstone, and R.Tibshirani: <i>"Least Angle Regression"</i>,
767  Annals of Statistics 32(2):407-499, 2004.
768 
769  It is an efficient algorithm to solve the L1-regularized least squares (LASSO) problem
770 
771  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
772  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
773  \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s
774  \f]
775 
776  and the L1-regularized non-negative least squares (NN-LASSO) problem
777 
778  \f[ \tilde \textrm{\bf x} = \textrm{argmin} \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
779  \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s \textrm{ and } \textrm{\bf x}\ge \textrm{\bf 0}
780  \f]
781 
782  where \a A is a matrix with <tt>m</tt> rows and <tt>n</tt> columns (often with <tt>m < n</tt>),
783  \a b a vector of length <tt>m</tt>, and a regularization parameter s >= 0.0.
784  L1-regularization has the desirable effect that it causes the solution \a x to be sparse, i.e. only
785  the most important variables (called the <em>active set</em>) have non-zero values. The
786  key insight of the LARS algorithm is the following: When the solution vector is considered
787  as a function of the regularization parameter s, then <b>x</b>(s) is a piecewise
788  linear function, i.e. a polyline in n-dimensional space. The knots of the polyline
789  occur precisely at those values of s where one variable enters or leaves the active set,
790  and can be efficiently computed.
791 
792  Therefore, leastAngleRegression() returns the entire solution path as a sequence of knot points, starting
793  at \f$\textrm{\bf x}(s=0)\f$ (where the only feasible solution is obviously <b>x</b> = 0) and ending at
794  \f$\textrm{\bf x}(s=\infty)\f$ (where the solution becomes the ordinary least squares solution). Actually,
795  the initial null solution is not explicitly returned, i.e. the sequence starts at the first non-zero
796  solution with one variable in the active set. The function leastAngleRegression() returns the number
797  of solutions( i.e. knot points) computed.
798 
799  The sequences of active sets and corresponding variable weights are returned in \a activeSets and
800  \a solutions respectively. That is, <tt>activeSets[i]</tt> is an \ref vigra::ArrayVector "ArrayVector<int>"
801  containing the indices of the variables that are active at the i-th knot, and <tt>solutions</tt> is a
802  \ref vigra::linalg::Matrix "Matrix<T>" containing the weights of those variables, in the same order (see
803  example below). Variables not contained in <tt>activeSets[i]</tt> are zero at this solution.
804 
805  The behavior of the algorithm can be adapted by \ref vigra::linalg::LeastAngleRegressionOptions
806  "LeastAngleRegressionOptions":
807  <DL>
808  <DT><b>options.lasso()</b> (active by default)
809  <DD> Compute the LASSO solution as described above.
810  <DT><b>options.nnlasso()</b> (inactive by default)
811  <DD> Compute non-negative LASSO solutions, i.e. use the additional constraint that
812  <b>x</b> >= 0 in all solutions.
813  <DT><b>options.lars()</b> (inactive by default)
814  <DD> Compute a solution path according to the plain LARS rule, i.e. never remove
815  a variable from the active set once it entered.
816  <DT><b>options.leastSquaresSolutions(bool)</b> (default: true)
817  <DD> Use the algorithm mode selected above
818  to determine the sequence of active sets, but then compute and return an
819  ordinary (unconstrained) least squares solution for every active set.<br>
820  <b>Note:</b> The second form of leastAngleRegression() ignores this option and
821  does always compute both constrained and unconstrained solutions (returned in
822  \a lasso_solutions and \a lsq_solutions respectively).
823  <DT><b>maxSolutionCount(unsigned int n)</b> (default: n = 0, i.e. compute all solutions)
824  <DD> Compute at most <tt>n</tt> solutions.
825  </DL>
826 
827  <b>Usage:</b>
828 
829  \code
830  int m = ..., n = ...;
831  Matrix<double> A(m, n), b(m, 1);
832  ... // fill A and b
833 
834  // normalize the input
835  Matrix<double> offset(1,n), scaling(1,n);
836  prepareColumns(A, A, offset, scaling, DataPreparationGoals(ZeroMean|UnitVariance));
837  prepareColumns(b, b, DataPreparationGoals(ZeroMean));
838 
839  // arrays to hold the output
840  ArrayVector<ArrayVector<int> > activeSets;
841  ArrayVector<Matrix<double> > solutions;
842 
843  // run leastAngleRegression() in non-negative LASSO mode
844  int numSolutions = leastAngleRegression(A, b, activeSets, solutions,
845  LeastAngleRegressionOptions().nnlasso());
846 
847  // print results
848  Matrix<double> denseSolution(1, n);
849  for (MultiArrayIndex k = 0; k < numSolutions; ++k)
850  {
851  // transform the sparse solution into a dense vector
852  denseSolution.init(0.0); // ensure that inactive variables are zero
853  for (unsigned int i = 0; i < activeSets[k].size(); ++i)
854  {
855  // set the values of the active variables;
856  // activeSets[k][i] is the true index of the i-th variable in the active set
857  denseSolution(0, activeSets[k][i]) = solutions[k](i,0);
858  }
859 
860  // invert the input normalization
861  denseSolution = denseSolution * pointWise(scaling);
862 
863  // output the solution
864  std::cout << "solution " << k << ":\n" << denseSolution << std::endl;
865  }
866  \endcode
867 
868  <b>Required Interface:</b>
869 
870  <ul>
871  <li> <tt>T</tt> must be numeric type (compatible to double)
872  <li> <tt>Array1 a1;</tt><br>
873  <tt>a1.push_back(ArrayVector<int>());</tt>
874  <li> <tt>Array2 a2;</tt><br>
875  <tt>a2.push_back(Matrix<T>());</tt>
876  </ul>
877  */
878 doxygen_overloaded_function(template <...> unsigned int leastAngleRegression)
879 
880 template <class T, class C1, class C2, class Array1, class Array2>
881 inline unsigned int
882 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
883  Array1 & activeSets, Array2 & solutions,
884  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions())
885 {
886  if(options.least_squares_solutions)
887  return detail::leastAngleRegressionImpl(A, b, activeSets, (Array2*)0, &solutions, options);
888  else
889  return detail::leastAngleRegressionImpl(A, b, activeSets, &solutions, (Array2*)0, options);
890 }
891 
892 template <class T, class C1, class C2, class Array1, class Array2>
893 inline unsigned int
894 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
895  Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
896  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions())
897 {
898  return detail::leastAngleRegressionImpl(A, b, activeSets, &lasso_solutions, &lsq_solutions, options);
899 }
900 
901  /** Non-negative Least Squares Regression.
902 
903  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
904  and a column vector \a b of length <tt>m</tt> rows, this function computes
905  a column vector \a x of length <tt>n</tt> with <b>non-negative entries</b> that solves the optimization problem
906 
907  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
908  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
909  \textrm{ subject to } \textrm{\bf x} \ge \textrm{\bf 0}
910  \f]
911 
912  Both \a b and \a x must be column vectors (i.e. matrices with <tt>1</tt> column).
913  Note that all matrices must already have the correct shape. The solution is computed by means
914  of \ref leastAngleRegression() with non-negativity constraint.
915 
916  <b>\#include</b> <vigra/regression.hxx>
917  Namespaces: vigra and vigra::linalg
918  */
919 template <class T, class C1, class C2, class C3>
920 inline void
923 {
924  vigra_precondition(columnCount(A) == rowCount(x) && rowCount(A) == rowCount(b),
925  "nonnegativeLeastSquares(): Matrix shape mismatch.");
926  vigra_precondition(columnCount(b) == 1 && columnCount(x) == 1,
927  "nonnegativeLeastSquares(): RHS and solution must be vectors (i.e. columnCount == 1).");
928 
930  ArrayVector<Matrix<T> > results;
931 
932  leastAngleRegression(A, b, activeSets, results,
933  LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
934  x.init(NumericTraits<T>::zero());
935  if(activeSets.size() > 0)
936  for(unsigned int k=0; k<activeSets.back().size(); ++k)
937  x(activeSets.back()[k],0) = results.back()[k];
938 }
939 
940 
941 //@}
942 
943 } // namespace linalg
944 
952 using linalg::LeastAngleRegressionOptions;
953 
954 } // namespace vigra
955 
956 #endif // VIGRA_REGRESSION_HXX

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.9.0 (Wed Feb 27 2013)