Package mvpa :: Package clfs :: Module gpr
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.gpr

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   Copyright (c) 2008 Emanuele Olivetti <emanuele@relativita.com> 
  6  #   See COPYING file distributed along with the PyMVPA package for the 
  7  #   copyright and license terms. 
  8  # 
  9  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
 10  """Gaussian Process Regression (GPR).""" 
 11   
 12  __docformat__ = 'restructuredtext' 
 13   
 14   
 15  import numpy as N 
 16   
 17  from mvpa.base import externals 
 18   
 19  from mvpa.misc.state import StateVariable 
 20  from mvpa.clfs.base import Classifier 
 21  from mvpa.misc.param import Parameter 
 22  from mvpa.clfs.kernel import KernelSquaredExponential, KernelLinear 
 23  from mvpa.measures.base import Sensitivity 
 24  from mvpa.misc.exceptions import InvalidHyperparameterError 
 25  from mvpa.datasets import Dataset 
 26   
 27  if externals.exists("scipy", raiseException=True): 
 28      from scipy.linalg import cho_solve as SLcho_solve 
 29      from scipy.linalg import cholesky as SLcholesky 
 30      import scipy.linalg as SL 
 31      # Some local binding for bits of speed up 
 32      SLAError = SL.basic.LinAlgError 
 33   
 34  if __debug__: 
 35      from mvpa.base import debug 
 36   
 37  # Some local bindings for bits of speed up 
 38  Nlog = N.log 
 39  Ndot = N.dot 
 40  Ndiag = N.diag 
 41  NLAcholesky = N.linalg.cholesky 
 42  NLAsolve = N.linalg.solve 
 43  NLAError = N.linalg.linalg.LinAlgError 
 44  eps64 = N.finfo(N.float64).eps 
 45   
 46  # Some precomputed items. log is relatively expensive 
 47  _halflog2pi = 0.5 * Nlog(2 * N.pi) 
 48   
 49   
50 -class GPR(Classifier):
51 """Gaussian Process Regression (GPR). 52 53 """ 54 55 predicted_variances = StateVariable(enabled=False, 56 doc="Variance per each predicted value") 57 58 log_marginal_likelihood = StateVariable(enabled=False, 59 doc="Log Marginal Likelihood") 60 61 log_marginal_likelihood_gradient = StateVariable(enabled=False, 62 doc="Log Marginal Likelihood Gradient") 63 64 _clf_internals = [ 'gpr', 'regression', 'retrainable' ] 65 66 67 # NOTE XXX Parameters of the classifier. Values available as 68 # clf.parameter or clf.params.parameter, or as 69 # clf.params['parameter'] (as the full Parameter object) 70 # 71 # __doc__ and __repr__ for class is conviniently adjusted to 72 # reflect values of those params 73 74 # Kernel machines/classifiers should be refactored also to behave 75 # the same and define kernel parameter appropriately... TODO, but SVMs 76 # already kinda do it nicely ;-) 77 78 sigma_noise = Parameter(0.001, allowedtype='float', min=1e-10, 79 doc="the standard deviation of the gaussian noise.") 80 81 # XXX For now I don't introduce kernel parameter since yet to unify 82 # kernel machines 83 #kernel = Parameter(None, allowedtype='Kernel', 84 # doc="Kernel object defining the covariance between instances. " 85 # "(Defaults to KernelSquaredExponential if None in arguments)") 86 87 lm = Parameter(0.0, min=0.0, allowedtype='float', 88 doc="""The regularization term lambda. 89 Increase this when the kernel matrix is not positive, definite.""") 90 91
92 - def __init__(self, kernel=None, **kwargs):
93 """Initialize a GPR regression analysis. 94 95 :Parameters: 96 kernel : Kernel 97 a kernel object defining the covariance between instances. 98 (Defaults to KernelSquaredExponential if None in arguments) 99 """ 100 # init base class first 101 Classifier.__init__(self, **kwargs) 102 103 # It does not make sense to calculate a confusion matrix for a GPR 104 # XXX it does ;) it will be a RegressionStatistics actually ;-) 105 # So if someone desires -- let him have it 106 # self.states.enable('training_confusion', False) 107 108 # set kernel: 109 if kernel is None: 110 kernel = KernelSquaredExponential() 111 self.__kernel = kernel 112 113 # append proper clf_internal depending on the kernel 114 # TODO: unify finally all kernel-based machines. 115 # make SMLR to use kernels 116 if isinstance(kernel, KernelLinear): 117 self._clf_internals += ['linear'] 118 else: 119 self._clf_internals += ['non-linear'] 120 if externals.exists('openopt'): 121 self._clf_internals += ['has_sensitivity'] 122 123 # No need to initialize state variables. Unless they got set 124 # they would raise an exception self.predicted_variances = 125 # None self.log_marginal_likelihood = None 126 self._init_internals() 127 pass
128 129
130 - def _init_internals(self):
131 """Reset some internal variables to None. 132 133 To be used in constructor and untrain() 134 """ 135 self._train_fv = None 136 self._labels = None 137 self._km_train_train = None 138 self._train_labels = None 139 self._alpha = None 140 self._L = None 141 self._LL = None 142 self.__kernel.reset() 143 pass
144 145
146 - def __repr__(self):
147 """String summary of the object 148 """ 149 return super(GPR, self).__repr__( 150 prefixes=['kernel=%s' % self.__kernel])
151 152
154 """ 155 Compute log marginal likelihood using self.train_fv and self.labels. 156 """ 157 if __debug__: 158 debug("GPR", "Computing log_marginal_likelihood") 159 self.log_marginal_likelihood = \ 160 -0.5*Ndot(self._train_labels, self._alpha) - \ 161 Nlog(self._L.diagonal()).sum() - \ 162 self._km_train_train.shape[0] * _halflog2pi 163 return self.log_marginal_likelihood
164 165
167 """Compute gradient of the log marginal likelihood. This 168 version use a more compact formula provided by Williams and 169 Rasmussen book. 170 """ 171 # XXX EO: check whether the precomputed self.alpha self.Kinv 172 # are actually the ones corresponding to the hyperparameters 173 # used to compute this gradient! 174 # YYY EO: currently this is verified outside gpr.py but it is 175 # not an efficient solution. 176 # XXX EO: Do some memoizing since it could happen that some 177 # hyperparameters are kept constant by user request, so we 178 # don't need (somtimes) to recompute the corresponding 179 # gradient again. 180 181 # self.Kinv = N.linalg.inv(self._C) 182 # Faster: 183 Kinv = SLcho_solve(self._LL, N.eye(self._L.shape[0])) 184 185 alphalphaT = N.dot(self._alpha[:,None], self._alpha[None,:]) 186 tmp = alphalphaT - Kinv 187 # Pass tmp to __kernel and let it compute its gradient terms. 188 # This scales up to huge number of hyperparameters: 189 grad_LML_hypers = self.__kernel.compute_lml_gradient( 190 tmp, self._train_fv) 191 grad_K_sigma_n = 2.0*self.sigma_noise*N.eye(tmp.shape[0]) 192 # Add the term related to sigma_noise: 193 # grad_LML_sigma_n = 0.5 * N.trace(N.dot(tmp,grad_K_sigma_n)) 194 # Faster formula: tr(AB) = (A*B.T).sum() 195 grad_LML_sigma_n = 0.5 * (tmp * (grad_K_sigma_n).T).sum() 196 lml_gradient = N.hstack([grad_LML_sigma_n, grad_LML_hypers]) 197 self.log_marginal_likelihood_gradient = lml_gradient 198 return lml_gradient
199 200
202 """Compute gradient of the log marginal likelihood when 203 hyperparameters are in logscale. This version use a more 204 compact formula provided by Williams and Rasmussen book. 205 """ 206 # Kinv = N.linalg.inv(self._C) 207 # Faster: 208 Kinv = SLcho_solve(self._LL, N.eye(self._L.shape[0])) 209 alphalphaT = N.dot(self._alpha[:,None], self._alpha[None,:]) 210 tmp = alphalphaT - Kinv 211 grad_LML_log_hypers = \ 212 self.__kernel.compute_lml_gradient_logscale(tmp, self._train_fv) 213 grad_K_log_sigma_n = 2.0 * self.sigma_noise ** 2 * N.eye(Kinv.shape[0]) 214 # Add the term related to sigma_noise: 215 # grad_LML_log_sigma_n = 0.5 * N.trace(N.dot(tmp, grad_K_log_sigma_n)) 216 # Faster formula: tr(AB) = (A * B.T).sum() 217 grad_LML_log_sigma_n = 0.5 * (tmp * (grad_K_log_sigma_n).T).sum() 218 lml_gradient = N.hstack([grad_LML_log_sigma_n, grad_LML_log_hypers]) 219 self.log_marginal_likelihood_gradient = lml_gradient 220 return lml_gradient
221 222
223 - def getSensitivityAnalyzer(self, flavor='auto', **kwargs):
224 """Returns a sensitivity analyzer for GPR. 225 226 :Parameters: 227 flavor : basestring 228 What sensitivity to provide. Valid values are 229 'linear', 'model_select', 'auto'. 230 In case of 'auto' selects 'linear' for linear kernel 231 and 'model_select' for the rest. 'linear' corresponds to 232 GPRLinearWeights and 'model_select' to GRPWeights 233 """ 234 # XXX The following two lines does not work since 235 # self.__kernel is instance of kernel.KernelLinear and not 236 # just KernelLinear. How to fix? 237 # YYY yoh is not sure what is the problem... KernelLinear is actually 238 # kernel.KernelLinear so everything shoudl be ok 239 if flavor == 'auto': 240 flavor = ('model_select', 'linear')\ 241 [int(isinstance(self.__kernel, KernelLinear))] 242 if __debug__: 243 debug("GPR", "Returning '%s' sensitivity analyzer" % flavor) 244 245 # Return proper sensitivity 246 if flavor == 'linear': 247 return GPRLinearWeights(self, **kwargs) 248 elif flavor == 'model_select': 249 # sanity check 250 if not ('has_sensitivity' in self._clf_internals): 251 raise ValueError, \ 252 "model_select flavor is not available probably " \ 253 "due to not available 'openopt' module" 254 return GPRWeights(self, **kwargs) 255 else: 256 raise ValueError, "Flavor %s is not recognized" % flavor
257 258
259 - def _train(self, data):
260 """Train the classifier using `data` (`Dataset`). 261 """ 262 263 # local bindings for faster lookup 264 retrainable = self.params.retrainable 265 if retrainable: 266 newkernel = False 267 newL = False 268 _changedData = self._changedData 269 270 self._train_fv = train_fv = data.samples 271 self._train_labels = train_labels = data.labels 272 273 if not retrainable or _changedData['traindata'] \ 274 or _changedData.get('kernel_params', False): 275 if __debug__: 276 debug("GPR", "Computing train train kernel matrix") 277 self._km_train_train = km_train_train = self.__kernel.compute(train_fv) 278 newkernel = True 279 if retrainable: 280 self._km_train_test = None # reset to facilitate recomputation 281 else: 282 if __debug__: 283 debug("GPR", "Not recomputing kernel since retrainable and " 284 "nothing has changed") 285 km_train_train = self._km_train_train # reuse 286 287 if not retrainable or newkernel or _changedData['params']: 288 if __debug__: 289 debug("GPR", "Computing L. sigma_noise=%g" % self.sigma_noise) 290 # XXX it seems that we do not need binding to object, but may be 291 # commented out code would return? 292 self._C = km_train_train + \ 293 self.sigma_noise**2 * N.identity(km_train_train.shape[0], 'd') 294 # The following decomposition could raise 295 # N.linalg.linalg.LinAlgError because of numerical 296 # reasons, due to the too rapid decay of 'self._C' 297 # eigenvalues. In that case we try adding a small constant 298 # to self._C, e.g. epsilon=1.0e-20. It should be a form of 299 # Tikhonov regularization. This is equivalent to adding 300 # little white gaussian noise to data. 301 # 302 # XXX EO: how to choose epsilon? 303 # 304 # Cholesky decomposition is provided by three different 305 # NumPy/SciPy routines (fastest first): 306 # 1) self._LL = scipy.linalg.cho_factor(self._C, lower=True) 307 # self._L = L = N.tril(self._LL[0]) 308 # 2) self._L = scipy.linalg.cholesky(self._C, lower=True) 309 # 3) self._L = numpy.linalg.cholesky(self._C) 310 # Even though 1 is the fastest we choose 2 since 1 does 311 # not return a clean lower-triangular matrix (see docstring). 312 313 # PBS: I just made it so the KernelMatrix is regularized 314 # all the time. I figured that if ever you were going to 315 # use regularization, you would want to set it yourself 316 # and use the same value for all folds of your data. 317 try: 318 # apply regularization 319 epsilon = self.params.lm * N.eye(self._C.shape[0]) 320 self._L = SLcholesky(self._C + epsilon, lower=True) 321 self._LL = (self._L, True) 322 except SLAError: 323 raise SLAError("Kernel matrix is not positive, definite. " + \ 324 "Try increasing the lm parameter.") 325 pass 326 newL = True 327 else: 328 if __debug__: 329 debug("GPR", "Not computing L since kernel, data and params " 330 "stayed the same") 331 L = self._L # reuse 332 333 # XXX we leave _alpha being recomputed, although we could check 334 # if newL or _changedData['labels'] 335 # 336 if __debug__: 337 debug("GPR", "Computing alpha") 338 # self._alpha = NLAsolve(L.transpose(), 339 # NLAsolve(L, train_labels)) 340 # Faster: 341 self._alpha = SLcho_solve(self._LL, train_labels) 342 343 # compute only if the state is enabled 344 if self.states.isEnabled('log_marginal_likelihood'): 345 self.compute_log_marginal_likelihood() 346 pass 347 348 if retrainable: 349 # we must assign it only if it is retrainable 350 self.states.retrained = not newkernel or not newL 351 352 if __debug__: 353 debug("GPR", "Done training") 354 355 pass
356 357
358 - def _predict(self, data):
359 """ 360 Predict the output for the provided data. 361 """ 362 retrainable = self.params.retrainable 363 364 if not retrainable or self._changedData['testdata'] \ 365 or self._km_train_test is None: 366 if __debug__: 367 debug('GPR', "Computing train test kernel matrix") 368 km_train_test = self.__kernel.compute(self._train_fv, data) 369 if retrainable: 370 self._km_train_test = km_train_test 371 self.states.repredicted = False 372 else: 373 if __debug__: 374 debug('GPR', "Not recomputing train test kernel matrix") 375 km_train_test = self._km_train_test 376 self.states.repredicted = True 377 378 379 predictions = Ndot(km_train_test.transpose(), self._alpha) 380 381 if self.states.isEnabled('predicted_variances'): 382 # do computation only if state variable was enabled 383 if not retrainable or self._km_test_test is None \ 384 or self._changedData['testdata']: 385 if __debug__: 386 debug('GPR', "Computing test test kernel matrix") 387 km_test_test = self.__kernel.compute(data) 388 if retrainable: 389 self._km_test_test = km_test_test 390 else: 391 if __debug__: 392 debug('GPR', "Not recomputing test test kernel matrix") 393 km_test_test = self._km_test_test 394 395 if __debug__: 396 debug("GPR", "Computing predicted variances") 397 L = self._L 398 # v = NLAsolve(L, km_train_test) 399 # Faster: 400 piv = N.arange(L.shape[0]) 401 v = SL.lu_solve((L.T, piv), km_train_test, trans=1) 402 # self.predicted_variances = \ 403 # Ndiag(km_test_test - Ndot(v.T, v)) \ 404 # + self.sigma_noise**2 405 # Faster formula: N.diag(Ndot(v.T, v)) = (v**2).sum(0): 406 self.predicted_variances = Ndiag(km_test_test) - (v ** 2).sum(0) \ 407 + self.sigma_noise ** 2 408 pass 409 410 if __debug__: 411 debug("GPR", "Done predicting") 412 return predictions
413 414
415 - def _setRetrainable(self, value, force=False):
416 """Internal function : need to set _km_test_test 417 """ 418 super(GPR, self)._setRetrainable(value, force) 419 if force or (value and value != self.params.retrainable): 420 self._km_test_test = None
421 422
423 - def untrain(self):
424 super(GPR, self).untrain() 425 # XXX might need to take special care for retrainable. later 426 self._init_internals() 427 pass
428 429
430 - def set_hyperparameters(self, hyperparameter):
431 """ 432 Set hyperparameters' values. 433 434 Note that 'hyperparameter' is a sequence so the order of its 435 values is important. First value must be sigma_noise, then 436 other kernel's hyperparameters values follow in the exact 437 order the kernel expect them to be. 438 """ 439 if hyperparameter[0] < GPR.sigma_noise.min: 440 raise InvalidHyperparameterError() 441 self.sigma_noise = hyperparameter[0] 442 if hyperparameter.size > 1: 443 self.__kernel.set_hyperparameters(hyperparameter[1:]) 444 pass 445 return
446 447 kernel = property(fget=lambda self:self.__kernel) 448 pass
449 450
451 -class GPRLinearWeights(Sensitivity):
452 """`SensitivityAnalyzer` that reports the weights GPR trained 453 on a given `Dataset`. 454 455 In case of KernelLinear compute explicitly the coefficients 456 of the linear regression, together with their variances (if 457 requested). 458 459 Note that the intercept is not computed. 460 """ 461 462 variances = StateVariable(enabled=False, 463 doc="Variances of the weights (for KernelLinear)") 464 465 _LEGAL_CLFS = [ GPR ] 466 467
468 - def _call(self, dataset):
469 """Extract weights from GPR 470 """ 471 472 clf = self.clf 473 kernel = clf.kernel 474 train_fv = clf._train_fv 475 476 weights = Ndot(kernel.Sigma_p, 477 Ndot(train_fv.T, clf._alpha)) 478 479 if self.states.isEnabled('variances'): 480 # super ugly formulas that can be quite surely improved: 481 tmp = N.linalg.inv(self._L) 482 Kyinv = Ndot(tmp.T, tmp) 483 # XXX in such lengthy matrix manipulations you might better off 484 # using N.matrix where * is a matrix product 485 self.states.variances = Ndiag( 486 kernel.Sigma_p - 487 Ndot(kernel.Sigma_p, 488 Ndot(train_fv.T, 489 Ndot(Kyinv, 490 Ndot(train_fv, kernel.Sigma_p))))) 491 return weights
492 493 494 if externals.exists('openopt'): 495 496 from mvpa.clfs.model_selector import ModelSelector 497
498 - class GPRWeights(Sensitivity):
499 """`SensitivityAnalyzer` that reports the weights GPR trained 500 on a given `Dataset`. 501 """ 502 503 _LEGAL_CLFS = [ GPR ] 504
505 - def _call(self, dataset):
506 """Extract weights from GPR 507 """ 508 509 clf = self.clf 510 # normalize data: 511 clf._train_labels = (clf._train_labels - clf._train_labels.mean()) \ 512 / clf._train_labels.std() 513 # clf._train_fv = (clf._train_fv-clf._train_fv.mean(0)) \ 514 # /clf._train_fv.std(0) 515 dataset = Dataset(samples=clf._train_fv, labels=clf._train_labels) 516 clf.states.enable("log_marginal_likelihood") 517 ms = ModelSelector(clf, dataset) 518 # Note that some kernels does not have gradient yet! 519 sigma_noise_initial = 1.0e-5 520 sigma_f_initial = 1.0 521 length_scale_initial = N.ones(dataset.nfeatures)*1.0e4 522 # length_scale_initial = N.random.rand(dataset.nfeatures)*1.0e4 523 hyp_initial_guess = N.hstack([sigma_noise_initial, 524 sigma_f_initial, 525 length_scale_initial]) 526 fixedHypers = N.array([0]*hyp_initial_guess.size, dtype=bool) 527 fixedHypers = None 528 problem = ms.max_log_marginal_likelihood( 529 hyp_initial_guess=hyp_initial_guess, 530 optimization_algorithm="scipy_lbfgsb", 531 ftol=1.0e-3, fixedHypers=fixedHypers, 532 use_gradient=True, logscale=True) 533 if __debug__ and 'GPR_WEIGHTS' in debug.active: 534 problem.iprint = 1 535 lml = ms.solve() 536 weights = 1.0/ms.hyperparameters_best[2:] # weight = 1/length_scale 537 if __debug__: 538 debug("GPR", 539 "%s, train: shape %s, labels %s, min:max %g:%g, " 540 "sigma_noise %g, sigma_f %g" % 541 (clf, clf._train_fv.shape, N.unique(clf._train_labels), 542 clf._train_fv.min(), clf._train_fv.max(), 543 ms.hyperparameters_best[0], ms.hyperparameters_best[1])) 544 545 return weights
546