1 """
2 Approximation of a distribution as a mixture of gaussians with a zero mean but different sigmas
3 """
4
5 import numpy.random
6
7 import csb.core
8 import csb.statistics.ars
9 import csb.statistics.rand
10
11 from csb.core import typedproperty, validatedproperty
12 from abc import abstractmethod, ABCMeta
13
14 from csb.numeric import log, exp, approx_psi, inv_psi, d_approx_psi
15 from scipy.special import psi, kve
16 from csb.statistics import harmonic_mean, geometric_mean
17 from csb.statistics.pdf import AbstractEstimator, AbstractDensity, Gamma, InverseGamma, NullEstimator
21 """
22 Solve y = psi(alpha) - log(alpha) for alpha by fixed point
23 integration.
24 """
25 if y >= -log(6.):
26 x = 1 / (2 * (1 - exp(y)))
27 else:
28 x = 1.e-10
29 for _i in range(n_iter):
30 z = approx_psi(x) - log(x) - y
31 if abs(z) < tol:
32 break
33 x -= x * z / (x * d_approx_psi(x) - 1)
34 x = abs(x)
35 return x
36
38 """
39 Prior on the scales of a L{ScaleMixture}, which determines how the scales
40 are estimated.
41 """
42
43 __metaclass__ = ABCMeta
44
45 @abstractmethod
47 """
48 Return an appropriate estimator for the scales of the mixture distribution
49 under this prior.
50 """
51 pass
52
55 """
56 Prior on the scales of a L{ScaleMixture}, which determines how the scales
57 are estimated.
58 """
59
64
65 @property
67 return self._scale_estimator
68
69 @property
71 return self._estimator
72
73 @estimator.setter
82
85 """
86 Robust probabilistic superposition and comparison of protein structures
87 Martin Mechelke and Michael Habeck
88
89 Represenation of a distribution as a mixture of gaussians with a mean of
90 zero and different inverse variances/scales. The number of scales equals
91 the number of datapoints.
92
93 The underlying family is determined by a prior L{ScaleMixturePrior} on the
94 scales. Choosing a L{GammaPrior} results in Stundent-t posterior, wheras a
95 L{InvGammaPrior} leads to a K-Distribution as posterior.
96 """
97
98 - def __init__(self, scales=numpy.array([1., 1.]), prior=None, d=3):
110
111 @property
113 return numpy.squeeze(self['scales'])
114
115 @scales.setter
117 if not isinstance(value, csb.core.string) and \
118 isinstance(value, (numpy.ndarray, list, tuple)):
119 self['scales'] = numpy.array(value)
120 else:
121 raise ValueError("numpy array expected")
122
123 @property
126
127 @prior.setter
133
135 from csb.numeric import log_sum_exp
136
137 dim = self._d
138 s = self.scales
139
140 log_p = numpy.squeeze(-numpy.multiply.outer(x * x, 0.5 * s)) + \
141 numpy.squeeze(dim * 0.5 * (log(s) - log(2 * numpy.pi)))
142
143 if self._prior is not None:
144 log_p += numpy.squeeze(self._prior.log_prob(s))
145 return log_sum_exp(log_p.T, 0)
146
147
148 - def random(self, shape=None):
161
162
163 -class ARSPosteriorAlpha(csb.statistics.ars.LogProb):
164 """
165 This class represents the posterior distribution of the alpha parameter
166 of the Gamma and Inverse Gamma prior, and allows sampling using adaptive
167 rejection sampling L{ARS}.
168 """
169
170 - def __init__(self, a, b, n):
171
172 self.a = float(a)
173 self.b = float(b)
174 self.n = float(n)
175
176 - def __call__(self, x):
177
178 from scipy.special import gammaln
179
180 return self.a * x - \
181 self.n * gammaln(numpy.clip(x, 1e-308, 1e308)) + \
182 self.b * log(x), \
183 self.a - self.n * psi(x) + self.b / x
184
185 - def initial_values(self, tol=1e-10):
186 """
187 Generate initial values by doing fixed point
188 iterations to solve for alpha
189 """
190 n, a, b = self.n, self.a, self.b
191
192 if abs(b) < 1e-10:
193 alpha = inv_psi(a / n)
194 else:
195 alpha = 1.
196
197 z = tol + 1.
198
199 while abs(z) > tol:
200
201 z = n * psi(alpha) - \
202 b / numpy.clip(alpha, 1e-300, 1e300) - a
203
204 alpha -= z / (n * d_approx_psi(alpha) - b
205 / (alpha ** 2 + 1e-300))
206 alpha = numpy.clip(alpha, 1e-100, 1e300)
207
208 return numpy.clip(alpha - 1 / (n + 1e-300), 1e-100, 1e300), \
209 alpha + 1 / (n + 1e-300), alpha
210
211
212 -class GammaPosteriorSampler(ScaleMixturePriorEstimator):
213
214 - def __init__(self):
215 super(GammaPosteriorSampler, self).__init__()
216 self.n_samples = 2
217
220
221 - def estimate(self, context, data):
222 """
223 Generate samples from the posterior of alpha and beta.
224
225 For beta the posterior is a gamma distribution and analytically
226 acessible.
227
228 The posterior of alpha can not be expressed analytically and is
229 aproximated using adaptive rejection sampling.
230 """
231 pdf = GammaPrior()
232
233
234
235 a = numpy.mean(data)
236 b = exp(numpy.mean(log(data)))
237 v = numpy.std(data) ** 2
238 n = len(data)
239
240 beta = a / v
241 alpha = beta * a
242 samples = []
243
244 for _i in range(self.n_samples):
245
246
247 beta = numpy.random.gamma(n * alpha + context._hyper_beta.alpha,
248 1 / (n * a + context._hyper_beta.beta))
249
250
251 logp = ARSPosteriorAlpha(n * log(beta * b)\
252 - context.hyper_alpha.beta,
253 context.hyper_alpha.alpha - 1., n)
254 ars = csb.statistics.ars.ARS(logp)
255 ars.initialize(logp.initial_values()[:2], z0=0.)
256 alpha = ars.sample()
257
258 if alpha is None:
259 raise ValueError("ARS failed")
260
261 samples.append((alpha, beta))
262
263 pdf.alpha, pdf.beta = samples[-1]
264
265 return pdf
266
267 -class GammaPosteriorMAP(ScaleMixturePriorEstimator):
268
269 - def __init__(self):
271
273 return GammaScaleMAP()
274
275 - def estimate(self, context, data):
276 """
277 Estimate alpha and beta from their posterior
278 """
279
280 pdf = GammaPrior()
281
282 s = data[0].mean()
283 y = data[1].mean() - log(s) - 1.
284
285 alpha = abs(inv_digamma_minus_log(numpy.clip(y,
286 - 1e308,
287 - 1e-300)))
288 beta = alpha / s
289
290
291 pdf.alpha, pdf.beta = alpha, beta
292 return pdf
293
294
295
296
297 -class InvGammaPosteriorSampler(ScaleMixturePriorEstimator):
298
299 - def __init__(self):
300 super(InvGammaPosteriorSampler, self).__init__()
301 self.n_samples = 2
302
305
306 - def estimate(self, context, data):
307 """
308 Generate samples from the posterior of alpha and beta.
309
310 For beta the posterior is a gamma distribution and analytically
311 acessible.
312
313 The posterior of alpha can not be expressed analytically and is
314 aproximated using adaptive rejection sampling.
315 """
316 pdf = GammaPrior()
317
318
319
320 h = harmonic_mean(numpy.clip(data, 1e-308, 1e308))
321 g = geometric_mean(numpy.clip(data, 1e-308, 1e308))
322
323 n = len(data)
324
325 samples = []
326
327 a = numpy.mean(1 / data)
328 v = numpy.std(1 / data) ** 2
329
330 beta = a / v
331 alpha = beta * a
332
333 for i in range(self.n_samples):
334
335
336
337 logp = ARSPosteriorAlpha(n * (log(beta) - log(g)) - context.hyper_alpha.beta,
338 context.hyper_alpha.alpha - 1., n)
339 ars = csb.statistics.ars.ARS(logp)
340 ars.initialize(logp.initial_values()[:2], z0=0.)
341
342 alpha = numpy.abs(ars.sample())
343
344 if alpha is None:
345 raise ValueError("Sampling failed")
346
347
348
349
350 beta = numpy.random.gamma(n * alpha + context.hyper_beta.alpha, \
351 1 / (n / h + context.hyper_beta.beta))
352
353 samples.append((alpha, beta))
354
355 pdf.alpha, pdf.beta = samples[-1]
356 return pdf
357
358
359 -class InvGammaPosteriorMAP(ScaleMixturePriorEstimator):
360
361 - def __init__(self):
363
366
367 - def estimate(self, context, data):
368 """
369 Generate samples from the posterior of alpha and beta.
370
371 For beta the posterior is a gamma distribution and analytically
372 acessible.
373
374 The posterior of alpha can not be expressed analytically and is
375 aproximated using adaptive rejection sampling.
376 """
377 pdf = GammaPrior()
378
379
380 y = log(data).mean() - log((data ** -1).mean())
381
382 alpha = inv_digamma_minus_log(numpy.clip(y,
383 - 1e308,
384 - 1e-300))
385 alpha = abs(alpha)
386
387 beta = numpy.clip(alpha /
388 (data ** (-1)).mean(),
389 1e-100, 1e100)
390
391 pdf.alpha, pdf.beta = alpha, beta
392 return pdf
393
397 """
398 Sample the scalies given the data
399 """
400
420
423 """
424 MAP estimator of the scales
425 """
426
447
451 """
452 Sample the scales given the data
453 """
454
476
480 """
481 MAP estimator of the scales
482 """
504
505
506
507 -class GammaPrior(ScaleMixturePrior, Gamma):
508 """
509 Gamma prior on mixture weights including a weak gamma prior on its parameter.
510 """
511
512 - def __init__(self, alpha=1., beta=1., hyper_alpha=(4., 1.),
513 hyper_beta=(2., 1.)):
520
521 @typedproperty(AbstractDensity)
524
525 @typedproperty(AbstractDensity)
528
530
531 a, b = self['alpha'], self['beta']
532
533 l_a = self._hyper_alpha(a)
534 l_b = self._hyper_beta(b)
535
536 return super(GammaPrior, self).log_prob(x) + l_a + l_b
537
541 """
542 Inverse Gamma prior on mixture weights including a weak gamma
543 prior on its parameter.
544 """
545
546 - def __init__(self, alpha=1., beta=1., hyper_alpha=(4., 1.),
547 hyper_beta=(2., 1.)):
554
555 @typedproperty(AbstractDensity)
558
559 @typedproperty(AbstractDensity)
562
564
565 a, b = self['alpha'], self['beta']
566
567 l_a = self._hyper_alpha(a)
568 l_b = self._hyper_beta(b)
569
570 return super(InvGammaPrior, self).log_prob(x) + l_a + l_b
571