1
2
3
4
5
6
7
8
9 """Unit tests for PyMVPA stats helpers -- those requiring scipy"""
10
11 from test_stats import *
12 externals.exists('scipy', raiseException=True)
13
14 from scipy import signal
15 from mvpa.misc.stats import chisquare
19 """Unittests for various statistics which use scipy"""
20
22 """Test chi-square distribution"""
23
24 tbl = N.array([[5, 5], [5, 5]])
25 chi, p = chisquare(tbl)
26 self.failUnless( chi == 0.0 )
27 self.failUnless( p == 1.0 )
28
29
30 tbl = N.array([[4, 0], [0, 4]])
31 chi, p = chisquare(tbl)
32 self.failUnless(chi == 8.0)
33 self.failUnless(p < 0.05)
34
35
37 """Test 'any' tail statistics estimation"""
38 if not externals.exists('scipy'):
39 return
40
41
42 from mvpa.measures.corrcoef import CorrCoef
43 ds = datasets['uni2small']
44
45 null = MCNullDist(permutations=10, tail='any')
46 null.fit(CorrCoef(), ds)
47
48
49
50 self.failUnless(null.p([-100, 0, 0, 0, 0, 0])[0] == 0)
51 self.failUnless(null.p([100, 0, 0, 0, 0, 0])[0] == 0)
52
53
54 null.fit(CorrCoef(), ds.selectFeatures([0]))
55 self.failUnless(null.p(-100) == 0)
56 self.failUnless(null.p(100) == 0)
57
58
59 @sweepargs(nd=nulldist_sweep)
61 """Test estimation of measures statistics"""
62 if not externals.exists('scipy'):
63
64 return
65
66 ds = datasets['uni2medium']
67
68 m = OneWayAnova(null_dist=nd, enable_states=['null_t'])
69 score = m(ds)
70
71 score_nonbogus = N.mean(score[ds.nonbogus_features])
72 score_bogus = N.mean(score[ds.bogus_features])
73
74 self.failUnless(score_bogus < score_nonbogus)
75
76 null_prob_nonbogus = m.null_prob[ds.nonbogus_features]
77 null_prob_bogus = m.null_prob[ds.bogus_features]
78
79 self.failUnless((null_prob_nonbogus < 0.05).all(),
80 msg="Nonbogus features should have a very unlikely value. Got %s"
81 % null_prob_nonbogus)
82
83
84 self.failUnless(N.mean(N.abs(null_prob_bogus)) >
85 N.mean(N.abs(null_prob_nonbogus)))
86
87 if isinstance(nd, MCNullDist):
88
89 return
90
91 if cfg.getboolean('tests', 'labile', default='yes'):
92
93
94 self.failUnless((N.abs(m.null_t[ds.nonbogus_features]) >= 5).all(),
95 msg="Nonbogus features should have high t-score. Got %s"
96 % (m.null_t[ds.nonbogus_features]))
97
98 bogus_min = min(N.abs(m.null_t[ds.bogus_features]))
99 self.failUnless(bogus_min < 4,
100 msg="Some bogus features should have low t-score of %g."
101 "Got (t,p,sens):%s"
102 % (bogus_min,
103 zip(m.null_t[ds.bogus_features],
104 m.null_prob[ds.bogus_features],
105 score[ds.bogus_features])))
106
107
109 """Basic testing of the sign in p and t scores
110 """
111 from mvpa.measures.base import FeaturewiseDatasetMeasure
112
113 class BogusMeasure(FeaturewiseDatasetMeasure):
114 """Just put high positive into first 2 features, and high
115 negative into 2nd two
116 """
117 def _call(self, dataset):
118 """just a little helper... pylint shut up!"""
119 res = N.random.normal(size=(dataset.nfeatures,))
120 res[0] = res[1] = 100
121 res[2] = res[3] = -100
122 return res
123
124 nd = FixedNullDist(scipy.stats.norm(0, 0.1), tail='any')
125 m = BogusMeasure(null_dist=nd, enable_states=['null_t'])
126 ds = datasets['uni2small']
127 score = m(ds)
128 t, p = m.null_t, m.null_prob
129 self.failUnless((p>=0).all())
130 self.failUnless((t[:2] > 0).all())
131 self.failUnless((t[2:4] < 0).all())
132
133
135 """Some really basic testing for matchDistribution
136 """
137 from mvpa.clfs.stats import matchDistribution, rv_semifrozen
138
139 ds = datasets['uni2medium']
140 data = ds.samples[:, ds.bogus_features[0]]
141
142
143
144
145 floc = rv_semifrozen(scipy.stats.norm, loc=0).fit(data)
146 self.failUnless(floc[0] == 0)
147
148 fscale = rv_semifrozen(scipy.stats.norm, scale=1.0).fit(data)
149 self.failUnless(fscale[1] == 1)
150
151 flocscale = rv_semifrozen(scipy.stats.norm, loc=0, scale=1.0).fit(data)
152 self.failUnless(flocscale[1] == 1 and flocscale[0] == 0)
153
154 full = scipy.stats.norm.fit(data)
155 for res in [floc, fscale, flocscale, full]:
156 self.failUnless(len(res) == 2)
157
158 data_mean = N.mean(data)
159 for loc in [None, data_mean]:
160 for test in ['p-roc', 'kstest']:
161
162 matched = matchDistribution(
163 data=data,
164 distributions = ['scipy',
165 ('norm',
166 {'name': 'norm_fixed',
167 'loc': 0.2,
168 'scale': 0.3})],
169 test=test, loc=loc, p=0.05)
170
171 names = [m[2] for m in matched]
172 if test == 'p-roc':
173 if cfg.getboolean('tests', 'labile', default='yes'):
174
175 self.failUnless('norm' in names)
176 self.failUnless('norm_fixed' in names)
177 inorm = names.index('norm_fixed')
178
179
180 self.failUnless(inorm <= 30)
181
183 """Test either rdist distribution performs nicely
184 """
185 try:
186
187 scipy.stats.rdist(1.32, 0, 1).pdf(-1.0+N.finfo(float).eps)
188 except Exception, e:
189 self.fail('Failed to compute rdist.pdf due to numeric'
190 ' loss of precision. Exception was %s' % e)
191
192 try:
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210 scipy.stats.rdist(1.32, 0, 1).cdf(-1.0+N.finfo(float).eps)
211 except IndexError, e:
212 self.fail('Failed due to bug which leads to InvalidIndex if only'
213 ' scalar is provided to cdf')
214 except Exception, e:
215 self.fail('Failed to compute rdist.cdf due to numeric'
216 ' loss of precision. Exception was %s' % e)
217
218 v = scipy.stats.rdist(10000, 0, 1).cdf([-0.1])
219 self.failUnless(v>=0, v<=1)
220
221
223 ds = datasets['uni2large']
224
225 fwm = OneWayAnova()
226 f = fwm(ds)
227
228 f_sp = f_oneway(ds['labels', [1]].samples,
229 ds['labels', [0]].samples)
230
231
232 assert_array_almost_equal(f, f_sp[0])
233
234
235
237 """Test GLM
238 """
239
240
241 hrf_x = N.linspace(0, 25, 250)
242 hrf = doubleGammaHRF(hrf_x) - singleGammaHRF(hrf_x, 0.8, 1, 0.05)
243
244
245 samples = 1800
246 fast_er_onsets = N.array([10, 200, 250, 500, 600, 900, 920, 1400])
247 fast_er = N.zeros(samples)
248 fast_er[fast_er_onsets] = 1
249
250
251 model_hr = N.convolve(fast_er, hrf)[:samples]
252
253
254 tr = 2.0
255 model_lr = signal.resample(model_hr,
256 int(samples / tr / 10),
257 window='ham')
258
259
260
261 baseline = 800.0
262 wsignal = baseline + 2 * model_lr + \
263 N.random.randn(int(samples / tr / 10)) * 0.2
264 nsignal = baseline + N.random.randn(int(samples / tr / 10)) * 0.5
265
266
267 X = N.array([model_lr, N.repeat(1, len(model_lr))]).T
268
269
270 data = Dataset(samples=N.array((wsignal, nsignal, nsignal)).T, labels=1)
271
272
273 glm = GLM(X, combiner=None)
274 betas = glm(data)
275
276
277 self.failUnless(betas.shape == (data.nfeatures, X.shape[1]))
278
279 self.failUnless(N.absolute(betas[:, 1] - baseline < 10).all(),
280 msg="baseline betas should be huge and around 800")
281
282 self.failUnless(betas[0][0] > betas[1, 0],
283 msg="feature (with signal) beta should be larger than for noise")
284
285 if cfg.getboolean('tests', 'labile', default='yes'):
286 self.failUnless(N.absolute(betas[1, 0]) < 0.5)
287 self.failUnless(N.absolute(betas[0, 0]) > 1.0)
288
289
290
291 glm = GLM(X, voi='zstat', combiner=None)
292 zstats = glm(data)
293
294 self.failUnless(zstats.shape == betas.shape)
295
296 self.failUnless((zstats[:, 1] > 1000).all(),
297 msg='constant zstats should be huge')
298
299 if cfg.getboolean('tests', 'labile', default='yes'):
300 self.failUnless(N.absolute(betas[0, 0]) > betas[1][0],
301 msg='with signal should have higher zstats')
302
303
305 """Test if binomial distribution works ok
306
307 after possibly a monkey patch
308 """
309 bdist = scipy.stats.binom(100, 0.5)
310 self.failUnless(bdist.ppf(1.0) == 100)
311 self.failUnless(bdist.ppf(0.9) <= 60)
312 self.failUnless(bdist.ppf(0.5) == 50)
313 self.failUnless(bdist.ppf(0) == -1)
314
319
320
321 if __name__ == '__main__':
322 import runner
323