Package mvpa :: Package tests :: Module test_stats_sp
[hide private]
[frames] | no frames]

Source Code for Module mvpa.tests.test_stats_sp

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Unit tests for PyMVPA stats helpers -- those requiring scipy""" 
 10   
 11  from test_stats import * 
 12  externals.exists('scipy', raiseException=True) 
 13   
 14  from scipy import signal 
 15  from mvpa.misc.stats import chisquare 
16 17 18 -class StatsTestsScipy(unittest.TestCase):
19 """Unittests for various statistics which use scipy""" 20
21 - def testChiSquare(self):
22 """Test chi-square distribution""" 23 # test equal distribution 24 tbl = N.array([[5, 5], [5, 5]]) 25 chi, p = chisquare(tbl) 26 self.failUnless( chi == 0.0 ) 27 self.failUnless( p == 1.0 ) 28 29 # test non-equal distribution 30 tbl = N.array([[4, 0], [0, 4]]) 31 chi, p = chisquare(tbl) 32 self.failUnless(chi == 8.0) 33 self.failUnless(p < 0.05)
34 35
36 - def testNullDistProbAny(self):
37 """Test 'any' tail statistics estimation""" 38 if not externals.exists('scipy'): 39 return 40 41 # test 'any' mode 42 from mvpa.measures.corrcoef import CorrCoef 43 ds = datasets['uni2small'] 44 45 null = MCNullDist(permutations=10, tail='any') 46 null.fit(CorrCoef(), ds) 47 48 # 100 and -100 should both have zero probability on their respective 49 # tails 50 self.failUnless(null.p([-100, 0, 0, 0, 0, 0])[0] == 0) 51 self.failUnless(null.p([100, 0, 0, 0, 0, 0])[0] == 0) 52 53 # same test with just scalar measure/feature 54 null.fit(CorrCoef(), ds.selectFeatures([0])) 55 self.failUnless(null.p(-100) == 0) 56 self.failUnless(null.p(100) == 0)
57 58 59 @sweepargs(nd=nulldist_sweep)
60 - def testDatasetMeasureProb(self, nd):
61 """Test estimation of measures statistics""" 62 if not externals.exists('scipy'): 63 # due to null_t requirement 64 return 65 66 ds = datasets['uni2medium'] 67 68 m = OneWayAnova(null_dist=nd, enable_states=['null_t']) 69 score = m(ds) 70 71 score_nonbogus = N.mean(score[ds.nonbogus_features]) 72 score_bogus = N.mean(score[ds.bogus_features]) 73 # plausability check 74 self.failUnless(score_bogus < score_nonbogus) 75 76 null_prob_nonbogus = m.null_prob[ds.nonbogus_features] 77 null_prob_bogus = m.null_prob[ds.bogus_features] 78 79 self.failUnless((null_prob_nonbogus < 0.05).all(), 80 msg="Nonbogus features should have a very unlikely value. Got %s" 81 % null_prob_nonbogus) 82 83 # the others should be a lot larger 84 self.failUnless(N.mean(N.abs(null_prob_bogus)) > 85 N.mean(N.abs(null_prob_nonbogus))) 86 87 if isinstance(nd, MCNullDist): 88 # MCs are not stable with just 10 samples... so lets skip them 89 return 90 91 if cfg.getboolean('tests', 'labile', default='yes'): 92 # Failed on c94ec26eb593687f25d8c27e5cfdc5917e352a69 93 # with MVPA_SEED=833393575 94 self.failUnless((N.abs(m.null_t[ds.nonbogus_features]) >= 5).all(), 95 msg="Nonbogus features should have high t-score. Got %s" 96 % (m.null_t[ds.nonbogus_features])) 97 98 bogus_min = min(N.abs(m.null_t[ds.bogus_features])) 99 self.failUnless(bogus_min < 4, 100 msg="Some bogus features should have low t-score of %g." 101 "Got (t,p,sens):%s" 102 % (bogus_min, 103 zip(m.null_t[ds.bogus_features], 104 m.null_prob[ds.bogus_features], 105 score[ds.bogus_features])))
106 107
108 - def testNegativeT(self):
109 """Basic testing of the sign in p and t scores 110 """ 111 from mvpa.measures.base import FeaturewiseDatasetMeasure 112 113 class BogusMeasure(FeaturewiseDatasetMeasure): 114 """Just put high positive into first 2 features, and high 115 negative into 2nd two 116 """ 117 def _call(self, dataset): 118 """just a little helper... pylint shut up!""" 119 res = N.random.normal(size=(dataset.nfeatures,)) 120 res[0] = res[1] = 100 121 res[2] = res[3] = -100 122 return res
123 124 nd = FixedNullDist(scipy.stats.norm(0, 0.1), tail='any') 125 m = BogusMeasure(null_dist=nd, enable_states=['null_t']) 126 ds = datasets['uni2small'] 127 score = m(ds) 128 t, p = m.null_t, m.null_prob 129 self.failUnless((p>=0).all()) 130 self.failUnless((t[:2] > 0).all()) 131 self.failUnless((t[2:4] < 0).all()) 132 133
134 - def testMatchDistribution(self):
135 """Some really basic testing for matchDistribution 136 """ 137 from mvpa.clfs.stats import matchDistribution, rv_semifrozen 138 139 ds = datasets['uni2medium'] # large to get stable stats 140 data = ds.samples[:, ds.bogus_features[0]] 141 # choose bogus feature, which 142 # should have close to normal distribution 143 144 # Lets test ad-hoc rv_semifrozen 145 floc = rv_semifrozen(scipy.stats.norm, loc=0).fit(data) 146 self.failUnless(floc[0] == 0) 147 148 fscale = rv_semifrozen(scipy.stats.norm, scale=1.0).fit(data) 149 self.failUnless(fscale[1] == 1) 150 151 flocscale = rv_semifrozen(scipy.stats.norm, loc=0, scale=1.0).fit(data) 152 self.failUnless(flocscale[1] == 1 and flocscale[0] == 0) 153 154 full = scipy.stats.norm.fit(data) 155 for res in [floc, fscale, flocscale, full]: 156 self.failUnless(len(res) == 2) 157 158 data_mean = N.mean(data) 159 for loc in [None, data_mean]: 160 for test in ['p-roc', 'kstest']: 161 # some really basic testing 162 matched = matchDistribution( 163 data=data, 164 distributions = ['scipy', 165 ('norm', 166 {'name': 'norm_fixed', 167 'loc': 0.2, 168 'scale': 0.3})], 169 test=test, loc=loc, p=0.05) 170 # at least norm should be in there 171 names = [m[2] for m in matched] 172 if test == 'p-roc': 173 if cfg.getboolean('tests', 'labile', default='yes'): 174 # we can guarantee that only for norm_fixed 175 self.failUnless('norm' in names) 176 self.failUnless('norm_fixed' in names) 177 inorm = names.index('norm_fixed') 178 # and it should be at least in the first 179 # 30 best matching ;-) 180 self.failUnless(inorm <= 30)
181
182 - def testRDistStability(self):
183 """Test either rdist distribution performs nicely 184 """ 185 try: 186 # actually I haven't managed to cause this error 187 scipy.stats.rdist(1.32, 0, 1).pdf(-1.0+N.finfo(float).eps) 188 except Exception, e: 189 self.fail('Failed to compute rdist.pdf due to numeric' 190 ' loss of precision. Exception was %s' % e) 191 192 try: 193 # this one should fail with recent scipy with error 194 # ZeroDivisionError: 0.0 cannot be raised to a negative power 195 196 # XXX: There is 1 more bug in etch's scipy.stats or numpy 197 # (vectorize), so I have to put 2 elements in the 198 # queried x's, otherwise it 199 # would puke. But for now that fix is not here 200 # 201 # value = scipy.stats.rdist(1.32, 0, 1).cdf( 202 # [-1.0+N.finfo(float).eps, 0]) 203 # 204 # to cause it now just run this unittest only with 205 # nosetests -s test_stats:StatsTests.testRDistStability 206 207 # NB: very cool way to store the trace of the execution 208 #import pydb 209 #pydb.debugger(dbg_cmds=['bt', 'l', 's']*300 + ['c']) 210 scipy.stats.rdist(1.32, 0, 1).cdf(-1.0+N.finfo(float).eps) 211 except IndexError, e: 212 self.fail('Failed due to bug which leads to InvalidIndex if only' 213 ' scalar is provided to cdf') 214 except Exception, e: 215 self.fail('Failed to compute rdist.cdf due to numeric' 216 ' loss of precision. Exception was %s' % e) 217 218 v = scipy.stats.rdist(10000, 0, 1).cdf([-0.1]) 219 self.failUnless(v>=0, v<=1)
220 221
222 - def testAnovaCompliance(self):
223 ds = datasets['uni2large'] 224 225 fwm = OneWayAnova() 226 f = fwm(ds) 227 228 f_sp = f_oneway(ds['labels', [1]].samples, 229 ds['labels', [0]].samples) 230 231 # SciPy needs to compute the same F-scores 232 assert_array_almost_equal(f, f_sp[0])
233 234 235
236 - def testGLM(self):
237 """Test GLM 238 """ 239 # play fmri 240 # full-blown HRF with initial dip and undershoot ;-) 241 hrf_x = N.linspace(0, 25, 250) 242 hrf = doubleGammaHRF(hrf_x) - singleGammaHRF(hrf_x, 0.8, 1, 0.05) 243 244 # come up with an experimental design 245 samples = 1800 246 fast_er_onsets = N.array([10, 200, 250, 500, 600, 900, 920, 1400]) 247 fast_er = N.zeros(samples) 248 fast_er[fast_er_onsets] = 1 249 250 # high resolution model of the convolved regressor 251 model_hr = N.convolve(fast_er, hrf)[:samples] 252 253 # downsample the regressor to fMRI resolution 254 tr = 2.0 255 model_lr = signal.resample(model_hr, 256 int(samples / tr / 10), 257 window='ham') 258 259 # generate artifical fMRI data: two voxels one is noise, one has 260 # something 261 baseline = 800.0 262 wsignal = baseline + 2 * model_lr + \ 263 N.random.randn(int(samples / tr / 10)) * 0.2 264 nsignal = baseline + N.random.randn(int(samples / tr / 10)) * 0.5 265 266 # build design matrix: bold-regressor and constant 267 X = N.array([model_lr, N.repeat(1, len(model_lr))]).T 268 269 # two 'voxel' dataset 270 data = Dataset(samples=N.array((wsignal, nsignal, nsignal)).T, labels=1) 271 272 # check GLM betas 273 glm = GLM(X, combiner=None) 274 betas = glm(data) 275 276 # betas for each feature and each regressor 277 self.failUnless(betas.shape == (data.nfeatures, X.shape[1])) 278 279 self.failUnless(N.absolute(betas[:, 1] - baseline < 10).all(), 280 msg="baseline betas should be huge and around 800") 281 282 self.failUnless(betas[0][0] > betas[1, 0], 283 msg="feature (with signal) beta should be larger than for noise") 284 285 if cfg.getboolean('tests', 'labile', default='yes'): 286 self.failUnless(N.absolute(betas[1, 0]) < 0.5) 287 self.failUnless(N.absolute(betas[0, 0]) > 1.0) 288 289 290 # check GLM zscores 291 glm = GLM(X, voi='zstat', combiner=None) 292 zstats = glm(data) 293 294 self.failUnless(zstats.shape == betas.shape) 295 296 self.failUnless((zstats[:, 1] > 1000).all(), 297 msg='constant zstats should be huge') 298 299 if cfg.getboolean('tests', 'labile', default='yes'): 300 self.failUnless(N.absolute(betas[0, 0]) > betas[1][0], 301 msg='with signal should have higher zstats')
302 303
304 - def testBinomdistPPF(self):
305 """Test if binomial distribution works ok 306 307 after possibly a monkey patch 308 """ 309 bdist = scipy.stats.binom(100, 0.5) 310 self.failUnless(bdist.ppf(1.0) == 100) 311 self.failUnless(bdist.ppf(0.9) <= 60) 312 self.failUnless(bdist.ppf(0.5) == 50) 313 self.failUnless(bdist.ppf(0) == -1)
314
315 316 -def suite():
317 """Create the suite""" 318 return unittest.makeSuite(StatsTestsScipy)
319 320 321 if __name__ == '__main__': 322 import runner 323