Package mvpa :: Package tests :: Module test_clf
[hide private]
[frames] | no frames]

Source Code for Module mvpa.tests.test_clf

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Unit tests for PyMVPA basic Classifiers""" 
 10   
 11  from mvpa.support.copy import deepcopy 
 12   
 13  from mvpa.datasets import Dataset 
 14  from mvpa.mappers.mask import MaskMapper 
 15  from mvpa.datasets.splitters import NFoldSplitter, OddEvenSplitter 
 16   
 17  from mvpa.misc.exceptions import UnknownStateError 
 18   
 19  from mvpa.clfs.base import Classifier 
 20  from mvpa.clfs.meta import CombinedClassifier, \ 
 21       BinaryClassifier, MulticlassClassifier, \ 
 22       SplitClassifier, MappedClassifier, FeatureSelectionClassifier, \ 
 23       TreeClassifier 
 24  from mvpa.clfs.transerror import TransferError 
 25  from mvpa.algorithms.cvtranserror import CrossValidatedTransferError 
 26   
 27  from tests_warehouse import * 
 28  from tests_warehouse_clfs import * 
29 30 -class ClassifiersTests(unittest.TestCase):
31
32 - def setUp(self):
33 self.clf_sign = SameSignClassifier() 34 self.clf_less1 = Less1Classifier() 35 36 # simple binary dataset 37 self.data_bin_1 = Dataset( 38 samples=[[0,0],[-10,-1],[1,0.1],[1,-1],[-1,1]], 39 labels=[1, 1, 1, -1, -1], # labels 40 chunks=[0, 1, 2, 2, 3]) # chunks
41
42 - def testDummy(self):
43 clf = SameSignClassifier(enable_states=['training_confusion']) 44 clf.train(self.data_bin_1) 45 self.failUnlessRaises(UnknownStateError, clf.states.getvalue, 46 "predictions") 47 """Should have no predictions after training. Predictions 48 state should be explicitely disabled""" 49 50 self.failUnlessRaises(UnknownStateError, clf.states.getvalue, 51 "trained_dataset") 52 53 self.failUnlessEqual(clf.training_confusion.percentCorrect, 54 100, 55 msg="Dummy clf should train perfectly") 56 self.failUnlessEqual(clf.predict(self.data_bin_1.samples), 57 list(self.data_bin_1.labels)) 58 59 self.failUnlessEqual(len(clf.predictions), self.data_bin_1.nsamples, 60 msg="Trained classifier stores predictions by default") 61 62 clf = SameSignClassifier(enable_states=['trained_dataset']) 63 clf.train(self.data_bin_1) 64 self.failUnless((clf.trained_dataset.samples == 65 self.data_bin_1.samples).all()) 66 self.failUnless((clf.trained_dataset.labels == 67 self.data_bin_1.labels).all())
68 69
70 - def testBoosted(self):
71 # XXXXXXX 72 # silly test if we get the same result with boosted as with a single one 73 bclf = CombinedClassifier(clfs=[self.clf_sign.clone(), 74 self.clf_sign.clone()]) 75 76 self.failUnlessEqual(list(bclf.predict(self.data_bin_1.samples)), 77 list(self.data_bin_1.labels), 78 msg="Boosted classifier should work") 79 self.failUnlessEqual(bclf.predict(self.data_bin_1.samples), 80 self.clf_sign.predict(self.data_bin_1.samples), 81 msg="Boosted classifier should have the same as regular")
82 83
85 bclf = CombinedClassifier(clfs=[self.clf_sign.clone(), 86 self.clf_sign.clone()], 87 enable_states=['feature_ids']) 88 89 # check states enabling propagation 90 self.failUnlessEqual(self.clf_sign.states.isEnabled('feature_ids'), False) 91 self.failUnlessEqual(bclf.clfs[0].states.isEnabled('feature_ids'), True) 92 93 bclf2 = CombinedClassifier(clfs=[self.clf_sign.clone(), 94 self.clf_sign.clone()], 95 propagate_states=False, 96 enable_states=['feature_ids']) 97 98 self.failUnlessEqual(self.clf_sign.states.isEnabled('feature_ids'), False) 99 self.failUnlessEqual(bclf2.clfs[0].states.isEnabled('feature_ids'), False)
100 101 102
103 - def testBinaryDecorator(self):
104 ds = Dataset(samples=[ [0,0], [0,1], [1,100], [-1,0], [-1,-3], [ 0,-10] ], 105 labels=[ 'sp', 'sp', 'sp', 'dn', 'sn', 'dp']) 106 testdata = [ [0,0], [10,10], [-10, -1], [0.1, -0.1], [-0.2, 0.2] ] 107 # labels [s]ame/[d]ifferent (sign), and [p]ositive/[n]egative first element 108 109 clf = SameSignClassifier() 110 # lets create classifier to descriminate only between same/different, 111 # which is a primary task of SameSignClassifier 112 bclf1 = BinaryClassifier(clf=clf, 113 poslabels=['sp', 'sn'], 114 neglabels=['dp', 'dn']) 115 116 orig_labels = ds.labels[:] 117 bclf1.train(ds) 118 119 self.failUnless(bclf1.predict(testdata) == 120 [['sp', 'sn'], ['sp', 'sn'], ['sp', 'sn'], 121 ['dn', 'dp'], ['dn', 'dp']]) 122 123 self.failUnless((ds.labels == orig_labels).all(), 124 msg="BinaryClassifier should not alter labels")
125 126 127 @sweepargs(clf=clfswh['binary'])
128 - def testClassifierGeneralization(self, clf):
129 """Simple test if classifiers can generalize ok on simple data 130 """ 131 te = CrossValidatedTransferError(TransferError(clf), NFoldSplitter()) 132 cve = te(datasets['uni2medium']) 133 if cfg.getboolean('tests', 'labile', default='yes'): 134 self.failUnless(cve < 0.25, 135 msg="Got transfer error %g" % (cve))
136 137 138 @sweepargs(clf=clfswh[:] + regrswh[:])
139 - def testSummary(self, clf):
140 """Basic testing of the clf summary 141 """ 142 summary1 = clf.summary() 143 self.failUnless('not yet trained' in summary1) 144 clf.train(datasets['uni2small']) 145 summary = clf.summary() 146 # It should get bigger ;) 147 self.failUnless(len(summary) > len(summary1)) 148 self.failUnless(not 'not yet trained' in summary)
149 150 151 # TODO: validate for regressions as well!!!
152 - def testSplitClassifier(self):
153 ds = self.data_bin_1 154 clf = SplitClassifier(clf=SameSignClassifier(), 155 splitter=NFoldSplitter(1), 156 enable_states=['confusion', 'training_confusion', 157 'feature_ids']) 158 clf.train(ds) # train the beast 159 error = clf.confusion.error 160 tr_error = clf.training_confusion.error 161 162 clf2 = clf.clone() 163 cv = CrossValidatedTransferError( 164 TransferError(clf2), 165 NFoldSplitter(), 166 enable_states=['confusion', 'training_confusion']) 167 cverror = cv(ds) 168 tr_cverror = cv.training_confusion.error 169 170 self.failUnlessEqual(error, cverror, 171 msg="We should get the same error using split classifier as" 172 " using CrossValidatedTransferError. Got %s and %s" 173 % (error, cverror)) 174 175 self.failUnlessEqual(tr_error, tr_cverror, 176 msg="We should get the same training error using split classifier as" 177 " using CrossValidatedTransferError. Got %s and %s" 178 % (tr_error, tr_cverror)) 179 180 self.failUnlessEqual(clf.confusion.percentCorrect, 181 100, 182 msg="Dummy clf should train perfectly") 183 self.failUnlessEqual(len(clf.confusion.sets), 184 len(ds.uniquechunks), 185 msg="Should have 1 confusion per each split") 186 self.failUnlessEqual(len(clf.clfs), len(ds.uniquechunks), 187 msg="Should have number of classifiers equal # of epochs") 188 self.failUnlessEqual(clf.predict(ds.samples), list(ds.labels), 189 msg="Should classify correctly") 190 191 # feature_ids must be list of lists, and since it is not 192 # feature-selecting classifier used - we expect all features 193 # to be utilized 194 # NOT ANYMORE -- for BoostedClassifier we have now union of all 195 # used features across slave classifiers. That makes 196 # semantics clear. If you need to get deeper -- use upcoming 197 # harvesting facility ;-) 198 # self.failUnlessEqual(len(clf.feature_ids), len(ds.uniquechunks)) 199 # self.failUnless(N.array([len(ids)==ds.nfeatures 200 # for ids in clf.feature_ids]).all()) 201 202 # Just check if we get it at all ;-) 203 summary = clf.summary()
204 205 206 @sweepargs(clf_=clfswh['binary', '!meta'])
207 - def testSplitClassifierExtended(self, clf_):
208 clf2 = clf_.clone() 209 ds = datasets['uni2medium']#self.data_bin_1 210 clf = SplitClassifier(clf=clf_, #SameSignClassifier(), 211 splitter=NFoldSplitter(1), 212 enable_states=['confusion', 'feature_ids']) 213 clf.train(ds) # train the beast 214 error = clf.confusion.error 215 216 cv = CrossValidatedTransferError( 217 TransferError(clf2), 218 NFoldSplitter(), 219 enable_states=['confusion', 'training_confusion']) 220 cverror = cv(ds) 221 222 self.failUnless(abs(error-cverror)<0.01, 223 msg="We should get the same error using split classifier as" 224 " using CrossValidatedTransferError. Got %s and %s" 225 % (error, cverror)) 226 227 if cfg.getboolean('tests', 'labile', default='yes'): 228 self.failUnless(error < 0.25, 229 msg="clf should generalize more or less fine. " 230 "Got error %s" % error) 231 self.failUnlessEqual(len(clf.confusion.sets), len(ds.uniquechunks), 232 msg="Should have 1 confusion per each split") 233 self.failUnlessEqual(len(clf.clfs), len(ds.uniquechunks), 234 msg="Should have number of classifiers equal # of epochs")
235 #self.failUnlessEqual(clf.predict(ds.samples), list(ds.labels), 236 # msg="Should classify correctly") 237 238 239
240 - def testHarvesting(self):
241 """Basic testing of harvesting based on SplitClassifier 242 """ 243 ds = self.data_bin_1 244 clf = SplitClassifier(clf=SameSignClassifier(), 245 splitter=NFoldSplitter(1), 246 enable_states=['confusion', 'training_confusion', 247 'feature_ids'], 248 harvest_attribs=['clf.feature_ids', 249 'clf.training_time'], 250 descr="DESCR") 251 clf.train(ds) # train the beast 252 # Number of harvested items should be equal to number of chunks 253 self.failUnlessEqual(len(clf.harvested['clf.feature_ids']), 254 len(ds.uniquechunks)) 255 # if we can blame multiple inheritance and ClassWithCollections.__init__ 256 self.failUnlessEqual(clf.descr, "DESCR")
257 258
259 - def testMappedClassifier(self):
260 samples = N.array([ [0,0,-1], [1,0,1], [-1,-1, 1], [-1,0,1], [1, -1, 1] ]) 261 testdata3 = Dataset(samples=samples, labels=1) 262 res110 = [1, 1, 1, -1, -1] 263 res101 = [-1, 1, -1, -1, 1] 264 res011 = [-1, 1, -1, 1, -1] 265 266 clf110 = MappedClassifier(clf=self.clf_sign, mapper=MaskMapper(N.array([1,1,0]))) 267 clf101 = MappedClassifier(clf=self.clf_sign, mapper=MaskMapper(N.array([1,0,1]))) 268 clf011 = MappedClassifier(clf=self.clf_sign, mapper=MaskMapper(N.array([0,1,1]))) 269 270 self.failUnlessEqual(clf110.predict(samples), res110) 271 self.failUnlessEqual(clf101.predict(samples), res101) 272 self.failUnlessEqual(clf011.predict(samples), res011)
273 274
276 from test_rfe import SillySensitivityAnalyzer 277 from mvpa.featsel.base import \ 278 SensitivityBasedFeatureSelection 279 from mvpa.featsel.helpers import \ 280 FixedNElementTailSelector 281 282 # should give lowest weight to the feature with lowest index 283 sens_ana = SillySensitivityAnalyzer() 284 # should give lowest weight to the feature with highest index 285 sens_ana_rev = SillySensitivityAnalyzer(mult=-1) 286 287 # corresponding feature selections 288 feat_sel = SensitivityBasedFeatureSelection(sens_ana, 289 FixedNElementTailSelector(1, mode='discard')) 290 291 feat_sel_rev = SensitivityBasedFeatureSelection(sens_ana_rev, 292 FixedNElementTailSelector(1)) 293 294 samples = N.array([ [0,0,-1], [1,0,1], [-1,-1, 1], [-1,0,1], [1, -1, 1] ]) 295 296 testdata3 = Dataset(samples=samples, labels=1) 297 # dummy train data so proper mapper gets created 298 traindata = Dataset(samples=N.array([ [0, 0,-1], [1,0,1] ]), labels=[1,2]) 299 300 # targets 301 res110 = [1, 1, 1, -1, -1] 302 res011 = [-1, 1, -1, 1, -1] 303 304 # first classifier -- 0th feature should be discarded 305 clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel, 306 enable_states=['feature_ids']) 307 308 self.clf_sign.states._changeTemporarily(enable_states=['values']) 309 clf011.train(traindata) 310 311 self.failUnlessEqual(clf011.predict(testdata3.samples), res011) 312 # just silly test if we get values assigned in the 'ProxyClassifier' 313 self.failUnless(len(clf011.values) == len(res110), 314 msg="We need to pass values into ProxyClassifier") 315 self.clf_sign.states._resetEnabledTemporarily() 316 317 self.failUnlessEqual(len(clf011.feature_ids), 2) 318 "Feature selection classifier had to be trained on 2 features" 319 320 # first classifier -- last feature should be discarded 321 clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev) 322 clf011.train(traindata) 323 self.failUnlessEqual(clf011.predict(testdata3.samples), res110)
324
326 from test_rfe import SillySensitivityAnalyzer 327 from mvpa.featsel.base import \ 328 SensitivityBasedFeatureSelection 329 from mvpa.featsel.helpers import \ 330 FixedNElementTailSelector 331 if sample_clf_reg is None: 332 # none regression was found, so nothing to test 333 return 334 # should give lowest weight to the feature with lowest index 335 sens_ana = SillySensitivityAnalyzer() 336 337 # corresponding feature selections 338 feat_sel = SensitivityBasedFeatureSelection(sens_ana, 339 FixedNElementTailSelector(1, mode='discard')) 340 341 # now test with regression-based classifier. The problem is 342 # that it is determining predictions twice from values and 343 # then setting the values from the results, which the second 344 # time is set to predictions. The final outcome is that the 345 # values are actually predictions... 346 dat = Dataset(samples=N.random.randn(4,10),labels=[-1,-1,1,1]) 347 clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel) 348 clf_reg.train(dat) 349 res = clf_reg.predict(dat.samples) 350 self.failIf((N.array(clf_reg.values)-clf_reg.predictions).sum()==0, 351 msg="Values were set to the predictions.")
352 353
354 - def testTreeClassifier(self):
355 """Basic tests for TreeClassifier 356 """ 357 ds = datasets['uni4small'] 358 clfs = clfswh['binary'] # pool of classifiers 359 # Lets permute so each time we try some different combination 360 # of the classifiers 361 clfs = [clfs[i] for i in N.random.permutation(len(clfs))] 362 # Test conflicting definition 363 tclf = TreeClassifier(clfs[0], { 364 'L0+2' : (('L0', 'L2'), clfs[1]), 365 'L2+3' : ((2, 3), clfs[2])}) 366 self.failUnlessRaises(ValueError, tclf.train, ds) 367 """Should raise exception since label 2 is in both""" 368 369 # Test insufficient definition 370 tclf = TreeClassifier(clfs[0], { 371 'L0+5' : (('L0', 'L5'), clfs[1]), 372 'L2+3' : ((2, 3), clfs[2])}) 373 self.failUnlessRaises(ValueError, tclf.train, ds) 374 """Should raise exception since no group for L1""" 375 376 # proper definition now 377 tclf = TreeClassifier(clfs[0], { 378 'L0+1' : (('L0', 1), clfs[1]), 379 'L2+3' : ((2, 3), clfs[2])}) 380 381 # Lets test train/test cycle using CVTE 382 cv = CrossValidatedTransferError( 383 TransferError(tclf), 384 OddEvenSplitter(), 385 enable_states=['confusion', 'training_confusion']) 386 cverror = cv(ds) 387 try: 388 rtclf = repr(tclf) 389 except: 390 self.fail(msg="Could not obtain repr for TreeClassifier") 391 392 # Test accessibility of .clfs 393 self.failUnless(tclf.clfs['L0+1'] is clfs[1]) 394 self.failUnless(tclf.clfs['L2+3'] is clfs[2]) 395 396 cvtrc = cv.training_confusion 397 cvtc = cv.confusion 398 if cfg.getboolean('tests', 'labile', default='yes'): 399 # just a dummy check to make sure everything is working 400 self.failUnless(cvtrc != cvtc) 401 self.failUnless(cverror < 0.3) 402 403 # TODO: whenever implemented 404 tclf = TreeClassifier(clfs[0], { 405 'L0' : (('L0',), clfs[1]), 406 'L1+2+3' : ((1, 2, 3), clfs[2])})
407 # TEST ME 408 409 410 @sweepargs(clf=clfswh[:])
411 - def testValues(self, clf):
412 if isinstance(clf, MulticlassClassifier): 413 # TODO: handle those values correctly 414 return 415 ds = datasets['uni2small'] 416 clf.states._changeTemporarily(enable_states = ['values']) 417 cv = CrossValidatedTransferError( 418 TransferError(clf), 419 OddEvenSplitter(), 420 enable_states=['confusion', 'training_confusion']) 421 cverror = cv(ds) 422 #print clf.descr, clf.values[0] 423 # basic test either we get 1 set of values per each sample 424 self.failUnlessEqual(len(clf.values), ds.nsamples/2) 425 426 clf.states._resetEnabledTemporarily()
427 428 @sweepargs(clf=clfswh['linear', 'svm', 'libsvm', '!meta'])
429 - def testMulticlassClassifier(self, clf):
430 oldC = None 431 # XXX somewhat ugly way to force non-dataspecific C value. 432 # Otherwise multiclass libsvm builtin and our MultiClass would differ 433 # in results 434 if clf.params.isKnown('C') and clf.C<0: 435 oldC = clf.C 436 clf.C = 1.0 # reset C to be 1 437 438 svm, svm2 = clf, clf.clone() 439 svm2.states.enable(['training_confusion']) 440 441 mclf = MulticlassClassifier(clf=svm, 442 enable_states=['training_confusion']) 443 444 svm2.train(datasets['uni2small_train']) 445 mclf.train(datasets['uni2small_train']) 446 s1 = str(mclf.training_confusion) 447 s2 = str(svm2.training_confusion) 448 self.failUnlessEqual(s1, s2, 449 msg="Multiclass clf should provide same results as built-in " 450 "libsvm's %s. Got %s and %s" % (svm2, s1, s2)) 451 452 svm2.untrain() 453 454 self.failUnless(svm2.trained == False, 455 msg="Un-Trained SVM should be untrained") 456 457 self.failUnless(N.array([x.trained for x in mclf.clfs]).all(), 458 msg="Trained Boosted classifier should have all primary classifiers trained") 459 self.failUnless(mclf.trained, 460 msg="Trained Boosted classifier should be marked as trained") 461 462 mclf.untrain() 463 464 self.failUnless(not mclf.trained, 465 msg="UnTrained Boosted classifier should not be trained") 466 self.failUnless(not N.array([x.trained for x in mclf.clfs]).any(), 467 msg="UnTrained Boosted classifier should have no primary classifiers trained") 468 469 if oldC is not None: 470 clf.C = oldC
471 472 # XXX meta should also work but TODO 473 @sweepargs(clf=clfswh['svm', '!meta'])
474 - def testSVMs(self, clf):
475 knows_probabilities = 'probabilities' in clf.states.names and clf.params.probability 476 enable_states = ['values'] 477 if knows_probabilities: enable_states += ['probabilities'] 478 479 clf.states._changeTemporarily(enable_states = enable_states) 480 for traindata, testdata in [ 481 (datasets['uni2small_train'], datasets['uni2small_test']) ]: 482 clf.train(traindata) 483 predicts = clf.predict(testdata.samples) 484 # values should be different from predictions for SVMs we have 485 self.failUnless( (predicts != clf.values).any() ) 486 487 if knows_probabilities and clf.states.isSet('probabilities'): 488 # XXX test more thoroughly what we are getting here ;-) 489 self.failUnlessEqual( len(clf.probabilities), len(testdata.samples) ) 490 clf.states._resetEnabledTemporarily()
491 492 493 @sweepargs(clf=clfswh['retrainable'])
494 - def testRetrainables(self, clf):
495 # we need a copy since will tune its internals later on 496 clf = clf.clone() 497 clf.states._changeTemporarily(enable_states = ['values'], 498 # ensure that it does do predictions 499 # while training 500 disable_states=['training_confusion']) 501 clf_re = clf.clone() 502 # TODO: .retrainable must have a callback to call smth like 503 # _setRetrainable 504 clf_re._setRetrainable(True) 505 506 # need to have high snr so we don't 'cope' with problematic 507 # datasets since otherwise unittests would fail. 508 dsargs = {'perlabel':50, 'nlabels':2, 'nfeatures':5, 'nchunks':1, 509 'nonbogus_features':[2,4], 'snr': 5.0} 510 511 ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 512 # NB datasets will be changed by the end of testing, so if 513 # are to change to use generic datasets - make sure to copy 514 # them here 515 dstrain = deepcopy(datasets['uni2large_train']) 516 dstest = deepcopy(datasets['uni2large_test']) 517 518 clf.untrain() 519 clf_re.untrain() 520 trerr, trerr_re = TransferError(clf), TransferError(clf_re) 521 522 # Just check for correctness of retraining 523 err_1 = trerr(dstest, dstrain) 524 self.failUnless(err_1<0.3, 525 msg="We should test here on easy dataset. Got error of %s" % err_1) 526 values_1 = clf.values[:] 527 # some times retraining gets into deeper optimization ;-) 528 eps = 0.05 529 corrcoef_eps = 0.85 # just to get no failures... usually > 0.95 530 531 532 def batch_test(retrain=True, retest=True, closer=True): 533 err = trerr(dstest, dstrain) 534 err_re = trerr_re(dstest, dstrain) 535 corr = N.corrcoef(clf.values, clf_re.values)[0,1] 536 corr_old = N.corrcoef(values_1, clf_re.values)[0,1] 537 if __debug__: 538 debug('TEST', "Retraining stats: errors %g %g corr %g " 539 "with old error %g corr %g" % 540 (err, err_re, corr, err_1, corr_old)) 541 self.failUnless(clf_re.states.retrained == retrain, 542 ("Must fully train", 543 "Must retrain instead of full training")[retrain]) 544 self.failUnless(clf_re.states.repredicted == retest, 545 ("Must fully test", 546 "Must retest instead of full testing")[retest]) 547 self.failUnless(corr > corrcoef_eps, 548 msg="Result must be close to the one without retraining." 549 " Got corrcoef=%s" % (corr)) 550 if closer: 551 self.failUnless(corr >= corr_old, 552 msg="Result must be closer to current without retraining" 553 " than to old one. Got corrcoef=%s" % (corr_old))
554 555 # Check sequential retraining/retesting 556 for i in xrange(3): 557 flag = bool(i!=0) 558 # ok - on 1st call we should train/test, then retrain/retest 559 # and we can't compare for closinest to old result since 560 # we are working on the same data/classifier 561 batch_test(retrain=flag, retest=flag, closer=False) 562 563 # should retrain nicely if we change a parameter 564 if 'C' in clf.params.names: 565 clf.params.C *= 0.1 566 clf_re.params.C *= 0.1 567 batch_test() 568 elif 'sigma_noise' in clf.params.names: 569 clf.params.sigma_noise *= 100 570 clf_re.params.sigma_noise *= 100 571 batch_test() 572 else: 573 raise RuntimeError, \ 574 'Please implement testing while changing some of the ' \ 575 'params for clf %s' % clf 576 577 # should retrain nicely if we change kernel parameter 578 if hasattr(clf, 'kernel_params') and len(clf.kernel_params.names): 579 clf.kernel_params.gamma = 0.1 580 clf_re.kernel_params.gamma = 0.1 581 # retest is false since kernel got recomputed thus 582 # can't expect to use the same kernel 583 batch_test(retest=not('gamma' in clf.kernel_params.names)) 584 585 # should retrain nicely if we change labels 586 oldlabels = dstrain.labels[:] 587 dstrain.permuteLabels(status=True, assure_permute=True) 588 self.failUnless((oldlabels != dstrain.labels).any(), 589 msg="We should succeed at permutting -- now got the same labels") 590 batch_test() 591 592 # Change labels in testing 593 oldlabels = dstest.labels[:] 594 dstest.permuteLabels(status=True, assure_permute=True) 595 self.failUnless((oldlabels != dstest.labels).any(), 596 msg="We should succeed at permutting -- now got the same labels") 597 batch_test() 598 599 # should re-train if we change data 600 # reuse trained SVM and its 'final' optimization point 601 if not clf.__class__.__name__ in ['GPR']: # on GPR everything depends on the data ;-) 602 oldsamples = dstrain.samples.copy() 603 dstrain.samples[:] += dstrain.samples*0.05 604 self.failUnless((oldsamples != dstrain.samples).any()) 605 batch_test(retest=False) 606 clf.states._resetEnabledTemporarily() 607 608 # test retrain() 609 # TODO XXX -- check validity 610 clf_re.retrain(dstrain); self.failUnless(clf_re.states.retrained) 611 clf_re.retrain(dstrain, labels=True); self.failUnless(clf_re.states.retrained) 612 clf_re.retrain(dstrain, traindataset=True); self.failUnless(clf_re.states.retrained) 613 614 # test repredict() 615 clf_re.repredict(dstest.samples); 616 self.failUnless(clf_re.states.repredicted) 617 self.failUnlessRaises(RuntimeError, clf_re.repredict, 618 dstest.samples, labels=True, 619 msg="for now retesting with anything changed makes no sense") 620 clf_re._setRetrainable(False)
621 622
623 - def testGenericTests(self):
624 """Test all classifiers for conformant behavior 625 """ 626 for clf_, traindata in \ 627 [(clfswh['binary'], datasets['dumb2']), 628 (clfswh['multiclass'], datasets['dumb'])]: 629 traindata_copy = deepcopy(traindata) # full copy of dataset 630 for clf in clf_: 631 clf.train(traindata) 632 self.failUnless( 633 (traindata.samples == traindata_copy.samples).all(), 634 "Training of a classifier shouldn't change original dataset") 635 636 # TODO: enforce uniform return from predict?? 637 #predicted = clf.predict(traindata.samples) 638 #self.failUnless(isinstance(predicted, N.ndarray)) 639 640 # Just simple test that all of them are syntaxed correctly 641 self.failUnless(str(clf) != "") 642 self.failUnless(repr(clf) != "")
643 644 # TODO: unify str and repr for all classifiers 645 646 # XXX TODO: should work on smlr, knn, ridgereg, lars as well! but now 647 # they fail to train 648 @sweepargs(clf=clfswh['!smlr', '!knn', '!lars', '!meta', '!ridge'])
649 - def testCorrectDimensionsOrder(self, clf):
650 """To check if known/present Classifiers are working properly 651 with samples being first dimension. Started to worry about 652 possible problems while looking at sg where samples are 2nd 653 dimension 654 """ 655 # specially crafted dataset -- if dimensions are flipped over 656 # the same storage, problem becomes unseparable. Like in this case 657 # incorrect order of dimensions lead to equal samples [0, 1, 0] 658 traindatas = [ 659 Dataset(samples=N.array([ [0, 0, 1.0], 660 [1, 0, 0] ]), labels=[-1, 1]), 661 Dataset(samples=N.array([ [0, 0.0], 662 [1, 1] ]), labels=[-1, 1])] 663 664 clf.states._changeTemporarily(enable_states = ['training_confusion']) 665 for traindata in traindatas: 666 clf.train(traindata) 667 self.failUnlessEqual(clf.training_confusion.percentCorrect, 100.0, 668 "Classifier %s must have 100%% correct learning on %s. Has %f" % 669 (`clf`, traindata.samples, clf.training_confusion.percentCorrect)) 670 671 # and we must be able to predict every original sample thus 672 for i in xrange(traindata.nsamples): 673 sample = traindata.samples[i,:] 674 predicted = clf.predict([sample]) 675 self.failUnlessEqual([predicted], traindata.labels[i], 676 "We must be able to predict sample %s using " % sample + 677 "classifier %s" % `clf`) 678 clf.states._resetEnabledTemporarily()
679
680 -def suite():
681 return unittest.makeSuite(ClassifiersTests)
682 683 684 if __name__ == '__main__': 685 import runner 686