1
2
3
4
5
6
7
8
9 """Collection of classifiers to ease the exploration.
10 """
11
12 __docformat__ = 'restructuredtext'
13
14 from sets import Set
15 import operator
16
17
18 from mvpa.clfs.meta import FeatureSelectionClassifier, SplitClassifier, \
19 MulticlassClassifier
20 from mvpa.clfs.smlr import SMLR
21 from mvpa.clfs.knn import kNN
22 from mvpa.clfs.kernel import KernelLinear, KernelSquaredExponential
23
24
25 from mvpa.base import externals, cfg
26 from mvpa.measures.anova import OneWayAnova
27 from mvpa.misc.transformers import Absolute
28 from mvpa.clfs.smlr import SMLRWeights
29 from mvpa.featsel.helpers import FractionTailSelector, \
30 FixedNElementTailSelector, RangeElementSelector
31
32 from mvpa.featsel.base import SensitivityBasedFeatureSelection
33
34 _KNOWN_INTERNALS = [ 'knn', 'binary', 'svm', 'linear',
35 'smlr', 'does_feature_selection', 'has_sensitivity',
36 'multiclass', 'non-linear', 'kernel-based', 'lars',
37 'regression', 'libsvm', 'sg', 'meta', 'retrainable', 'gpr',
38 'notrain2predict', 'ridge', 'blr', 'gnpp', 'enet', 'glmnet']
41 """Class to keep known instantiated classifiers
42
43 Should provide easy ways to select classifiers of needed kind:
44 clfswh['linear', 'svm'] should return all linear SVMs
45 clfswh['linear', 'multiclass'] should return all linear classifiers
46 capable of doing multiclass classification
47 """
48
49 - def __init__(self, known_tags=None, matches=None):
50 """Initialize warehouse
51
52 :Parameters:
53 known_tags : list of basestring
54 List of known tags
55 matches : dict
56 Optional dictionary of additional matches. E.g. since any
57 regression can be used as a binary classifier,
58 matches={'binary':['regression']}, would allow to provide
59 regressions also if 'binary' was requested
60 """
61 self._known_tags = Set(known_tags)
62 self.__items = []
63 self.__keys = Set()
64 if matches is None:
65 matches = {}
66 self.__matches = matches
67
69 if isinstance(args[0], tuple):
70 args = args[0]
71
72
73 if args == (slice(None),):
74 args = []
75
76
77 dargs = Set([str(x).lstrip('!') for x in args]).difference(
78 self._known_tags)
79
80 if len(dargs)>0:
81 raise ValueError, "Unknown internals %s requested. Known are %s" % \
82 (list(dargs), list(self._known_tags))
83
84
85 result = []
86
87 for item in self.__items:
88 good = True
89
90 for arg in args:
91
92 if arg.startswith('!'):
93 if (arg[1:] in item._clf_internals):
94 good = False
95 break
96 else:
97 continue
98
99 found = False
100 for arg in [arg] + self.__matches.get(arg, []):
101 if (arg in item._clf_internals):
102 found = True
103 break
104 good = found
105 if not good:
106 break
107 if good:
108 result.append(item)
109 return result
110
112 if operator.isSequenceType(item):
113 for item_ in item:
114 self.__iadd__(item_)
115 else:
116 if not hasattr(item, '_clf_internals'):
117 raise ValueError, "Cannot register %s " % item + \
118 "which has no _clf_internals defined"
119 if len(item._clf_internals) == 0:
120 raise ValueError, "Cannot register %s " % item + \
121 "which has empty _clf_internals"
122 clf_internals = Set(item._clf_internals)
123 if clf_internals.issubset(self._known_tags):
124 self.__items.append(item)
125 self.__keys |= clf_internals
126 else:
127 raise ValueError, 'Unknown clf internal(s) %s' % \
128 clf_internals.difference(self._known_tags)
129 return self
130
131 @property
133 """Known internal tags of the classifiers
134 """
135 return self.__keys
136
138 """Listing (description + internals) of registered items
139 """
140 return [(x.descr, x._clf_internals) for x in self.__items]
141
142 @property
144 """Registered items
145 """
146 return self.__items
147
148 clfswh = Warehouse(known_tags=_KNOWN_INTERNALS)
149 regrswh = Warehouse(known_tags=_KNOWN_INTERNALS)
150
151
152
153
154
155
156
157 clfswh += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"),
158 SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"),
159
160
161
162 ]
163
164 clfswh += \
165 [ MulticlassClassifier(clfswh['smlr'][0],
166 descr='Pairs+maxvote multiclass on ' + \
167 clfswh['smlr'][0].descr) ]
168
169 if externals.exists('libsvm'):
170 from mvpa.clfs import libsvmc as libsvm
171 clfswh._known_tags.union_update(libsvm.SVM._KNOWN_IMPLEMENTATIONS.keys())
172 clfswh += [libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1),
173 libsvm.SVM(
174 C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1),
175 libsvm.SVM(
176 C=1.0, descr="libsvm.LinSVM(C=1)", probability=1),
177 libsvm.SVM(svm_impl='NU_SVC',
178 descr="libsvm.LinNuSVM(nu=def)", probability=1)
179 ]
180 clfswh += [libsvm.SVM(kernel_type='RBF', descr="libsvm.RbfSVM()"),
181 libsvm.SVM(kernel_type='RBF', svm_impl='NU_SVC',
182 descr="libsvm.RbfNuSVM(nu=def)"),
183 libsvm.SVM(kernel_type='poly',
184 descr='libsvm.PolySVM()', probability=1),
185
186
187
188 ]
189
190
191 regrswh._known_tags.union_update(['EPSILON_SVR', 'NU_SVR'])
192 regrswh += [libsvm.SVM(svm_impl='EPSILON_SVR', descr='libsvm epsilon-SVR',
193 regression=True),
194 libsvm.SVM(svm_impl='NU_SVR', descr='libsvm nu-SVR',
195 regression=True)]
196
197 if externals.exists('shogun'):
198 from mvpa.clfs import sg
199 clfswh._known_tags.union_update(sg.SVM._KNOWN_IMPLEMENTATIONS)
200
201
202
203 bad_classifiers = [
204 'mpd',
205
206
207
208 'gpbt',
209
210 'gmnp',
211
212 'svrlight',
213
214 'krr',
215 ]
216 if not externals.exists('sg_fixedcachesize'):
217
218 bad_classifiers.append('gnpp')
219
220 for impl in sg.SVM._KNOWN_IMPLEMENTATIONS:
221
222 if impl in bad_classifiers:
223 continue
224 clfswh += [
225 sg.SVM(
226 descr="sg.LinSVM(C=def)/%s" % impl, svm_impl=impl),
227 sg.SVM(
228 C=-10.0, descr="sg.LinSVM(C=10*def)/%s" % impl, svm_impl=impl),
229 sg.SVM(
230 C=1.0, descr="sg.LinSVM(C=1)/%s" % impl, svm_impl=impl),
231 ]
232 clfswh += [
233 sg.SVM(kernel_type='RBF',
234 descr="sg.RbfSVM()/%s" % impl, svm_impl=impl),
235
236
237
238
239
240 ]
241
242 for impl in ['libsvr', 'krr']:
243
244
245 regrswh._known_tags.union_update([impl])
246 regrswh += [ sg.SVM(svm_impl=impl, descr='sg.LinSVMR()/%s' % impl,
247 regression=True),
248
249
250
251 ]
252
253 if len(clfswh['svm', 'linear']) > 0:
254
255 from mvpa.clfs.svm import *
256
257
258 if externals.exists('lars'):
259 import mvpa.clfs.lars as lars
260 from mvpa.clfs.lars import LARS
261 for model in lars.known_models:
262
263 lars_clf = LARS(descr="LARS(%s)" % model, model_type=model)
264 clfswh += lars_clf
265
266
267 lars_regr = LARS(descr="_LARS(%s, regression=True)" % model,
268 regression=True, model_type=model)
269 regrswh += lars_regr
270
271
272
273
274
275
276
277
278
279
280
281
282 if externals.exists('glmnet'):
283 from mvpa.clfs.glmnet import GLMNET_C, GLMNET_R
284 clfswh += GLMNET_C(descr="GLMNET_C()")
285 regrswh += GLMNET_R(descr="GLMNET_R()")
286
287
288 clfswh += kNN(k=5, descr="kNN(k=5)")
289 clfswh += kNN(k=5, voting='majority', descr="kNN(k=5, voting='majority')")
290
291 clfswh += \
292 FeatureSelectionClassifier(
293 kNN(),
294 SensitivityBasedFeatureSelection(
295 SMLRWeights(SMLR(lm=1.0, implementation="C")),
296 RangeElementSelector(mode='select')),
297 descr="kNN on SMLR(lm=1) non-0")
298
299 clfswh += \
300 FeatureSelectionClassifier(
301 kNN(),
302 SensitivityBasedFeatureSelection(
303 OneWayAnova(),
304 FractionTailSelector(0.05, mode='select', tail='upper')),
305 descr="kNN on 5%(ANOVA)")
306
307 clfswh += \
308 FeatureSelectionClassifier(
309 kNN(),
310 SensitivityBasedFeatureSelection(
311 OneWayAnova(),
312 FixedNElementTailSelector(50, mode='select', tail='upper')),
313 descr="kNN on 50(ANOVA)")
314
315
316
317 if externals.exists('scipy'):
318 from mvpa.clfs.gpr import GPR
319
320 clfswh += GPR(kernel=KernelLinear(), descr="GPR(kernel='linear')")
321 clfswh += GPR(kernel=KernelSquaredExponential(),
322 descr="GPR(kernel='sqexp')")
323
324
325 from mvpa.clfs.blr import BLR
326 clfswh += BLR(descr="BLR()")
327
328
329
330
331 if len(clfswh['linear', 'svm']) > 0:
332
333 linearSVMC = clfswh['linear', 'svm',
334 cfg.get('svm', 'backend', default='libsvm').lower()
335 ][0]
336
337
338 clfswh += \
339 FeatureSelectionClassifier(
340 linearSVMC.clone(),
341 SensitivityBasedFeatureSelection(
342 SMLRWeights(SMLR(lm=0.1, implementation="C")),
343 RangeElementSelector(mode='select')),
344 descr="LinSVM on SMLR(lm=0.1) non-0")
345
346
347 clfswh += \
348 FeatureSelectionClassifier(
349 linearSVMC.clone(),
350 SensitivityBasedFeatureSelection(
351 SMLRWeights(SMLR(lm=1.0, implementation="C")),
352 RangeElementSelector(mode='select')),
353 descr="LinSVM on SMLR(lm=1) non-0")
354
355
356
357 clfswh += \
358 FeatureSelectionClassifier(
359 RbfCSVMC(),
360 SensitivityBasedFeatureSelection(
361 SMLRWeights(SMLR(lm=1.0, implementation="C")),
362 RangeElementSelector(mode='select')),
363 descr="RbfSVM on SMLR(lm=1) non-0")
364
365 clfswh += \
366 FeatureSelectionClassifier(
367 linearSVMC.clone(),
368 SensitivityBasedFeatureSelection(
369 OneWayAnova(),
370 FractionTailSelector(0.05, mode='select', tail='upper')),
371 descr="LinSVM on 5%(ANOVA)")
372
373 clfswh += \
374 FeatureSelectionClassifier(
375 linearSVMC.clone(),
376 SensitivityBasedFeatureSelection(
377 OneWayAnova(),
378 FixedNElementTailSelector(50, mode='select', tail='upper')),
379 descr="LinSVM on 50(ANOVA)")
380
381 clfswh += \
382 FeatureSelectionClassifier(
383 linearSVMC.clone(),
384 SensitivityBasedFeatureSelection(
385 linearSVMC.getSensitivityAnalyzer(transformer=Absolute),
386 FractionTailSelector(0.05, mode='select', tail='upper')),
387 descr="LinSVM on 5%(SVM)")
388
389 clfswh += \
390 FeatureSelectionClassifier(
391 linearSVMC.clone(),
392 SensitivityBasedFeatureSelection(
393 linearSVMC.getSensitivityAnalyzer(transformer=Absolute),
394 FixedNElementTailSelector(50, mode='select', tail='upper')),
395 descr="LinSVM on 50(SVM)")
396
397
398
399
400
401
402
403
404
405
406
407
408
409 rfesvm_split = SplitClassifier(linearSVMC)
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451 rfesvm = LinearCSVMC()
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489