1
2
3
4
5
6
7
8
9 """Simply functors that transform something."""
10
11 _DEV_DOC = """
12 Follow the convetion that functions start with lower case, and classes
13 with uppercase letter.
14 """
15
16 __docformat__ = 'restructuredtext'
17
18
19 import numpy as N
20
21 from mvpa.base import externals, warning
22 from mvpa.misc.state import StateVariable, ClassWithCollections
23
24 if __debug__:
25 from mvpa.base import debug
26
27
29 """
30 Returns the elementwise absolute of any argument.
31
32 :Parameter:
33 x: scalar | sequence
34
35 """
36 return N.absolute(x)
37
38
40 """Returns elementwise '1 - x' of any argument."""
41 return 1 - x
42
43
45 """Return whatever it was called with."""
46 return x
47
48
50 """Mean computed along the first axis."""
51 return N.mean(x, axis=0)
52
54 """Sum computed over first axis of whether the values are not
55 equal to zero."""
56 return (N.asarray(x)!=0).sum(axis=0)
57
58
60 """Mean across 2nd axis
61
62 Use cases:
63 - to combine multiple sensitivities to get sense about
64 mean sensitivity across splits
65 """
66 return N.mean(x, axis=1)
67
68
70 """Sum of absolute values along the 2nd axis
71
72 Use cases:
73 - to combine multiple sensitivities to get sense about
74 what features are most influential
75 """
76 return N.abs(x).sum(axis=1)
77
78
80 """Max of absolute values along the 2nd axis
81 """
82 return N.abs(x).max(axis=1)
83
84
86 """Just what the name suggests."""
87 return N.mean(x)
88
89
90 -def L2Normed(x, norm=1.0, reverse=False):
91 """Norm the values so that regular vector norm becomes `norm`
92
93 More verbose: Norm that the sum of the squared elements of the
94 returned vector becomes `norm`.
95 """
96 xnorm = N.linalg.norm(x)
97 return x * (norm/xnorm)
98
99 -def L1Normed(x, norm=1.0, reverse=False):
100 """Norm the values so that L_1 norm (sum|x|) becomes `norm`"""
101 xnorm = N.sum(N.abs(x))
102 return x * (norm/xnorm)
103
104
106 """Rank-order by value. Highest gets 0"""
107
108
109 nelements = len(x)
110 indexes = N.arange(nelements)
111 t_indexes = indexes
112 if not reverse:
113 t_indexes = indexes[::-1]
114 tosort = zip(x, indexes)
115 tosort.sort()
116 ztosort = zip(tosort, t_indexes)
117 rankorder = N.empty(nelements, dtype=int)
118 rankorder[ [x[0][1] for x in ztosort] ] = \
119 [x[1] for x in ztosort]
120 return rankorder
121
122
126
127
129 """Helper to apply transformer over specific axis
130 """
131
132 - def __init__(self, transformer, axis=None):
133 """Initialize transformer wrapper with an axis.
134
135 :Parameters:
136 transformer
137 A callable to be used
138 axis : None or int
139 If None -- apply transformer across all the data. If some
140 int -- over that axis
141 """
142 self.transformer = transformer
143
144 if not (axis is None or isinstance(axis, int)):
145 raise ValueError, "axis must be specified by integer"
146 self.axis = axis
147
148
149 - def __call__(self, x, *args, **kwargs):
150 transformer = self.transformer
151 axis = self.axis
152 if axis is None:
153 return transformer(x, *args, **kwargs)
154
155 x = N.asanyarray(x)
156 shape = x.shape
157 if axis >= len(shape):
158 raise ValueError, "Axis given in constructor %d is higher " \
159 "than dimensionality of the data of shape %s" % (axis, shape)
160
161
162
163
164
165
166
167 shape_sweep = shape[:axis] + shape[axis+1:]
168 shrinker = None
169 """Either transformer reduces the dimensionality of the data"""
170
171 for index_sweep in N.ndindex(shape_sweep):
172 if axis > 0:
173 index = index_sweep[:axis]
174 else:
175 index = ()
176 index = index + (slice(None),) + index_sweep[axis:]
177 x_sel = x[index]
178 x_t = transformer(x_sel, *args, **kwargs)
179 if shrinker is None:
180 if N.isscalar(x_t) or x_t.shape == shape_sweep:
181 results = N.empty(shape_sweep, dtype=x.dtype)
182 shrinker = True
183 elif x_t.shape == x_sel.shape:
184 results = N.empty(x.shape, dtype=x.dtype)
185 shrinker = False
186 else:
187 raise RuntimeError, 'Not handled by OverAxis kind of transformer'
188
189 if shrinker:
190 results[index_sweep] = x_t
191 else:
192 results[index] = x_t
193
194 return results
195
196
198 """Converts values into p-values under vague and non-scientific assumptions
199 """
200
201 nulldist_number = StateVariable(enabled=True,
202 doc="Number of features within the estimated null-distribution")
203
204 positives_recovered = StateVariable(enabled=True,
205 doc="Number of features considered to be positives and which were recovered")
206
207
208 - def __init__(self, sd=0, distribution='rdist', fpp=None, nbins=400, **kwargs):
209 """L2-Norm the values, convert them to p-values of a given distribution.
210
211 :Parameters:
212 sd : int
213 Samples dimension (if len(x.shape)>1) on which to operate
214 distribution : string
215 Which distribution to use. Known are: 'rdist' (later normal should
216 be there as well)
217 fpp : float
218 At what p-value (both tails) if not None, to control for false
219 positives. It would iteratively prune the tails (tentative real positives)
220 until empirical p-value becomes less or equal to numerical.
221 nbins : int
222 Number of bins for the iterative pruning of positives
223
224 WARNING: Highly experimental/slow/etc: no theoretical grounds have been
225 presented in any paper, nor proven
226 """
227 externals.exists('scipy', raiseException=True)
228 ClassWithCollections.__init__(self, **kwargs)
229
230 self.sd = sd
231 if not (distribution in ['rdist']):
232 raise ValueError, "Actually only rdist supported at the moment" \
233 " got %s" % distribution
234 self.distribution = distribution
235 self.fpp = fpp
236 self.nbins = nbins
237
238
240 from mvpa.support.stats import scipy
241 import scipy.stats as stats
242
243
244 distribution = self.distribution
245 sd = self.sd
246 fpp = self.fpp
247 nbins = self.nbins
248
249 x = N.asanyarray(x)
250 shape_orig = x.shape
251 ndims = len(shape_orig)
252
253
254 hist_kwargs = ({}, {'new': False})[externals.versions['numpy'] > (1,1)]
255
256
257 if ndims > 2:
258 raise NotImplementedError, \
259 "TODO: add support for more than 2 dimensions"
260 elif ndims == 1:
261 x, sd = x[:, N.newaxis], 0
262
263
264 if sd == 0: x = x.T
265
266
267 pvalues = N.zeros(x.shape)
268 nulldist_number, positives_recovered = [], []
269
270
271 nd = x.shape[1]
272 if __debug__:
273 if nd < x.shape[0]:
274 warning("Number of features in DistPValue lower than number of"
275 " items -- may be incorrect sd=%d was provided" % sd)
276 dist = stats.rdist(nd-1, 0, 1)
277 for i, xx in enumerate(x):
278 xx /= N.linalg.norm(xx)
279
280 if fpp is not None:
281
282 Nxx, xxx, pN_emp_prev = len(xx), xx, 1.0
283 Nxxx = Nxx
284 indexes = N.arange(Nxx)
285 """What features belong to Null-distribution"""
286 while True:
287 Nhist = N.histogram(xxx, bins=nbins, normed=False,
288 **hist_kwargs)
289 pdf = Nhist[0].astype(float)/Nxxx
290 bins = Nhist[1]
291 bins_halfstep = (bins[1] - bins[2])/2.0
292
293
294
295 dist_cdf = dist.cdf(bins)
296
297
298
299
300
301
302
303 cdf = N.zeros(nbins, dtype=float)
304
305 dist_prevv = cdf_prevv = 0.0
306 for j in range(nbins):
307 cdf_prevv = cdf[j] = cdf_prevv + pdf[j]
308
309
310
311 p = (0.5 - N.abs(dist_cdf - 0.5) < fpp/2.0)
312
313
314
315
316 pN_emp = N.sum(pdf[p])
317
318 if __debug__:
319 debug('TRAN_', "empirical p=%.3f for theoretical p=%.3f"
320 % (pN_emp, fpp))
321
322 if pN_emp <= fpp:
323
324 break
325
326 if pN_emp > pN_emp_prev:
327 if __debug__:
328 debug('TRAN_', "Diverging -- thus keeping last result "
329 "with p=%.3f" % pN_emp_prev)
330
331 indexes, xxx, dist = indexes_prev, xxx_prev, dist_prev
332 break
333
334 pN_emp_prev = pN_emp
335
336 keep = N.logical_and(xxx > bins[0],
337 xxx < bins[-1])
338 if __debug__:
339 debug('TRAN_', "Keeping %d out of %d elements" %
340 (N.sum(keep), Nxxx))
341
342
343 indexes_prev, xxx_prev, dist_prev = indexes, xxx, dist
344
345
346
347 xxx, indexes = xxx[keep], indexes[keep]
348
349 xxx = xxx / N.linalg.norm(xxx)
350 Nxxx = len(xxx)
351 dist = stats.rdist(Nxxx-1, 0, 1)
352
353 Nindexes = len(indexes)
354 Nrecovered = Nxx - Nindexes
355
356 nulldist_number += [Nindexes]
357 positives_recovered += [Nrecovered]
358
359 if __debug__:
360 if distribution == 'rdist':
361 assert(dist.args[0], Nindexes-1)
362 debug('TRAN', "Positives recovery finished with %d out of %d "
363 "entries in Null-distribution, thus %d positives "
364 "were recovered" % (Nindexes, Nxx, Nrecovered))
365
366
367
368 pvalues[i, :] = dist.cdf(xx)
369
370
371 result = pvalues
372
373
374
375 self.nulldist_number = nulldist_number
376 self.positives_recovered = positives_recovered
377
378
379 if sd == 0:
380 result = result.T
381
382 return result
383