1
2
3
4
5
6
7
8
9 """Support function -- little helpers in everyday life"""
10
11 __docformat__ = 'restructuredtext'
12
13 import numpy as N
14 import re, os
15
16
17 from distutils.version import Version
18 from types import StringType, TupleType, ListType
19
20 from mvpa.base import warning
21 from mvpa.support.copy import copy, deepcopy
22 from operator import isSequenceType
23
24 if __debug__:
25 from mvpa.base import debug
26
27
29 """Use path to file1 as the path to file2 is no absolute
30 path is given for file2
31
32 :Parameters:
33 force : bool
34 if True, force it even if the file2 starts with /
35 """
36 if not file2.startswith(os.path.sep) or force:
37
38 return os.path.join(os.path.dirname(file1), file2.lstrip(os.path.sep))
39 else:
40 return file2
41
42
80
81
83 """Generates a list of lists containing all combinations of
84 elements of data of length 'n' without repetitions.
85
86 data: list
87 n: integer
88
89 This function is adapted from a Java version posted in some forum on
90 the web as an answer to the question 'How can I generate all possible
91 combinations of length n?'. Unfortunately I cannot remember which
92 forum it was.
93
94 NOTE: implementation is broken for n>2
95 """
96
97 if n > 2:
98 raise ValueError, "_getUniqueLengthNCombinations_lt3 " \
99 "is broken for n>2, hence should not be used directly."
100
101
102 combos = []
103
104
105
106 def take(data, occupied, depth, taken):
107 for i, d in enumerate(data):
108
109 if occupied[i] == False:
110
111 if depth < n-1:
112
113 occupied[i] = True
114
115 take(data, occupied, depth+1, taken + [d])
116
117
118
119
120
121 else:
122
123 combos.append(taken + [d])
124
125
126 occupied = [False] * len(data)
127
128 take(data, occupied, 0, [])
129
130
131 return combos
132
134 """Generator of unique combinations form a list L of objects in
135 groups of size n.
136
137 # XXX EO: I guess they are already sorted.
138 # XXX EO: It seems to work well for n>20 :)
139
140 :Parameters:
141 L : list
142 list of unique ids
143 n : int
144 grouping size
145
146 Adopted from Li Daobing
147 http://code.activestate.com/recipes/190465/
148 (MIT license, according to activestate.com's policy)
149 """
150 if n==0: yield []
151 else:
152 for i in xrange(len(L)-n+1):
153 for cc in xuniqueCombinations(L[i+1:],n-1):
154 yield [L[i]]+cc
155
157 """Find all subsets of data
158
159 :Parameters:
160 L : list
161 list of unique ids
162 n : None or int
163 If None, all possible subsets are returned. If n is specified (int),
164 then only the ones of the length n are returned
165 sort : bool
166 Either to sort the resultant sequence
167
168 Adopted from Alex Martelli:
169 http://mail.python.org/pipermail/python-list/2001-January/067815.html
170 """
171 N = len(L)
172 if N > 20 and n == 1:
173 warning("getUniqueLengthNCombinations_binary should not be used for "
174 "large N")
175 result = []
176 for X in range(2**N):
177 x = [ L[i] for i in range(N) if X & (1L<<i) ]
178 if n is None or len(x) == n:
179
180 result.append(x)
181 result.sort()
182
183
184
185
186 return result
187
188
190 """Find all subsets of data
191
192 :Parameters:
193 L : list
194 list of unique ids
195 n : None or int
196 If None, all possible subsets are returned. If n is specified (int),
197 then only the ones of the length n are returned
198
199 TODO: work out single stable implementation -- probably just by fixing
200 _getUniqueLengthNCombinations_lt3
201 """
202 if n == 1:
203 return _getUniqueLengthNCombinations_lt3(L, n)
204 elif n==None:
205 return _getUniqueLengthNCombinations_binary(L, n, sort=True)
206 else:
207
208
209
210
211
212
213 return list(xuniqueCombinations(L, n))
214
215
217 """Find all subsets of data
218
219 :Parameters:
220 L : list
221 list of unique ids
222 n : None or int
223 If None, all possible subsets are returned. If n is specified (int),
224 then only the ones of the length n are returned
225 sort : bool
226 Either to sort the resultant sequence
227
228 Adopted from Alex Martelli:
229 http://mail.python.org/pipermail/python-list/2001-January/067815.html
230 """
231 N = len(L)
232 if N > 20 and n == 1:
233 warning("getUniqueLengthNCombinations_binary should not be used for "
234 "large N")
235 result = []
236 for X in range(2**N):
237 x = [ L[i] for i in range(N) if X & (1L<<i) ]
238 if n is None or len(x) == n:
239
240 result.append(x)
241 result.sort()
242
243
244
245
246 return result
247
248
250 """Find all subsets of data
251
252 :Parameters:
253 L : list
254 list of unique ids
255 n : None or int
256 If None, all possible subsets are returned. If n is specified (int),
257 then only the ones of the length n are returned
258
259 TODO: work out single stable implementation -- probably just by fixing
260 _getUniqueLengthNCombinations_lt3
261 """
262 if n == 1:
263 return _getUniqueLengthNCombinations_lt3(L, n)
264 else:
265 return _getUniqueLengthNCombinations_binary(L, n, sort=True)
266
267
269 """Given a `value` returns a string where each line is indented
270
271 Needed for a cleaner __repr__ output
272 `v` - arbitrary
273 """
274 return re.sub('\n', '\n ', str(v))
275
276
278 """Craft unique id+hash for an object
279 """
280 res = "%s" % id(val)
281 if isinstance(val, list):
282 val = tuple(val)
283 try:
284 res += ":%s" % hash(buffer(val))
285 except:
286 try:
287 res += ":%s" % hash(val)
288 except:
289 pass
290 pass
291 return res
292
294 """Check if listed items are in sorted order.
295
296 :Parameters:
297 `items`: iterable container
298
299 :return: `True` if were sorted. Otherwise `False` + Warning
300 """
301 items_sorted = deepcopy(items)
302 items_sorted.sort()
303 equality = items_sorted == items
304
305 if hasattr(equality, '__iter__'):
306 equality = N.all(equality)
307 return equality
308
309
311 """For given coord check if it is within a specified volume size.
312
313 Returns True/False. Assumes that volume coordinates start at 0.
314 No more generalization (arbitrary minimal coord) is done to save
315 on performance
316 """
317 for i in xrange(len(coord)):
318 if coord[i] < 0 or coord[i] >= shape[i]:
319 return False
320 return True
321
322
324 """Convert literal string into a tuple, if possible of ints
325
326 Tuple of integers constructed by splitting at '.' or interleaves
327 of numerics and alpha numbers
328 """
329 if isinstance(v, basestring):
330 v = v.split('.')
331 elif isinstance(v, tuple) or isinstance(v, list):
332
333 pass
334 else:
335 raise ValueError, "Do not know how to treat version '%s'" % str(v)
336
337
338 vres = []
339
340 regex = re.compile('(?P<numeric>[0-9]*)'
341 '(?P<alpha>[~+-]*[A-Za-z]*)(?P<suffix>.*)')
342 for x in v:
343 try:
344 vres += [int(x)]
345 except ValueError:
346
347 suffix = x
348 while suffix != '':
349 res = regex.search(suffix)
350 if res:
351 resd = res.groupdict()
352 if resd['numeric'] != '':
353 vres += [int(resd['numeric'])]
354 if resd['alpha'] != '':
355 vres += [resd['alpha']]
356 suffix = resd['suffix']
357 else:
358
359 resd += [suffix]
360 break
361 v = tuple(vres)
362
363 return v
364
366 """A bit evolved comparison of versions
367
368 The reason for not using python's distutil.version is that it
369 seems to have no clue about somewhat common conventions of using
370 '-dev' or 'dev' or 'rc' suffixes for upcoming releases (so major
371 version does contain upcoming release already).
372
373 So here is an ad-hoc and not as nice implementation
374 """
375
376 - def parse(self, vstring):
379
382
384 if isinstance(other, (StringType, TupleType, ListType)):
385 other = SmartVersion(other)
386 elif isinstance(other, SmartVersion):
387 pass
388 elif isinstance(other, Version):
389 other = SmartVersion(other.vstring)
390 else:
391 raise ValueError("Do not know how to treat version %s"
392 % str(other))
393
394
395 i = 0
396 s, o = self.version, other.version
397 regex_prerelease = re.compile('~|-?dev|-?rc|-?svn|-?pre', re.I)
398 for i in xrange(max(len(s), len(o))):
399 if i < len(s): si = s[i]
400 else: si = None
401 if i < len(o): oi = o[i]
402 else: oi = None
403
404 if si == oi:
405 continue
406
407 for x,y,mult in ((si, oi, 1), (oi, si, -1)):
408 if x is None:
409 if isinstance(y, int):
410 return -mult
411 if isinstance(y, StringType):
412 if (regex_prerelease.match(y)):
413 return mult
414
415 else:
416
417 return -mult
418 else:
419 raise RuntimeError, "Should not have got here with %s" \
420 % y
421 elif isinstance(x, int):
422 if not isinstance(y, int):
423 return mult
424 return mult*cmp(x, y)
425 elif isinstance(x, StringType):
426 if isinstance(y, StringType):
427 return mult*cmp(x,y)
428 return 0
429
431 """Return a list of break points.
432
433 :Parameters:
434 items : iterable
435 list of items, such as chunks
436 contiguous : bool
437 if `True` (default) then raise Value Error if items are not
438 contiguous, i.e. a label occur in multiple contiguous sets
439
440 :raises: ValueError
441
442 :return: list of indexes for every new set of items
443 """
444 prev = None
445 known = []
446 """List of items which was already seen"""
447 result = []
448 """Resultant list"""
449 for index in xrange(len(items)):
450 item = items[index]
451 if item in known:
452 if index > 0:
453 if prev != item:
454 if contiguous:
455 raise ValueError, \
456 "Item %s was already seen before" % str(item)
457 else:
458 result.append(index)
459 else:
460 known.append(item)
461 result.append(index)
462 prev = item
463 return result
464
465
466 -def RFEHistory2maps(history):
467 """Convert history generated by RFE into the array of binary maps
468
469 Example:
470 history2maps(N.array( [ 3,2,1,0 ] ))
471 results in
472 array([[ 1., 1., 1., 1.],
473 [ 1., 1., 1., 0.],
474 [ 1., 1., 0., 0.],
475 [ 1., 0., 0., 0.]])
476 """
477
478
479 history = N.array(history)
480 nfeatures, steps = len(history), max(history) - min(history) + 1
481 history_maps = N.zeros((steps, nfeatures))
482
483 for step in xrange(steps):
484 history_maps[step, history >= step] = 1
485
486 return history_maps
487
488
490 """Compute some overlap stats from a sequence of binary maps.
491
492 When called with a sequence of binary maps (e.g. lists or arrays) the
493 fraction of mask elements that are non-zero in a customizable proportion
494 of the maps is returned. By default this threshold is set to 1.0, i.e.
495 such an element has to be non-zero in *all* maps.
496
497 Three additional maps (same size as original) are computed:
498
499 * overlap_map: binary map which is non-zero for each overlapping element.
500 * spread_map: binary map which is non-zero for each element that is
501 non-zero in any map, but does not exceed the overlap
502 threshold.
503 * ovstats_map: map of float with the raw elementwise fraction of overlap.
504
505 All maps are available via class members.
506 """
507 - def __init__(self, overlap_threshold=1.0):
508 """Nothing to be seen here.
509 """
510 self.__overlap_threshold = overlap_threshold
511
512
513 self.overlap_map = None
514 self.spread_map = None
515 self.ovstats_map = None
516
517
519 """Returns fraction of overlapping elements.
520 """
521 ovstats = N.mean(maps, axis=0)
522
523 self.overlap_map = (ovstats >= self.__overlap_threshold )
524 self.spread_map = N.logical_and(ovstats > 0.0,
525 ovstats < self.__overlap_threshold)
526 self.ovstats_map = ovstats
527
528 return N.mean(ovstats >= self.__overlap_threshold)
529
530
532 """Simple class to define properties of an event.
533
534 The class is basically a dictionary. Any properties can
535 be passed as keyword arguments to the constructor, e.g.:
536
537 >>> ev = Event(onset=12, duration=2.45)
538
539 Conventions for keys:
540
541 `onset`
542 The onset of the event in some unit.
543 `duration`
544 The duration of the event in the same unit as `onset`.
545 `label`
546 E.g. the condition this event is part of.
547 `chunk`
548 Group this event is part of (if any), e.g. experimental run.
549 `features`
550 Any amount of additional features of the event. This might include
551 things like physiological measures, stimulus intensity. Must be a mutable
552 sequence (e.g. list), if present.
553 """
554 _MUSTHAVE = ['onset']
555
557
558 dict.__init__(self, **kwargs)
559
560
561 for k in Event._MUSTHAVE:
562 if not self.has_key(k):
563 raise ValueError, "Event must have '%s' defined." % k
564
565
567 """Convert `onset` and `duration` information into descrete timepoints.
568
569 :Parameters:
570 dt: float
571 Temporal distance between two timepoints in the same unit as `onset`
572 and `duration`.
573 storeoffset: bool
574 If True, the temporal offset between original `onset` and
575 descretized `onset` is stored as an additional item in `features`.
576
577 :Return:
578 A copy of the original `Event` with `onset` and optionally `duration`
579 replaced by their corresponding descrete timepoint. The new onset will
580 correspond to the timepoint just before or exactly at the original
581 onset. The new duration will be the number of timepoints covering the
582 event from the computed onset timepoint till the timepoint exactly at
583 the end, or just after the event.
584
585 Note again, that the new values are expressed as #timepoint and not
586 in their original unit!
587 """
588 dt = float(dt)
589 onset = self['onset']
590 out = deepcopy(self)
591
592
593 out['onset'] = int(N.floor(onset / dt))
594
595 if storeoffset:
596
597 offset = onset - (out['onset'] * dt)
598
599 if out.has_key('features'):
600 out['features'].append(offset)
601 else:
602 out['features'] = [offset]
603
604 if out.has_key('duration'):
605
606
607 out['duration'] = int(N.ceil((onset + out['duration']) / dt) \
608 - out['onset'])
609
610 return out
611
612
613
615 - def __init__(self, call, attribs=None, argfilter=None, expand_args=True,
616 copy_attribs=True):
617 """Initialize
618
619 :Parameters:
620 expand_args : bool
621 Either to expand the output of looper into a list of arguments for
622 call
623 attribs : list of basestr
624 What attributes of call to store and return later on?
625 copy_attribs : bool
626 Force copying values of attributes
627 """
628
629 self.call = call
630 """Call which gets called in the harvester."""
631
632 if attribs is None:
633 attribs = []
634 if not isSequenceType(attribs):
635 raise ValueError, "'attribs' have to specified as a sequence."
636
637 if not (argfilter is None or isSequenceType(argfilter)):
638 raise ValueError, "'argfilter' have to be a sequence or None."
639
640
641 self.argfilter = argfilter
642 self.expand_args = expand_args
643 self.copy_attribs = copy_attribs
644 self.attribs = attribs
645
646
647
649 """World domination helper: do whatever it is asked and accumulate results
650
651 XXX Thinks about:
652 - Might we need to deepcopy attributes values?
653 - Might we need to specify what attribs to copy and which just to bind?
654 """
655
656 - def __init__(self, source, calls, simplify_results=True):
657 """Initialize
658
659 :Parameters:
660 source
661 Generator which produce food for the calls.
662 calls : sequence of HarvesterCall instances
663 Calls which are processed in the loop. All calls are processed in
664 order of apperance in the sequence.
665 simplify_results: bool
666 Remove unecessary overhead in results if possible (nested lists
667 and dictionaries).
668 """
669 if not isSequenceType(calls):
670 raise ValueError, "'calls' have to specified as a sequence."
671
672 self.__source = source
673 """Generator which feeds the harvester"""
674
675 self.__calls = calls
676 """Calls which gets called with each generated source"""
677
678 self.__simplify_results = simplify_results
679
680
682 """
683 """
684
685
686 results = [dict([('result', [])] + [(a, []) for a in c.attribs]) \
687 for c in self.__calls]
688
689
690 for (i, X) in enumerate(self.__source(*args, **kwargs)):
691 for (c, call) in enumerate(self.__calls):
692
693 if i == 0 and call.expand_args and not isSequenceType(X):
694 raise RuntimeError, \
695 "Cannot expand non-sequence result from %s" % \
696 `self.__source`
697
698
699 if call.argfilter:
700 filtered_args = [X[f] for f in call.argfilter]
701 else:
702 filtered_args = X
703
704 if call.expand_args:
705 result = call.call(*filtered_args)
706 else:
707 result = call.call(filtered_args)
708
709
710
711
712
713
714
715 results[c]['result'].append(result)
716
717 for attrib in call.attribs:
718 attrv = call.call.__getattribute__(attrib)
719
720 if call.copy_attribs:
721 attrv = copy(attrv)
722
723 results[c][attrib].append(attrv)
724
725
726 if self.__simplify_results:
727
728 for (c, call) in enumerate(self.__calls):
729 if not len(call.attribs):
730 results[c] = results[c]['result']
731
732 if len(self.__calls) == 1:
733 results = results[0]
734
735 return results
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754