Package mvpa :: Package misc :: Module support
[hide private]
[frames] | no frames]

Source Code for Module mvpa.misc.support

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Support function -- little helpers in everyday life""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14  import re, os 
 15   
 16  # for SmartVersion 
 17  from distutils.version import Version 
 18  from types import StringType, TupleType, ListType 
 19   
 20  from mvpa.base import warning 
 21  from mvpa.support.copy import copy, deepcopy 
 22  from operator import isSequenceType 
 23   
 24  if __debug__: 
 25      from mvpa.base import debug 
 26   
 27   
28 -def reuseAbsolutePath(file1, file2, force=False):
29 """Use path to file1 as the path to file2 is no absolute 30 path is given for file2 31 32 :Parameters: 33 force : bool 34 if True, force it even if the file2 starts with / 35 """ 36 if not file2.startswith(os.path.sep) or force: 37 # lets reuse path to file1 38 return os.path.join(os.path.dirname(file1), file2.lstrip(os.path.sep)) 39 else: 40 return file2
41 42
43 -def transformWithBoxcar(data, startpoints, boxlength, offset=0, fx=N.mean):
44 """This function extracts boxcar windows from an array. Such a boxcar is 45 defined by a starting point and the size of the window along the first axis 46 of the array (`boxlength`). Afterwards a customizable function is applied 47 to each boxcar individually (Default: averaging). 48 49 :param data: An array with an arbitrary number of dimensions. 50 :type data: array 51 :param startpoints: Boxcar startpoints as index along the first array axis 52 :type startpoints: sequence 53 :param boxlength: Length of the boxcar window in #array elements 54 :type boxlength: int 55 :param offset: Optional offset between the configured starting point and the 56 actual begining of the boxcar window. 57 :type offset: int 58 :rtype: array (len(startpoints) x data.shape[1:]) 59 """ 60 if boxlength < 1: 61 raise ValueError, "Boxlength lower than 1 makes no sense." 62 63 # check for illegal boxes 64 for sp in startpoints: 65 if ( sp + offset + boxlength - 1 > len(data)-1 ) \ 66 or ( sp + offset < 0 ): 67 raise ValueError, \ 68 'Illegal box: start: %i, offset: %i, length: %i' \ 69 % (sp, offset, boxlength) 70 71 # build a list of list where each sublist contains the indexes of to be 72 # averaged data elements 73 selector = [ range( i + offset, i + offset + boxlength ) \ 74 for i in startpoints ] 75 76 # average each box 77 selected = [ fx( data[ N.array(box) ], axis=0 ) for box in selector ] 78 79 return N.array( selected )
80 81
82 -def _getUniqueLengthNCombinations_lt3(data, n):
83 """Generates a list of lists containing all combinations of 84 elements of data of length 'n' without repetitions. 85 86 data: list 87 n: integer 88 89 This function is adapted from a Java version posted in some forum on 90 the web as an answer to the question 'How can I generate all possible 91 combinations of length n?'. Unfortunately I cannot remember which 92 forum it was. 93 94 NOTE: implementation is broken for n>2 95 """ 96 97 if n > 2: 98 raise ValueError, "_getUniqueLengthNCombinations_lt3 " \ 99 "is broken for n>2, hence should not be used directly." 100 101 # to be returned 102 combos = [] 103 104 # local function that will be called recursively to collect the 105 # combination elements 106 def take(data, occupied, depth, taken): 107 for i, d in enumerate(data): 108 # only do something if this element hasn't been touch yet 109 if occupied[i] == False: 110 # see whether will reached the desired length 111 if depth < n-1: 112 # flag the current element as touched 113 occupied[i] = True 114 # next level 115 take(data, occupied, depth+1, taken + [d]) 116 # if the current element would be set 'free', it would 117 # results in ALL combinations of elements (obeying order 118 # of elements) and not just in the unique sets of 119 # combinations (without order) 120 #occupied[i] = False 121 else: 122 # store the final combination 123 combos.append(taken + [d])
124 # some kind of bitset that stores the status of each element 125 # (contained in combination or not) 126 occupied = [False] * len(data) 127 # get the combinations 128 take(data, occupied, 0, []) 129 130 # return the result 131 return combos 132
133 -def xuniqueCombinations(L, n):
134 """Generator of unique combinations form a list L of objects in 135 groups of size n. 136 137 # XXX EO: I guess they are already sorted. 138 # XXX EO: It seems to work well for n>20 :) 139 140 :Parameters: 141 L : list 142 list of unique ids 143 n : int 144 grouping size 145 146 Adopted from Li Daobing 147 http://code.activestate.com/recipes/190465/ 148 (MIT license, according to activestate.com's policy) 149 """ 150 if n==0: yield [] 151 else: 152 for i in xrange(len(L)-n+1): 153 for cc in xuniqueCombinations(L[i+1:],n-1): 154 yield [L[i]]+cc
155
156 -def _getUniqueLengthNCombinations_binary(L, n=None, sort=True):
157 """Find all subsets of data 158 159 :Parameters: 160 L : list 161 list of unique ids 162 n : None or int 163 If None, all possible subsets are returned. If n is specified (int), 164 then only the ones of the length n are returned 165 sort : bool 166 Either to sort the resultant sequence 167 168 Adopted from Alex Martelli: 169 http://mail.python.org/pipermail/python-list/2001-January/067815.html 170 """ 171 N = len(L) 172 if N > 20 and n == 1: 173 warning("getUniqueLengthNCombinations_binary should not be used for " 174 "large N") 175 result = [] 176 for X in range(2**N): 177 x = [ L[i] for i in range(N) if X & (1L<<i) ] 178 if n is None or len(x) == n: 179 # yield x # if we wanted to use it as a generator 180 result.append(x) 181 result.sort() 182 # if __debug__ and n is not None: 183 # # verify the result 184 # # would need scipy... screw it 185 # assert(len(result) == ...) 186 return result
187 188
189 -def getUniqueLengthNCombinations(L, n=None, sort=True):
190 """Find all subsets of data 191 192 :Parameters: 193 L : list 194 list of unique ids 195 n : None or int 196 If None, all possible subsets are returned. If n is specified (int), 197 then only the ones of the length n are returned 198 199 TODO: work out single stable implementation -- probably just by fixing 200 _getUniqueLengthNCombinations_lt3 201 """ 202 if n == 1: 203 return _getUniqueLengthNCombinations_lt3(L, n) 204 elif n==None: 205 return _getUniqueLengthNCombinations_binary(L, n, sort=True) 206 else: 207 # XXX EO: Is it safe to remove "list" and use generator 208 # directly to save memory for large number of 209 # combinations? It seems OK according to tests but 210 # I'd like a second opinion. 211 # XXX EO: what about inserting a warning if number of 212 # combinations > 10000? No one would run it... 213 return list(xuniqueCombinations(L, n))
214 215
216 -def _getUniqueLengthNCombinations_binary(L, n=None, sort=True):
217 """Find all subsets of data 218 219 :Parameters: 220 L : list 221 list of unique ids 222 n : None or int 223 If None, all possible subsets are returned. If n is specified (int), 224 then only the ones of the length n are returned 225 sort : bool 226 Either to sort the resultant sequence 227 228 Adopted from Alex Martelli: 229 http://mail.python.org/pipermail/python-list/2001-January/067815.html 230 """ 231 N = len(L) 232 if N > 20 and n == 1: 233 warning("getUniqueLengthNCombinations_binary should not be used for " 234 "large N") 235 result = [] 236 for X in range(2**N): 237 x = [ L[i] for i in range(N) if X & (1L<<i) ] 238 if n is None or len(x) == n: 239 # yield x # if we wanted to use it as a generator 240 result.append(x) 241 result.sort() 242 # if __debug__ and n is not None: 243 # # verify the result 244 # # would need scipy... screw it 245 # assert(len(result) == ...) 246 return result
247 248
249 -def getUniqueLengthNCombinations(L, n=None, sort=True):
250 """Find all subsets of data 251 252 :Parameters: 253 L : list 254 list of unique ids 255 n : None or int 256 If None, all possible subsets are returned. If n is specified (int), 257 then only the ones of the length n are returned 258 259 TODO: work out single stable implementation -- probably just by fixing 260 _getUniqueLengthNCombinations_lt3 261 """ 262 if n == 1: 263 return _getUniqueLengthNCombinations_lt3(L, n) 264 else: 265 return _getUniqueLengthNCombinations_binary(L, n, sort=True)
266 267
268 -def indentDoc(v):
269 """Given a `value` returns a string where each line is indented 270 271 Needed for a cleaner __repr__ output 272 `v` - arbitrary 273 """ 274 return re.sub('\n', '\n ', str(v))
275 276
277 -def idhash(val):
278 """Craft unique id+hash for an object 279 """ 280 res = "%s" % id(val) 281 if isinstance(val, list): 282 val = tuple(val) 283 try: 284 res += ":%s" % hash(buffer(val)) 285 except: 286 try: 287 res += ":%s" % hash(val) 288 except: 289 pass 290 pass 291 return res
292
293 -def isSorted(items):
294 """Check if listed items are in sorted order. 295 296 :Parameters: 297 `items`: iterable container 298 299 :return: `True` if were sorted. Otherwise `False` + Warning 300 """ 301 items_sorted = deepcopy(items) 302 items_sorted.sort() 303 equality = items_sorted == items 304 # XXX yarik forgotten analog to isiterable 305 if hasattr(equality, '__iter__'): 306 equality = N.all(equality) 307 return equality
308 309
310 -def isInVolume(coord, shape):
311 """For given coord check if it is within a specified volume size. 312 313 Returns True/False. Assumes that volume coordinates start at 0. 314 No more generalization (arbitrary minimal coord) is done to save 315 on performance 316 """ 317 for i in xrange(len(coord)): 318 if coord[i] < 0 or coord[i] >= shape[i]: 319 return False 320 return True
321 322
323 -def version_to_tuple(v):
324 """Convert literal string into a tuple, if possible of ints 325 326 Tuple of integers constructed by splitting at '.' or interleaves 327 of numerics and alpha numbers 328 """ 329 if isinstance(v, basestring): 330 v = v.split('.') 331 elif isinstance(v, tuple) or isinstance(v, list): 332 # assure tuple 333 pass 334 else: 335 raise ValueError, "Do not know how to treat version '%s'" % str(v) 336 337 # Try to convert items into ints 338 vres = [] 339 340 regex = re.compile('(?P<numeric>[0-9]*)' 341 '(?P<alpha>[~+-]*[A-Za-z]*)(?P<suffix>.*)') 342 for x in v: 343 try: 344 vres += [int(x)] 345 except ValueError: 346 # try to split into sequences of literals and numerics 347 suffix = x 348 while suffix != '': 349 res = regex.search(suffix) 350 if res: 351 resd = res.groupdict() 352 if resd['numeric'] != '': 353 vres += [int(resd['numeric'])] 354 if resd['alpha'] != '': 355 vres += [resd['alpha']] 356 suffix = resd['suffix'] 357 else: 358 # We can't detech anything meaningful -- let it go as is 359 resd += [suffix] 360 break 361 v = tuple(vres) 362 363 return v
364
365 -class SmartVersion(Version):
366 """A bit evolved comparison of versions 367 368 The reason for not using python's distutil.version is that it 369 seems to have no clue about somewhat common conventions of using 370 '-dev' or 'dev' or 'rc' suffixes for upcoming releases (so major 371 version does contain upcoming release already). 372 373 So here is an ad-hoc and not as nice implementation 374 """ 375
376 - def parse(self, vstring):
377 self.vstring = vstring 378 self.version = version_to_tuple(vstring)
379
380 - def __str__(self):
381 return self.vstring
382
383 - def __cmp__(self, other):
384 if isinstance(other, (StringType, TupleType, ListType)): 385 other = SmartVersion(other) 386 elif isinstance(other, SmartVersion): 387 pass 388 elif isinstance(other, Version): 389 other = SmartVersion(other.vstring) 390 else: 391 raise ValueError("Do not know how to treat version %s" 392 % str(other)) 393 394 # Do ad-hoc comparison of strings 395 i = 0 396 s, o = self.version, other.version 397 regex_prerelease = re.compile('~|-?dev|-?rc|-?svn|-?pre', re.I) 398 for i in xrange(max(len(s), len(o))): 399 if i < len(s): si = s[i] 400 else: si = None 401 if i < len(o): oi = o[i] 402 else: oi = None 403 404 if si == oi: 405 continue 406 407 for x,y,mult in ((si, oi, 1), (oi, si, -1)): 408 if x is None: 409 if isinstance(y, int): 410 return -mult # we got '.1' suffix 411 if isinstance(y, StringType): 412 if (regex_prerelease.match(y)): 413 return mult # so we got something to signal 414 # pre-release, so first one won 415 else: 416 # otherwise the other one wins 417 return -mult 418 else: 419 raise RuntimeError, "Should not have got here with %s" \ 420 % y 421 elif isinstance(x, int): 422 if not isinstance(y, int): 423 return mult 424 return mult*cmp(x, y) # both are ints 425 elif isinstance(x, StringType): 426 if isinstance(y, StringType): 427 return mult*cmp(x,y) 428 return 0
429
430 -def getBreakPoints(items, contiguous=True):
431 """Return a list of break points. 432 433 :Parameters: 434 items : iterable 435 list of items, such as chunks 436 contiguous : bool 437 if `True` (default) then raise Value Error if items are not 438 contiguous, i.e. a label occur in multiple contiguous sets 439 440 :raises: ValueError 441 442 :return: list of indexes for every new set of items 443 """ 444 prev = None # pylint happiness event! 445 known = [] 446 """List of items which was already seen""" 447 result = [] 448 """Resultant list""" 449 for index in xrange(len(items)): 450 item = items[index] 451 if item in known: 452 if index > 0: 453 if prev != item: # breakpoint 454 if contiguous: 455 raise ValueError, \ 456 "Item %s was already seen before" % str(item) 457 else: 458 result.append(index) 459 else: 460 known.append(item) 461 result.append(index) 462 prev = item 463 return result
464 465
466 -def RFEHistory2maps(history):
467 """Convert history generated by RFE into the array of binary maps 468 469 Example: 470 history2maps(N.array( [ 3,2,1,0 ] )) 471 results in 472 array([[ 1., 1., 1., 1.], 473 [ 1., 1., 1., 0.], 474 [ 1., 1., 0., 0.], 475 [ 1., 0., 0., 0.]]) 476 """ 477 478 # assure that it is an array 479 history = N.array(history) 480 nfeatures, steps = len(history), max(history) - min(history) + 1 481 history_maps = N.zeros((steps, nfeatures)) 482 483 for step in xrange(steps): 484 history_maps[step, history >= step] = 1 485 486 return history_maps
487 488
489 -class MapOverlap(object):
490 """Compute some overlap stats from a sequence of binary maps. 491 492 When called with a sequence of binary maps (e.g. lists or arrays) the 493 fraction of mask elements that are non-zero in a customizable proportion 494 of the maps is returned. By default this threshold is set to 1.0, i.e. 495 such an element has to be non-zero in *all* maps. 496 497 Three additional maps (same size as original) are computed: 498 499 * overlap_map: binary map which is non-zero for each overlapping element. 500 * spread_map: binary map which is non-zero for each element that is 501 non-zero in any map, but does not exceed the overlap 502 threshold. 503 * ovstats_map: map of float with the raw elementwise fraction of overlap. 504 505 All maps are available via class members. 506 """
507 - def __init__(self, overlap_threshold=1.0):
508 """Nothing to be seen here. 509 """ 510 self.__overlap_threshold = overlap_threshold 511 512 # pylint happiness block 513 self.overlap_map = None 514 self.spread_map = None 515 self.ovstats_map = None
516 517
518 - def __call__(self, maps):
519 """Returns fraction of overlapping elements. 520 """ 521 ovstats = N.mean(maps, axis=0) 522 523 self.overlap_map = (ovstats >= self.__overlap_threshold ) 524 self.spread_map = N.logical_and(ovstats > 0.0, 525 ovstats < self.__overlap_threshold) 526 self.ovstats_map = ovstats 527 528 return N.mean(ovstats >= self.__overlap_threshold)
529 530
531 -class Event(dict):
532 """Simple class to define properties of an event. 533 534 The class is basically a dictionary. Any properties can 535 be passed as keyword arguments to the constructor, e.g.: 536 537 >>> ev = Event(onset=12, duration=2.45) 538 539 Conventions for keys: 540 541 `onset` 542 The onset of the event in some unit. 543 `duration` 544 The duration of the event in the same unit as `onset`. 545 `label` 546 E.g. the condition this event is part of. 547 `chunk` 548 Group this event is part of (if any), e.g. experimental run. 549 `features` 550 Any amount of additional features of the event. This might include 551 things like physiological measures, stimulus intensity. Must be a mutable 552 sequence (e.g. list), if present. 553 """ 554 _MUSTHAVE = ['onset'] 555
556 - def __init__(self, **kwargs):
557 # store everything 558 dict.__init__(self, **kwargs) 559 560 # basic checks 561 for k in Event._MUSTHAVE: 562 if not self.has_key(k): 563 raise ValueError, "Event must have '%s' defined." % k
564 565
566 - def asDescreteTime(self, dt, storeoffset=False):
567 """Convert `onset` and `duration` information into descrete timepoints. 568 569 :Parameters: 570 dt: float 571 Temporal distance between two timepoints in the same unit as `onset` 572 and `duration`. 573 storeoffset: bool 574 If True, the temporal offset between original `onset` and 575 descretized `onset` is stored as an additional item in `features`. 576 577 :Return: 578 A copy of the original `Event` with `onset` and optionally `duration` 579 replaced by their corresponding descrete timepoint. The new onset will 580 correspond to the timepoint just before or exactly at the original 581 onset. The new duration will be the number of timepoints covering the 582 event from the computed onset timepoint till the timepoint exactly at 583 the end, or just after the event. 584 585 Note again, that the new values are expressed as #timepoint and not 586 in their original unit! 587 """ 588 dt = float(dt) 589 onset = self['onset'] 590 out = deepcopy(self) 591 592 # get the timepoint just prior the onset 593 out['onset'] = int(N.floor(onset / dt)) 594 595 if storeoffset: 596 # compute offset 597 offset = onset - (out['onset'] * dt) 598 599 if out.has_key('features'): 600 out['features'].append(offset) 601 else: 602 out['features'] = [offset] 603 604 if out.has_key('duration'): 605 # how many timepoint cover the event (from computed onset 606 # to the one timepoint just after the end of the event 607 out['duration'] = int(N.ceil((onset + out['duration']) / dt) \ 608 - out['onset']) 609 610 return out
611 612 613
614 -class HarvesterCall(object):
615 - def __init__(self, call, attribs=None, argfilter=None, expand_args=True, 616 copy_attribs=True):
617 """Initialize 618 619 :Parameters: 620 expand_args : bool 621 Either to expand the output of looper into a list of arguments for 622 call 623 attribs : list of basestr 624 What attributes of call to store and return later on? 625 copy_attribs : bool 626 Force copying values of attributes 627 """ 628 629 self.call = call 630 """Call which gets called in the harvester.""" 631 632 if attribs is None: 633 attribs = [] 634 if not isSequenceType(attribs): 635 raise ValueError, "'attribs' have to specified as a sequence." 636 637 if not (argfilter is None or isSequenceType(argfilter)): 638 raise ValueError, "'argfilter' have to be a sequence or None." 639 640 # now give it to me... 641 self.argfilter = argfilter 642 self.expand_args = expand_args 643 self.copy_attribs = copy_attribs 644 self.attribs = attribs
645 646 647
648 -class Harvester(object):
649 """World domination helper: do whatever it is asked and accumulate results 650 651 XXX Thinks about: 652 - Might we need to deepcopy attributes values? 653 - Might we need to specify what attribs to copy and which just to bind? 654 """ 655
656 - def __init__(self, source, calls, simplify_results=True):
657 """Initialize 658 659 :Parameters: 660 source 661 Generator which produce food for the calls. 662 calls : sequence of HarvesterCall instances 663 Calls which are processed in the loop. All calls are processed in 664 order of apperance in the sequence. 665 simplify_results: bool 666 Remove unecessary overhead in results if possible (nested lists 667 and dictionaries). 668 """ 669 if not isSequenceType(calls): 670 raise ValueError, "'calls' have to specified as a sequence." 671 672 self.__source = source 673 """Generator which feeds the harvester""" 674 675 self.__calls = calls 676 """Calls which gets called with each generated source""" 677 678 self.__simplify_results = simplify_results
679 680
681 - def __call__(self, *args, **kwargs):
682 """ 683 """ 684 # prepare complex result structure for all calls and their respective 685 # attributes: calls x dict(attributes x loop iterations) 686 results = [dict([('result', [])] + [(a, []) for a in c.attribs]) \ 687 for c in self.__calls] 688 689 # Lets do it! 690 for (i, X) in enumerate(self.__source(*args, **kwargs)): 691 for (c, call) in enumerate(self.__calls): 692 # sanity check 693 if i == 0 and call.expand_args and not isSequenceType(X): 694 raise RuntimeError, \ 695 "Cannot expand non-sequence result from %s" % \ 696 `self.__source` 697 698 # apply argument filter (and reorder) if requested 699 if call.argfilter: 700 filtered_args = [X[f] for f in call.argfilter] 701 else: 702 filtered_args = X 703 704 if call.expand_args: 705 result = call.call(*filtered_args) 706 else: 707 result = call.call(filtered_args) 708 709 # # XXX pylint doesn't like `` for some reason 710 # if __debug__: 711 # debug("LOOP", "Iteration %i on call %s. Got result %s" % 712 # (i, `self.__call`, `result`)) 713 714 715 results[c]['result'].append(result) 716 717 for attrib in call.attribs: 718 attrv = call.call.__getattribute__(attrib) 719 720 if call.copy_attribs: 721 attrv = copy(attrv) 722 723 results[c][attrib].append(attrv) 724 725 # reduce results structure 726 if self.__simplify_results: 727 # get rid of dictionary if just the results are requested 728 for (c, call) in enumerate(self.__calls): 729 if not len(call.attribs): 730 results[c] = results[c]['result'] 731 732 if len(self.__calls) == 1: 733 results = results[0] 734 735 return results
736 737 738 739 # XXX MH: this doesn't work in all cases, as you cannot have *args after a 740 # kwarg. 741 #def loop(looper, call, 742 # unroll=True, attribs=None, copy_attribs=True, *args, **kwargs): 743 # """XXX Loop twin brother 744 # 745 # Helper for those who just wants to do smth like 746 # loop(blah, bleh, grgr) 747 # instead of 748 # Loop(blah, bleh)(grgr) 749 # """ 750 # print looper, call, unroll, attribs, copy_attribs 751 # print args, kwargs 752 # return Loop(looper=looper, call=call, unroll=unroll, 753 # attribs=attribs, copy_attribs=copy_attribs)(*args, **kwargs) 754