Package csb :: Package bio :: Package structure
[frames] | no frames]

Source Code for Package csb.bio.structure

   1  """ 
   2  3D and secondary structure APIs. 
   3   
   4  This module defines some of the most fundamental abstractions in the library: 
   5  L{Structure}, L{Chain}, L{Residue} and L{Atom}. Instances of these objects may 
   6  exist independently and that is perfectly fine, but usually they are part of a 
   7  Composite aggregation. The root node in this Composite is a L{Structure} (or 
   8  L{Ensemble}). L{Structure}s are composed of L{Chain}s, and each L{Chain} is a 
   9  collection of L{Residue}s. The leaf node is L{Atom}.  
  10   
  11  All of these objects implement the base L{AbstractEntity} interface. Therefore, 
  12  every node in the Composite can be transformed: 
  13       
  14      >>> r, t = [rotation matrix], [translation vector] 
  15      >>> entity.transform(r, t) 
  16       
  17  and it knows its immediate children: 
  18   
  19      >>> entity.items 
  20      <iterator>    # over all immediate child entities 
  21       
  22  If you want to traverse the complete Composite tree, starting at arbitrary level, 
  23  and down to the lowest level, use one of the L{CompositeEntityIterator}s. Or just 
  24  call L{AbstractEntity.components}: 
  25   
  26      >>> entity.components() 
  27      <iterator>   # over all descendants, of any type, at any level 
  28      >>> entity.components(klass=Residue) 
  29      <iterator>   # over all Residue descendants 
  30       
  31  Some of the inner objects in this hierarchy behave just like dictionaries 
  32  (but are not): 
  33   
  34      >>> structure.chains['A']       # access chain A by ID 
  35      <Chain A: Protein> 
  36      >>> structure['A']              # the same 
  37      <Chain A: Protein> 
  38      >>> residue.atoms['CS']           
  39      <Atom: CA>                      # access an atom by its name 
  40      >>> residue.atoms['CS']           
  41      <Atom: CA>                      # the same 
  42           
  43  Others behave like list collections: 
  44   
  45      >>> chain.residues[10]               # 1-based access to the residues in the chain 
  46      <ProteinResidue [10]: PRO 10> 
  47      >>> chain[10]                        # 0-based, list-like access 
  48      <ProteinResidue [11]: GLY 11> 
  49       
  50  Step-wise building of L{Ensemble}s, L{Chain}s and L{Residue}s is supported through 
  51  a number of C{append} methods, for example: 
  52   
  53      >>> residue = ProteinResidue(401, ProteinAlphabet.ALA) 
  54      >>> s.chains['A'].residues.append(residue) 
  55       
  56  See L{EnsembleModelsCollection}, L{StructureChainsTable}, L{ChainResiduesCollection} 
  57  and L{ResidueAtomsTable} for more details. 
  58   
  59  Some other objects in this module of potential interest are the self-explanatory 
  60  L{SecondaryStructure} and L{TorsionAngles}.      
  61  """ 
  62   
  63  import os 
  64  import re 
  65  import copy 
  66  import math 
  67  import numpy 
  68   
  69  import csb.io 
  70  import csb.core 
  71  import csb.numeric 
  72  import csb.bio.utils 
  73   
  74  from abc import ABCMeta, abstractmethod, abstractproperty 
  75   
  76  from csb.bio.sequence import SequenceTypes, SequenceAlphabets, AlignmentTypes 
77 78 79 -class AngleUnits(csb.core.enum):
80 """ 81 Torsion angle unit types 82 """ 83 Degrees='deg'; Radians='rad'
84
85 -class SecStructures(csb.core.enum):
86 """ 87 Secondary structure types 88 """ 89 Helix='H'; Strand='E'; Coil='C'; Turn='T'; Bend='S'; 90 Helix3='G'; PiHelix='I'; BetaBridge='B'; Gap='-'
91
92 -class ChemElements(csb.core.enum):
93 """ 94 Periodic table elements 95 """ 96 H=1; He=2; Li=3; Be=4; B=5; C=6; N=7; O=8; F=9; Ne=10; Na=11; Mg=12; Al=13; Si=14; P=15; 97 S=16; Cl=17; Ar=18; K=19; Ca=20; Sc=21; Ti=22; V=23; Cr=24; Mn=25; Fe=26; Co=27; Ni=28; 98 Cu=29; Zn=30; Ga=31; Ge=32; As=33; Se=34; Br=35; Kr=36; Rb=37; Sr=38; Y=39; Zr=40; Nb=41; 99 Mo=42; Tc=43; Ru=44; Rh=45; Pd=46; Ag=47; Cd=48; In=49; Sn=50; Sb=51; Te=52; I=53; Xe=54; 100 Cs=55; Ba=56; Hf=72; Ta=73; W=74; Re=75; Os=76; Ir=77; Pt=78; Au=79; Hg=80; Tl=81; Pb=82; 101 Bi=83; Po=84; At=85; Rn=86; Fr=87; Ra=88; Rf=104; Db=105; Sg=106; Bh=107; Hs=108; Mt=109; 102 Ds=110; Rg=111; La=57; Ce=58; Pr=59; Nd=60; Pm=61; Sm=62; Eu=63; Gd=64; Tb=65; Dy=66; 103 Ho=67; Er=68; Tm=69; Yb=70; Lu=71; Ac=89; Th=90; Pa=91; U=92; Np=93; Pu=94; Am=95; Cm=96; 104 Bk=97; Cf=98; Es=99; Fm=100; Md=101; No=102; Lr=103; x=-1
105
106 107 -class Broken3DStructureError(ValueError):
108 pass
109
110 -class Missing3DStructureError(Broken3DStructureError):
111 pass
112
113 -class InvalidOperation(Exception):
114 pass
115
116 -class EntityNotFoundError(csb.core.ItemNotFoundError):
117 pass
118
119 -class ChainNotFoundError(EntityNotFoundError):
120 pass
121
122 -class AtomNotFoundError(EntityNotFoundError):
123 pass
124
125 -class EntityIndexError(csb.core.CollectionIndexError):
126 pass
127
128 -class DuplicateModelIDError(csb.core.DuplicateKeyError):
129 pass
130
131 -class DuplicateChainIDError(csb.core.DuplicateKeyError):
132 pass
133
134 -class DuplicateResidueIDError(csb.core.DuplicateKeyError):
135 pass
136
137 -class DuplicateAtomIDError(csb.core.DuplicateKeyError):
138 pass
139
140 -class AlignmentArgumentLengthError(ValueError):
141 pass
142
143 -class BrokenSecStructureError(ValueError):
144 pass
145
146 -class UnknownSecStructureError(BrokenSecStructureError):
147 pass
148
149 -class AbstractEntity(object):
150 """ 151 Base class for all protein structure entities. 152 153 This class defines uniform interface of all entities (e.g. L{Structure}, 154 L{Chain}, L{Residue}) according to the Composite pattern. 155 """ 156 157 __metaclass__ = ABCMeta 158 159 @abstractproperty
160 - def items(self):
161 """ 162 Iterator over all immediate children of the entity 163 @rtype: iterator of L{AbstractEntity} 164 """ 165 pass
166
167 - def components(self, klass=None):
168 """ 169 Return an iterator over all descendants of the entity. 170 171 @param klass: return entities of the specified L{AbstractEntity} subclass 172 only. If None, traverse the hierarchy down to the lowest level. 173 @param klass: class 174 """ 175 for entity in CompositeEntityIterator.create(self, klass): 176 if klass is None or isinstance(entity, klass): 177 yield entity
178
179 - def transform(self, rotation, translation):
180 """ 181 Apply in place RotationMatrix and translation Vector to all atoms. 182 183 @type rotation: numpy array 184 @type translation: numpy array 185 """ 186 for node in self.items: 187 node.transform(rotation, translation)
188
189 - def get_coordinates(self, what=None, skip=False):
190 """ 191 Extract the coordinates of the specified kind(s) of atoms and return 192 them as a list. 193 194 @param what: a list of atom kinds, e.g. ['N', 'CA', 'C'] 195 @type what: list or None 196 197 @return: a list of lists, each internal list corresponding to the coordinates 198 of a 3D vector 199 @rtype: list 200 201 @raise Broken3DStructureError: if a specific atom kind cannot be retrieved from a residue 202 """ 203 coords = [ ] 204 205 for residue in self.components(klass=Residue): 206 for atom_kind in (what or residue.atoms): 207 try: 208 coords.append(residue.atoms[atom_kind].vector) 209 except csb.core.ItemNotFoundError: 210 if skip: 211 continue 212 raise Broken3DStructureError('Could not retrieve {0} atom from the structure'.format(atom_kind)) 213 214 return numpy.array(coords)
215
216 -class CompositeEntityIterator(object):
217 """ 218 Iterates over composite L{AbstractEntity} hierarchies. 219 220 @param node: root entity to traverse 221 @type node: L{AbstractEntity} 222 """ 223
224 - def __init__(self, node):
225 226 if not isinstance(node, AbstractEntity): 227 raise TypeError(node) 228 229 self._node = node 230 self._stack = csb.core.Stack() 231 232 self._inspect(node)
233
234 - def __iter__(self):
235 return self
236
237 - def __next__(self):
238 return self.next()
239
240 - def next(self):
241 242 while True: 243 if self._stack.empty(): 244 raise StopIteration() 245 246 try: 247 current = self._stack.peek() 248 node = next(current) 249 self._inspect(node) 250 return node 251 252 except StopIteration: 253 self._stack.pop()
254
255 - def _inspect(self, node):
256 """ 257 Push C{node}'s children to the stack. 258 """ 259 self._stack.push(node.items)
260 261 @staticmethod
262 - def create(node, leaf=None):
263 """ 264 Create a new composite iterator. 265 266 @param leaf: if not None, return a L{ConfinedEntityIterator} 267 @type leaf: class 268 @rtype: L{CompositeEntityIterator} 269 """ 270 if leaf is None: 271 return CompositeEntityIterator(node) 272 else: 273 return ConfinedEntityIterator(node, leaf)
274
275 -class ConfinedEntityIterator(CompositeEntityIterator):
276 """ 277 Iterates over composite L{AbstractEntity} hierarchies, but terminates 278 the traversal of a branch once a specific node type is encountered. 279 280 @param node: root entity to traverse 281 @type node: L{AbstractEntity} 282 @param leaf: traverse the hierarchy down to the specified L{AbstractEntity} 283 @type leaf: class 284 """
285 - def __init__(self, node, leaf):
286 287 if not issubclass(leaf, AbstractEntity): 288 raise TypeError(leaf) 289 290 self._leaf = leaf 291 super(ConfinedEntityIterator, self).__init__(node)
292
293 - def _inspect(self, node):
294 295 if not isinstance(node, self._leaf): 296 self._stack.push(node.items)
297
298 -class Ensemble(csb.core.AbstractNIContainer, AbstractEntity):
299 """ 300 Represents an ensemble of multiple L{Structure} models. 301 Provides a list-like access to these models: 302 303 >>> ensemble[0] 304 <Structure Model 1: accn, x chains> 305 >>> ensemble.models[1] 306 <Structure Model 1: accn, x chains> 307 """ 308
309 - def __init__(self):
310 self._models = EnsembleModelsCollection()
311
312 - def __repr__(self):
313 return "<Ensemble: {0} models>".format(self.models.length)
314 315 @property
316 - def _children(self):
317 return self._models
318 319 @property
320 - def models(self):
321 """ 322 Access Ensembles's models by model ID 323 @rtype: L{EnsembleModelsCollection} 324 """ 325 return self._models
326 327 @property
328 - def items(self):
329 return iter(self._models)
330 331 @property
332 - def first_model(self):
333 """ 334 The first L{Structure} in the ensemble (if available) 335 @rtype: L{Structure} or None 336 """ 337 if len(self._models) > 0: 338 return self[0] 339 return None
340
341 - def to_pdb(self, output_file=None):
342 """ 343 Dump the ensemble in PDB format. 344 345 @param output_file: output file name or open stream 346 @type output_file: str or stream 347 """ 348 from csb.bio.io.wwpdb import PDBEnsembleFileBuilder 349 350 if self.models.length < 1: 351 raise InvalidOperation("Can't dump an empty ensemble") 352 353 temp = csb.io.MemoryStream() 354 355 builder = PDBEnsembleFileBuilder(temp) 356 builder.add_header(self.first_model) 357 358 for model in self.models: 359 builder.add_structure(model) 360 361 builder.finalize() 362 363 data = temp.getvalue() 364 temp.close() 365 366 if not output_file: 367 return data 368 else: 369 with csb.io.EntryWriter(output_file, close=False) as out: 370 out.write(data)
371
372 -class EnsembleModelsCollection(csb.core.CollectionContainer):
373
374 - def __init__(self):
375 376 super(EnsembleModelsCollection, self).__init__(type=Structure, start_index=1) 377 self._models = set()
378
379 - def append(self, structure):
380 """ 381 Add a new model 382 383 @param structure: model to append 384 @type structure: L{Structure} 385 """ 386 387 if not structure.model_id or not str(structure.model_id).strip(): 388 raise ValueError("Invalid model identifier: '{0.model_id}'".format(structure)) 389 if structure.model_id in self._models: 390 raise DuplicateModelIDError(structure.model_id) 391 else: 392 return super(EnsembleModelsCollection, self).append(structure)
393 394 @property
395 - def _exception(self):
396 return EntityIndexError
397
398 399 -class Structure(csb.core.AbstractNIContainer, AbstractEntity):
400 """ 401 Represents a single model of a PDB 3-Dimensional molecular structure. 402 Provides access to the L{Chain} objects, contained in the model: 403 404 >>> structure['A'] 405 <Chain A: Protein> 406 >>> structure.chains['A'] 407 <Chain A: Protein> 408 >>> structure.items 409 <iterator of Chain-s> 410 411 @param accession: accession number of the structure 412 @type accession: str 413 """
414 - def __init__(self, accession):
415 416 self._accession = None 417 self._chains = StructureChainsTable(self) 418 self._model_id = None 419 self._resolution = None 420 421 self.accession = accession
422
423 - def __repr__(self):
424 return "<Structure Model {0.model_id}: {0.accession}, {1} chains>".format(self, self.chains.length)
425 426 @property
427 - def _children(self):
428 return self._chains
429 430 @property
431 - def chains(self):
432 """ 433 Access chains by their chain identifiers 434 @rtype: L{StructureChainsTable} 435 """ 436 return self._chains
437 438 @property
439 - def items(self):
440 for chain in self._chains: 441 yield self._chains[chain]
442 443 @property
444 - def first_chain(self):
445 """ 446 The first L{Chain} in the structure (if available) 447 @rtype: L{Chain} or None 448 """ 449 if len(self._chains) > 0: 450 return next(self.items) 451 return None
452 453 @property
454 - def accession(self):
455 """ 456 Accession number 457 @rtype: str 458 """ 459 return self._accession
460 @accession.setter
461 - def accession(self, accession):
462 if accession is None: 463 raise ValueError(accession) 464 self._accession = str(accession).strip().lower() 465 for c in self.chains: 466 self.chains[c]._accession = self._accession
467 468 @property
469 - def model_id(self):
470 """ 471 Model ID 472 @rtype: int 473 """ 474 return self._model_id
475 @model_id.setter
476 - def model_id(self, value):
477 self._model_id = value
478 479 @property
480 - def resolution(self):
481 """ 482 Resolution in Angstroms 483 """ 484 return self._resolution
485 @resolution.setter
486 - def resolution(self, value):
487 if value is not None: 488 value = float(value) 489 self._resolution = value
490
491 - def to_fasta(self):
492 """ 493 Dump the structure in FASTA format. 494 495 @return: FASTA-formatted string with all chains in the structure 496 @rtype: str 497 498 @deprecated: this method will be removed soon. Use 499 L{csb.bio.sequence.ChainSequence.create} instead 500 """ 501 fasta = [] 502 503 for chain in self.items: 504 505 if chain.length > 0: 506 fasta.append('>{0}'.format(chain.header)) 507 fasta.append(chain.sequence) 508 509 return os.linesep.join(fasta)
510
511 - def to_pdb(self, output_file=None):
512 """ 513 Dump the whole structure in PDB format. 514 515 @param output_file: output file name or open stream 516 @type output_file: str or stream 517 """ 518 from csb.bio.io.wwpdb import PDBFileBuilder 519 520 temp = csb.io.MemoryStream() 521 builder = PDBFileBuilder(temp) 522 523 builder.add_header(self) 524 builder.add_structure(self) 525 builder.finalize() 526 527 data = temp.getvalue() 528 temp.close() 529 530 if not output_file: 531 return data 532 else: 533 with csb.io.EntryWriter(output_file, close=False) as out: 534 out.write(data)
535
536 -class StructureChainsTable(csb.core.DictionaryContainer):
537
538 - def __init__(self, structure=None, chains=None):
539 self.__container = structure 540 super(StructureChainsTable, self).__init__() 541 542 if chains is not None: 543 for chain in chains: 544 self.append(chain)
545
546 - def __repr__(self):
547 if len(self) > 0: 548 return "<StructureChains: {0}>".format(', '.join(self)) 549 else: 550 return "<StructureChains: empty>"
551 552 @property
553 - def _exception(self):
554 return ChainNotFoundError
555
556 - def append(self, chain):
557 """ 558 Add a new Chain to the structure. 559 560 @param chain: the new chain to be appended 561 @type chain: L{Chain} 562 563 @raise DuplicateChainIDError: if a chain with same ID is already defined 564 @raise InvalidOperation: if the chain is already associated with a structure 565 """ 566 567 if chain._structure and chain._structure is not self.__container: 568 raise InvalidOperation('This chain is already part of another structure') 569 if chain.id in self: 570 raise DuplicateChainIDError('A chain with ID {0} is already defined'.format(chain.id)) 571 572 super(StructureChainsTable, self).append(chain.id, chain) 573 574 if self.__container: 575 chain._accession = self.__container.accession 576 chain._structure = self.__container
577
578 - def remove(self, id):
579 """ 580 Remove a chain from the structure. 581 582 @param id: ID of the chain to be detached 583 @type id: str 584 """ 585 chain = self[id] 586 self._remove(id) 587 chain._structure = None
588
589 - def _update_chain_id(self, chain, new_id):
590 591 if chain.id not in self or self[chain.id] is not chain: 592 raise InvalidOperation(chain) 593 594 self._remove(chain.id) 595 596 if new_id in self: 597 raise DuplicateChainIDError('Chain ID {0} is already defined'.format(id)) 598 599 super(StructureChainsTable, self).append(new_id, chain)
600
601 -class Chain(csb.core.AbstractNIContainer, AbstractEntity):
602 """ 603 Represents a polymeric chain. Provides list-like and rank-based access to 604 the residues in the chain: 605 606 >>> chain[0] 607 <ProteinResidue [1]: SER None> 608 >>> chain.residues[1] 609 <ProteinResidue [1]: SER None> 610 611 You can also access residues by their PDB sequence number: 612 613 >>> chain.find(sequence_number=5, insertion_code='A') 614 <ProteinResidue [1]: SER 5A> 615 616 @param chain_id: ID of the new chain 617 @type chain_id: str 618 @param type: sequence type (a member of the L{SequenceTypes} enum) 619 @type type: L{csb.core.EnumItem} 620 @param name: name of the chain 621 @type name: str 622 @param residues: initialization list of L{Residue}-s 623 @type residues: list 624 @param accession: accession number of the chain 625 @type accession: str 626 @param molecule_id: MOL ID of the chain, if part of a polymer 627 628 """
629 - def __init__(self, chain_id, type=SequenceTypes.Protein, name='', 630 residues=None, accession=None, molecule_id=None):
631 632 self._id = str(chain_id).strip() 633 self._accession = None 634 self._type = None 635 self._residues = ChainResiduesCollection(self, residues) 636 self._secondary_structure = None 637 self._molecule_id = molecule_id 638 self._torsion_computed = False 639 self._name = str(name).strip() 640 641 self._structure = None 642 643 self.type = type 644 if accession is not None: 645 self.accession = accession
646 647 @staticmethod
648 - def from_sequence(sequence, id="_"):
649 """ 650 Create a new chain from an existing sequence. 651 652 @param sequence: source sequence 653 @type sequence: L{csb.bio.sequence.AbstractSequence} 654 655 @rtype: L{Chain} 656 """ 657 658 chain = Chain(id, type=sequence.type) 659 660 for ri in sequence.residues: 661 residue = Residue.create(sequence.type, ri.rank, ri.type, sequence_number=ri.rank) 662 chain.residues.append(residue) 663 664 return chain
665 666 @property
667 - def _children(self):
668 return self._residues
669
670 - def __repr__(self):
671 return "<Chain {0.id}: {0.type!r}>".format(self)
672
673 - def __len__(self):
674 return self._residues.length
675 676 @property
677 - def id(self):
678 """ 679 Chain's ID 680 @rtype: str 681 """ 682 return self._id
683 @id.setter
684 - def id(self, id):
685 if not isinstance(id, csb.core.string): 686 raise ValueError(id) 687 id = id.strip() 688 if self._structure: 689 self._structure.chains._update_chain_id(self, id) 690 self._id = id
691 692 @property
693 - def accession(self):
694 """ 695 Accession number 696 @rtype: str 697 """ 698 return self._accession
699 @accession.setter
700 - def accession(self, accession):
701 if self._structure: 702 raise InvalidOperation("Only the accession of the parent structure can be altered") 703 if accession is None: 704 raise ValueError(accession) 705 self._accession = str(accession).strip()
706 707 @property
708 - def type(self):
709 """ 710 Chain type - any member of L{SequenceTypes} 711 @rtype: enum item 712 """ 713 return self._type
714 @type.setter
715 - def type(self, type):
716 if type.enum is not SequenceTypes: 717 raise TypeError(type) 718 self._type = type
719 720 @property
721 - def residues(self):
722 """ 723 Rank-based access to Chain's L{Residue}s 724 @rtype: L{ChainResiduesCollection} 725 """ 726 return self._residues
727 728 @property
729 - def items(self):
730 return iter(self._residues)
731 732 @property
733 - def torsion(self):
734 """ 735 Torsion angles 736 @rtype: L{TorsionAnglesCollection} 737 """ 738 if not self._torsion_computed: 739 raise InvalidOperation('The correctness of the data is not guaranteed ' 740 'until chain.compute_torsion() is invoked.') 741 742 torsion = TorsionAnglesCollection() 743 744 for r in self.residues: 745 if r.torsion is None: 746 torsion.append(TorsionAngles(None, None, None)) 747 else: 748 torsion.append(r.torsion) 749 750 return torsion
751 752 @property
753 - def has_torsion(self):
754 """ 755 True if C{Chain.compute_torsion} had been invoked 756 @rtype: bool 757 """ 758 return self._torsion_computed
759 760 @property
761 - def length(self):
762 """ 763 Number of residues 764 @rtype: int 765 """ 766 return self._residues.length
767 768 @property
769 - def entry_id(self):
770 """ 771 Accession number + chain ID 772 @rtype: str 773 """ 774 if self._accession and self._id: 775 return self._accession + self._id 776 else: 777 return None
778 779 @property
780 - def name(self):
781 """ 782 Chain name 783 @rtype: str 784 """ 785 return self._name
786 @name.setter
787 - def name(self, value):
788 if value is not None: 789 value = str(value).strip() 790 self._name = value
791 792 @property
793 - def molecule_id(self):
794 """ 795 PDB MOL ID of this chain 796 @rtype: int 797 """ 798 return self._molecule_id
799 @molecule_id.setter
800 - def molecule_id(self, value):
801 self._molecule_id = value
802 803 @property
804 - def header(self):
805 """ 806 FASTA header in PDB format 807 @rtype: str 808 """ 809 header = "{0._accession}_{0._id} mol:{1} length:{0.length} {0.name}" 810 return header.format(self, str(self.type).lower())
811 812 @property
813 - def sequence(self):
814 """ 815 Chain sequence 816 @rtype: str 817 """ 818 sequence = [] 819 gap = str(self.alphabet.GAP) 820 821 for residue in self.residues: 822 if residue and residue.type: 823 sequence.append(str(residue.type)) 824 else: 825 sequence.append(gap) 826 827 return ''.join(sequence)
828 829 @property
830 - def alphabet(self):
831 """ 832 Sequence alphabet corresponding to the current chain type 833 @rtype: L{csb.core.enum} 834 """ 835 return SequenceAlphabets.get(self.type)
836 837 @property
838 - def secondary_structure(self):
839 """ 840 Secondary structure (if available) 841 @rtype: L{SecondaryStructure} 842 """ 843 return self._secondary_structure
844 @secondary_structure.setter
845 - def secondary_structure(self, ss):
846 if not isinstance(ss, SecondaryStructure): 847 raise TypeError(ss) 848 if len(ss) > 0: 849 if (ss[ss.last_index].end > self._residues.last_index): 850 raise ValueError('Secondary structure out of range') 851 self._secondary_structure = ss
852
853 - def clone(self):
854 """ 855 Make a deep copy of the chain. If this chain is part of a structure, 856 detach from it. 857 858 @return: a deep copy of self 859 @rtype: L{Chain} 860 """ 861 start, end = self.residues.start_index, self.residues.last_index 862 return self.subregion(start, end, clone=True)
863
864 - def subregion(self, start, end, clone=False):
865 """ 866 Extract a subchain defined by [start, end]. If clone is True, this 867 is a deep copy of the chain. Otherwise same as: 868 869 >>> chain.residues[start : end + 1] 870 871 but coordinates are checked and a Chain instance is returned. 872 873 @param start: start position of the sub-region 874 @type start: int 875 @param end: end position 876 @type end: int 877 @param clone: if True, a deep copy of the sub-region is returned, 878 otherwise - a shallow one 879 @type clone: bool 880 881 882 @return: a new chain, made from the residues of the extracted region 883 @rtype: L{Chain} 884 885 @raise IndexError: if start/end positions are out of range 886 """ 887 if start < self.residues.start_index or start > self.residues.last_index: 888 raise IndexError('The start position is out of range {0.start_index} .. {0.last_index}'.format(self.residues)) 889 if end < self.residues.start_index or end > self.residues.last_index: 890 raise IndexError('The end position is out of range {0.start_index} .. {0.last_index}'.format(self.residues)) 891 892 residues = self.residues[start : end + 1] 893 894 if clone: 895 residues = [r.clone() for r in residues] 896 897 chain = Chain(self.id, accession=self.accession, name=self.name, 898 type=self.type, residues=residues, molecule_id=self.molecule_id) 899 if chain.secondary_structure: 900 chain.secondary_structure = self.secondary_structure.subregion(start, end) 901 chain._torsion_computed = self._torsion_computed 902 903 return chain
904
905 - def find(self, sequence_number, insertion_code=None):
906 """ 907 Get a residue by its original Residue Sequence Number and Insertion Code. 908 909 @param sequence_number: PDB sequence number of the residue 910 @type sequence_number: str 911 @param insertion_code: PDB insertion code of the residue (if any) 912 @type insertion_code: str 913 914 @return: the residue object with such an ID 915 @rtype: L{Residue} 916 917 @raise csb.core.ItemNotFoundError: if no residue with that ID exists 918 """ 919 res_id = str(sequence_number).strip() 920 921 if insertion_code is not None: 922 insertion_code = str(insertion_code).strip() 923 res_id += insertion_code 924 925 return self.residues._get_residue(res_id)
926
927 - def compute_torsion(self):
928 """ 929 Iterate over all residues in the chain, compute and set their torsion property. 930 931 @raise Missing3DStructureError: when a 3D structure is absent 932 @raise Broken3DStructureError: when a given atom cannot be retrieved from any residue 933 """ 934 if self.type != SequenceTypes.Protein: 935 raise NotImplementedError() 936 937 for i, residue in enumerate(self.residues, start=self.residues.start_index): 938 939 prev_residue, next_residue = None, None 940 941 if i > self.residues.start_index: 942 prev_residue = self.residues[i - 1] 943 if i < self.residues.last_index: 944 next_residue = self.residues[i + 1] 945 946 residue.torsion = residue.compute_torsion(prev_residue, next_residue, strict=False) 947 948 self._torsion_computed = True
949
950 - def superimpose(self, other, what=['CA'], how=AlignmentTypes.Global):
951 """ 952 Find the optimal fit between C{self} and C{other}. Return L{SuperimposeInfo} 953 (RotationMatrix, translation Vector and RMSD), such that: 954 955 >>> other.transform(rotation_matrix, translation_vector) 956 957 will result in C{other}'s coordinates superimposed over C{self}. 958 959 @param other: the subject (movable) chain 960 @type other: L{Chain} 961 @param what: a list of atom kinds, e.g. ['CA'] 962 @type what: list 963 @param how: fitting method (global or local) - a member of the L{AlignmentTypes} enum 964 @type how: L{csb.core.EnumItem} 965 966 @return: superimposition info object, containing rotation matrix, translation 967 vector and computed RMSD 968 @rtype: L{SuperimposeInfo} 969 970 @raise AlignmentArgumentLengthError: when the lengths of the argument chains differ 971 """ 972 if self.length != other.length or self.length < 1: 973 raise AlignmentArgumentLengthError('Both chains must be of the same and positive length') 974 975 x = self.get_coordinates(what) 976 y = other.get_coordinates(what) 977 assert len(x) == len(y) 978 979 if how == AlignmentTypes.Global: 980 r, t = csb.bio.utils.fit(x, y) 981 else: 982 r, t = csb.bio.utils.fit_wellordered(x, y) 983 984 rmsd = csb.bio.utils.rmsd(x, y) 985 986 return SuperimposeInfo(r, t, rmsd=rmsd) 987
988 - def align(self, other, what=['CA'], how=AlignmentTypes.Global):
989 """ 990 Align C{other}'s alpha carbons over self in space and return L{SuperimposeInfo}. 991 Coordinates of C{other} are overwritten in place using the rotation matrix 992 and translation vector in L{SuperimposeInfo}. Alias for:: 993 994 R, t = self.superimpose(other, what=['CA']) 995 other.transform(R, t) 996 997 @param other: the subject (movable) chain 998 @type other: L{Chain} 999 @param what: a list of atom kinds, e.g. ['CA'] 1000 @type what: list 1001 @param how: fitting method (global or local) - a member of the L{AlignmentTypes} enum 1002 @type how: L{csb.core.EnumItem} 1003 1004 @return: superimposition info object, containing rotation matrix, translation 1005 vector and computed RMSD 1006 @rtype: L{SuperimposeInfo} 1007 """ 1008 result = self.superimpose(other, what=what, how=how) 1009 other.transform(result.rotation, result.translation) 1010 1011 return result 1012
1013 - def rmsd(self, other, what=['CA']):
1014 """ 1015 Compute the C-alpha RMSD against another chain (assuming equal length). 1016 Chains are superimposed with Least Squares Fit / Singular Value Decomposition. 1017 1018 @param other: the subject (movable) chain 1019 @type other: L{Chain} 1020 @param what: a list of atom kinds, e.g. ['CA'] 1021 @type what: list 1022 1023 @return: computed RMSD over the specified atom kinds 1024 @rtype: float 1025 """ 1026 1027 if self.length != other.length or self.length < 1: 1028 raise ValueError('Both chains must be of the same and positive length ' 1029 '(got {0} and {1})'.format(self.length, other.length)) 1030 1031 x = self.get_coordinates(what) 1032 y = other.get_coordinates(what) 1033 assert len(x) == len(y) 1034 1035 return csb.bio.utils.rmsd(x, y)
1036
1037 - def tm_superimpose(self, other, what=['CA'], how=AlignmentTypes.Global):
1038 """ 1039 Find the optimal fit between C{self} and C{other}. Return L{SuperimposeInfo} 1040 (RotationMatrix, translation Vector and TM-score), such that: 1041 1042 >>> other.transform(rotation_matrix, translation_vector) 1043 1044 will result in C{other}'s coordinates superimposed over C{self}. 1045 1046 @param other: the subject (movable) chain 1047 @type other: L{Chain} 1048 @param what: a list of atom kinds, e.g. ['CA'] 1049 @type what: list 1050 @param how: fitting method (global or local) - a member of the L{AlignmentTypes} enum 1051 @type how: L{csb.core.EnumItem} 1052 1053 @return: superimposition info object, containing rotation matrix, translation 1054 vector and computed TM-score 1055 @rtype: L{SuperimposeInfo} 1056 1057 @raise AlignmentArgumentLengthError: when the lengths of the argument chains differ 1058 """ 1059 1060 if self.length != other.length or self.length < 1: 1061 raise ValueError('Both chains must be of the same and positive length') 1062 1063 x = self.get_coordinates(what) 1064 y = other.get_coordinates(what) 1065 assert len(x) == len(y) 1066 1067 L_ini_min = 0 1068 if how == AlignmentTypes.Global: 1069 fit = csb.bio.utils.fit 1070 elif how == AlignmentTypes.Local: 1071 fit = csb.bio.utils.fit_wellordered 1072 else: 1073 # TMscore.f like search (slow) 1074 fit = csb.bio.utils.fit 1075 L_ini_min = 4 1076 1077 r, t, tm = csb.bio.utils.tm_superimpose(x, y, fit, None, None, L_ini_min) 1078 1079 return SuperimposeInfo(r,t, tm_score=tm) 1080
1081 - def tm_score(self, other, what=['CA']):
1082 """ 1083 Compute the C-alpha TM-Score against another chain (assuming equal chain length 1084 and optimal configuration - no fitting is done). 1085 1086 @param other: the subject (movable) chain 1087 @type other: L{Chain} 1088 @param what: a list of atom kinds, e.g. ['CA'] 1089 @type what: list 1090 1091 @return: computed TM-Score over the specified atom kinds 1092 @rtype: float 1093 """ 1094 1095 if self.length != other.length or self.length < 1: 1096 raise ValueError('Both chains must be of the same and positive length') 1097 1098 x = self.get_coordinates(what) 1099 y = other.get_coordinates(what) 1100 assert len(x) == len(y) 1101 1102 return csb.bio.utils.tm_score(x, y)
1103
1104 -class ChainResiduesCollection(csb.core.CollectionContainer):
1105
1106 - def __init__(self, chain, residues):
1107 super(ChainResiduesCollection, self).__init__(type=Residue, start_index=1) 1108 self.__container = chain 1109 self.__lookup = { } 1110 1111 if residues is not None: 1112 for residue in residues: 1113 self.append(residue)
1114
1115 - def __repr__(self):
1116 if len(self) > 0: 1117 return "<ChainResidues: {0} ... {1}>".format(self[self.start_index], self[self.last_index]) 1118 else: 1119 return "<ChainResidues: empty>"
1120 1121 @property
1122 - def _exception(self):
1123 return EntityIndexError
1124
1125 - def append(self, residue):
1126 """ 1127 Append a new residue to the chain. 1128 1129 @param residue: the new residue 1130 @type residue: L{Residue} 1131 1132 @raise DuplicateResidueIDError: if a residue with the same ID already exists 1133 """ 1134 if residue.id and residue.id in self.__lookup: 1135 raise DuplicateResidueIDError('A residue with ID {0} is already defined within the chain'.format(residue.id)) 1136 index = super(ChainResiduesCollection, self).append(residue) 1137 residue._container = self 1138 self.__container._torsion_computed = False 1139 self._add(residue) 1140 return index
1141
1142 - def _contains(self, id):
1143 return id in self.__lookup
1144
1145 - def _remove(self, id):
1146 if id in self.__lookup: 1147 del self.__lookup[id]
1148
1149 - def _add(self, residue):
1150 self.__lookup[residue.id] = residue
1151
1152 - def _get_residue(self, id):
1153 try: 1154 return self.__lookup[id] 1155 except KeyError: 1156 raise csb.core.ItemNotFoundError(id)
1157
1158 -class Residue(csb.core.AbstractNIContainer, AbstractEntity):
1159 """ 1160 Base class representing a single residue. Provides a dictionary-like 1161 access to the atoms contained in the residue: 1162 1163 >>> residue['CA'] 1164 <Atom [3048]: CA> 1165 >>> residue.atoms['CA'] 1166 <Atom [3048]: CA> 1167 >>> residue.items 1168 <iterator of Atom-s> 1169 1170 @param rank: rank of the residue with respect to the chain 1171 @type rank: int 1172 @param type: residue type - a member of any L{SequenceAlphabets} 1173 @type type: L{csb.core.EnumItem} 1174 @param sequence_number: PDB sequence number of the residue 1175 @type sequence_number: str 1176 @param insertion_code: PDB insertion code, if any 1177 @type insertion_code: str 1178 """
1179 - def __init__(self, rank, type, sequence_number=None, insertion_code=None):
1180 1181 self._type = None 1182 self._pdb_name = None 1183 self._rank = int(rank) 1184 self._atoms = ResidueAtomsTable(self) 1185 self._secondary_structure = None 1186 self._torsion = None 1187 self._sequence_number = None 1188 self._insertion_code = None 1189 self._container = None 1190 1191 self.type = type 1192 self.id = sequence_number, insertion_code 1193 self._pdb_name = repr(type)
1194 1195 @property
1196 - def _children(self):
1197 return self._atoms
1198
1199 - def __repr__(self):
1200 return '<{1} [{0.rank}]: {0.type!r} {0.id}>'.format(self, self.__class__.__name__)
1201 1202 @property
1203 - def type(self):
1204 """ 1205 Residue type - a member of any sequence alphabet 1206 @rtype: enum item 1207 """ 1208 return self._type
1209 @type.setter
1210 - def type(self, type):
1211 if type.enum not in (SequenceAlphabets.Protein, SequenceAlphabets.Nucleic, SequenceAlphabets.Unknown): 1212 raise TypeError(type) 1213 self._type = type
1214 1215 @property
1216 - def rank(self):
1217 """ 1218 Residue's position in the sequence (1-based) 1219 @rtype: int 1220 """ 1221 return self._rank
1222 1223 @property
1224 - def secondary_structure(self):
1225 """ 1226 Secondary structure element this residue is part of 1227 @rtype: L{SecondaryStructureElement} 1228 """ 1229 return self._secondary_structure
1230 @secondary_structure.setter
1231 - def secondary_structure(self, structure):
1232 if not isinstance(structure, SecondaryStructureElement): 1233 raise TypeError(structure) 1234 self._secondary_structure = structure
1235 1236 @property
1237 - def torsion(self):
1238 """ 1239 Torsion angles 1240 @rtype: L{TorsionAngles} 1241 """ 1242 return self._torsion
1243 @torsion.setter
1244 - def torsion(self, torsion):
1245 if not isinstance(torsion, TorsionAngles): 1246 raise TypeError(torsion) 1247 self._torsion = torsion
1248 1249 @property
1250 - def atoms(self):
1251 """ 1252 Access residue's atoms by atom name 1253 @rtype: L{ResidueAtomsTable} 1254 """ 1255 return self._atoms
1256 1257 @property
1258 - def items(self):
1259 for atom in self._atoms: 1260 yield self._atoms[atom]
1261 1262 @property
1263 - def sequence_number(self):
1264 """ 1265 PDB sequence number (if residue.has_structure is True) 1266 @rtype: int 1267 """ 1268 return self._sequence_number
1269 1270 @property
1271 - def insertion_code(self):
1272 """ 1273 PDB insertion code (if defined) 1274 @rtype: str 1275 """ 1276 return self._insertion_code
1277 1278 @property
1279 - def id(self):
1280 """ 1281 PDB sequence number [+ insertion code] 1282 @rtype: str 1283 """ 1284 if self._sequence_number is None: 1285 return None 1286 elif self._insertion_code is not None: 1287 return str(self._sequence_number) + self._insertion_code 1288 else: 1289 return str(self._sequence_number)
1290 @id.setter
1291 - def id(self, value):
1292 sequence_number, insertion_code = value 1293 old_id = self.id 1294 id = '' 1295 if sequence_number is not None: 1296 sequence_number = int(sequence_number) 1297 id = str(sequence_number) 1298 if insertion_code is not None: 1299 insertion_code = str(insertion_code).strip() 1300 id += insertion_code 1301 if sequence_number is None: 1302 raise InvalidOperation('sequence_number must be defined when an insertion_code is specified.') 1303 if old_id != id: 1304 if self._container: 1305 if self._container._contains(id): 1306 raise DuplicateResidueIDError('A residue with ID {0} is already defined within the chain'.format(id)) 1307 self._container._remove(old_id) 1308 self._sequence_number = sequence_number 1309 self._insertion_code = insertion_code 1310 if self._container: 1311 self._container._add(self)
1312 1313 @property
1314 - def has_structure(self):
1315 """ 1316 True if this residue has any atoms 1317 @rtype: bool 1318 """ 1319 return len(self.atoms) > 0
1320
1321 - def get_coordinates(self, what=None, skip=False):
1322 1323 coords = [] 1324 1325 if not self.has_structure: 1326 if skip: 1327 return numpy.array([]) 1328 raise Missing3DStructureError(self) 1329 1330 for atom_kind in (what or self.atoms): 1331 if atom_kind in self.atoms: 1332 coords.append(self.atoms[atom_kind].vector) 1333 else: 1334 if skip: 1335 continue 1336 raise Broken3DStructureError('Could not retrieve {0} atom'.format(atom_kind)) 1337 1338 return numpy.array(coords)
1339
1340 - def clone(self):
1341 1342 container = self._container 1343 self._container = None 1344 clone = copy.deepcopy(self) 1345 self._container = container 1346 1347 return clone
1348 1349 @staticmethod
1350 - def create(sequence_type, *a, **k):
1351 """ 1352 Residue factory method, which returns the proper L{Residue} instance based on 1353 the specified C{sequence_type}. All additional arguments are used to initialize 1354 the subclass by passing them automatically to the underlying constructor. 1355 1356 @param sequence_type: create a Residue of that SequenceType 1357 @type sequence_type: L{csb.core.EnumItem} 1358 1359 @return: a new residue of the proper subclass 1360 @rtype: L{Residue} subclass 1361 1362 @raise ValueError: if the sequence type is not known 1363 """ 1364 if sequence_type == SequenceTypes.Protein: 1365 return ProteinResidue(*a, **k) 1366 elif sequence_type == SequenceTypes.NucleicAcid: 1367 return NucleicResidue(*a, **k) 1368 elif sequence_type == SequenceTypes.Unknown: 1369 return UnknownResidue(*a, **k) 1370 else: 1371 raise ValueError(sequence_type)
1372
1373 -class ProteinResidue(Residue):
1374 """ 1375 Represents a single amino acid residue. 1376 1377 @param rank: rank of the residue with respect to the chain 1378 @type rank: int 1379 @param type: residue type - a member of 1380 L{csb.bio.sequence.SequenceAlphabets.Protein} 1381 @type type: L{csb.core.EnumItem} 1382 @param sequence_number: PDB sequence number of the residue 1383 @type sequence_number: str 1384 @param insertion_code: PDB insertion code, if any 1385 @type insertion_code: str 1386 """ 1387
1388 - def __init__(self, rank, type, sequence_number=None, insertion_code=None):
1389 1390 if isinstance(type, csb.core.string): 1391 try: 1392 if len(type) == 3: 1393 type = csb.core.Enum.parsename(SequenceAlphabets.Protein, type) 1394 else: 1395 type = csb.core.Enum.parse(SequenceAlphabets.Protein, type) 1396 except (csb.core.EnumMemberError, csb.core.EnumValueError): 1397 raise ValueError("'{0}' is not a valid amino acid".format(type)) 1398 elif type.enum is not SequenceAlphabets.Protein: 1399 raise TypeError(type) 1400 1401 super(ProteinResidue, self).__init__(rank, type, sequence_number, insertion_code)
1402
1403 - def compute_torsion(self, prev_residue, next_residue, strict=True):
1404 """ 1405 Compute the torsion angles of the current residue with neighboring residues 1406 C{prev_residue} and C{next_residue}. 1407 1408 @param prev_residue: the previous residue in the chain 1409 @type prev_residue: L{Residue} 1410 @param next_residue: the next residue in the chain 1411 @type next_residue: L{Residue} 1412 @param strict: if True, L{Broken3DStructureError} is raised if either C{prev_residue} 1413 or C{next_residue} has a broken structure, else the error is silently 1414 ignored and an empty L{TorsionAngles} instance is created 1415 @type strict: bool 1416 1417 @return: a L{TorsionAngles} object, holding the phi, psi and omega values 1418 @rtype: L{TorsionAngles} 1419 1420 @raise Broken3DStructureError: when a specific atom cannot be found 1421 """ 1422 if prev_residue is None and next_residue is None: 1423 raise ValueError('At least one neighboring residue is required to compute the torsion.') 1424 1425 angles = TorsionAngles(None, None, None, units=AngleUnits.Degrees) 1426 1427 for residue in (self, prev_residue, next_residue): 1428 if residue is not None and not residue.has_structure: 1429 if strict: 1430 raise Missing3DStructureError(repr(residue)) 1431 elif residue is self: 1432 return angles 1433 1434 try: 1435 n = self._atoms['N'].vector 1436 ca = self._atoms['CA'].vector 1437 c = self._atoms['C'].vector 1438 except csb.core.ItemNotFoundError as missing_atom: 1439 if strict: 1440 raise Broken3DStructureError('Could not retrieve {0} atom from the current residue {1!r}.'.format( 1441 missing_atom, self)) 1442 else: 1443 return angles 1444 1445 try: 1446 if prev_residue is not None and prev_residue.has_structure: 1447 prev_c = prev_residue._atoms['C'].vector 1448 angles.phi = csb.numeric.dihedral_angle(prev_c, n, ca, c) 1449 except csb.core.ItemNotFoundError as missing_prevatom: 1450 if strict: 1451 raise Broken3DStructureError('Could not retrieve {0} atom from the i-1 residue {1!r}.'.format( 1452 missing_prevatom, prev_residue)) 1453 try: 1454 if next_residue is not None and next_residue.has_structure: 1455 next_n = next_residue._atoms['N'].vector 1456 angles.psi = csb.numeric.dihedral_angle(n, ca, c, next_n) 1457 next_ca = next_residue._atoms['CA'].vector 1458 angles.omega = csb.numeric.dihedral_angle(ca, c, next_n, next_ca) 1459 except csb.core.ItemNotFoundError as missing_nextatom: 1460 if strict: 1461 raise Broken3DStructureError('Could not retrieve {0} atom from the i+1 residue {1!r}.'.format( 1462 missing_nextatom, next_residue)) 1463 1464 return angles
1465
1466 -class NucleicResidue(Residue):
1467 """ 1468 Represents a single nucleotide residue. 1469 1470 @param rank: rank of the residue with respect to the chain 1471 @type rank: int 1472 @param type: residue type - a member of 1473 L{csb.bio.sequence.SequenceAlphabets.Nucleic} 1474 @type type: L{csb.core.EnumItem} 1475 @param sequence_number: PDB sequence number of the residue 1476 @type sequence_number: str 1477 @param insertion_code: PDB insertion code, if any 1478 @type insertion_code: str 1479 """ 1480
1481 - def __init__(self, rank, type, sequence_number=None, insertion_code=None):
1482 1483 if isinstance(type, csb.core.string): 1484 try: 1485 if len(type) > 1: 1486 type = csb.core.Enum.parsename(SequenceAlphabets.Nucleic, type) 1487 else: 1488 type = csb.core.Enum.parse(SequenceAlphabets.Nucleic, type) 1489 except (csb.core.EnumMemberError, csb.core.EnumValueError): 1490 raise ValueError("'{0}' is not a valid nucleotide".format(type)) 1491 elif type.enum is not SequenceAlphabets.Nucleic: 1492 raise TypeError(type) 1493 1494 super(NucleicResidue, self).__init__(rank, type, sequence_number, insertion_code) 1495 self._pdb_name = str(type)
1496
1497 -class UnknownResidue(Residue):
1498
1499 - def __init__(self, rank, type, sequence_number=None, insertion_code=None):
1503
1504 -class ResidueAtomsTable(csb.core.DictionaryContainer):
1505 """ 1506 Represents a collection of atoms. Provides dictionary-like access, 1507 where PDB atom names are used for lookup. 1508 """
1509 - def __init__(self, residue, atoms=None):
1510 1511 self.__residue = residue 1512 super(ResidueAtomsTable, self).__init__() 1513 1514 if atoms is not None: 1515 for atom in atoms: 1516 self.append(atom)
1517
1518 - def __repr__(self):
1519 if len(self) > 0: 1520 return "<ResidueAtoms: {0}>".format(', '.join(self.keys())) 1521 else: 1522 return "<ResidueAtoms: empty>"
1523 1524 @property
1525 - def _exception(self):
1526 return AtomNotFoundError
1527
1528 - def append(self, atom):
1529 """ 1530 Append a new Atom to the catalog. 1531 1532 If the atom has an alternate position, a disordered proxy will be created instead and the 1533 atom will be appended to the L{DisorderedAtom}'s list of children. If a disordered atom 1534 with that name already exists, the atom will be appended to its children only. 1535 If an atom with the same name exists, but it was erroneously not marked as disordered, 1536 that terrible condition will be fixed too. 1537 1538 @param atom: the new atom to append 1539 @type atom: L{Atom} 1540 1541 @raise DuplicateAtomIDError: if an atom with the same sequence number and 1542 insertion code already exists in that residue 1543 """ 1544 if atom.residue and atom.residue is not self.__residue: 1545 raise InvalidOperation('This atom is part of another residue') 1546 if atom.alternate or (atom.name in self and isinstance(self[atom.name], DisorderedAtom)): 1547 if atom.name not in self: 1548 atom._residue = self.__residue 1549 dis_atom = DisorderedAtom(atom) 1550 super(ResidueAtomsTable, self).append(dis_atom.name, dis_atom) 1551 else: 1552 if not isinstance(self[atom.name], DisorderedAtom): 1553 buggy_atom = self[atom.name] 1554 assert buggy_atom.alternate in (None, False) 1555 buggy_atom.alternate = True 1556 self.update(atom.name, DisorderedAtom(buggy_atom)) 1557 if not atom.alternate: 1558 atom.alternate = True 1559 atom._residue = self.__residue 1560 self[atom.name].append(atom) 1561 else: 1562 if atom.name in self: 1563 raise DuplicateAtomIDError('Atom {0} is already defined for {1}'.format( 1564 atom.name, self.__residue)) 1565 else: 1566 super(ResidueAtomsTable, self).append(atom.name, atom) 1567 atom._residue = self.__residue
1568
1569 - def update(self, atom_name, atom):
1570 """ 1571 Update the atom with the specified name. 1572 1573 @param atom_name: update key 1574 @type atom_name: str 1575 @param atom: new value for this key 1576 @type atom: L{Atom} 1577 1578 @raise ValueError: if C{atom} has a different name than C{atom_name} 1579 """ 1580 if atom.name != atom_name: 1581 raise ValueError("Atom's name differs from the specified key.") 1582 if atom.residue is not self.__residue: 1583 atom._residue = self.__residue 1584 1585 super(ResidueAtomsTable, self)._update({atom_name: atom})
1586
1587 -class Atom(AbstractEntity):
1588 """ 1589 Represents a single atom in space. 1590 1591 @param serial_number: atom's UID 1592 @type serial_number: int 1593 @param name: atom's name 1594 @type name: str 1595 @param element: corresponding L{ChemElements} 1596 @type element: L{csb.core.EnumItem} 1597 @param vector: atom's coordinates 1598 @type vector: numpy array 1599 @param alternate: if True, means that this is a wobbling atom with multiple alternative 1600 locations 1601 @type alternate: bool 1602 """
1603 - def __init__(self, serial_number, name, element, vector, alternate=False):
1604 1605 self._serial_number = None 1606 self._name = None 1607 self._element = None 1608 self._residue = None 1609 self._vector = None 1610 self._alternate = False 1611 self._bfactor = None 1612 self._occupancy = None 1613 self._charge = None 1614 1615 if not isinstance(name, csb.core.string): 1616 raise TypeError(name) 1617 name_compact = name.strip() 1618 if len(name_compact) < 1: 1619 raise ValueError(name) 1620 self._name = name_compact 1621 self._full_name = name 1622 1623 if isinstance(element, csb.core.string): 1624 element = csb.core.Enum.parsename(ChemElements, element) 1625 elif element is None: 1626 pass 1627 elif element.enum is not ChemElements: 1628 raise TypeError(element) 1629 self._element = element 1630 1631 # pass type- and value-checking control to setters 1632 self.serial_number = serial_number 1633 self.vector = vector 1634 self.alternate = alternate
1635
1636 - def __repr__(self):
1637 return "<Atom [{0.serial_number}]: {0.name}>".format(self)
1638
1639 - def __lt__(self, other):
1640 return self.serial_number < other.serial_number
1641
1642 - def transform(self, rotation, translation):
1643 1644 vector = numpy.dot(self.vector, numpy.transpose(rotation)) + translation 1645 self.vector = vector
1646
1647 - def get_coordinates(self, what=None, skip=False):
1648 1649 if what is None: 1650 what = [self.name] 1651 1652 if self.name in what: 1653 return numpy.array([self.vector.copy()]) 1654 elif skip: 1655 return numpy.array([]) 1656 else: 1657 raise Missing3DStructureError()
1658
1659 - def clone(self):
1660 1661 residue = self._residue 1662 self._residue = None 1663 clone = copy.deepcopy(self) 1664 self._residue = residue 1665 1666 return clone
1667 1668 @property
1669 - def serial_number(self):
1670 """ 1671 PDB serial number 1672 @rtype: int 1673 """ 1674 return self._serial_number
1675 @serial_number.setter
1676 - def serial_number(self, number):
1677 if not isinstance(number, int) or number < 1: 1678 raise TypeError(number) 1679 self._serial_number = number
1680 1681 @property
1682 - def name(self):
1683 """ 1684 PDB atom name (trimmed) 1685 @rtype: str 1686 """ 1687 return self._name
1688 1689 @property
1690 - def element(self):
1691 """ 1692 Chemical element - a member of L{ChemElements} 1693 @rtype: enum item 1694 """ 1695 return self._element
1696 1697 @property
1698 - def residue(self):
1699 """ 1700 Residue instance that owns this atom (if available) 1701 @rtype: L{Residue} 1702 """ 1703 return self._residue
1704 @residue.setter
1705 - def residue(self, residue):
1706 if self._residue: 1707 raise InvalidOperation('This atom is already part of a residue.') 1708 if not isinstance(residue, Residue): 1709 raise TypeError(residue) 1710 self._residue = residue
1711 1712 @property
1713 - def vector(self):
1714 """ 1715 Atom's 3D coordinates (x, y, z) 1716 @rtype: numpy.array 1717 """ 1718 return self._vector
1719 @vector.setter
1720 - def vector(self, vector):
1721 if numpy.shape(vector) != (3,): 1722 raise ValueError("Three dimensional vector expected") 1723 self._vector = numpy.array(vector)
1724 1725 @property
1726 - def alternate(self):
1727 """ 1728 Alternative location flag 1729 @rtype: str 1730 """ 1731 return self._alternate
1732 @alternate.setter
1733 - def alternate(self, value):
1734 self._alternate = value
1735 1736 @property
1737 - def bfactor(self):
1738 """ 1739 Temperature factor 1740 @rtype: float 1741 """ 1742 return self._bfactor
1743 @bfactor.setter
1744 - def bfactor(self, value):
1745 self._bfactor = value
1746 1747 @property
1748 - def occupancy(self):
1749 """ 1750 Occupancy number 1751 @rtype: float 1752 """ 1753 return self._occupancy
1754 @occupancy.setter
1755 - def occupancy(self, value):
1756 self._occupancy = value
1757 1758 @property
1759 - def charge(self):
1760 """ 1761 Charge 1762 @rtype: int 1763 """ 1764 return self._charge
1765 @charge.setter
1766 - def charge(self, value):
1767 self._charge = value
1768 1769 @property
1770 - def items(self):
1771 return iter([])
1772
1773 -class DisorderedAtom(csb.core.CollectionContainer, Atom):
1774 """ 1775 A wobbling atom, which has alternative locations. Each alternative is represented 1776 as a 'normal' L{Atom}. The atom with a highest occupancy is selected as a representative, 1777 hence a DisorderedAtom behaves as a regular L{Atom} (proxy of the representative) as well 1778 as a collection of Atoms. 1779 1780 @param atom: the first atom to be appended to the collection of alternatives. It 1781 is automatically defined as a representative, until a new atom with 1782 higher occupancy is appended to the collection 1783 @type atom: L{Atom} 1784 """ 1785
1786 - def __init__(self, atom):
1787 1788 super(DisorderedAtom, self).__init__(type=Atom) 1789 1790 self.__rep = None 1791 self.__alt = {} 1792 1793 self.append(atom)
1794
1795 - def __getattr__(self, name):
1796 try: 1797 return object.__getattribute__(self, name) 1798 except AttributeError: 1799 subject = object.__getattribute__(self, '_DisorderedAtom__rep') 1800 return getattr(subject, name)
1801
1802 - def append(self, atom):
1803 """ 1804 Append a new atom to the collection of alternatives. 1805 1806 @param atom: the new alternative 1807 @type atom: L{Atom} 1808 """ 1809 self.__update_rep(atom) 1810 self.__alt[atom.alternate] = atom 1811 1812 super(DisorderedAtom, self).append(atom)
1813
1814 - def find(self, altloc):
1815 """ 1816 Retrieve a specific atom by its altloc identifier. 1817 1818 @param altloc: alternative location identifier 1819 @type altloc: str 1820 1821 @rtype: L{Atom} 1822 """ 1823 if altloc in self.__alt: 1824 return self.__alt[altloc] 1825 else: 1826 for atom in self: 1827 if atom.alternate == altloc: 1828 return Atom 1829 1830 raise EntityNotFoundError(altloc)
1831
1832 - def transform(self, rotation, translation):
1833 1834 for atom in self: 1835 atom.transform(rotation, translation)
1836
1837 - def __update_rep(self, atom):
1838 1839 if self.__rep is None or \ 1840 ((self.__rep.occupancy != atom.occupancy) and (self.__rep.occupancy < atom.occupancy)): 1841 1842 self.__rep = atom
1843
1844 - def __repr__(self):
1845 return "<DisorderedAtom: {0.length} alternative locations>".format(self)
1846
1847 -class SuperimposeInfo(object):
1848 """ 1849 Describes a structural alignment result. 1850 1851 @type rotation: Numpy Array 1852 @type translation: L{Vector} 1853 @type rmsd: float 1854 """
1855 - def __init__(self, rotation, translation, rmsd=None, tm_score=None):
1856 1857 self.rotation = rotation 1858 self.translation = translation 1859 self.rmsd = rmsd 1860 self.tm_score = tm_score
1861
1862 -class SecondaryStructureElement(object):
1863 """ 1864 Describes a Secondary Structure Element. 1865 1866 @param start: start position with reference to the chain 1867 @type start: float 1868 @param end: end position with reference to the chain 1869 @type end: float 1870 @param type: element type - a member of the L{SecStructures} enum 1871 @type type: csb.core.EnumItem 1872 @param score: secondary structure prediction confidence, if available 1873 @type score: int 1874 1875 @raise IndexError: if start/end positions are out of range 1876 """
1877 - def __init__(self, start, end, type, score=None):
1878 1879 if not (0 < start <= end): 1880 raise IndexError('Element coordinates are out of range: 1 <= start <= end.') 1881 1882 self._start = None 1883 self._end = None 1884 self._type = None 1885 self._score = None 1886 1887 self.start = start 1888 self.end = end 1889 self.type = type 1890 1891 if score is not None: 1892 self.score = score
1893
1894 - def __lt__(self, other):
1895 return self.start < other.start
1896
1897 - def __eq__(self, other):
1898 return (self.type == other.type 1899 and self.start == other.start 1900 and self.end == other.end)
1901
1902 - def __str__(self):
1903 return self.to_string()
1904
1905 - def __repr__(self):
1906 return "<{0.type!r}: {0.start}-{0.end}>".format(self)
1907 1908 @property
1909 - def start(self):
1910 """ 1911 Start position (1-based) 1912 @rtype: int 1913 """ 1914 return self._start
1915 @start.setter
1916 - def start(self, value):
1917 if value is not None: 1918 value = int(value) 1919 if value < 1: 1920 raise ValueError(value) 1921 self._start = value
1922 1923 @property
1924 - def end(self):
1925 """ 1926 End position (1-based) 1927 @rtype: int 1928 """ 1929 return self._end
1930 @end.setter
1931 - def end(self, value):
1932 if value is not None: 1933 value = int(value) 1934 if value < 1: 1935 raise ValueError(value) 1936 self._end = value
1937 1938 @property
1939 - def type(self):
1940 """ 1941 Secondary structure type - a member of L{SecStructures} 1942 @rtype: enum item 1943 """ 1944 return self._type
1945 @type.setter
1946 - def type(self, value):
1947 if isinstance(value, csb.core.string): 1948 value = csb.core.Enum.parse(SecStructures, value) 1949 if not value.enum is SecStructures: 1950 raise TypeError(value) 1951 self._type = value
1952 1953 @property
1954 - def length(self):
1955 """ 1956 Number of residues covered by this element 1957 @rtype: int 1958 """ 1959 return self.end - self.start + 1
1960 1961 @property
1962 - def score(self):
1963 """ 1964 Secondary structure confidence values for each residue 1965 @rtype: L{CollectionContainer} 1966 """ 1967 return self._score
1968 @score.setter
1969 - def score(self, scores):
1970 if not len(scores) == self.length: 1971 raise ValueError('There must be a score entry for each residue in the element.') 1972 self._score = csb.core.CollectionContainer( 1973 items=list(scores), type=int, start_index=self.start)
1974
1975 - def overlaps(self, other):
1976 """ 1977 Return True if C{self} overlaps with C{other}. 1978 1979 @type other: L{SecondaryStructureElement} 1980 @rtype: bool 1981 """ 1982 this = set(range(self.start, self.end + 1)) 1983 that = set(range(other.start, other.end + 1)) 1984 return not this.isdisjoint(that)
1985
1986 - def merge(self, other):
1987 """ 1988 Merge C{self} and C{other}. 1989 1990 @type other: L{SecondaryStructureElement} 1991 1992 @return: a new secondary structure element 1993 @rtype: L{SecondaryStructureElement} 1994 1995 @bug: confidence scores are lost 1996 """ 1997 if not self.overlaps(other): 1998 raise ValueError("Can't merge non-overlapping secondary structures") 1999 elif self.type != other.type: 2000 raise ValueError("Can't merge secondary structures of different type") 2001 2002 start = min(self.start, other.start) 2003 end = max(self.end, other.end) 2004 assert self.type == other.type 2005 2006 return SecondaryStructureElement(start, end, self.type)
2007
2008 - def to_string(self):
2009 """ 2010 Dump the element as a string. 2011 2012 @return: string representation of the element 2013 @rtype: str 2014 """ 2015 return str(self.type) * self.length
2016
2017 - def simplify(self):
2018 """ 2019 Convert to three-state secondary structure (Helix, Strand, Coil). 2020 """ 2021 if self.type in (SecStructures.Helix, SecStructures.Helix3, SecStructures.PiHelix): 2022 self.type = SecStructures.Helix 2023 elif self.type in (SecStructures.Strand, SecStructures.BetaBridge): 2024 self.type = SecStructures.Strand 2025 elif self.type in (SecStructures.Coil, SecStructures.Turn, SecStructures.Bend): 2026 self.type = SecStructures.Coil 2027 elif self.type == SecStructures.Gap or self.type is None: 2028 pass 2029 else: 2030 assert False, 'Unhandled SS type: ' + repr(self.type)
2031
2032 -class SecondaryStructure(csb.core.CollectionContainer):
2033 """ 2034 Describes the secondary structure of a chain. 2035 Provides an index-based access to the secondary structure elements of the chain. 2036 2037 @param string: a secondary structure string (e.g. a PSI-PRED output) 2038 @type string: str 2039 @param conf_string: secondary structure prediction confidence values, if available 2040 @type conf_string: str 2041 """
2042 - def __init__(self, string=None, conf_string=None):
2043 2044 super(SecondaryStructure, self).__init__(type=SecondaryStructureElement, start_index=1) 2045 2046 self._minstart = None 2047 self._maxend = None 2048 2049 if string is not None: 2050 for motif in SecondaryStructure.parse(string, conf_string): 2051 self.append(motif)
2052
2053 - def __str__(self):
2054 return self.to_string()
2055
2056 - def append(self, element):
2057 """ 2058 Add a new SecondaryStructureElement. Then sort all elements by 2059 their start position. 2060 """ 2061 super(SecondaryStructure, self).append(element) 2062 super(SecondaryStructure, self)._sort() 2063 2064 if self._minstart is None or element.start < self._minstart: 2065 self._minstart = element.start 2066 if self._maxend is None or element.end > self._maxend: 2067 self._maxend = element.end
2068 2069 @staticmethod
2070 - def parse(string, conf_string=None):
2071 """ 2072 Parse secondary structure from DSSP/PSI-PRED output string. 2073 2074 @param string: a secondary structure string (e.g. a PSI-PRED output) 2075 @type string: str 2076 @param conf_string: secondary structure prediction confidence values, if available 2077 @type conf_string: str 2078 2079 @return: a list of L{SecondaryStructureElement}s. 2080 @rtype: list 2081 2082 @raise ValueError: if the confidence string is not of the same length 2083 """ 2084 if not isinstance(string, csb.core.string): 2085 raise TypeError(string) 2086 2087 string = ''.join(re.split('\s+', string)) 2088 if conf_string is not None: 2089 conf_string = ''.join(re.split('\s+', conf_string)) 2090 if not len(string) == len(conf_string): 2091 raise ValueError('The confidence string has unexpected length.') 2092 motifs = [ ] 2093 2094 if not len(string) > 0: 2095 raise ValueError('Empty Secondary Structure string') 2096 2097 currel = string[0] 2098 start = 0 2099 2100 for i, char in enumerate(string + '.'): 2101 2102 if currel != char: 2103 try: 2104 type = csb.core.Enum.parse(SecStructures, currel) 2105 except csb.core.EnumValueError: 2106 raise UnknownSecStructureError(currel) 2107 confidence = None 2108 if conf_string is not None: 2109 confidence = list(conf_string[start : i]) 2110 confidence = list(map(int, confidence)) 2111 motif = SecondaryStructureElement(start + 1, i, type, confidence) 2112 motifs.append(motif) 2113 2114 currel = char 2115 start = i 2116 2117 return motifs
2118 2119 @property
2120 - def start(self):
2121 """ 2122 Start position of the leftmost element 2123 @rtype: int 2124 """ 2125 return self._minstart
2126 2127 @property
2128 - def end(self):
2129 """ 2130 End position of the rightmost element 2131 @rtype: int 2132 """ 2133 return self._maxend
2134
2135 - def clone(self):
2136 """ 2137 @return: a deep copy of the object 2138 """ 2139 return copy.deepcopy(self)
2140
2141 - def to_three_state(self):
2142 """ 2143 Convert to three-state secondary structure (Helix, Strand, Coil). 2144 """ 2145 for e in self: 2146 e.simplify()
2147
2148 - def to_string(self, chain_length=None):
2149 """ 2150 Get back the string representation of the secondary structure. 2151 2152 @return: a string of secondary structure elements 2153 @rtype: str 2154 2155 @bug: [CSB 0000003] If conflicting elements are found at a given rank, 2156 this position is represented as a coil. 2157 """ 2158 gap = str(SecStructures.Gap) 2159 coil = str(SecStructures.Coil) 2160 2161 if chain_length is None: 2162 chain_length = max(e.end for e in self) 2163 2164 ss = [] 2165 2166 for pos in range(1, chain_length + 1): 2167 elements = self.at(pos) 2168 if len(elements) > 0: 2169 if len(set(e.type for e in elements)) > 1: 2170 ss.append(coil) # [CSB 0000003] 2171 else: 2172 ss.append(elements[0].to_string()) 2173 else: 2174 ss.append(gap) 2175 2176 return ''.join(ss)
2177
2178 - def at(self, rank, type=None):
2179 """ 2180 @return: all secondary structure elements covering the specifid position 2181 @rtype: tuple of L{SecondaryStructureElement}s 2182 """ 2183 return self.scan(start=rank, end=rank, filter=type, loose=True, cut=True)
2184
2185 - def scan(self, start, end, filter=None, loose=True, cut=True):
2186 """ 2187 Get all secondary structure elements within the specified [start, end] region. 2188 2189 @param start: the start position of the region, 1-based, inclusive 2190 @type start: int 2191 @param end: the end position of the region, 1-based, inclusive 2192 @type end: int 2193 @param filter: return only elements of the specified L{SecStructures} kind 2194 @type filter: L{csb.core.EnumItem} 2195 @param loose: grab all fully or partially matching elements within the region. 2196 if False, return only the elements which strictly reside within 2197 the region 2198 @type loose: bool 2199 @param cut: if an element is partially overlapping with the start..end region, 2200 cut its start and/or end to make it fit into the region. If False, 2201 return the elements with their real lengths 2202 @type cut: bool 2203 2204 @return: a list of deep-copied L{SecondaryStructureElement}s, sorted by their 2205 start position 2206 @rtype: tuple of L{SecondaryStructureElement}s 2207 """ 2208 matches = [ ] 2209 2210 for m in self: 2211 if filter and m.type != filter: 2212 continue 2213 2214 if loose: 2215 if start <= m.start <= end or start <= m.end <= end or (m.start <= start and m.end >= end): 2216 partmatch = copy.deepcopy(m) 2217 if cut: 2218 if partmatch.start < start: 2219 partmatch.start = start 2220 if partmatch.end > end: 2221 partmatch.end = end 2222 if partmatch.score: 2223 partmatch.score = partmatch.score[start : end + 1] 2224 matches.append(partmatch) 2225 else: 2226 if m.start >= start and m.end <= end: 2227 matches.append(copy.deepcopy(m)) 2228 2229 matches.sort() 2230 return tuple(matches)
2231
2232 - def q3(self, reference, relaxed=True):
2233 """ 2234 Compute Q3 score. 2235 2236 @param reference: reference secondary structure 2237 @type reference: L{SecondaryStructure} 2238 @param relaxed: if True, treat gaps as coils 2239 @type relaxed: bool 2240 2241 @return: the percentage of C{reference} residues with identical 2242 3-state secondary structure. 2243 @rtype: float 2244 """ 2245 2246 this = self.clone() 2247 this.to_three_state() 2248 2249 ref = reference.clone() 2250 ref.to_three_state() 2251 2252 total = 0 2253 identical = 0 2254 2255 def at(ss, rank): 2256 elements = ss.at(rank) 2257 if len(elements) == 0: 2258 return None 2259 elif len(elements) > 1: 2260 raise ValueError('Flat secondary structure expected') 2261 else: 2262 return elements[0]
2263 2264 for rank in range(ref.start, ref.end + 1): 2265 q = at(this, rank) 2266 s = at(ref, rank) 2267 2268 if s: 2269 if relaxed or s.type != SecStructures.Gap: 2270 total += 1 2271 if q: 2272 if q.type == s.type: 2273 identical += 1 2274 elif relaxed: 2275 pair = set([q.type, s.type]) 2276 match = set([SecStructures.Gap, SecStructures.Coil]) 2277 if pair.issubset(match): 2278 identical += 1 2279 2280 if total == 0: 2281 return 0.0 2282 else: 2283 return identical * 100.0 / total
2284
2285 - def subregion(self, start, end):
2286 """ 2287 Same as C{ss.scan(...cut=True)}, but also shift the start-end positions 2288 of all motifs and return a L{SecondaryStructure} instance instead of a list. 2289 2290 @param start: start position of the subregion, with reference to the chain 2291 @type start: int 2292 @param end: start position of the subregion, with reference to the chain 2293 @type end: int 2294 2295 @return: a deep-copy sub-fragment of the original L{SecondaryStructure} 2296 @rtype: L{SecondaryStructure} 2297 """ 2298 sec_struct = SecondaryStructure() 2299 2300 for motif in self.scan(start, end, loose=True, cut=True): 2301 2302 motif.start = motif.start - start + 1 2303 motif.end = motif.end - start + 1 2304 if motif.score: 2305 motif.score = list(motif.score) # this will automatically fix the score indices in the setter 2306 sec_struct.append(motif) 2307 2308 return sec_struct
2309
2310 -class TorsionAnglesCollection(csb.core.CollectionContainer):
2311 """ 2312 Describes a collection of torsion angles. Provides 1-based list-like access. 2313 2314 @param items: an initialization list of L{TorsionAngles} 2315 @type items: list 2316 """
2317 - def __init__(self, items=None, start=1):
2321
2322 - def __repr__(self):
2323 if len(self) > 0: 2324 return "<TorsionAnglesList: {0} ... {1}>".format(self[self.start_index], self[self.last_index]) 2325 else: 2326 return "<TorsionAnglesList: empty>"
2327 2328 @property
2329 - def phi(self):
2330 """ 2331 List of all phi angles 2332 @rtype: list 2333 """ 2334 return [a.phi for a in self]
2335 2336 @property
2337 - def psi(self):
2338 """ 2339 List of all psi angles 2340 @rtype: list 2341 """ 2342 return [a.psi for a in self]
2343 2344 @property
2345 - def omega(self):
2346 """ 2347 List of all omega angles 2348 @rtype: list 2349 """ 2350 return [a.omega for a in self]
2351
2352 - def update(self, angles):
2353 self._update(angles)
2354
2355 - def rmsd(self, other):
2356 """ 2357 Calculate the Circular RSMD against another TorsionAnglesCollection. 2358 2359 @param other: subject (right-hand-term) 2360 @type other: L{TorsionAnglesCollection} 2361 2362 @return: RMSD based on torsion angles 2363 @rtype: float 2364 2365 @raise Broken3DStructureError: on discontinuous torsion angle collections 2366 (phi and psi values are still allowed to be absent at the termini) 2367 @raise ValueError: on mismatching torsion angles collection lengths 2368 """ 2369 if len(self) != len(other) or len(self) < 1: 2370 raise ValueError('Both collections must be of the same and positive length') 2371 2372 length = len(self) 2373 query, subject = [], [] 2374 2375 for n, (q, s) in enumerate(zip(self, other), start=1): 2376 2377 q = q.copy() 2378 q.to_radians() 2379 2380 s = s.copy() 2381 s.to_radians() 2382 2383 if q.phi is None or s.phi is None: 2384 if n == 1: 2385 q.phi = s.phi = 0.0 2386 else: 2387 raise Broken3DStructureError('Discontinuous torsion angles collection at {0}'.format(n)) 2388 2389 if q.psi is None or s.psi is None: 2390 if n == length: 2391 q.psi = s.psi = 0.0 2392 else: 2393 raise Broken3DStructureError('Discontinuous torsion angles collection at {0}'.format(n)) 2394 2395 query.append([q.phi, q.psi]) 2396 subject.append([s.phi, s.psi]) 2397 2398 return csb.bio.utils.torsion_rmsd(numpy.array(query), numpy.array(subject))
2399
2400 -class TorsionAngles(object):
2401 """ 2402 Describes a collection of phi, psi and omega backbone torsion angles. 2403 2404 It is assumed that the supplied values are either None, or fitting into 2405 the range of [-180, +180] for AngleUnites.Degrees and [0, 2pi] for Radians. 2406 2407 @param phi: phi angle value in C{units} 2408 @type phi: float 2409 @param psi: psi angle value in C{units} 2410 @type psi: float 2411 @param omega: omega angle value in C{units} 2412 @type omega: float 2413 @param units: any of L{AngleUnits}'s enum members 2414 @type units: L{csb.core.EnumItem} 2415 2416 @raise ValueError: on invalid/unknown units 2417 """ 2418
2419 - def __init__(self, phi, psi, omega, units=AngleUnits.Degrees):
2420 2421 try: 2422 if isinstance(units, csb.core.string): 2423 units = csb.core.Enum.parse(AngleUnits, units, ignore_case=True) 2424 else: 2425 if units.enum is not AngleUnits: 2426 raise TypeError(units) 2427 2428 except ValueError: 2429 raise ValueError('Unknown angle unit type {0}'.format(units)) 2430 2431 self._units = units 2432 2433 self._phi = None 2434 self._psi = None 2435 self._omega = None 2436 2437 self.phi = phi 2438 self.psi = psi 2439 self.omega = omega
2440
2441 - def __repr__(self):
2442 return "<TorsionAngles: phi={0.phi}, psi={0.psi}, omega={0.omega}>".format(self)
2443
2444 - def __nonzero__(self):
2445 return self.__bool__()
2446
2447 - def __bool__(self):
2448 return self.phi is not None \ 2449 or self.psi is not None \ 2450 or self.omega is not None
2451 2452 @property
2453 - def units(self):
2454 """ 2455 Current torsion angle units - a member of L{AngleUnits} 2456 @rtype: enum item 2457 """ 2458 return self._units
2459 2460 @property
2461 - def phi(self):
2462 return self._phi
2463 @phi.setter
2464 - def phi(self, phi):
2465 TorsionAngles.check_angle(phi, self._units) 2466 self._phi = phi
2467 2468 @property
2469 - def psi(self):
2470 return self._psi
2471 @psi.setter
2472 - def psi(self, psi):
2473 TorsionAngles.check_angle(psi, self._units) 2474 self._psi = psi
2475 2476 @property
2477 - def omega(self):
2478 return self._omega
2479 @omega.setter
2480 - def omega(self, omega):
2481 TorsionAngles.check_angle(omega, self._units) 2482 self._omega = omega
2483
2484 - def copy(self):
2485 """ 2486 @return: a deep copy of C{self} 2487 """ 2488 return TorsionAngles(self.phi, self.psi, self.omega, self.units)
2489
2490 - def to_degrees(self):
2491 """ 2492 Set angle measurement units to degrees. 2493 Convert the angles in this TorsionAngles instance to degrees. 2494 """ 2495 2496 if self._units != AngleUnits.Degrees: 2497 2498 phi = TorsionAngles.deg(self._phi) 2499 psi = TorsionAngles.deg(self._psi) 2500 omega = TorsionAngles.deg(self._omega) 2501 2502 # if no ValueError is raised by TorsionAngles.check_angle in TorsionAngles.deg: 2503 # (we assign directly to the instance variables to avoid check_angle being invoked again in setters) 2504 self._phi, self._psi, self._omega = phi, psi, omega 2505 self._units = AngleUnits.Degrees
2506 2507
2508 - def to_radians(self):
2509 """ 2510 Set angle measurement units to radians. 2511 Convert the angles in this TorsionAngles instance to radians. 2512 """ 2513 2514 if self._units != AngleUnits.Radians: 2515 2516 phi = TorsionAngles.rad(self._phi) 2517 psi = TorsionAngles.rad(self._psi) 2518 omega = TorsionAngles.rad(self._omega) 2519 2520 # if no ValueError is raised by TorsionAngles.check_angle in TorsionAngles.rad: 2521 # (we assign directly to the instance variables to avoid check_angle being invoked again in setters) 2522 self._phi, self._psi, self._omega = phi, psi, omega 2523 self._units = AngleUnits.Radians
2524 2525 @staticmethod
2526 - def check_angle(angle, units):
2527 """ 2528 Check the value of a torsion angle expressed in the specified units. 2529 """ 2530 if angle is None: 2531 return 2532 elif units == AngleUnits.Degrees: 2533 if not (-180 <= angle <= 180): 2534 raise ValueError('Torsion angle {0} is out of range -180..180'.format(angle)) 2535 elif units == AngleUnits.Radians: 2536 if not (0 <= angle <= (2 * math.pi)): 2537 raise ValueError('Torsion angle {0} is out of range 0..2pi'.format(angle)) 2538 else: 2539 raise ValueError('Unknown angle unit type {0}'.format(units))
2540 2541 @staticmethod
2542 - def rad(angle):
2543 """ 2544 Convert a torsion angle value, expressed in degrees, to radians. 2545 Negative angles are converted to their positive counterparts: rad(ang + 360deg). 2546 2547 Return the calculated value in the range of [0, 2pi] radians. 2548 """ 2549 TorsionAngles.check_angle(angle, AngleUnits.Degrees) 2550 2551 if angle is not None: 2552 if angle < 0: 2553 angle += 360. 2554 angle = math.radians(angle) 2555 return angle
2556 2557 @staticmethod
2558 - def deg(angle):
2559 """ 2560 Convert a torsion angle value, expressed in radians, to degrees. 2561 Negative angles are not accepted, it is assumed that negative torsion angles have been 2562 converted to their ang+2pi counterparts beforehand. 2563 2564 Return the calculated value in the range of [-180, +180] degrees. 2565 """ 2566 TorsionAngles.check_angle(angle, AngleUnits.Radians) 2567 2568 if angle is not None: 2569 if angle > math.pi: 2570 angle = -((2. * math.pi) - angle) 2571 angle = math.degrees(angle) 2572 2573 return angle
2574