BALL
1.4.1
|
00001 // -*- Mode: C++; tab-width: 2; -*- 00002 // vi: set ts=2: 00003 // 00004 00005 #ifndef BALL_FORMAT_NMRSTARFILE_H 00006 #define BALL_FORMAT_NMRSTARFILE_H 00007 00008 #ifndef BALL_FORMAT_CIFFILE_H 00009 # include <BALL/FORMAT/CIFFile.h> 00010 #endif 00011 00012 #ifndef BALL_COMMON_LIMITS_H 00013 # include <BALL/COMMON/limits.h> 00014 #endif 00015 00016 #ifndef BALL_KERNEL_PROTEIN_H 00017 # include <BALL/KERNEL/protein.h> 00018 #endif 00019 00020 #ifndef BALL_STRUCTURE_PEPTIDES_H 00021 # include <BALL/STRUCTURE/peptides.h> 00022 #endif 00023 00024 #include <vector> 00025 00026 namespace BALL 00027 { 00117 class BALL_EXPORT NMRStarFile 00118 : public CIFFile 00119 { 00120 public: 00121 00122 // constant variables to denote unknown values 00123 static const float FLOAT_VALUE_NA; 00124 static const int INT_VALUE_NA; 00125 00126 static const Position POSITION_VALUE_NA; 00127 00133 00139 class BALL_EXPORT SampleCondition 00140 { 00141 public: 00142 SampleCondition(); 00143 00144 // saveframe name -- referenced in the sample_condition_label 00145 // of the saveframe "assigned_chemical_shifts" 00146 String name; 00147 // Accoring to the NMRStarFile 2.1 documentation 00148 // the first entry MUST BE "_Variable_type". 00149 // This is why we are allowed to map per type :-) 00150 vector<String> types; 00151 StringHashMap<float> values; 00152 StringHashMap<float> errors; 00153 StringHashMap<String> units; 00154 00155 bool hasType(String type) {return values.has(type);} 00156 std::ostream& operator >> (std::ostream& s); 00157 }; 00158 00162 class BALL_EXPORT Sample 00163 { 00164 public: 00165 00170 class BALL_EXPORT Component 00171 { 00172 public: 00173 Component(); 00174 void clear(); 00175 00176 String label; 00177 float concentration_value; 00178 String value_unit; 00179 float concentration_min; 00180 float concentration_max; 00181 String isotopic_labeling; 00182 00183 std::ostream& operator >> (std::ostream& s); 00184 }; 00185 00186 Sample(); 00187 void clear(); 00188 00189 String label; 00190 String type; 00191 String details; 00192 vector <Component> components; 00193 00194 std::ostream& operator >> (std::ostream& s); 00195 }; 00196 00202 class BALL_EXPORT ShiftReferenceElement 00203 { 00204 public: 00205 ShiftReferenceElement(); 00206 00207 String mol_common_name; 00208 String atom_type; 00209 Position isotope_number; 00210 String atom_group; 00211 String shift_units; 00212 float shift_value; 00213 String reference_method; 00214 String reference_type; 00215 float indirect_shift_ratio; 00216 00217 std::ostream& operator >> (std::ostream& s); 00218 }; 00219 00225 class BALL_EXPORT ShiftReferenceSet 00226 { 00227 public: 00228 ShiftReferenceSet(); 00229 00230 // The saveframe's name = set name 00231 // can be referenced in the saveframe "assigned_chemical_shifts" 00232 String name; 00233 std::vector<ShiftReferenceElement> elements; 00234 00235 std::ostream& operator >> (std::ostream& s); 00236 }; 00237 00243 class BALL_EXPORT NMRAtomData 00244 { 00245 public: 00246 NMRAtomData(); 00247 00248 Position atom_ID; 00249 Position residue_seq_code; 00250 String residue_label; 00251 String atom_name; 00252 char atom_type; 00253 float shift_value; 00254 float error_value; 00255 Position ambiguity_code; 00256 00257 bool operator == (const NMRAtomData& atom) const; 00258 std::ostream& operator >> (std::ostream& s); 00259 }; 00260 00261 00271 class BALL_EXPORT NMRAtomDataSet // _Saveframe_category assigned_chemical_shifts 00272 00273 { 00274 public: 00275 NMRAtomDataSet(NMRStarFile* parent); 00276 00277 String name; 00278 String label; 00279 std::vector<NMRAtomData> atom_data; 00280 String condition; 00281 String reference; 00282 std::vector<String> samples; 00283 00284 std::ostream& operator >> (std::ostream& s); 00285 00286 protected: 00287 NMRStarFile* parent_; 00288 }; 00289 00295 class BALL_EXPORT EntryInformation 00296 { 00297 public: 00298 EntryInformation(); 00299 ~EntryInformation(); 00300 00301 std::ostream& operator >> (std::ostream& s); 00302 void clear(); 00303 00304 String entry_type; 00305 String BMRB_accession_code; 00306 String NMR_STAR_version; 00307 String experimental_method; 00308 String submission_date; 00309 00310 }; 00311 00316 class BALL_EXPORT MonomericPolymer 00317 { 00318 public: 00320 class BALL_EXPORT HomologDB 00321 { 00322 public: 00323 HomologDB(); 00324 00325 std::ostream& operator >> (std::ostream& s); 00326 void clear(); 00327 00328 String name; 00329 String accession_code; 00330 String entry_mol_name; 00331 float seq_to_submitted_percentage; 00332 float subject_length; 00333 float seq_identity; 00334 float seq_positive; 00335 float homology_expectation_value; 00336 }; 00337 00338 00339 MonomericPolymer(); 00340 00341 String label_name; 00342 String type; 00343 String polymer_class; 00344 String common_name; 00345 String name_variant; 00346 float molecular_mass; 00347 String details; 00348 // polymer residue sequence information 00349 int number_of_residues; 00350 String residue_sequence; 00351 // we want to allow things like resid 137A, so we cannot use Index 00352 // key: index -- value: aminoacidname 00353 StringHashMap<String> residues_by_index; 00354 vector<HomologDB> homolog_database_entries; 00355 00356 std::ostream& operator >> (std::ostream& s); 00357 void clear(); 00358 }; 00359 00360 00365 class BALL_EXPORT MolecularSystem 00366 { 00367 // System related information 00368 public: 00369 class BALL_EXPORT RelatedDB 00370 { 00371 public: 00372 RelatedDB(); 00373 00374 std::ostream& operator >> (std::ostream& s); 00375 void clear(); 00376 00377 String name; 00378 String accession_code; 00379 String entry_mol_name; 00380 String relation_type; 00381 String details; 00382 }; 00383 00384 00385 // Central class for convenience 00386 class BALL_EXPORT ChemicalUnit 00387 { 00388 public: 00389 ChemicalUnit(); 00390 std::ostream& operator >> (std::ostream& s); 00391 void clear(); 00392 00393 String component_name; 00394 String label; 00395 MonomericPolymer* monomeric_polymer; 00396 NMRAtomDataSet* shifts; 00397 }; 00398 00399 00400 MolecularSystem(); 00401 ~MolecularSystem(); 00402 00403 ChemicalUnit const& getChemicalUnit(Position i) const { return chemical_units[i]; } 00404 ChemicalUnit& getChemicalUnit(Position i) { return chemical_units[i]; } 00405 00406 Size getNumberOfChemicalUnits() const {return chemical_units.size(); } 00407 00408 // Name of the molecular system 00409 String system_name; 00410 String abbreviation_common; 00411 vector<ChemicalUnit> chemical_units; 00412 String system_physical_state; 00413 String system_oligomer_state; 00414 String system_paramagnetic; 00415 String system_thiol_state; 00417 float system_molecular_weight; 00418 // related entries in various DB's 00419 vector<RelatedDB> related_database_entries; 00420 00422 00423 std::ostream& operator >> (std::ostream& s); 00424 void clear(); 00425 }; 00426 00427 00432 class BALL_EXPORT NMRSpectrometer 00433 { 00434 public: 00435 String name; 00436 String manufacturer; 00437 String model; 00438 float field_strength; 00439 00440 std::ostream& operator >> (std::ostream& s); 00441 }; 00442 00443 00449 class BALL_EXPORT BALLToBMRBMapper 00450 { 00451 public: 00452 00459 //<saveframe_id, atom_id_in_nmr_atom_data_set> 00460 typedef std::pair<Position, Position> BMRBIndex; 00461 typedef std::map<Atom const* , BMRBIndex> BALLToBMRBMapping; 00462 typedef std::map<const NMRAtomData*, Atom const*> BMRBToBALLMapping; 00463 //TODO: Dont use pointer but something more sophisticated! 00464 00466 00470 BALLToBMRBMapper(); 00471 00478 BALLToBMRBMapper(Chain const& chain, const NMRStarFile& nmr_data, const String& chemical_unit); 00479 00481 virtual ~BALLToBMRBMapper() {} 00482 00484 00487 00489 const Chain* getChain() const {return chain_;} 00490 00492 void setChain(Chain const& chain) { chain_ = &chain; 00493 num_mismatches_ = -1; 00494 num_gaps_ = -1;} 00495 00497 void setNMRStarFile(NMRStarFile const& nmrfile) {nmr_data_ = &nmrfile; 00498 num_mismatches_ = -1; 00499 num_gaps_ = -1;} 00500 00502 const NMRStarFile* getNMRStarFile() const {return nmr_data_;} 00503 00505 void setNMRAtomDataSet(NMRAtomDataSet const& nmr_atom_data_set){nmr_atom_data_set_= &nmr_atom_data_set;} 00506 00508 bool setNMRAtomDataSetByName(String const& chemical_unit_name); 00509 00510 // Get the NMRAtomDataSet 00511 const NMRAtomDataSet* getNMRAtomDataSet() const{return nmr_atom_data_set_;} 00512 00514 BALLToBMRBMapping& getBALLToBMRBMapping() {return ball_to_bmrb_map_;} 00515 00517 const BALLToBMRBMapping& getBALLToBMRBMapping() const {return ball_to_bmrb_map_;} 00518 00520 BMRBToBALLMapping& getBMRBToBALLMapping() {return bmrb_to_ball_map_;} 00521 00523 const BMRBToBALLMapping& getBMRBToBALLMapping() const {return bmrb_to_ball_map_;} 00524 00526 int getNumberOfMismatches(){return num_mismatches_;} 00527 00529 int getNumberOfGaps(){return num_gaps_;} 00530 00532 bool isMapped(const NMRAtomData& nmr_atom) const; 00533 00539 const Atom* getBALLAtom(const NMRAtomData& nmr_atom) const; 00540 00545 bool isMapped(Atom const* atom) const; 00546 00548 BMRBIndex operator () (const Atom* atom); 00549 00556 bool createTrivialMapping(); 00557 00567 bool createMapping(const String& aligned_ball_sequence, 00568 const String& aligned_nmrstar_sequence); 00569 00572 void clear(); 00573 00575 00576 protected: 00577 00578 Peptides::NameConverter name_converter_; 00579 00583 00585 BALLToBMRBMapping ball_to_bmrb_map_; 00586 00588 BMRBToBALLMapping bmrb_to_ball_map_; 00589 00590 // NOTE: do *not* attempt to delete these pointers! 00591 const Chain* chain_; 00592 const NMRStarFile* nmr_data_; 00593 const NMRAtomDataSet* nmr_atom_data_set_; 00594 Position nmr_atom_data_set_index_; 00595 int num_mismatches_; 00596 int num_gaps_; 00597 bool valid_; 00599 00600 private: 00601 const Atom* findNMRAtom_(const NMRAtomData& atom) const; 00602 00603 }; 00604 00606 00609 00612 NMRStarFile(); 00613 00618 NMRStarFile(const String& file_name, File::OpenMode open_mode = std::ios::in); 00619 00621 ~NMRStarFile(); 00623 00624 00628 00632 bool read(); 00633 00634 /* Read an NMRStarFile and assign the shifts to the 00635 given AtomContainer using a trivial standard mapping. 00636 If the AtomContainer is a system, the first chain in chosen. 00637 00638 @param ac AtomContainer to which the NMRStarfile's shift should be assigned. 00639 @return bool - <tt>true</tt> if reading the file was successful 00640 */ 00641 //TODO to be able to use this function, further functions getMapping() and assign() are needed. 00642 bool read(AtomContainer& ac); 00643 00650 bool assignShifts(BALLToBMRBMapper& ball_to_bmrb_mapping); 00651 00663 bool assignShifts(AtomContainer& ac, 00664 const String& chemical_unit, 00665 const String& aligned_ball_sequence, 00666 const String& aligned_nmrstar_sequence); 00667 00670 Size getNumberOfAtoms() const; 00671 00674 Size getNumberOfShiftsAssigned() const {return number_of_assigned_shifts_;}; 00675 00678 const std::vector<NMRAtomDataSet>& getNMRData() const; 00679 00682 const EntryInformation& getEntryInformation() const {return entry_information_;}; 00683 00686 const MolecularSystem& getMolecularInformation() const {return molecular_system_;}; 00687 00690 MolecularSystem& getMolecularInformation() {return molecular_system_;}; 00691 00692 00695 const MolecularSystem::ChemicalUnit& getChemicalUnitByLabel(String const& label) const; 00696 00699 MolecularSystem::ChemicalUnit& getChemicalUnitByLabel(String const& label); 00700 00701 00704 bool hasSampleCondition(String name); 00705 00708 bool hasSampleCondition(String name) const; 00709 00712 SampleCondition& getSampleConditionByName(String name); 00713 00716 const SampleCondition& getSampleConditionByName(String name) const; 00717 00719 SampleCondition& getSampleCondition(Position i) {return sample_conditions_[i];}; 00720 00722 const SampleCondition& getSampleCondition(Position i) const {return sample_conditions_[i];}; 00723 00725 Size getNumberOfSampleConditions() const {return sample_conditions_.size();}; 00726 00728 const std::vector<SampleCondition>& getSampleConditions() const {return sample_conditions_;}; 00729 00731 std::vector<SampleCondition>& getSampleConditions() {return sample_conditions_;}; 00732 00733 // addSampleCondition TODO!! 00734 00736 std::vector<Sample> getSamples() const {return samples_;}; 00737 //const std::vector<Sample>& getSamples() const {return samples_;}; 00738 00740 Size getNumberOfSamples() const {return samples_.size();}; 00741 00743 bool hasSample(String label) const; 00744 00748 Sample getSample(Position i) const; 00749 00753 Sample getSample(String label) const; 00754 00756 std::vector<ShiftReferenceSet>& getShiftReferenceSets() {return shift_references_;}; 00758 const std::vector<ShiftReferenceSet>& getShiftReferenceSets() const {return shift_references_;}; 00759 00761 Size getNumberOfShiftReferenceSets() const {return shift_references_.size();}; 00762 00764 bool hasShiftReferenceSet(String name); 00765 00767 ShiftReferenceSet& getShiftReferenceSet(Position i) {return shift_references_[i];}; 00769 const ShiftReferenceSet& getShiftReferenceSet(Position i) const {return shift_references_[i];}; 00770 00772 const ShiftReferenceSet& getShiftReferenceSetByName(String name) const; 00774 ShiftReferenceSet& getShiftReferenceSetByName(String name); 00775 00776 00778 std::vector<NMRSpectrometer>& getNMRSpectrometers() {return nmr_spectrometers_;}; 00780 const std::vector<NMRSpectrometer>& getNMRSpectrometers() const {return nmr_spectrometers_;}; 00781 00783 Size getNumberOfNMRSpectrometers() const {return nmr_spectrometers_.size();}; 00784 00786 NMRSpectrometer& getNMRSpectrometer(Position i); 00788 const NMRSpectrometer& getNMRSpectrometer(Position i) const; 00789 00791 NMRSpectrometer& getNMRSpectrometerByName(String name); 00793 const NMRSpectrometer& getNMRSpectrometerByName(String name) const; 00794 00796 String getNMRSpectrometerManufacturer(Position i) const; 00797 00799 float getNMRSpectrometerFieldStrength(Position i) const; 00800 00801 00805 NMRStarFile::MonomericPolymer& getMonomericPolymer(Position i); 00806 00810 const NMRStarFile::MonomericPolymer& getMonomericPolymer(Position i) const; 00811 00815 NMRStarFile::MonomericPolymer& getMonomericPolymer(const String& name); 00816 00820 const NMRStarFile::MonomericPolymer& getMonomericPolymer(const String& name) const; 00821 00823 Size getNumberOfMonomericPolymers() const {return monomeric_polymers_.size();}; 00824 00826 vector<MonomericPolymer> getMonomericPolymers() const {return monomeric_polymers_;}; 00827 00829 bool hasMonomericPolymer(String name) const; 00830 00836 bool isMonomericPolymer(String chemical_unit_label); 00837 00841 //TODO: Store changes/additions as Saveframes also in CIFFile 00842 void addMonomericPolymer(MonomericPolymer mp); 00843 00844 00853 String getResidueSequence(Position i=0) const; 00854 00856 bool hasHshifts() const {return has_H_shifts_;}; 00857 00859 bool hasCshifts() const {return has_C_shifts_;}; 00860 00862 bool hasNshifts() const {return has_N_shifts_;}; 00863 00865 00866 00870 00874 bool operator == (const NMRStarFile& f); 00875 00879 bool operator != (const NMRStarFile& f); 00880 00883 void clear(); 00884 00886 00887 private: 00888 00889 /*_ @name NMRStar file specific Help-Methods 00890 */ 00891 //_@{ 00892 00894 void readEntryInformation_(); 00895 00897 void readMolSystem_(); 00898 00900 void readMonomericPolymers_(); 00901 00903 void readSampleConditions_(); 00904 00906 void readShiftReferences_(); 00907 00909 void readShifts_(); 00910 00912 void readSamples_(); 00913 00915 void readNMRSpectrometer_(); 00916 00918 void findDependiencies_(); 00919 00921 void setSpecialCharacters_(String characters); 00922 00924 bool isValidSingleValue_(String value); 00925 00927 float valueToFloat_(String value); 00928 00930 int valueToInt_(String value); 00936 bool assignShifts_(BALLToBMRBMapper& pdb_to_bmrb_mapping); 00937 00938 //_@} 00939 /*_ @name NMRStar file specific attributes 00940 */ 00941 //_@{ 00942 00943 /*_ A flag indicating validity of this instance. A sole NMRStarFile 00944 instance cannot be valid, because it does not have any information. 00945 */ 00946 bool valid_; 00947 00949 Size number_of_shift_sets_; 00950 00952 Size number_of_assigned_shifts_; 00953 00955 EntryInformation entry_information_; 00956 00958 MolecularSystem molecular_system_; 00959 00961 std::vector<NMRAtomDataSet> atom_data_sets_; 00962 00964 std::vector<SampleCondition> sample_conditions_; 00965 00967 std::vector<Sample> samples_; 00968 00970 std::vector<ShiftReferenceSet> shift_references_; 00971 00973 std::vector<NMRSpectrometer> nmr_spectrometers_; 00974 00976 vector<MonomericPolymer> monomeric_polymers_; 00977 00979 bool has_H_shifts_; 00980 bool has_C_shifts_; 00981 bool has_N_shifts_; 00982 00983 // a dummy saveframe 00984 SaveFrame dummy_saveframe_; 00985 00986 // a dummy sample condition 00987 SampleCondition dummy_sample_condition_; 00988 00989 // a dummy sample 00990 Sample dummy_sample_; 00991 00992 // a dummy shift reference set 00993 ShiftReferenceSet dummy_shift_reference_set_; 00994 00995 // a dummy nmr spectrometer 00996 NMRSpectrometer dummy_NMR_spectrometer_; 00997 00998 // a dummy nmr spectrometer 00999 MonomericPolymer dummy_monomeric_polymer_; 01000 01002 String special_characters_; 01003 //_@} 01004 }; 01005 01007 } // Namespace BALL 01008 01009 #endif // BALL_FORMAT_NMRSTARFILE_H