BALL  1.4.1
NMRStarFile.h
Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 
00005 #ifndef BALL_FORMAT_NMRSTARFILE_H
00006 #define BALL_FORMAT_NMRSTARFILE_H
00007 
00008 #ifndef BALL_FORMAT_CIFFILE_H
00009 # include <BALL/FORMAT/CIFFile.h>
00010 #endif
00011 
00012 #ifndef BALL_COMMON_LIMITS_H
00013 # include <BALL/COMMON/limits.h>
00014 #endif
00015 
00016 #ifndef BALL_KERNEL_PROTEIN_H
00017 # include <BALL/KERNEL/protein.h>
00018 #endif
00019 
00020 #ifndef BALL_STRUCTURE_PEPTIDES_H
00021 # include <BALL/STRUCTURE/peptides.h>
00022 #endif
00023 
00024 #include <vector>
00025 
00026 namespace BALL
00027 {
00117   class BALL_EXPORT NMRStarFile
00118     : public CIFFile
00119   {
00120     public:
00121 
00122       // constant variables to denote unknown values
00123       static const float    FLOAT_VALUE_NA;
00124       static const int      INT_VALUE_NA;
00125 
00126       static const Position POSITION_VALUE_NA;
00127 
00133 
00139       class BALL_EXPORT SampleCondition
00140       {
00141         public:
00142           SampleCondition();
00143 
00144           // saveframe name -- referenced in the sample_condition_label 
00145           // of  the saveframe "assigned_chemical_shifts"   
00146           String                name;
00147           // Accoring to the NMRStarFile 2.1 documentation 
00148           // the first entry MUST BE "_Variable_type".
00149           // This is why we are allowed to map per type :-)
00150           vector<String>         types;
00151           StringHashMap<float>   values;
00152           StringHashMap<float>   errors;
00153           StringHashMap<String>  units;
00154 
00155           bool hasType(String type) {return values.has(type);}
00156           std::ostream& operator >> (std::ostream& s);
00157       };
00158   
00162       class BALL_EXPORT Sample
00163       {
00164         public:
00165         
00170           class BALL_EXPORT Component
00171           {
00172             public:
00173               Component();
00174               void clear();
00175 
00176               String   label;
00177               float    concentration_value;
00178               String   value_unit;
00179               float    concentration_min;
00180               float    concentration_max;
00181               String   isotopic_labeling;
00182 
00183               std::ostream& operator >> (std::ostream& s);
00184           };
00185 
00186           Sample();
00187           void clear();
00188 
00189           String label;
00190           String type;
00191           String details;
00192           vector <Component> components;
00193 
00194           std::ostream& operator >> (std::ostream& s);
00195       };
00196 
00202       class BALL_EXPORT ShiftReferenceElement
00203       {
00204         public:
00205           ShiftReferenceElement();
00206 
00207           String      mol_common_name;
00208           String      atom_type;
00209           Position    isotope_number;
00210           String      atom_group;
00211           String      shift_units;
00212           float       shift_value;
00213           String      reference_method;
00214           String      reference_type;
00215           float       indirect_shift_ratio;
00216 
00217           std::ostream& operator >> (std::ostream& s);
00218       };
00219 
00225       class BALL_EXPORT ShiftReferenceSet
00226       {
00227         public:
00228           ShiftReferenceSet();
00229         
00230           // The saveframe's name = set name
00231           // can be referenced in the saveframe "assigned_chemical_shifts"
00232           String                             name;
00233           std::vector<ShiftReferenceElement> elements;
00234 
00235           std::ostream& operator >> (std::ostream& s);
00236       };
00237 
00243       class BALL_EXPORT NMRAtomData
00244       {
00245         public:
00246           NMRAtomData();
00247 
00248           Position   atom_ID;
00249           Position   residue_seq_code;
00250           String     residue_label;
00251           String     atom_name;
00252           char       atom_type;
00253           float      shift_value;
00254           float      error_value;
00255           Position   ambiguity_code;
00256 
00257           bool operator == (const NMRAtomData& atom) const;
00258           std::ostream& operator >> (std::ostream& s);
00259       };
00260 
00261 
00271       class BALL_EXPORT NMRAtomDataSet // _Saveframe_category  assigned_chemical_shifts
00272 
00273       {
00274         public:
00275           NMRAtomDataSet(NMRStarFile* parent);
00276 
00277           String                     name;
00278           String                     label;
00279           std::vector<NMRAtomData>   atom_data;
00280           String                     condition;
00281           String                     reference;
00282           std::vector<String>        samples;
00283 
00284           std::ostream& operator >> (std::ostream& s);
00285 
00286         protected:
00287           NMRStarFile*                    parent_;
00288       };
00289 
00295       class BALL_EXPORT EntryInformation
00296       {
00297         public:
00298           EntryInformation();
00299           ~EntryInformation();
00300 
00301           std::ostream& operator >> (std::ostream& s);
00302           void clear();
00303 
00304           String   entry_type;
00305           String   BMRB_accession_code;
00306           String   NMR_STAR_version;
00307           String   experimental_method;
00308           String   submission_date;
00309 
00310       };
00311 
00316       class BALL_EXPORT MonomericPolymer
00317       {
00318         public:
00320           class BALL_EXPORT HomologDB
00321           {
00322             public:
00323               HomologDB();
00324 
00325               std::ostream& operator >> (std::ostream& s);
00326               void clear();
00327 
00328               String  name;
00329               String  accession_code;
00330               String  entry_mol_name;
00331               float   seq_to_submitted_percentage;
00332               float   subject_length;
00333               float   seq_identity;
00334               float   seq_positive;
00335               float   homology_expectation_value;
00336           };
00337 
00338 
00339           MonomericPolymer();
00340 
00341           String   label_name;
00342           String   type;
00343           String   polymer_class;
00344           String   common_name;
00345           String   name_variant;
00346           float    molecular_mass;
00347           String   details;
00348           //  polymer residue sequence information
00349           int      number_of_residues;
00350           String   residue_sequence;
00351           // we want to allow things like resid 137A, so we cannot use Index
00352           // key: index -- value: aminoacidname
00353           StringHashMap<String>   residues_by_index;
00354           vector<HomologDB>       homolog_database_entries;
00355 
00356           std::ostream& operator >> (std::ostream& s);
00357           void clear();
00358       };
00359 
00360 
00365       class BALL_EXPORT MolecularSystem
00366       {
00367         // System related information
00368         public:
00369           class BALL_EXPORT RelatedDB
00370           {
00371             public:
00372               RelatedDB();
00373 
00374               std::ostream& operator >> (std::ostream& s);
00375               void clear();
00376 
00377               String    name;
00378               String    accession_code;
00379               String    entry_mol_name;
00380               String    relation_type;
00381               String    details;
00382           };
00383 
00384 
00385           // Central class for convenience
00386           class BALL_EXPORT ChemicalUnit
00387           {
00388             public:
00389               ChemicalUnit();
00390               std::ostream& operator >> (std::ostream& s);
00391               void clear();
00392 
00393               String            component_name;
00394               String            label;
00395               MonomericPolymer* monomeric_polymer;
00396               NMRAtomDataSet*   shifts;
00397           };
00398 
00399 
00400           MolecularSystem();
00401           ~MolecularSystem();
00402 
00403           ChemicalUnit const&  getChemicalUnit(Position i) const { return chemical_units[i]; }
00404           ChemicalUnit&  getChemicalUnit(Position i) { return chemical_units[i]; }
00405 
00406           Size getNumberOfChemicalUnits() const {return chemical_units.size(); }
00407 
00408           // Name of the molecular system
00409           String                system_name;
00410           String                abbreviation_common;
00411           vector<ChemicalUnit>  chemical_units;
00412           String                system_physical_state;
00413           String                system_oligomer_state;
00414           String                system_paramagnetic;
00415           String                system_thiol_state;
00417           float                 system_molecular_weight;
00418           // related entries in various DB's
00419           vector<RelatedDB>     related_database_entries;
00420 
00422 
00423           std::ostream& operator >> (std::ostream& s);
00424           void clear();
00425       };
00426 
00427 
00432       class BALL_EXPORT NMRSpectrometer
00433       {
00434         public:
00435           String    name;
00436           String    manufacturer;
00437           String    model;
00438           float     field_strength;
00439 
00440           std::ostream& operator >> (std::ostream& s);
00441       };
00442 
00443 
00449       class BALL_EXPORT BALLToBMRBMapper
00450       {
00451         public:
00452 
00459           //<saveframe_id, atom_id_in_nmr_atom_data_set>
00460           typedef std::pair<Position, Position>             BMRBIndex;
00461           typedef std::map<Atom const* , BMRBIndex>         BALLToBMRBMapping;
00462           typedef std::map<const NMRAtomData*, Atom const*> BMRBToBALLMapping;
00463           //TODO: Dont use pointer but something more sophisticated!
00464 
00466 
00470           BALLToBMRBMapper();
00471 
00478           BALLToBMRBMapper(Chain const& chain, const NMRStarFile& nmr_data, const String& chemical_unit);
00479 
00481           virtual ~BALLToBMRBMapper() {}
00482 
00484 
00487 
00489           const Chain* getChain() const {return chain_;}
00490 
00492           void setChain(Chain const& chain) { chain_ = &chain;
00493                                               num_mismatches_ = -1;
00494                                               num_gaps_ = -1;}
00495 
00497           void setNMRStarFile(NMRStarFile const& nmrfile) {nmr_data_ = &nmrfile;
00498                                                            num_mismatches_ = -1;
00499                                                            num_gaps_ = -1;}
00500 
00502           const NMRStarFile* getNMRStarFile() const {return nmr_data_;}
00503 
00505           void setNMRAtomDataSet(NMRAtomDataSet const& nmr_atom_data_set){nmr_atom_data_set_= &nmr_atom_data_set;}
00506 
00508           bool setNMRAtomDataSetByName(String const& chemical_unit_name);
00509 
00510           // Get the NMRAtomDataSet 
00511           const NMRAtomDataSet* getNMRAtomDataSet() const{return nmr_atom_data_set_;}
00512 
00514           BALLToBMRBMapping& getBALLToBMRBMapping() {return ball_to_bmrb_map_;}
00515 
00517           const BALLToBMRBMapping& getBALLToBMRBMapping() const {return ball_to_bmrb_map_;}
00518 
00520           BMRBToBALLMapping& getBMRBToBALLMapping() {return bmrb_to_ball_map_;}
00521 
00523           const BMRBToBALLMapping& getBMRBToBALLMapping() const {return bmrb_to_ball_map_;}
00524 
00526           int getNumberOfMismatches(){return num_mismatches_;}
00527 
00529           int getNumberOfGaps(){return num_gaps_;}
00530 
00532           bool isMapped(const NMRAtomData& nmr_atom) const;
00533 
00539           const Atom* getBALLAtom(const NMRAtomData& nmr_atom) const;
00540 
00545           bool isMapped(Atom const* atom) const;
00546 
00548           BMRBIndex operator () (const Atom* atom);
00549 
00556           bool createTrivialMapping();
00557 
00567           bool createMapping(const String& aligned_ball_sequence,
00568                              const String& aligned_nmrstar_sequence);
00569 
00572           void clear();
00573 
00575 
00576         protected:
00577 
00578           Peptides::NameConverter name_converter_;
00579 
00583 
00585           BALLToBMRBMapping       ball_to_bmrb_map_;
00586 
00588           BMRBToBALLMapping       bmrb_to_ball_map_;
00589 
00590           // NOTE: do *not* attempt to delete these pointers!
00591           const Chain*            chain_;
00592           const NMRStarFile*      nmr_data_;
00593           const NMRAtomDataSet*   nmr_atom_data_set_;
00594           Position                nmr_atom_data_set_index_;
00595           int                     num_mismatches_;
00596           int                     num_gaps_;
00597           bool                    valid_;
00599 
00600         private:
00601           const Atom* findNMRAtom_(const NMRAtomData& atom) const;
00602 
00603       };
00604 
00606 
00609 
00612       NMRStarFile();
00613 
00618       NMRStarFile(const String& file_name, File::OpenMode open_mode = std::ios::in);
00619 
00621       ~NMRStarFile();
00623 
00624 
00628 
00632       bool read();
00633 
00634       /*  Read an NMRStarFile and assign the shifts to the
00635           given AtomContainer using a trivial standard mapping.
00636           If the AtomContainer is a system, the first chain in chosen.  
00637        
00638           @param  ac AtomContainer to which the NMRStarfile's shift should be assigned.
00639           @return bool - <tt>true</tt> if reading the file was successful 
00640        */
00641       //TODO to be able to use this function, further functions getMapping() and assign() are needed.
00642       bool read(AtomContainer& ac);
00643 
00650       bool assignShifts(BALLToBMRBMapper& ball_to_bmrb_mapping);
00651 
00663       bool assignShifts(AtomContainer& ac,
00664                         const String& chemical_unit,
00665                         const String& aligned_ball_sequence,
00666                         const String& aligned_nmrstar_sequence);
00667 
00670       Size getNumberOfAtoms() const;
00671 
00674       Size getNumberOfShiftsAssigned() const {return number_of_assigned_shifts_;};
00675 
00678       const std::vector<NMRAtomDataSet>& getNMRData() const;
00679 
00682       const EntryInformation& getEntryInformation() const {return entry_information_;};
00683 
00686       const MolecularSystem& getMolecularInformation() const {return molecular_system_;};
00687 
00690       MolecularSystem& getMolecularInformation() {return molecular_system_;};
00691 
00692 
00695       const MolecularSystem::ChemicalUnit& getChemicalUnitByLabel(String const& label) const;
00696 
00699       MolecularSystem::ChemicalUnit& getChemicalUnitByLabel(String const& label);
00700 
00701 
00704       bool hasSampleCondition(String name);
00705 
00708       bool hasSampleCondition(String name) const;
00709 
00712       SampleCondition& getSampleConditionByName(String name);
00713 
00716       const SampleCondition& getSampleConditionByName(String name) const;
00717 
00719       SampleCondition& getSampleCondition(Position i) {return sample_conditions_[i];};
00720 
00722       const SampleCondition& getSampleCondition(Position i) const {return sample_conditions_[i];};
00723 
00725       Size getNumberOfSampleConditions() const {return sample_conditions_.size();};
00726 
00728       const std::vector<SampleCondition>& getSampleConditions() const {return sample_conditions_;};
00729 
00731       std::vector<SampleCondition>& getSampleConditions() {return sample_conditions_;};
00732 
00733       // addSampleCondition TODO!!
00734 
00736       std::vector<Sample> getSamples()  const {return samples_;};
00737       //const std::vector<Sample>& getSamples() const {return samples_;};
00738 
00740       Size getNumberOfSamples() const {return samples_.size();};
00741 
00743       bool hasSample(String label) const;
00744 
00748       Sample getSample(Position i) const;
00749 
00753       Sample getSample(String label) const;
00754 
00756       std::vector<ShiftReferenceSet>& getShiftReferenceSets() {return shift_references_;};
00758       const std::vector<ShiftReferenceSet>& getShiftReferenceSets() const  {return shift_references_;};
00759 
00761       Size getNumberOfShiftReferenceSets() const {return shift_references_.size();};
00762 
00764       bool hasShiftReferenceSet(String name);
00765 
00767       ShiftReferenceSet& getShiftReferenceSet(Position i) {return shift_references_[i];};
00769       const ShiftReferenceSet& getShiftReferenceSet(Position i) const {return shift_references_[i];};
00770 
00772       const ShiftReferenceSet& getShiftReferenceSetByName(String name) const;
00774       ShiftReferenceSet& getShiftReferenceSetByName(String name);
00775 
00776 
00778       std::vector<NMRSpectrometer>& getNMRSpectrometers() {return nmr_spectrometers_;};
00780       const std::vector<NMRSpectrometer>& getNMRSpectrometers() const {return nmr_spectrometers_;};
00781 
00783       Size getNumberOfNMRSpectrometers() const  {return nmr_spectrometers_.size();};
00784 
00786       NMRSpectrometer& getNMRSpectrometer(Position i);
00788       const NMRSpectrometer& getNMRSpectrometer(Position i) const;
00789 
00791       NMRSpectrometer& getNMRSpectrometerByName(String name);
00793       const NMRSpectrometer& getNMRSpectrometerByName(String name) const;
00794 
00796       String getNMRSpectrometerManufacturer(Position i) const;
00797 
00799       float getNMRSpectrometerFieldStrength(Position i) const;
00800 
00801 
00805       NMRStarFile::MonomericPolymer& getMonomericPolymer(Position i);
00806 
00810       const NMRStarFile::MonomericPolymer& getMonomericPolymer(Position i) const;
00811 
00815       NMRStarFile::MonomericPolymer& getMonomericPolymer(const String& name);
00816 
00820       const NMRStarFile::MonomericPolymer& getMonomericPolymer(const String& name) const;
00821 
00823       Size getNumberOfMonomericPolymers() const {return monomeric_polymers_.size();};
00824 
00826       vector<MonomericPolymer> getMonomericPolymers() const {return monomeric_polymers_;};
00827 
00829       bool hasMonomericPolymer(String name) const;
00830 
00836       bool isMonomericPolymer(String chemical_unit_label);
00837 
00841       //TODO: Store changes/additions as Saveframes also in CIFFile
00842       void addMonomericPolymer(MonomericPolymer mp);
00843 
00844 
00853       String getResidueSequence(Position i=0) const;
00854 
00856       bool hasHshifts() const {return has_H_shifts_;};
00857 
00859       bool hasCshifts() const {return has_C_shifts_;};
00860 
00862       bool hasNshifts() const {return has_N_shifts_;};
00863 
00865 
00866 
00870 
00874       bool operator == (const NMRStarFile& f);
00875 
00879       bool operator != (const NMRStarFile& f);
00880 
00883       void clear();
00884 
00886 
00887     private:
00888 
00889       /*_ @name NMRStar file specific Help-Methods
00890        */
00891       //_@{
00892 
00894       void readEntryInformation_();
00895 
00897       void readMolSystem_();
00898 
00900       void readMonomericPolymers_();
00901 
00903       void readSampleConditions_();
00904 
00906       void readShiftReferences_();
00907 
00909       void readShifts_();
00910 
00912       void readSamples_();
00913 
00915       void readNMRSpectrometer_();
00916 
00918       void findDependiencies_();
00919 
00921       void setSpecialCharacters_(String characters);
00922 
00924       bool isValidSingleValue_(String value);
00925 
00927       float valueToFloat_(String value);
00928 
00930       int valueToInt_(String value);
00936       bool assignShifts_(BALLToBMRBMapper& pdb_to_bmrb_mapping);
00937 
00938       //_@}
00939       /*_ @name NMRStar file specific attributes
00940       */
00941       //_@{
00942 
00943       /*_ A flag indicating validity of this instance. A sole NMRStarFile
00944         instance cannot be valid, because it does not have any information.
00945       */
00946       bool valid_;
00947 
00949       Size number_of_shift_sets_;
00950 
00952       Size number_of_assigned_shifts_;
00953 
00955       EntryInformation entry_information_;
00956 
00958       MolecularSystem molecular_system_;
00959 
00961       std::vector<NMRAtomDataSet> atom_data_sets_;
00962 
00964       std::vector<SampleCondition> sample_conditions_;
00965 
00967       std::vector<Sample> samples_;
00968 
00970       std::vector<ShiftReferenceSet> shift_references_;
00971 
00973       std::vector<NMRSpectrometer> nmr_spectrometers_;
00974 
00976       vector<MonomericPolymer> monomeric_polymers_;
00977 
00979       bool has_H_shifts_;
00980       bool has_C_shifts_;
00981       bool has_N_shifts_;
00982 
00983       // a dummy saveframe
00984       SaveFrame dummy_saveframe_;
00985 
00986       // a dummy sample condition
00987       SampleCondition dummy_sample_condition_;
00988 
00989       // a dummy sample
00990       Sample dummy_sample_;
00991 
00992       // a dummy shift reference set
00993       ShiftReferenceSet dummy_shift_reference_set_;
00994 
00995       // a dummy nmr spectrometer
00996       NMRSpectrometer dummy_NMR_spectrometer_;
00997 
00998       // a dummy nmr spectrometer
00999       MonomericPolymer dummy_monomeric_polymer_;
01000 
01002       String special_characters_;
01003       //_@}
01004   };
01005 
01007 } // Namespace BALL
01008 
01009 #endif // BALL_FORMAT_NMRSTARFILE_H
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Defines