BALL  1.4.1
Model.h
Go to the documentation of this file.
00001 /* Model.h
00002  * 
00003  * Copyright (C) 2009 Marcel Schumann
00004  * 
00005  * This file is part of QuEasy -- A Toolbox for Automated QSAR Model
00006  * Construction and Validation.
00007  * QuEasy is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 3 of the License, or (at
00010  * your option) any later version.
00011  * 
00012  * QuEasy is distributed in the hope that it will be useful, but
00013  * WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * General Public License for more details.
00016  * 
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, see <http://www.gnu.org/licenses/>.
00019  */
00020 
00021 // -*- Mode: C++; tab-width: 2; -*-
00022 // vi: set ts=2:
00023 //
00024 //
00025 #ifndef MODEL
00026 #define MODEL
00027 
00028 #include <vector>
00029 #include <set>
00030 
00031 #include <BALL/MATHS/LINALG/matrix.h>
00032 
00033 #ifndef VALIDATION
00034 #include <BALL/QSAR/validation.h>
00035 #endif
00036 
00037 #ifndef QSARH
00038 #include <BALL/QSAR/QSARData.h>
00039 #endif
00040 
00041 #ifndef QSAR_EXCEPTION
00042 #include <BALL/QSAR/exception.h>
00043 #endif
00044 
00045 #include <BALL/MATHS/parsedFunction.h>
00046 
00047 namespace BALL 
00048 {
00049   
00050   namespace QSAR
00051   { 
00052     class BALL_EXPORT Model
00053     { 
00054       public:
00060         Model(const QSARData& q); 
00061 
00062         virtual ~Model();
00063         
00066         virtual void operator=(const Model& m);
00068         
00069         
00074         void copyData(const Model& m);
00075         
00077         void copyDescriptorIDs(const Model& m);
00078         
00079         
00083         void readTrainingData();
00084         
00085         
00092         virtual Vector<double> predict(const vector<double>& substance, bool transform) =0; 
00093         
00095         void deleteDescriptorIDs();
00096       
00098         virtual void train() =0;
00099         
00100         
00105         virtual bool optimizeParameters(int /*k*/, int /*no_steps*/){return 0;};
00106         
00107         bool optimizeParameters(int k);
00108         
00109         virtual double calculateStdErr()
00110         {return -1.0;};
00111         
00113         virtual void setParameters(vector<double>& /*v*/){};
00114         
00115         virtual vector<double> getParameters() const;     
00116 
00118         std::multiset<unsigned int>* getDescriptorIDs();
00119         
00120         void setDataSource(const QSARData* q);
00121         
00123         virtual void saveToFile(string filename) = 0;
00124         
00126         virtual void readFromFile(string filename) = 0;
00127         
00129         const Matrix<double>* getDescriptorMatrix();
00130         
00132         const vector<string>* getSubstanceNames();
00133         
00135         const vector<string>* getDescriptorNames();
00136         
00138         const Matrix<double>* getY();
00139         
00141         void setDescriptorIDs(const std::multiset<unsigned int>& sl);
00142         
00144         const string* getType();
00145         
00147         void getUnnormalizedFeatureValue(int compound, int feature, double& return_value);
00148         
00150         void getUnnormalizedResponseValue(int compound, int response, double& return_value);
00152         
00153         
00158         const QSARData* data;
00159         
00161         Validation* model_val;
00163         
00164         
00165       protected:
00166         
00170         int default_no_opt_steps_; 
00171       
00177         Vector<double> getSubstanceVector(const vector<double>& substance, bool transform);
00178         
00179         Vector<double> getSubstanceVector(const Vector<double>& substance, bool transform);
00180         
00182         void backTransformPrediction(Vector<double>& pred);
00183         
00185         void addLambda(Matrix<double>& matrix, double& lambda);
00186         
00188         void readDescriptorInformation();
00190         
00191         
00196         void readMatrix(Matrix<double>& mat, std::ifstream& in, uint lines, uint col);
00197         
00198         void readVector(Vector<double>& vec, std::ifstream& in, uint no_cells, bool column_vector);
00199         
00200         void readModelParametersFromFile(std::ifstream& in);
00201         void saveModelParametersToFile(std::ofstream& out);
00202         
00203         
00205         virtual void saveDescriptorInformationToFile(std::ofstream& out);
00206         virtual void readDescriptorInformationFromFile(std::ifstream& in, int no_descriptors, bool transformation);
00207         
00208         void readResponseTransformationFromFile(std::ifstream& in, int no_y);
00209         void saveResponseTransformationToFile(std::ofstream& out);
00211         
00212         
00217         Matrix<double> descriptor_matrix_;
00218           
00220         vector<string> substance_names_;
00221   
00223         vector<string> descriptor_names_;
00224         
00227         Matrix<double> descriptor_transformations_;
00228         
00231         Matrix<double> y_transformations_;
00232         
00235         Matrix<double> Y_;
00236 
00238         String type_;
00239         
00243         std::multiset<unsigned int> descriptor_IDs_;      
00245         
00246         friend class Validation;
00247         friend class RegressionValidation;
00248         friend class ClassificationValidation;
00249 #ifdef BALL_HAS_LAPACK
00250         friend class PCRModel;
00251         friend class KPCRModel;
00252         friend class FeatureSelection;
00253 #endif //BALL_HAS_LAPACK
00254     };
00255     
00257     Model* createNewModelFromFile(String model_file, const QSARData& q);
00258   }
00259 }
00260 
00261 #endif // MODEL
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Defines