BALL  1.4.1
regressionValidation.h
Go to the documentation of this file.
00001 /* regressionValidation.h
00002  * 
00003  * Copyright (C) 2009 Marcel Schumann
00004  * 
00005  * This file is part of QuEasy -- A Toolbox for Automated QSAR Model
00006  * Construction and Validation.
00007  * QuEasy is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 3 of the License, or (at
00010  * your option) any later version.
00011  * 
00012  * QuEasy is distributed in the hope that it will be useful, but
00013  * WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * General Public License for more details.
00016  * 
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, see <http://www.gnu.org/licenses/>.
00019  */
00020 
00021 // -*- Mode: C++; tab-width: 2; -*-
00022 // vi: set ts=2:
00023 //
00024 //
00025 
00026 #ifndef REGVALIDATION
00027 #define REGVALIDATION
00028 
00029 #ifndef QSARDATA
00030 #include <BALL/QSAR/QSARData.h>
00031 #endif
00032 
00033 #ifndef VALIDATION
00034 #include <BALL/QSAR/validation.h>
00035 #endif
00036 
00037 #include <gsl/gsl_randist.h>
00038 #include <gsl/gsl_cdf.h>
00039 #include <iterator>
00040 
00041 
00042 #include <BALL/MATHS/LINALG/matrix.h>
00043 #include <BALL/MATHS/LINALG/vector.h>
00044 
00045 
00046 
00047 namespace BALL
00048 { 
00049   namespace QSAR
00050   {
00051     class RegressionModel;
00053     class BALL_EXPORT RegressionValidation : public Validation
00054     { 
00055       public:
00061         RegressionValidation(RegressionModel* m);
00062 
00063         ~RegressionValidation();
00065         
00066         
00073         void crossValidation(int k, bool restore=1);
00074         
00079         void crossValidation(int k, vector<BALL::Matrix<double> >* results, bool restore=1);
00080         
00081         
00085         void bootstrap(int k, bool restore=1);
00086         
00091         void bootstrap(int k, vector<BALL::Matrix<double> >* results, bool restore=1);
00092         
00093         void bootstrap1(int k, vector<BALL::Matrix<double> >* results, bool restore=1);
00094         
00098         const BALL::Matrix<double>& yRandomizationTest(int runs, int k);
00099         
00102         double getQ2();
00103         
00106         double getR2();   
00107         
00110         double getFregr();
00111             
00114         double getFcv();
00115         
00116         double getCVRes();
00117         
00118         double getFitRes();
00119         
00121         double getMaxError();
00122         
00123         void setCVRes(double d);
00124     
00126         void setQ2(double d);
00127         
00128         void testInputData(bool transform=0);
00129         
00133         void selectStat(int s);
00134         
00138         void calculateCoefficientStdErrors(int k, bool b=1);
00139       
00141         const BALL::Matrix<double>* getCoefficientStdErrors();
00142         
00143         void setCoefficientStdErrors(const BALL::Matrix<double>* stddev);
00144         
00145         void saveToFile(string filename) const;
00146         
00147         void saveToFile(string filename, const double& r2, const double& q2, const Matrix<double>& coefficient_stddev, const Matrix<double>& yRand_results) const;
00148         
00149         void readFromFile(string filename);
00151         
00152   
00153       private:
00154         
00155         struct BackupData
00156         {
00157           Matrix<double> descriptor_matrix;
00158           Matrix<double> training_result;
00159           Matrix<double> Y;
00160           Matrix<double> K;
00161           Matrix<double> latent_variables;
00162           Matrix<double> loadings;
00163           Matrix<double> weights;
00164         };
00165         
00166         
00171         void testAllSubstances(bool transform);
00172         
00173         void backupTrainingResults();
00174         
00175         void restoreTrainingResults();
00177         
00178         
00182         
00183         double ssR_;
00184         
00185         double ssE_;
00186         
00188         double ssY_;
00189         
00191         double std_err_;
00192   
00194         double Q2_;
00195   
00197         double F_cv_;
00198         
00200         double F_regr_;
00201   
00202         double R2_;
00203         
00204         double max_error_;
00205         
00207         double quality_;
00208         
00209         double (RegressionValidation::* predQualFetcher_)();
00210         
00211         double (RegressionValidation::* fitQualFetcher_)();
00212         
00213         void calculateQOF();
00214         
00216         BALL::Matrix<double> coefficient_stderr_;
00217         
00219         RegressionModel* regr_model_;
00220         
00221         BackupData backup_data_;
00222         
00223         void (RegressionValidation::* qualCalculation)();
00225         
00226     };
00227   }
00228 }
00229 
00230 
00231 
00232 #endif // REGVALIDATION
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Defines