BALL
1.4.1
|
00001 /* featureSelection.h 00002 * 00003 * Copyright (C) 2009 Marcel Schumann 00004 * 00005 * This file is part of QuEasy -- A Toolbox for Automated QSAR Model 00006 * Construction and Validation. 00007 * QuEasy is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 3 of the License, or (at 00010 * your option) any later version. 00011 * 00012 * QuEasy is distributed in the hope that it will be useful, but 00013 * WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, see <http://www.gnu.org/licenses/>. 00019 */ 00020 00021 // -*- Mode: C++; tab-width: 2; -*- 00022 // vi: set ts=2: 00023 // 00024 // 00025 00026 #ifndef FEATURESEL 00027 #define FEATURESEL 00028 00029 #ifndef MODEL 00030 #include <BALL/QSAR/Model.h> 00031 #endif 00032 00033 #ifndef KMODEL 00034 #include <BALL/QSAR/kernelModel.h> 00035 #endif 00036 00037 #ifndef LMODEL 00038 #include <BALL/QSAR/linearModel.h> 00039 #endif 00040 00041 #include <set> 00042 00043 namespace BALL 00044 { 00045 namespace QSAR 00046 { 00047 00048 class BALL_EXPORT FeatureSelection 00049 { 00050 public: 00054 FeatureSelection(Model& m); 00055 00056 FeatureSelection(KernelModel& m); 00057 00058 ~FeatureSelection(); 00060 00061 00066 void setModel(Model& m); 00067 00068 void setModel(KernelModel& km); 00069 00074 void forwardSelection(int k=4, bool optPar=0); 00075 00080 void backwardSelection(int k=4, bool optPar=0); 00081 00082 void stepwiseSelection(int k=4, bool optPar=0); 00083 00088 void twinScan(int k, bool optPar=0); 00089 00094 void implicitSelection(LinearModel& lm, int act=1, double d=1); 00095 00098 void removeHighlyCorrelatedFeatures(double& cor_threshold); 00099 00100 00102 void removeLowResponseCorrelation(double& min_correlation); 00103 00104 00106 void removeEmptyDescriptors(); 00107 00108 void selectStat(int s); 00109 00112 void setQualityIncreaseCutoff(double& d); 00114 00115 00116 private: 00117 00121 void updateWeights(std::multiset<unsigned int>& oldDescIDs, std::multiset<unsigned int>& newDescIDs, Vector<double>& oldWeights); 00123 00124 00130 std::multiset<unsigned int>* findIrrelevantDescriptors(); 00131 00133 Model* model_; 00134 00136 Vector<double>* weights_; 00137 00139 void forward(bool stepwise, int k, bool optPar); 00140 00142 double quality_increase_cutoff_; 00144 }; 00145 } 00146 } 00147 00148 00149 #endif // FEATURESEL