[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

vigra/random_forest/rf_sampling.hxx
00001 /************************************************************************/
00002 /*                                                                      */
00003 /*        Copyright 2008-2009 by  Ullrich Koethe and Rahul Nair         */
00004 /*                                                                      */
00005 /*    This file is part of the VIGRA computer vision library.           */
00006 /*    The VIGRA Website is                                              */
00007 /*        http://hci.iwr.uni-heidelberg.de/vigra/                       */
00008 /*    Please direct questions, bug reports, and contributions to        */
00009 /*        ullrich.koethe@iwr.uni-heidelberg.de    or                    */
00010 /*        vigra@informatik.uni-hamburg.de                               */
00011 /*                                                                      */
00012 /*    Permission is hereby granted, free of charge, to any person       */
00013 /*    obtaining a copy of this software and associated documentation    */
00014 /*    files (the "Software"), to deal in the Software without           */
00015 /*    restriction, including without limitation the rights to use,      */
00016 /*    copy, modify, merge, publish, distribute, sublicense, and/or      */
00017 /*    sell copies of the Software, and to permit persons to whom the    */
00018 /*    Software is furnished to do so, subject to the following          */
00019 /*    conditions:                                                       */
00020 /*                                                                      */
00021 /*    The above copyright notice and this permission notice shall be    */
00022 /*    included in all copies or substantial portions of the             */
00023 /*    Software.                                                         */
00024 /*                                                                      */
00025 /*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
00026 /*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
00027 /*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
00028 /*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
00029 /*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
00030 /*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
00031 /*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
00032 /*    OTHER DEALINGS IN THE SOFTWARE.                                   */
00033 /*                                                                      */
00034 /************************************************************************/
00035 #ifndef RN_SAMPLER_HXX
00036 #define RN_SAMPLER_HXX
00037 
00038 #include <vigra/array_vector.hxx>
00039 #include <vigra/random.hxx>
00040 #include <map>
00041 #include <math.h>
00042 #include <memory>
00043 namespace vigra
00044 {
00045 
00046 class SamplingOptions
00047 {
00048     public:
00049     typedef std::auto_ptr<vigra::ArrayVectorView<int> > strata_ptr_type;
00050     strata_ptr_type strata;
00051     bool sample_with_replacement;
00052     bool sample_stratified;
00053     bool sample_classes_individually;
00054     bool use_internal_mem;
00055     ArrayVectorView<Int32> mem;
00056 
00057 
00058     SamplingOptions(): sample_with_replacement(true),
00059                         sample_stratified(false), sample_classes_individually(false),
00060                         use_internal_mem(true)
00061     { this->strata = strata_ptr_type(new vigra::ArrayVectorView<int>());
00062     }
00063 
00064     SamplingOptions( SamplingOptions const& rhs): sample_with_replacement(rhs.sample_with_replacement),
00065                         sample_stratified(rhs.sample_stratified), sample_classes_individually(rhs.sample_classes_individually)
00066     {
00067         this->strata = strata_ptr_type(new vigra::ArrayVectorView<int>(*(rhs.strata)));
00068     }
00069 
00070     void operator=(SamplingOptions const& rhs)
00071     {
00072         this->sample_with_replacement = rhs.sample_with_replacement;
00073         this->sample_stratified = rhs.sample_stratified;
00074         this->sample_classes_individually = rhs.sample_classes_individually;
00075         this->strata = strata_ptr_type(new vigra::ArrayVectorView<int>(*(rhs.strata)));
00076         this->use_internal_mem = rhs.use_internal_mem;
00077         this->mem = rhs.mem;
00078     }
00079     SamplingOptions& sampleWithReplacement(bool in =true)
00080     {
00081         sample_with_replacement = in;
00082         return *this;
00083     }
00084 
00085     SamplingOptions& useExternalMemory(ArrayVectorView<Int32> & mem_)
00086     {
00087         this->use_internal_mem = false;
00088         this->mem = mem_;
00089         return *this;
00090     }
00091 
00092 
00093     SamplingOptions& sampleWithoutReplacement()
00094     {
00095         sample_with_replacement = false;
00096         return *this;
00097     }
00098 
00099 
00100     SamplingOptions& sampleStratified(vigra::ArrayVectorView<int> in)
00101     {
00102 
00103         strata = strata_ptr_type(new vigra::ArrayVectorView<int>(in));
00104         sample_stratified = true;
00105         sample_classes_individually = false;
00106         return *this;
00107     }
00108 
00109     SamplingOptions& sampleClassesIndividually(vigra::ArrayVectorView<int> in)
00110     {
00111         strata = strata_ptr_type(new vigra::ArrayVectorView<int>(in));
00112         sample_stratified = false;
00113         sample_classes_individually = true;
00114         return *this;
00115     }
00116 
00117     SamplingOptions& resetStrata()
00118     {
00119         strata.reset(new vigra::ArrayVectorView<int>());
00120         sample_stratified = false;
00121         sample_classes_individually = false;
00122         return *this;
00123     }
00124 };
00125 
00126 
00127 
00128 
00129 
00130 
00131 
00132 
00133 template<class Random =UniformIntRandomFunctor<RandomTT800> >
00134 class Sampler
00135 {
00136     public:
00137     typedef Int32                               IndexType;
00138     typedef vigra::ArrayVector     <IndexType>  IndexArrayType;
00139     typedef vigra::ArrayVectorView <IndexType>  IndexArrayViewType;
00140     typedef vigra::ArrayVector     <bool>       IsUsedArrayType;
00141     typedef vigra::ArrayVectorView <bool>       IsUsedArrayViewType;
00142 
00143     Random  randint;
00144 
00145     private:
00146         bool unused_indices_set;
00147         typedef std::map<IndexType, vigra::ArrayVector< IndexType> > StrataIndicesType;
00148         typedef std::map<IndexType, int> StrataSizesType;
00149         StrataIndicesType     strata_indices_;
00150         StrataSizesType       strata_sizes_;
00151         IndexArrayType        internal_used_indices_;
00152         IndexArrayViewType    used_indices_;
00153         IndexArrayType        unused_indices_;
00154         IsUsedArrayType         is_used_;
00155 
00156         void (Sampler::*sampling_func_ptr_)(void);
00157 
00158         void sample_stratified_w_rep()
00159         {
00160 
00161             is_used_.init(false);
00162 
00163             //Go thru each strata
00164             StrataIndicesType::iterator iter;
00165             int jj = 0;
00166             for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
00167             {
00168                 // do sampling with replacement in each strata and copy
00169                 // data.
00170                 int sze = iter->second.size();
00171                 for(int ii = 0; ii < (int)strata_sizes_[iter->first]; ++ii)
00172                 {
00173                     used_indices_[jj] = iter->second[randint(sze)];
00174                     is_used_[used_indices_[jj] ] = 1;
00175                     jj++;
00176                 }
00177 
00178             }
00179         }
00180         void sample_stratified_wo_rep()
00181         {
00182 
00183 
00184             // reset is_used
00185             is_used_.init(false);
00186 
00187             //Go thru each strata
00188             StrataIndicesType::iterator iter;
00189             int jj = 0;
00190             for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
00191             {
00192                 // do sampling without replacement in each strata and copy
00193                 // data.
00194                 for(int ii = 0; ii < (int)strata_sizes_[iter->first]; ++ii)
00195                 {
00196 
00197 
00198                     std::swap(iter->second[ii], iter->second[ii+ randint(iter->second.size() - ii)]);
00199                     used_indices_[jj] = iter->second[ii];
00200 
00201                     is_used_[used_indices_[jj] ] = 1;
00202                     jj++;
00203                 }
00204 
00205 
00206             }
00207         }
00208         void sample_w_rep()
00209         {
00210 
00211             is_used_.init(false);
00212             for(int ii = 0; ii < num_of_samples; ++ii)
00213             {
00214                 used_indices_[ii] = randint(max_index);
00215                 is_used_[used_indices_[ii] ] = true;
00216             }
00217         }
00218         void sample_wo_rep()
00219         {
00220 
00221             is_used_.init(false);
00222             for(int ii = 0; ii < num_of_samples; ++ii)
00223             {
00224                 std::swap(used_indices_[ii], used_indices_[ii+ randint(max_index - ii)]);
00225                 is_used_[used_indices_[ii] ] = true;
00226             }
00227         }
00228 
00229     public:
00230 
00231     SamplingOptions options_;
00232 
00233     IndexType const & operator[](int in)
00234     {
00235         return used_indices_[in];
00236     }
00237 
00238     IndexArrayViewType used_indices()
00239     {
00240         return used_indices_.subarray(0, num_of_samples);
00241     }
00242 
00243     IndexArrayViewType unused_indices()
00244     {
00245         if(unused_indices_set)
00246         {
00247             return unused_indices_;
00248         }
00249         else
00250         {
00251             unused_indices_.clear();
00252             for(int ii = 0; ii < (int)is_used().size(); ++ii)
00253             {
00254                 if(is_used_[ii])
00255                     unused_indices_.push_back(ii);
00256             }
00257             unused_indices_set = true;
00258         }
00259         return unused_indices_;
00260     }
00261 
00262     IndexArrayViewType used_indices_volatile()
00263     {
00264         return IndexArrayViewType(used_indices_);
00265     }
00266 
00267     IsUsedArrayViewType is_used()
00268     {
00269         return IsUsedArrayViewType(is_used_);
00270     }
00271 
00272     int num_of_samples;
00273     int max_index;
00274 
00275     inline void sample(  )
00276     {
00277         unused_indices_set = false;
00278         (this->*sampling_func_ptr_)();
00279     }
00280 
00281     int numOfSamples()
00282     {
00283         return num_of_samples;
00284     }
00285 
00286     int numOfSamples(int n)
00287     {
00288         num_of_samples = n;
00289         if(!options_.sample_with_replacement && ((*options_.strata).data() != 0) && options_.use_internal_mem)
00290         {
00291             internal_used_indices_.resize(num_of_samples);
00292             used_indices_ = internal_used_indices_;
00293         }
00294         else if (!options_.use_internal_mem)
00295             vigra_fail("Sampler::numOfSamples() : Sampler is using external memory - no resize possible");
00296         return num_of_samples;
00297     }
00298 
00299     void init(  int numOfSamples ,
00300                 int maxIndex     ,
00301                 SamplingOptions const & opt)
00302     {
00303         unused_indices_set = false;
00304         strata_indices_.clear();
00305         strata_sizes_.clear();
00306         max_index = maxIndex;
00307         num_of_samples = numOfSamples;
00308         options_ = opt;
00309         if(!options_.use_internal_mem)
00310         {
00311             used_indices_ = options_.mem;
00312         }
00313         //Allocate memory for the used/unused vector.
00314 
00315         is_used_.resize(max_index);
00316 
00317 
00318 
00319         if((*options_.strata).data() != 0)
00320         {
00321 
00322 
00323             // Set up the data struct used.
00324             for(int ii = 0; ii < maxIndex; ++ii)
00325             {
00326 
00327                 strata_indices_[(*(options_.strata))[ii]].push_back(ii);
00328             }
00329 
00330             // Set up the strata size
00331             if(options_.sample_classes_individually)
00332             {
00333                 StrataIndicesType::difference_type total_size = 0;
00334                 StrataIndicesType::iterator iter;
00335                 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
00336                 {
00337                     // Set the size of the strata to be sampled to the fixed proportion of num_of_samples
00338                     // This value is the same for all strata
00339                     strata_sizes_[iter->first] = (int)ceil(double(num_of_samples) / strata_indices_.size());
00340                     total_size += strata_sizes_[iter->first];
00341                 }
00342                 int cut_off = std::abs(int(total_size - num_of_samples));
00343                 if(cut_off != 0)
00344                 {
00345                     StrataIndicesType::iterator end_iter = strata_indices_.begin();
00346                     for(int ii = 0; ii < cut_off; ++ii)
00347                     {
00348                         ++end_iter;
00349                     }
00350                     for(iter = strata_indices_.begin(); iter != end_iter; ++iter)
00351                     {
00352                         strata_sizes_[iter->first] -= 1;
00353 
00354                     }
00355                 }
00356 
00357             }
00358 
00359             else
00360             {
00361                 StrataIndicesType::iterator iter;
00362                 unsigned int total_size = 0;
00363                 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
00364                 {
00365                     // Set the size of the strata to be sampled to be proportional of the size of the strata
00366                     // This value is different for each strata
00367                     strata_sizes_[iter->first] = (int)ceil((double(iter->second.size())/double(max_index)) * num_of_samples );
00368                     total_size += strata_sizes_[iter->first];
00369                 }
00370                 int cut_off = std::abs(int(total_size - num_of_samples));
00371                 if(cut_off != 0)
00372                 {
00373                     for(int ii = 0; ii < cut_off; ++ii)
00374                     {
00375                         StrataIndicesType::iterator curmax = strata_indices_.begin();
00376                         for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
00377                         {
00378                             if(strata_sizes_[iter->first] > strata_sizes_[curmax->first])
00379                                 curmax = iter;
00380                         }
00381                         strata_sizes_[curmax->first] -= 1;
00382                     }
00383                 }
00384             }
00385 
00386 
00387               // Set sampling function
00388             if(options_.sample_with_replacement)
00389             {
00390                 sampling_func_ptr_ = &Sampler::sample_stratified_w_rep;
00391             }
00392             else
00393             {
00394                 // Check whether there are enough samples to sample the needed strata size from the population
00395                 StrataSizesType::iterator iter;
00396                 for(iter = strata_sizes_.begin(); iter != strata_sizes_.end(); ++iter)
00397                 {
00398                     vigra_precondition(iter->second <= (int)strata_indices_[iter->first].size(),
00399                                                 "Not enough samples to sample classes individually //stratified and\
00400                                                     without replacement");
00401                 }
00402                 sampling_func_ptr_ = &Sampler::sample_stratified_wo_rep;
00403             }
00404             // Allocate memory for output
00405             if(options_.use_internal_mem)
00406             {
00407                 internal_used_indices_.resize(num_of_samples);
00408                 used_indices_ = internal_used_indices_;
00409             }
00410         }
00411         else // unstratified sampling
00412         {
00413 
00414               // Set sampling function
00415             if(options_.sample_with_replacement)
00416             {
00417                 if(options_.use_internal_mem)
00418                 {
00419                     internal_used_indices_.resize(num_of_samples);
00420                     used_indices_ = internal_used_indices_;
00421                 }
00422                 sampling_func_ptr_ = &Sampler::sample_w_rep;
00423             }
00424             else
00425             {
00426                 vigra_precondition(max_index >= num_of_samples,
00427                                         "Not enough samples to sample without replacement");
00428                 if(options_.use_internal_mem)
00429                 {
00430                     internal_used_indices_.resize(max_index);
00431                     used_indices_ = internal_used_indices_;
00432                     for(int ii = 0; ii < max_index; ++ii)
00433                     {
00434                         used_indices_[ii] = ii;
00435                     }
00436                 }
00437 
00438                 sampling_func_ptr_ = &Sampler::sample_wo_rep;
00439 
00440             }
00441         }
00442         if(options_.use_internal_mem)
00443         {
00444             used_indices_ = internal_used_indices_;
00445         }
00446     }
00447 
00448     inline Sampler(int numOfSamples,int maxIndex, SamplingOptions const & opt, Random & rnd)
00449     :
00450         randint(rnd)
00451     {
00452         init(numOfSamples,maxIndex, opt);
00453     }
00454 
00455     inline Sampler(int numOfSamples, int maxIndex, SamplingOptions const & opt)
00456     {
00457          init(numOfSamples,maxIndex, opt);
00458     }
00459 
00460 };
00461 
00462 template<class Random =RandomTT800 >
00463 class PoissonSampler
00464 {
00465 public:
00466     Random  randfloat;
00467     typedef Int32                               IndexType;
00468     typedef vigra::ArrayVector     <IndexType>  IndexArrayType;
00469     IndexArrayType        used_indices_;
00470     double lambda;
00471     int minIndex;
00472     int maxIndex;
00473     inline PoissonSampler(double lambda,IndexType minIndex,IndexType maxIndex)
00474         : randfloat()
00475     {
00476         this->lambda=lambda;
00477         this->minIndex=minIndex;
00478         this->maxIndex=maxIndex;
00479     }
00480 
00481     inline void sample(  )
00482     {
00483         used_indices_.clear();
00484         IndexType i;
00485         for(i=minIndex;i<maxIndex;++i)
00486         {
00487             //from http://en.wikipedia.org/wiki/Poisson_distribution
00488             int k=0;
00489             double p=1;
00490             double L=exp(-lambda);
00491             do
00492             {
00493                 ++k;
00494                 p*=randfloat.uniform53();
00495 
00496             }while(p>L);
00497             --k;
00498             //Insert i this many time
00499             while(k>0)
00500             {
00501                 used_indices_.push_back(i);
00502                 --k;
00503             }
00504         }
00505     }
00506 
00507     IndexType const & operator[](int in)
00508     {
00509         return used_indices_[in];
00510     }
00511     int numOfSamples()
00512     {
00513         return used_indices_.size();
00514     }
00515 
00516 };
00517 
00518 } /*end of namespace rn*/
00519 #endif /*RN_SAMPLER_HXX*/

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.7.0 (Thu Aug 25 2011)