[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]
00001 /************************************************************************/ 00002 /* */ 00003 /* Copyright 2008-2009 by Ullrich Koethe and Rahul Nair */ 00004 /* */ 00005 /* This file is part of the VIGRA computer vision library. */ 00006 /* The VIGRA Website is */ 00007 /* http://hci.iwr.uni-heidelberg.de/vigra/ */ 00008 /* Please direct questions, bug reports, and contributions to */ 00009 /* ullrich.koethe@iwr.uni-heidelberg.de or */ 00010 /* vigra@informatik.uni-hamburg.de */ 00011 /* */ 00012 /* Permission is hereby granted, free of charge, to any person */ 00013 /* obtaining a copy of this software and associated documentation */ 00014 /* files (the "Software"), to deal in the Software without */ 00015 /* restriction, including without limitation the rights to use, */ 00016 /* copy, modify, merge, publish, distribute, sublicense, and/or */ 00017 /* sell copies of the Software, and to permit persons to whom the */ 00018 /* Software is furnished to do so, subject to the following */ 00019 /* conditions: */ 00020 /* */ 00021 /* The above copyright notice and this permission notice shall be */ 00022 /* included in all copies or substantial portions of the */ 00023 /* Software. */ 00024 /* */ 00025 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */ 00026 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */ 00027 /* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */ 00028 /* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */ 00029 /* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */ 00030 /* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */ 00031 /* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */ 00032 /* OTHER DEALINGS IN THE SOFTWARE. */ 00033 /* */ 00034 /************************************************************************/ 00035 #ifndef RN_SAMPLER_HXX 00036 #define RN_SAMPLER_HXX 00037 00038 #include <vigra/array_vector.hxx> 00039 #include <vigra/random.hxx> 00040 #include <map> 00041 #include <math.h> 00042 #include <memory> 00043 namespace vigra 00044 { 00045 00046 class SamplingOptions 00047 { 00048 public: 00049 typedef std::auto_ptr<vigra::ArrayVectorView<int> > strata_ptr_type; 00050 strata_ptr_type strata; 00051 bool sample_with_replacement; 00052 bool sample_stratified; 00053 bool sample_classes_individually; 00054 bool use_internal_mem; 00055 ArrayVectorView<Int32> mem; 00056 00057 00058 SamplingOptions(): sample_with_replacement(true), 00059 sample_stratified(false), sample_classes_individually(false), 00060 use_internal_mem(true) 00061 { this->strata = strata_ptr_type(new vigra::ArrayVectorView<int>()); 00062 } 00063 00064 SamplingOptions( SamplingOptions const& rhs): sample_with_replacement(rhs.sample_with_replacement), 00065 sample_stratified(rhs.sample_stratified), sample_classes_individually(rhs.sample_classes_individually) 00066 { 00067 this->strata = strata_ptr_type(new vigra::ArrayVectorView<int>(*(rhs.strata))); 00068 } 00069 00070 void operator=(SamplingOptions const& rhs) 00071 { 00072 this->sample_with_replacement = rhs.sample_with_replacement; 00073 this->sample_stratified = rhs.sample_stratified; 00074 this->sample_classes_individually = rhs.sample_classes_individually; 00075 this->strata = strata_ptr_type(new vigra::ArrayVectorView<int>(*(rhs.strata))); 00076 this->use_internal_mem = rhs.use_internal_mem; 00077 this->mem = rhs.mem; 00078 } 00079 SamplingOptions& sampleWithReplacement(bool in =true) 00080 { 00081 sample_with_replacement = in; 00082 return *this; 00083 } 00084 00085 SamplingOptions& useExternalMemory(ArrayVectorView<Int32> & mem_) 00086 { 00087 this->use_internal_mem = false; 00088 this->mem = mem_; 00089 return *this; 00090 } 00091 00092 00093 SamplingOptions& sampleWithoutReplacement() 00094 { 00095 sample_with_replacement = false; 00096 return *this; 00097 } 00098 00099 00100 SamplingOptions& sampleStratified(vigra::ArrayVectorView<int> in) 00101 { 00102 00103 strata = strata_ptr_type(new vigra::ArrayVectorView<int>(in)); 00104 sample_stratified = true; 00105 sample_classes_individually = false; 00106 return *this; 00107 } 00108 00109 SamplingOptions& sampleClassesIndividually(vigra::ArrayVectorView<int> in) 00110 { 00111 strata = strata_ptr_type(new vigra::ArrayVectorView<int>(in)); 00112 sample_stratified = false; 00113 sample_classes_individually = true; 00114 return *this; 00115 } 00116 00117 SamplingOptions& resetStrata() 00118 { 00119 strata.reset(new vigra::ArrayVectorView<int>()); 00120 sample_stratified = false; 00121 sample_classes_individually = false; 00122 return *this; 00123 } 00124 }; 00125 00126 00127 00128 00129 00130 00131 00132 00133 template<class Random =UniformIntRandomFunctor<RandomTT800> > 00134 class Sampler 00135 { 00136 public: 00137 typedef Int32 IndexType; 00138 typedef vigra::ArrayVector <IndexType> IndexArrayType; 00139 typedef vigra::ArrayVectorView <IndexType> IndexArrayViewType; 00140 typedef vigra::ArrayVector <bool> IsUsedArrayType; 00141 typedef vigra::ArrayVectorView <bool> IsUsedArrayViewType; 00142 00143 Random randint; 00144 00145 private: 00146 bool unused_indices_set; 00147 typedef std::map<IndexType, vigra::ArrayVector< IndexType> > StrataIndicesType; 00148 typedef std::map<IndexType, int> StrataSizesType; 00149 StrataIndicesType strata_indices_; 00150 StrataSizesType strata_sizes_; 00151 IndexArrayType internal_used_indices_; 00152 IndexArrayViewType used_indices_; 00153 IndexArrayType unused_indices_; 00154 IsUsedArrayType is_used_; 00155 00156 void (Sampler::*sampling_func_ptr_)(void); 00157 00158 void sample_stratified_w_rep() 00159 { 00160 00161 is_used_.init(false); 00162 00163 //Go thru each strata 00164 StrataIndicesType::iterator iter; 00165 int jj = 0; 00166 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter) 00167 { 00168 // do sampling with replacement in each strata and copy 00169 // data. 00170 int sze = iter->second.size(); 00171 for(int ii = 0; ii < (int)strata_sizes_[iter->first]; ++ii) 00172 { 00173 used_indices_[jj] = iter->second[randint(sze)]; 00174 is_used_[used_indices_[jj] ] = 1; 00175 jj++; 00176 } 00177 00178 } 00179 } 00180 void sample_stratified_wo_rep() 00181 { 00182 00183 00184 // reset is_used 00185 is_used_.init(false); 00186 00187 //Go thru each strata 00188 StrataIndicesType::iterator iter; 00189 int jj = 0; 00190 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter) 00191 { 00192 // do sampling without replacement in each strata and copy 00193 // data. 00194 for(int ii = 0; ii < (int)strata_sizes_[iter->first]; ++ii) 00195 { 00196 00197 00198 std::swap(iter->second[ii], iter->second[ii+ randint(iter->second.size() - ii)]); 00199 used_indices_[jj] = iter->second[ii]; 00200 00201 is_used_[used_indices_[jj] ] = 1; 00202 jj++; 00203 } 00204 00205 00206 } 00207 } 00208 void sample_w_rep() 00209 { 00210 00211 is_used_.init(false); 00212 for(int ii = 0; ii < num_of_samples; ++ii) 00213 { 00214 used_indices_[ii] = randint(max_index); 00215 is_used_[used_indices_[ii] ] = true; 00216 } 00217 } 00218 void sample_wo_rep() 00219 { 00220 00221 is_used_.init(false); 00222 for(int ii = 0; ii < num_of_samples; ++ii) 00223 { 00224 std::swap(used_indices_[ii], used_indices_[ii+ randint(max_index - ii)]); 00225 is_used_[used_indices_[ii] ] = true; 00226 } 00227 } 00228 00229 public: 00230 00231 SamplingOptions options_; 00232 00233 IndexType const & operator[](int in) 00234 { 00235 return used_indices_[in]; 00236 } 00237 00238 IndexArrayViewType used_indices() 00239 { 00240 return used_indices_.subarray(0, num_of_samples); 00241 } 00242 00243 IndexArrayViewType unused_indices() 00244 { 00245 if(unused_indices_set) 00246 { 00247 return unused_indices_; 00248 } 00249 else 00250 { 00251 unused_indices_.clear(); 00252 for(int ii = 0; ii < (int)is_used().size(); ++ii) 00253 { 00254 if(is_used_[ii]) 00255 unused_indices_.push_back(ii); 00256 } 00257 unused_indices_set = true; 00258 } 00259 return unused_indices_; 00260 } 00261 00262 IndexArrayViewType used_indices_volatile() 00263 { 00264 return IndexArrayViewType(used_indices_); 00265 } 00266 00267 IsUsedArrayViewType is_used() 00268 { 00269 return IsUsedArrayViewType(is_used_); 00270 } 00271 00272 int num_of_samples; 00273 int max_index; 00274 00275 inline void sample( ) 00276 { 00277 unused_indices_set = false; 00278 (this->*sampling_func_ptr_)(); 00279 } 00280 00281 int numOfSamples() 00282 { 00283 return num_of_samples; 00284 } 00285 00286 int numOfSamples(int n) 00287 { 00288 num_of_samples = n; 00289 if(!options_.sample_with_replacement && ((*options_.strata).data() != 0) && options_.use_internal_mem) 00290 { 00291 internal_used_indices_.resize(num_of_samples); 00292 used_indices_ = internal_used_indices_; 00293 } 00294 else if (!options_.use_internal_mem) 00295 vigra_fail("Sampler::numOfSamples() : Sampler is using external memory - no resize possible"); 00296 return num_of_samples; 00297 } 00298 00299 void init( int numOfSamples , 00300 int maxIndex , 00301 SamplingOptions const & opt) 00302 { 00303 unused_indices_set = false; 00304 strata_indices_.clear(); 00305 strata_sizes_.clear(); 00306 max_index = maxIndex; 00307 num_of_samples = numOfSamples; 00308 options_ = opt; 00309 if(!options_.use_internal_mem) 00310 { 00311 used_indices_ = options_.mem; 00312 } 00313 //Allocate memory for the used/unused vector. 00314 00315 is_used_.resize(max_index); 00316 00317 00318 00319 if((*options_.strata).data() != 0) 00320 { 00321 00322 00323 // Set up the data struct used. 00324 for(int ii = 0; ii < maxIndex; ++ii) 00325 { 00326 00327 strata_indices_[(*(options_.strata))[ii]].push_back(ii); 00328 } 00329 00330 // Set up the strata size 00331 if(options_.sample_classes_individually) 00332 { 00333 StrataIndicesType::difference_type total_size = 0; 00334 StrataIndicesType::iterator iter; 00335 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter) 00336 { 00337 // Set the size of the strata to be sampled to the fixed proportion of num_of_samples 00338 // This value is the same for all strata 00339 strata_sizes_[iter->first] = (int)ceil(double(num_of_samples) / strata_indices_.size()); 00340 total_size += strata_sizes_[iter->first]; 00341 } 00342 int cut_off = std::abs(int(total_size - num_of_samples)); 00343 if(cut_off != 0) 00344 { 00345 StrataIndicesType::iterator end_iter = strata_indices_.begin(); 00346 for(int ii = 0; ii < cut_off; ++ii) 00347 { 00348 ++end_iter; 00349 } 00350 for(iter = strata_indices_.begin(); iter != end_iter; ++iter) 00351 { 00352 strata_sizes_[iter->first] -= 1; 00353 00354 } 00355 } 00356 00357 } 00358 00359 else 00360 { 00361 StrataIndicesType::iterator iter; 00362 unsigned int total_size = 0; 00363 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter) 00364 { 00365 // Set the size of the strata to be sampled to be proportional of the size of the strata 00366 // This value is different for each strata 00367 strata_sizes_[iter->first] = (int)ceil((double(iter->second.size())/double(max_index)) * num_of_samples ); 00368 total_size += strata_sizes_[iter->first]; 00369 } 00370 int cut_off = std::abs(int(total_size - num_of_samples)); 00371 if(cut_off != 0) 00372 { 00373 for(int ii = 0; ii < cut_off; ++ii) 00374 { 00375 StrataIndicesType::iterator curmax = strata_indices_.begin(); 00376 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter) 00377 { 00378 if(strata_sizes_[iter->first] > strata_sizes_[curmax->first]) 00379 curmax = iter; 00380 } 00381 strata_sizes_[curmax->first] -= 1; 00382 } 00383 } 00384 } 00385 00386 00387 // Set sampling function 00388 if(options_.sample_with_replacement) 00389 { 00390 sampling_func_ptr_ = &Sampler::sample_stratified_w_rep; 00391 } 00392 else 00393 { 00394 // Check whether there are enough samples to sample the needed strata size from the population 00395 StrataSizesType::iterator iter; 00396 for(iter = strata_sizes_.begin(); iter != strata_sizes_.end(); ++iter) 00397 { 00398 vigra_precondition(iter->second <= (int)strata_indices_[iter->first].size(), 00399 "Not enough samples to sample classes individually //stratified and\ 00400 without replacement"); 00401 } 00402 sampling_func_ptr_ = &Sampler::sample_stratified_wo_rep; 00403 } 00404 // Allocate memory for output 00405 if(options_.use_internal_mem) 00406 { 00407 internal_used_indices_.resize(num_of_samples); 00408 used_indices_ = internal_used_indices_; 00409 } 00410 } 00411 else // unstratified sampling 00412 { 00413 00414 // Set sampling function 00415 if(options_.sample_with_replacement) 00416 { 00417 if(options_.use_internal_mem) 00418 { 00419 internal_used_indices_.resize(num_of_samples); 00420 used_indices_ = internal_used_indices_; 00421 } 00422 sampling_func_ptr_ = &Sampler::sample_w_rep; 00423 } 00424 else 00425 { 00426 vigra_precondition(max_index >= num_of_samples, 00427 "Not enough samples to sample without replacement"); 00428 if(options_.use_internal_mem) 00429 { 00430 internal_used_indices_.resize(max_index); 00431 used_indices_ = internal_used_indices_; 00432 for(int ii = 0; ii < max_index; ++ii) 00433 { 00434 used_indices_[ii] = ii; 00435 } 00436 } 00437 00438 sampling_func_ptr_ = &Sampler::sample_wo_rep; 00439 00440 } 00441 } 00442 if(options_.use_internal_mem) 00443 { 00444 used_indices_ = internal_used_indices_; 00445 } 00446 } 00447 00448 inline Sampler(int numOfSamples,int maxIndex, SamplingOptions const & opt, Random & rnd) 00449 : 00450 randint(rnd) 00451 { 00452 init(numOfSamples,maxIndex, opt); 00453 } 00454 00455 inline Sampler(int numOfSamples, int maxIndex, SamplingOptions const & opt) 00456 { 00457 init(numOfSamples,maxIndex, opt); 00458 } 00459 00460 }; 00461 00462 template<class Random =RandomTT800 > 00463 class PoissonSampler 00464 { 00465 public: 00466 Random randfloat; 00467 typedef Int32 IndexType; 00468 typedef vigra::ArrayVector <IndexType> IndexArrayType; 00469 IndexArrayType used_indices_; 00470 double lambda; 00471 int minIndex; 00472 int maxIndex; 00473 inline PoissonSampler(double lambda,IndexType minIndex,IndexType maxIndex) 00474 : randfloat() 00475 { 00476 this->lambda=lambda; 00477 this->minIndex=minIndex; 00478 this->maxIndex=maxIndex; 00479 } 00480 00481 inline void sample( ) 00482 { 00483 used_indices_.clear(); 00484 IndexType i; 00485 for(i=minIndex;i<maxIndex;++i) 00486 { 00487 //from http://en.wikipedia.org/wiki/Poisson_distribution 00488 int k=0; 00489 double p=1; 00490 double L=exp(-lambda); 00491 do 00492 { 00493 ++k; 00494 p*=randfloat.uniform53(); 00495 00496 }while(p>L); 00497 --k; 00498 //Insert i this many time 00499 while(k>0) 00500 { 00501 used_indices_.push_back(i); 00502 --k; 00503 } 00504 } 00505 } 00506 00507 IndexType const & operator[](int in) 00508 { 00509 return used_indices_[in]; 00510 } 00511 int numOfSamples() 00512 { 00513 return used_indices_.size(); 00514 } 00515 00516 }; 00517 00518 } /*end of namespace rn*/ 00519 #endif /*RN_SAMPLER_HXX*/
© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de) |
html generated using doxygen and Python
|