SHOGUN v0.9.0
|
00001 /* 00002 * this program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2006-2009 Christian Gehl 00008 * Written (W) 2006-2009 Soeren Sonnenburg 00009 * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/config.h" 00013 #include "lib/common.h" 00014 #include "lib/io.h" 00015 #include "lib/File.h" 00016 #include "lib/Time.h" 00017 #include "base/Parallel.h" 00018 #include "base/Parameter.h" 00019 00020 #include "distance/Distance.h" 00021 #include "features/Features.h" 00022 00023 #include <string.h> 00024 #include <unistd.h> 00025 00026 #ifndef WIN32 00027 #include <pthread.h> 00028 #endif 00029 00030 using namespace shogun; 00031 00032 CDistance::CDistance() : CSGObject() 00033 { 00034 init(); 00035 } 00036 00037 00038 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs) : CSGObject() 00039 { 00040 init(); 00041 init(p_lhs, p_rhs); 00042 } 00043 00044 CDistance::~CDistance() 00045 { 00046 delete[] precomputed_matrix; 00047 precomputed_matrix=NULL; 00048 00049 remove_lhs_and_rhs(); 00050 } 00051 00052 bool CDistance::init(CFeatures* l, CFeatures* r) 00053 { 00054 //make sure features were indeed supplied 00055 ASSERT(l); 00056 ASSERT(r); 00057 00058 //make sure features are compatible 00059 ASSERT(l->get_feature_class()==r->get_feature_class()); 00060 ASSERT(l->get_feature_type()==r->get_feature_type()); 00061 00062 //remove references to previous features 00063 remove_lhs_and_rhs(); 00064 00065 //increase reference counts 00066 SG_REF(l); 00067 SG_REF(r); 00068 00069 lhs=l; 00070 rhs=r; 00071 00072 delete[] precomputed_matrix ; 00073 precomputed_matrix=NULL ; 00074 00075 return true; 00076 } 00077 00078 void CDistance::load(CFile* loader) 00079 { 00080 SG_SET_LOCALE_C; 00081 SG_RESET_LOCALE; 00082 } 00083 00084 void CDistance::save(CFile* writer) 00085 { 00086 SG_SET_LOCALE_C; 00087 SG_RESET_LOCALE; 00088 } 00089 00090 void CDistance::remove_lhs_and_rhs() 00091 { 00092 SG_UNREF(rhs); 00093 rhs = NULL; 00094 00095 SG_UNREF(lhs); 00096 lhs = NULL; 00097 } 00098 00099 void CDistance::remove_lhs() 00100 { 00101 SG_UNREF(lhs); 00102 lhs = NULL; 00103 } 00104 00106 void CDistance::remove_rhs() 00107 { 00108 SG_UNREF(rhs); 00109 rhs = NULL; 00110 } 00111 00112 CFeatures* CDistance::replace_rhs(CFeatures* r) 00113 { 00114 //make sure features were indeed supplied 00115 ASSERT(r); 00116 00117 //make sure features are compatible 00118 ASSERT(lhs->get_feature_class()==r->get_feature_class()); 00119 ASSERT(lhs->get_feature_type()==r->get_feature_type()); 00120 00121 //remove references to previous rhs features 00122 CFeatures* tmp=rhs; 00123 00124 rhs=r; 00125 00126 delete[] precomputed_matrix ; 00127 precomputed_matrix=NULL ; 00128 00129 // return old features including reference count 00130 return tmp; 00131 } 00132 00133 void CDistance::do_precompute_matrix() 00134 { 00135 int32_t num_left=lhs->get_num_vectors(); 00136 int32_t num_right=rhs->get_num_vectors(); 00137 SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ; 00138 00139 ASSERT(num_left==num_right); 00140 ASSERT(lhs==rhs); 00141 int32_t num=num_left; 00142 00143 delete[] precomputed_matrix; 00144 precomputed_matrix=new float32_t[num*(num+1)/2]; 00145 00146 for (int32_t i=0; i<num; i++) 00147 { 00148 SG_PROGRESS(i*i,0,num*num); 00149 for (int32_t j=0; j<=i; j++) 00150 precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ; 00151 } 00152 00153 SG_PROGRESS(num*num,0,num*num); 00154 SG_DONE(); 00155 } 00156 00157 void CDistance::get_distance_matrix(float64_t** dst, int32_t* m, int32_t* n) 00158 { 00159 ASSERT(dst && m && n); 00160 00161 float64_t* result = NULL; 00162 CFeatures* f1 = lhs; 00163 CFeatures* f2 = rhs; 00164 00165 if (f1 && f2) 00166 { 00167 int32_t num_vec1=f1->get_num_vectors(); 00168 int32_t num_vec2=f2->get_num_vectors(); 00169 *m=num_vec1; 00170 *n=num_vec2; 00171 00172 int64_t total_num=num_vec1*num_vec2; 00173 int32_t num_done=0; 00174 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2); 00175 00176 result=(float64_t*) malloc(total_num*sizeof(float64_t)); 00177 ASSERT(result); 00178 00179 if ( (f1 == f2) && (num_vec1 == num_vec2) ) 00180 { 00181 for (int32_t i=0; i<num_vec1; i++) 00182 { 00183 for (int32_t j=i; j<num_vec1; j++) 00184 { 00185 float64_t v=distance(i,j); 00186 00187 result[i+j*num_vec1]=v; 00188 result[j+i*num_vec1]=v; 00189 00190 if (num_done%100000) 00191 SG_PROGRESS(num_done, 0, total_num-1); 00192 00193 if (i!=j) 00194 num_done+=2; 00195 else 00196 num_done+=1; 00197 } 00198 } 00199 } 00200 else 00201 { 00202 for (int32_t i=0; i<num_vec1; i++) 00203 { 00204 for (int32_t j=0; j<num_vec2; j++) 00205 { 00206 result[i+j*num_vec1]=distance(i,j) ; 00207 00208 if (num_done%100000) 00209 SG_PROGRESS(num_done, 0, total_num-1); 00210 00211 num_done++; 00212 } 00213 } 00214 } 00215 00216 SG_DONE(); 00217 } 00218 else 00219 SG_ERROR( "no features assigned to distance\n"); 00220 00221 *dst=result; 00222 } 00223 00224 float32_t* CDistance::get_distance_matrix_shortreal( 00225 int32_t &num_vec1, int32_t &num_vec2, float32_t* target) 00226 { 00227 float32_t* result = NULL; 00228 CFeatures* f1 = lhs; 00229 CFeatures* f2 = rhs; 00230 00231 if (f1 && f2) 00232 { 00233 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors())) 00234 SG_ERROR("distance matrix does not fit into target\n"); 00235 00236 num_vec1=f1->get_num_vectors(); 00237 num_vec2=f2->get_num_vectors(); 00238 int64_t total_num=num_vec1*num_vec2; 00239 int32_t num_done=0; 00240 00241 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2); 00242 00243 if (target) 00244 result=target; 00245 else 00246 result=new float32_t[total_num]; 00247 00248 if ( (f1 == f2) && (num_vec1 == num_vec2) ) 00249 { 00250 for (int32_t i=0; i<num_vec1; i++) 00251 { 00252 for (int32_t j=i; j<num_vec1; j++) 00253 { 00254 float64_t v=distance(i,j); 00255 00256 result[i+j*num_vec1]=v; 00257 result[j+i*num_vec1]=v; 00258 00259 if (num_done%100000) 00260 SG_PROGRESS(num_done, 0, total_num-1); 00261 00262 if (i!=j) 00263 num_done+=2; 00264 else 00265 num_done+=1; 00266 } 00267 } 00268 } 00269 else 00270 { 00271 for (int32_t i=0; i<num_vec1; i++) 00272 { 00273 for (int32_t j=0; j<num_vec2; j++) 00274 { 00275 result[i+j*num_vec1]=distance(i,j) ; 00276 00277 if (num_done%100000) 00278 SG_PROGRESS(num_done, 0, total_num-1); 00279 00280 num_done++; 00281 } 00282 } 00283 } 00284 00285 SG_DONE(); 00286 } 00287 else 00288 SG_ERROR( "no features assigned to distance\n"); 00289 00290 return result; 00291 } 00292 00293 float64_t* CDistance::get_distance_matrix_real( 00294 int32_t &num_vec1, int32_t &num_vec2, float64_t* target) 00295 { 00296 float64_t* result = NULL; 00297 CFeatures* f1 = lhs; 00298 CFeatures* f2 = rhs; 00299 00300 if (f1 && f2) 00301 { 00302 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors())) 00303 SG_ERROR("distance matrix does not fit into target\n"); 00304 00305 num_vec1=f1->get_num_vectors(); 00306 num_vec2=f2->get_num_vectors(); 00307 int64_t total_num=num_vec1*num_vec2; 00308 int32_t num_done=0; 00309 00310 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2); 00311 00312 if (target) 00313 result=target; 00314 else 00315 result=new float64_t[total_num]; 00316 00317 if ( (f1 == f2) && (num_vec1 == num_vec2) ) 00318 { 00319 for (int32_t i=0; i<num_vec1; i++) 00320 { 00321 for (int32_t j=i; j<num_vec1; j++) 00322 { 00323 float64_t v=distance(i,j); 00324 00325 result[i+j*num_vec1]=v; 00326 result[j+i*num_vec1]=v; 00327 00328 if (num_done%100000) 00329 SG_PROGRESS(num_done, 0, total_num-1); 00330 00331 if (i!=j) 00332 num_done+=2; 00333 else 00334 num_done+=1; 00335 } 00336 } 00337 } 00338 else 00339 { 00340 for (int32_t i=0; i<num_vec1; i++) 00341 { 00342 for (int32_t j=0; j<num_vec2; j++) 00343 { 00344 result[i+j*num_vec1]=distance(i,j) ; 00345 00346 if (num_done%100000) 00347 SG_PROGRESS(num_done, 0, total_num-1); 00348 00349 num_done++; 00350 } 00351 } 00352 } 00353 00354 SG_DONE(); 00355 } 00356 else 00357 SG_ERROR( "no features assigned to distance\n"); 00358 00359 return result; 00360 } 00361 00362 void CDistance::init() 00363 { 00364 precomputed_matrix = NULL; 00365 precompute_matrix = false; 00366 lhs = NULL; 00367 rhs = NULL; 00368 00369 m_parameters->add((CSGObject**) &lhs, "lhs", 00370 "Feature vectors to occur on left hand side."); 00371 m_parameters->add((CSGObject**) &rhs, "rhs", 00372 "Feature vectors to occur on right hand side."); 00373 }