00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013
00014 #include "lib/common.h"
00015 #include "lib/io.h"
00016 #include "lib/File.h"
00017 #include "lib/Time.h"
00018 #include "base/Parallel.h"
00019
00020 #include "distance/Distance.h"
00021 #include "features/Features.h"
00022
00023 #include <string.h>
00024 #include <unistd.h>
00025
00026 #ifndef WIN32
00027 #include <pthread.h>
00028 #endif
00029
00030 using namespace shogun;
00031
00032 CDistance::CDistance()
00033 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00034 lhs(NULL), rhs(NULL)
00035 {
00036 }
00037
00038
00039 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs)
00040 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00041 lhs(NULL), rhs(NULL)
00042 {
00043 init(p_lhs, p_rhs);
00044 }
00045
00046 CDistance::~CDistance()
00047 {
00048 delete[] precomputed_matrix;
00049 precomputed_matrix=NULL;
00050
00051 remove_lhs_and_rhs();
00052 }
00053
00054 bool CDistance::init(CFeatures* l, CFeatures* r)
00055 {
00056
00057 ASSERT(l);
00058 ASSERT(r);
00059
00060
00061 ASSERT(l->get_feature_class()==r->get_feature_class());
00062 ASSERT(l->get_feature_type()==r->get_feature_type());
00063
00064
00065 remove_lhs_and_rhs();
00066
00067
00068 SG_REF(l);
00069 if (l!=r)
00070 SG_REF(r);
00071
00072 lhs=l;
00073 rhs=r;
00074
00075 delete[] precomputed_matrix ;
00076 precomputed_matrix=NULL ;
00077
00078 return true;
00079 }
00080
00081 bool CDistance::load(char* fname)
00082 {
00083 return false;
00084 }
00085
00086 bool CDistance::save(char* fname)
00087 {
00088 int32_t i=0;
00089 int32_t num_left=lhs->get_num_vectors();
00090 int32_t num_right=rhs->get_num_vectors();
00091 int64_t num_total=num_left*num_right;
00092
00093 CFile f(fname, 'w', F_DREAL);
00094
00095 for (int32_t l=0; l< (int32_t) num_left && f.is_ok(); l++)
00096 {
00097 for (int32_t r=0; r< (int32_t) num_right && f.is_ok(); r++)
00098 {
00099 if (!(i % (num_total/10+1)))
00100 SG_PRINT( "%02d%%.", (int) (100.0*i/num_total));
00101 else if (!(i % (num_total/200+1)))
00102 SG_PRINT( ".");
00103
00104 float64_t k=distance(l,r);
00105 f.save_real_data(&k, 1);
00106
00107 i++;
00108 }
00109 }
00110
00111 if (f.is_ok())
00112 SG_INFO( "distance matrix of size %ld x %ld written \n", num_left, num_right);
00113
00114 return (f.is_ok());
00115 }
00116
00117 void CDistance::remove_lhs_and_rhs()
00118 {
00119 if (rhs!=lhs)
00120 SG_UNREF(rhs);
00121 rhs = NULL;
00122
00123 SG_UNREF(lhs);
00124 lhs = NULL;
00125 }
00126
00127 void CDistance::remove_lhs()
00128 {
00129 SG_UNREF(lhs);
00130 lhs = NULL;
00131 }
00132
00134 void CDistance::remove_rhs()
00135 {
00136 SG_UNREF(rhs);
00137 rhs = NULL;
00138 }
00139
00140 CFeatures* CDistance::replace_rhs(CFeatures* r)
00141 {
00142
00143 ASSERT(r);
00144
00145
00146 ASSERT(lhs->get_feature_class()==r->get_feature_class());
00147 ASSERT(lhs->get_feature_type()==r->get_feature_type());
00148
00149
00150 CFeatures* tmp=rhs;
00151
00152 rhs=r;
00153
00154 delete[] precomputed_matrix ;
00155 precomputed_matrix=NULL ;
00156
00157 return tmp;
00158 }
00159
00160 void CDistance::do_precompute_matrix()
00161 {
00162 int32_t num_left=lhs->get_num_vectors();
00163 int32_t num_right=rhs->get_num_vectors();
00164 SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00165
00166 ASSERT(num_left==num_right);
00167 ASSERT(lhs==rhs);
00168 int32_t num=num_left;
00169
00170 delete[] precomputed_matrix;
00171 precomputed_matrix=new float32_t[num*(num+1)/2];
00172
00173 for (int32_t i=0; i<num; i++)
00174 {
00175 SG_PROGRESS(i*i,0,num*num);
00176 for (int32_t j=0; j<=i; j++)
00177 precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00178 }
00179
00180 SG_PROGRESS(num*num,0,num*num);
00181 SG_DONE();
00182 }
00183
00184 void CDistance::get_distance_matrix(float64_t** dst, int32_t* m, int32_t* n)
00185 {
00186 ASSERT(dst && m && n);
00187
00188 float64_t* result = NULL;
00189 CFeatures* f1 = lhs;
00190 CFeatures* f2 = rhs;
00191
00192 if (f1 && f2)
00193 {
00194 int32_t num_vec1=f1->get_num_vectors();
00195 int32_t num_vec2=f2->get_num_vectors();
00196 *m=num_vec1;
00197 *n=num_vec2;
00198
00199 int64_t total_num=num_vec1*num_vec2;
00200 int32_t num_done=0;
00201 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00202
00203 result=(float64_t*) malloc(total_num*sizeof(float64_t));
00204 ASSERT(result);
00205
00206 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00207 {
00208 for (int32_t i=0; i<num_vec1; i++)
00209 {
00210 for (int32_t j=i; j<num_vec1; j++)
00211 {
00212 float64_t v=distance(i,j);
00213
00214 result[i+j*num_vec1]=v;
00215 result[j+i*num_vec1]=v;
00216
00217 if (num_done%100000)
00218 SG_PROGRESS(num_done, 0, total_num-1);
00219
00220 if (i!=j)
00221 num_done+=2;
00222 else
00223 num_done+=1;
00224 }
00225 }
00226 }
00227 else
00228 {
00229 for (int32_t i=0; i<num_vec1; i++)
00230 {
00231 for (int32_t j=0; j<num_vec2; j++)
00232 {
00233 result[i+j*num_vec1]=distance(i,j) ;
00234
00235 if (num_done%100000)
00236 SG_PROGRESS(num_done, 0, total_num-1);
00237
00238 num_done++;
00239 }
00240 }
00241 }
00242
00243 SG_DONE();
00244 }
00245 else
00246 SG_ERROR( "no features assigned to distance\n");
00247
00248 *dst=result;
00249 }
00250
00251 float32_t* CDistance::get_distance_matrix_shortreal(
00252 int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00253 {
00254 float32_t* result = NULL;
00255 CFeatures* f1 = lhs;
00256 CFeatures* f2 = rhs;
00257
00258 if (f1 && f2)
00259 {
00260 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00261 SG_ERROR("distance matrix does not fit into target\n");
00262
00263 num_vec1=f1->get_num_vectors();
00264 num_vec2=f2->get_num_vectors();
00265 int64_t total_num=num_vec1*num_vec2;
00266 int32_t num_done=0;
00267
00268 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00269
00270 if (target)
00271 result=target;
00272 else
00273 result=new float32_t[total_num];
00274
00275 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00276 {
00277 for (int32_t i=0; i<num_vec1; i++)
00278 {
00279 for (int32_t j=i; j<num_vec1; j++)
00280 {
00281 float64_t v=distance(i,j);
00282
00283 result[i+j*num_vec1]=v;
00284 result[j+i*num_vec1]=v;
00285
00286 if (num_done%100000)
00287 SG_PROGRESS(num_done, 0, total_num-1);
00288
00289 if (i!=j)
00290 num_done+=2;
00291 else
00292 num_done+=1;
00293 }
00294 }
00295 }
00296 else
00297 {
00298 for (int32_t i=0; i<num_vec1; i++)
00299 {
00300 for (int32_t j=0; j<num_vec2; j++)
00301 {
00302 result[i+j*num_vec1]=distance(i,j) ;
00303
00304 if (num_done%100000)
00305 SG_PROGRESS(num_done, 0, total_num-1);
00306
00307 num_done++;
00308 }
00309 }
00310 }
00311
00312 SG_DONE();
00313 }
00314 else
00315 SG_ERROR( "no features assigned to distance\n");
00316
00317 return result;
00318 }
00319
00320 float64_t* CDistance::get_distance_matrix_real(
00321 int32_t &num_vec1, int32_t &num_vec2, float64_t* target)
00322 {
00323 float64_t* result = NULL;
00324 CFeatures* f1 = lhs;
00325 CFeatures* f2 = rhs;
00326
00327 if (f1 && f2)
00328 {
00329 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00330 SG_ERROR("distance matrix does not fit into target\n");
00331
00332 num_vec1=f1->get_num_vectors();
00333 num_vec2=f2->get_num_vectors();
00334 int64_t total_num=num_vec1*num_vec2;
00335 int32_t num_done=0;
00336
00337 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00338
00339 if (target)
00340 result=target;
00341 else
00342 result=new float64_t[total_num];
00343
00344 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00345 {
00346 for (int32_t i=0; i<num_vec1; i++)
00347 {
00348 for (int32_t j=i; j<num_vec1; j++)
00349 {
00350 float64_t v=distance(i,j);
00351
00352 result[i+j*num_vec1]=v;
00353 result[j+i*num_vec1]=v;
00354
00355 if (num_done%100000)
00356 SG_PROGRESS(num_done, 0, total_num-1);
00357
00358 if (i!=j)
00359 num_done+=2;
00360 else
00361 num_done+=1;
00362 }
00363 }
00364 }
00365 else
00366 {
00367 for (int32_t i=0; i<num_vec1; i++)
00368 {
00369 for (int32_t j=0; j<num_vec2; j++)
00370 {
00371 result[i+j*num_vec1]=distance(i,j) ;
00372
00373 if (num_done%100000)
00374 SG_PROGRESS(num_done, 0, total_num-1);
00375
00376 num_done++;
00377 }
00378 }
00379 }
00380
00381 SG_DONE();
00382 }
00383 else
00384 SG_ERROR( "no features assigned to distance\n");
00385
00386 return result;
00387 }