Distance.cpp

Go to the documentation of this file.
00001 /*
00002  * this program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2006-2009 Christian Gehl
00008  * Written (W) 2006-2009 Soeren Sonnenburg
00009  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/config.h"
00013 
00014 #include "lib/common.h"
00015 #include "lib/io.h"
00016 #include "lib/File.h"
00017 #include "lib/Time.h"
00018 #include "base/Parallel.h"
00019 
00020 #include "distance/Distance.h"
00021 #include "features/Features.h"
00022 
00023 #include <string.h>
00024 #include <unistd.h>
00025 
00026 #ifndef WIN32
00027 #include <pthread.h>
00028 #endif
00029 
00030 using namespace shogun;
00031 
00032 CDistance::CDistance()
00033 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00034     lhs(NULL), rhs(NULL)
00035 {
00036 }
00037 
00038         
00039 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs)
00040 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00041     lhs(NULL), rhs(NULL)
00042 {
00043     init(p_lhs, p_rhs);
00044 }
00045 
00046 CDistance::~CDistance()
00047 {
00048     delete[] precomputed_matrix;
00049     precomputed_matrix=NULL;
00050 
00051     remove_lhs_and_rhs();
00052 }
00053 
00054 bool CDistance::init(CFeatures* l, CFeatures* r)
00055 {
00056     //make sure features were indeed supplied
00057     ASSERT(l);
00058     ASSERT(r);
00059 
00060     //make sure features are compatible
00061     ASSERT(l->get_feature_class()==r->get_feature_class());
00062     ASSERT(l->get_feature_type()==r->get_feature_type());
00063 
00064     //remove references to previous features
00065     remove_lhs_and_rhs();
00066 
00067     //increase reference counts
00068     SG_REF(l);
00069     if (l!=r)
00070         SG_REF(r);
00071 
00072     lhs=l;
00073     rhs=r;
00074 
00075     delete[] precomputed_matrix ;
00076     precomputed_matrix=NULL ;
00077 
00078     return true;
00079 }
00080 
00081 bool CDistance::load(char* fname)
00082 {
00083     return false;
00084 }
00085 
00086 bool CDistance::save(char* fname)
00087 {
00088     int32_t i=0;
00089     int32_t num_left=lhs->get_num_vectors();
00090     int32_t num_right=rhs->get_num_vectors();
00091     int64_t num_total=num_left*num_right;
00092 
00093     CFile f(fname, 'w', F_DREAL);
00094 
00095     for (int32_t l=0; l< (int32_t) num_left && f.is_ok(); l++)
00096     {
00097         for (int32_t r=0; r< (int32_t) num_right && f.is_ok(); r++)
00098         {
00099             if (!(i % (num_total/10+1)))
00100                 SG_PRINT( "%02d%%.", (int) (100.0*i/num_total));
00101             else if (!(i % (num_total/200+1)))
00102                 SG_PRINT( ".");
00103 
00104             float64_t k=distance(l,r);
00105             f.save_real_data(&k, 1);
00106 
00107             i++;
00108         }
00109     }
00110 
00111     if (f.is_ok())
00112         SG_INFO( "distance matrix of size %ld x %ld written \n", num_left, num_right);
00113 
00114     return (f.is_ok());
00115 }
00116 
00117 void CDistance::remove_lhs_and_rhs()
00118 {
00119     if (rhs!=lhs)
00120         SG_UNREF(rhs);
00121     rhs = NULL;
00122 
00123     SG_UNREF(lhs);
00124     lhs = NULL;
00125 }
00126 
00127 void CDistance::remove_lhs()
00128 { 
00129     SG_UNREF(lhs);
00130     lhs = NULL;
00131 }
00132 
00134 void CDistance::remove_rhs()
00135 {
00136     SG_UNREF(rhs);
00137     rhs = NULL;
00138 }
00139 
00140 CFeatures* CDistance::replace_rhs(CFeatures* r)
00141 {
00142      //make sure features were indeed supplied
00143      ASSERT(r);
00144 
00145      //make sure features are compatible
00146      ASSERT(lhs->get_feature_class()==r->get_feature_class());
00147      ASSERT(lhs->get_feature_type()==r->get_feature_type());
00148 
00149      //remove references to previous rhs features
00150      CFeatures* tmp=rhs;
00151      
00152      rhs=r;
00153 
00154      delete[] precomputed_matrix ;
00155      precomputed_matrix=NULL ;
00156 
00157      return tmp;
00158 }
00159 
00160 void CDistance::do_precompute_matrix()
00161 {
00162     int32_t num_left=lhs->get_num_vectors();
00163     int32_t num_right=rhs->get_num_vectors();
00164     SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00165 
00166     ASSERT(num_left==num_right);
00167     ASSERT(lhs==rhs);
00168     int32_t num=num_left;
00169     
00170     delete[] precomputed_matrix;
00171     precomputed_matrix=new float32_t[num*(num+1)/2];
00172 
00173     for (int32_t i=0; i<num; i++)
00174     {
00175         SG_PROGRESS(i*i,0,num*num);
00176         for (int32_t j=0; j<=i; j++)
00177             precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00178     }
00179 
00180     SG_PROGRESS(num*num,0,num*num);
00181     SG_DONE();
00182 }
00183 
00184 void CDistance::get_distance_matrix(float64_t** dst, int32_t* m, int32_t* n)
00185 {
00186     ASSERT(dst && m && n);
00187 
00188     float64_t* result = NULL;
00189     CFeatures* f1 = lhs;
00190     CFeatures* f2 = rhs;
00191 
00192     if (f1 && f2)
00193     {
00194         int32_t num_vec1=f1->get_num_vectors();
00195         int32_t num_vec2=f2->get_num_vectors();
00196         *m=num_vec1;
00197         *n=num_vec2;
00198 
00199         int64_t total_num=num_vec1*num_vec2;
00200         int32_t num_done=0;
00201         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00202 
00203         result=(float64_t*) malloc(total_num*sizeof(float64_t));
00204         ASSERT(result);
00205 
00206         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00207         {
00208             for (int32_t i=0; i<num_vec1; i++)
00209             {
00210                 for (int32_t j=i; j<num_vec1; j++)
00211                 {
00212                     float64_t v=distance(i,j);
00213 
00214                     result[i+j*num_vec1]=v;
00215                     result[j+i*num_vec1]=v;
00216 
00217                     if (num_done%100000)
00218                         SG_PROGRESS(num_done, 0, total_num-1);
00219 
00220                     if (i!=j)
00221                         num_done+=2;
00222                     else
00223                         num_done+=1;
00224                 }
00225             }
00226         }
00227         else
00228         {
00229             for (int32_t i=0; i<num_vec1; i++)
00230             {
00231                 for (int32_t j=0; j<num_vec2; j++)
00232                 {
00233                     result[i+j*num_vec1]=distance(i,j) ;
00234 
00235                     if (num_done%100000)
00236                         SG_PROGRESS(num_done, 0, total_num-1);
00237 
00238                     num_done++;
00239                 }
00240             }
00241         }
00242 
00243         SG_DONE();
00244     }
00245     else
00246       SG_ERROR( "no features assigned to distance\n");
00247 
00248     *dst=result;
00249 }
00250 
00251 float32_t* CDistance::get_distance_matrix_shortreal(
00252     int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00253 {
00254     float32_t* result = NULL;
00255     CFeatures* f1 = lhs;
00256     CFeatures* f2 = rhs;
00257 
00258     if (f1 && f2)
00259     {
00260         if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00261             SG_ERROR("distance matrix does not fit into target\n");
00262 
00263         num_vec1=f1->get_num_vectors();
00264         num_vec2=f2->get_num_vectors();
00265         int64_t total_num=num_vec1*num_vec2;
00266         int32_t num_done=0;
00267 
00268         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00269 
00270         if (target)
00271             result=target;
00272         else
00273             result=new float32_t[total_num];
00274 
00275         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00276         {
00277             for (int32_t i=0; i<num_vec1; i++)
00278             {
00279                 for (int32_t j=i; j<num_vec1; j++)
00280                 {
00281                     float64_t v=distance(i,j);
00282 
00283                     result[i+j*num_vec1]=v;
00284                     result[j+i*num_vec1]=v;
00285 
00286                     if (num_done%100000)
00287                         SG_PROGRESS(num_done, 0, total_num-1);
00288 
00289                     if (i!=j)
00290                         num_done+=2;
00291                     else
00292                         num_done+=1;
00293                 }
00294             }
00295         }
00296         else
00297         {
00298             for (int32_t i=0; i<num_vec1; i++)
00299             {
00300                 for (int32_t j=0; j<num_vec2; j++)
00301                 {
00302                     result[i+j*num_vec1]=distance(i,j) ;
00303 
00304                     if (num_done%100000)
00305                         SG_PROGRESS(num_done, 0, total_num-1);
00306 
00307                     num_done++;
00308                 }
00309             }
00310         }
00311 
00312         SG_DONE();
00313     }
00314     else
00315       SG_ERROR( "no features assigned to distance\n");
00316 
00317     return result;
00318 }
00319 
00320 float64_t* CDistance::get_distance_matrix_real(
00321     int32_t &num_vec1, int32_t &num_vec2, float64_t* target)
00322 {
00323     float64_t* result = NULL;
00324     CFeatures* f1 = lhs;
00325     CFeatures* f2 = rhs;
00326 
00327     if (f1 && f2)
00328     {
00329         if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00330             SG_ERROR("distance matrix does not fit into target\n");
00331 
00332         num_vec1=f1->get_num_vectors();
00333         num_vec2=f2->get_num_vectors();
00334         int64_t total_num=num_vec1*num_vec2;
00335         int32_t num_done=0;
00336 
00337         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00338 
00339         if (target)
00340             result=target;
00341         else
00342             result=new float64_t[total_num];
00343 
00344         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00345         {
00346             for (int32_t i=0; i<num_vec1; i++)
00347             {
00348                 for (int32_t j=i; j<num_vec1; j++)
00349                 {
00350                     float64_t v=distance(i,j);
00351 
00352                     result[i+j*num_vec1]=v;
00353                     result[j+i*num_vec1]=v;
00354 
00355                     if (num_done%100000)
00356                         SG_PROGRESS(num_done, 0, total_num-1);
00357 
00358                     if (i!=j)
00359                         num_done+=2;
00360                     else
00361                         num_done+=1;
00362                 }
00363             }
00364         }
00365         else
00366         {
00367             for (int32_t i=0; i<num_vec1; i++)
00368             {
00369                 for (int32_t j=0; j<num_vec2; j++)
00370                 {
00371                     result[i+j*num_vec1]=distance(i,j) ;
00372 
00373                     if (num_done%100000)
00374                         SG_PROGRESS(num_done, 0, total_num-1);
00375 
00376                     num_done++;
00377                 }
00378             }
00379         }
00380 
00381         SG_DONE();
00382     }
00383     else
00384       SG_ERROR( "no features assigned to distance\n");
00385 
00386     return result;
00387 }

SHOGUN Machine Learning Toolbox - Documentation