SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2007-2009 Soeren Sonnenburg 00008 * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include "clustering/Hierarchical.h" 00012 #include "distance/Distance.h" 00013 #include "features/Labels.h" 00014 #include "features/Features.h" 00015 #include "lib/Mathematics.h" 00016 #include "base/Parallel.h" 00017 00018 #ifndef WIN32 00019 #include <pthread.h> 00020 #endif 00021 00022 using namespace shogun; 00023 00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00025 struct pair 00026 { 00028 int32_t idx1; 00030 int32_t idx2; 00031 }; 00032 #endif // DOXYGEN_SHOULD_SKIP_THIS 00033 00034 CHierarchical::CHierarchical() 00035 : CDistanceMachine(), merges(3), dimensions(0), assignment(NULL), 00036 table_size(0), pairs(NULL), merge_distance(NULL) 00037 { 00038 } 00039 00040 CHierarchical::CHierarchical(int32_t merges_, CDistance* d) 00041 : CDistanceMachine(), merges(merges_), dimensions(0), assignment(NULL), 00042 table_size(0), pairs(NULL), merge_distance(NULL) 00043 { 00044 set_distance(d); 00045 } 00046 00047 CHierarchical::~CHierarchical() 00048 { 00049 delete[] merge_distance; 00050 delete[] assignment; 00051 delete[] pairs; 00052 } 00053 00054 bool CHierarchical::train(CFeatures* data) 00055 { 00056 ASSERT(distance); 00057 00058 if (data) 00059 distance->init(data, data); 00060 00061 CFeatures* lhs=distance->get_lhs(); 00062 ASSERT(lhs); 00063 00064 int32_t num=lhs->get_num_vectors(); 00065 ASSERT(num>0); 00066 00067 const int32_t num_pairs=num*(num-1)/2; 00068 00069 delete[] merge_distance; 00070 merge_distance=new float64_t[num]; 00071 CMath::fill_vector(merge_distance, num, -1.0); 00072 00073 delete[] assignment; 00074 assignment=new int32_t[num]; 00075 CMath::range_fill_vector(assignment, num); 00076 00077 delete[] pairs; 00078 pairs=new int32_t[2*num]; 00079 CMath::fill_vector(pairs, 2*num, -1); 00080 00081 pair* index=new pair[num_pairs]; 00082 float64_t* distances=new float64_t[num_pairs]; 00083 00084 int32_t offs=0; 00085 for (int32_t i=0; i<num; i++) 00086 { 00087 for (int32_t j=i+1; j<num; j++) 00088 { 00089 distances[offs]=distance->distance(i,j); 00090 index[offs].idx1=i; 00091 index[offs].idx2=j; 00092 offs++; //offs=i*(i+1)/2+j 00093 } 00094 SG_PROGRESS(i, 0, num-1); 00095 } 00096 00097 CMath::qsort_index<float64_t,pair>(distances, index, (num-1)*num/2); 00098 //CMath::display_vector(distances, (num-1)*num/2, "dists"); 00099 00100 int32_t k=-1; 00101 int32_t l=0; 00102 for (; l<num && (num-l)>=merges && k<num_pairs-1; l++) 00103 { 00104 while (k<num_pairs-1) 00105 { 00106 k++; 00107 00108 int32_t i=index[k].idx1; 00109 int32_t j=index[k].idx2; 00110 int32_t c1=assignment[i]; 00111 int32_t c2=assignment[j]; 00112 00113 if (c1==c2) 00114 continue; 00115 00116 SG_PROGRESS(k, 0, num_pairs-1); 00117 00118 if (c1<c2) 00119 { 00120 pairs[2*l]=c1; 00121 pairs[2*l+1]=c2; 00122 } 00123 else 00124 { 00125 pairs[2*l]=c2; 00126 pairs[2*l+1]=c1; 00127 } 00128 merge_distance[l]=distances[k]; 00129 00130 int32_t c=num+l; 00131 for (int32_t m=0; m<num; m++) 00132 { 00133 if (assignment[m] == c1 || assignment[m] == c2) 00134 assignment[m] = c; 00135 } 00136 #ifdef DEBUG_HIERARCHICAL 00137 SG_PRINT("l=%04i i=%04i j=%04i c1=%+04d c2=%+04d c=%+04d dist=%6.6f\n", l,i,j, c1,c2,c, merge_distance[l]); 00138 #endif 00139 break; 00140 } 00141 } 00142 00143 assignment_size=num; 00144 table_size=l-1; 00145 ASSERT(table_size>0); 00146 delete[] distances; 00147 delete[] index; 00148 SG_UNREF(lhs) 00149 00150 return true; 00151 } 00152 00153 bool CHierarchical::load(FILE* srcfile) 00154 { 00155 SG_SET_LOCALE_C; 00156 SG_RESET_LOCALE; 00157 return false; 00158 } 00159 00160 bool CHierarchical::save(FILE* dstfile) 00161 { 00162 SG_SET_LOCALE_C; 00163 SG_RESET_LOCALE; 00164 return false; 00165 }