SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "features/Labels.h" 00013 #include "lib/common.h" 00014 #include "lib/File.h" 00015 #include "lib/io.h" 00016 #include "lib/Mathematics.h" 00017 #include "base/Parameter.h" 00018 00019 using namespace shogun; 00020 00021 CLabels::CLabels() 00022 : CSGObject() 00023 { 00024 init(0, 0); 00025 } 00026 00027 CLabels::CLabels(int32_t num_lab) 00028 : CSGObject() 00029 { 00030 init(num_lab, 0); 00031 00032 labels=new float64_t[num_lab]; 00033 for (int32_t i=0; i<num_lab; i++) 00034 labels[i]=0; 00035 } 00036 00037 CLabels::CLabels(float64_t* p_labels, int32_t len) 00038 : CSGObject() 00039 { 00040 init(0, 0); 00041 00042 set_labels(p_labels, len); 00043 00044 // We don't allocate the confidences matrix, unless it is 00045 // necessary. For problems with many classes and samples it might 00046 // get really big. 00047 m_num_classes=get_num_classes(); 00048 m_confidences=NULL; 00049 m_confidence_classes = 0; 00050 m_confidence_labels = 0; 00051 } 00052 00053 void CLabels::set_to_one() 00054 { 00055 ASSERT(labels); 00056 for (int32_t i=0; i<num_labels; i++) 00057 labels[i]=+1; 00058 } 00059 00060 CLabels::CLabels(float64_t* in_confidences, int32_t in_num_labels, 00061 int32_t in_num_classes) 00062 : CSGObject() 00063 { 00064 init(0, 0); 00065 00066 labels=new float64_t[in_num_labels]; 00067 for (int32_t i=0; i<in_num_labels; i++) 00068 labels[i]=0; 00069 00070 m_num_classes=in_num_classes; 00071 m_confidences=in_confidences; 00072 m_confidence_classes = in_num_classes; 00073 m_confidence_labels = in_num_labels; 00074 find_labels(); 00075 } 00076 00077 CLabels::CLabels(CFile* loader) 00078 : CSGObject() 00079 { 00080 init(0, 0); 00081 00082 load(loader); 00083 } 00084 00085 CLabels::~CLabels() 00086 { 00087 delete[] labels; 00088 delete[] m_confidences; 00089 00090 num_labels=0; 00091 m_num_classes=0; 00092 labels=NULL; 00093 m_confidences=NULL; 00094 m_confidence_classes = 0; 00095 m_confidence_labels = 0; 00096 } 00097 00098 void 00099 CLabels::init(int32_t num_labels_, int32_t num_classes) 00100 { 00101 m_parameters->add_vector(&labels, &num_labels, "labels", 00102 "The labels."); 00103 m_parameters->add_matrix(&m_confidences, &m_confidence_classes, 00104 &m_confidence_labels, "m_confidences", 00105 "Confidence matrix."); 00106 00107 labels = NULL; 00108 num_labels = num_labels_; 00109 m_confidences=NULL; 00110 m_confidence_classes = 0; 00111 m_confidence_labels = 0; 00112 m_num_classes=num_classes; 00113 } 00114 00115 void CLabels::set_labels(float64_t* p_labels, int32_t len) 00116 { 00117 ASSERT(len>0); 00118 num_labels=len; 00119 00120 delete[] labels; 00121 labels=CMath::clone_vector(p_labels, len); 00122 } 00123 00124 void CLabels::set_confidences(float64_t* in_confidences, int32_t in_num_labels, 00125 int32_t in_num_classes) 00126 { 00127 if (num_labels && (num_labels != in_num_labels)) 00128 { 00129 SG_ERROR("Shape of confidence matrix mismatch (number of " 00130 "labels = %d does not match %d\n", num_labels, in_num_labels); 00131 } 00132 00133 if (m_num_classes && (m_num_classes != in_num_classes)) 00134 { 00135 SG_ERROR("Shape of confidence matrix mismatch (number of " 00136 "num_classes = %d does not match %d\n", m_num_classes, in_num_classes); 00137 } 00138 00139 delete[] m_confidences; 00140 00141 num_labels=in_num_labels; 00142 m_num_classes=in_num_classes; 00143 m_confidences=in_confidences; 00144 m_confidence_classes = in_num_classes; 00145 m_confidence_labels = in_num_labels; 00146 find_labels(); 00147 } 00148 00149 float64_t* CLabels::get_confidences(int32_t& out_num_labels, int32_t& out_num_classes) 00150 { 00151 out_num_labels=num_labels; 00152 out_num_classes=m_num_classes; 00153 00154 if (!num_labels || !m_num_classes || !m_confidences) 00155 SG_ERROR("No labels / confidences set\n"); 00156 00157 float64_t* out_conf=new float64_t[num_labels*m_num_classes]; 00158 memcpy(out_conf, m_confidences, num_labels*m_num_classes*sizeof(float64_t)); 00159 return out_conf; 00160 } 00161 00162 void CLabels::get_confidences(float64_t** dst, int32_t* out_num_labels, int32_t* out_num_classes) 00163 { 00164 ASSERT(dst && out_num_labels && out_num_classes); 00165 00166 if (num_labels<=0 || m_num_classes<=0 || !m_confidences) 00167 SG_ERROR("No labels / confidences set\n"); 00168 00169 *dst=NULL; 00170 *out_num_labels=num_labels; 00171 *out_num_classes=m_num_classes; 00172 00173 float64_t* out_conf= (float64_t*) malloc((size_t) sizeof(float64_t)*num_labels*m_num_classes); 00174 memcpy(out_conf, m_confidences, num_labels*m_num_classes*sizeof(float64_t)); 00175 *dst=out_conf; 00176 } 00177 00178 float64_t* CLabels::get_sample_confidences(const int32_t& in_sample_index, 00179 int32_t& out_num_classes) 00180 { 00181 out_num_classes=m_num_classes; 00182 00183 if (!(in_sample_index>=0 && in_sample_index<num_labels && 00184 m_num_classes && m_confidences)) 00185 { 00186 SG_ERROR("No labels / confidences set\n"); 00187 } 00188 00189 float64_t* out_conf=new float64_t[m_num_classes]; 00190 for (int32_t n_class=0; n_class<m_num_classes; n_class++) 00191 { 00192 out_conf[n_class]=m_confidences[n_class+in_sample_index*m_num_classes]; 00193 } 00194 return out_conf; 00195 } 00196 00197 void CLabels::find_labels() 00198 { 00199 ASSERT(m_confidences); 00200 ASSERT(labels); 00201 00202 float64_t max_conf; 00203 int32_t index; 00204 for (int32_t n_samp=0; n_samp<num_labels; n_samp++) 00205 { 00206 max_conf=m_confidences[n_samp]; 00207 labels[n_samp]=0; 00208 for (int32_t n_class=1; n_class<m_num_classes; n_class++) 00209 { 00210 index=n_samp+n_class*m_num_classes; 00211 if (m_confidences[index]>max_conf) 00212 { 00213 max_conf=m_confidences[index]; 00214 labels[n_samp]=n_class; 00215 } 00216 } 00217 } 00218 } 00219 00220 bool CLabels::is_two_class_labeling() 00221 { 00222 ASSERT(labels); 00223 bool found_plus_one=false; 00224 bool found_minus_one=false; 00225 00226 for (int32_t i=0; i<num_labels; i++) 00227 { 00228 if (labels[i]==+1.0) 00229 found_plus_one=true; 00230 else if (labels[i]==-1.0) 00231 found_minus_one=true; 00232 else 00233 SG_ERROR("Not a two class labeling label[%d]=%f (only +1/-1 allowed)\n", i, labels[i]); 00234 } 00235 00236 if (!found_plus_one) 00237 SG_ERROR("Not a two class labeling - no positively labeled examples found\n"); 00238 if (!found_minus_one) 00239 SG_ERROR("Not a two class labeling - no negatively labeled examples found\n"); 00240 00241 return true; 00242 } 00243 00244 int32_t CLabels::get_num_classes() 00245 { 00246 int32_t n=-1; 00247 int32_t* lab=get_int_labels(n); 00248 00249 int32_t num_classes=0; 00250 for (int32_t i=0; i<n; i++) 00251 num_classes=CMath::max(num_classes,lab[i]); 00252 00253 delete[] lab; 00254 00255 return num_classes+1; 00256 } 00257 00258 float64_t* CLabels::get_labels(int32_t &len) 00259 { 00260 len=num_labels; 00261 00262 if (num_labels>0) 00263 { 00264 float64_t* _labels=new float64_t[num_labels] ; 00265 for (int32_t i=0; i<len; i++) 00266 _labels[i]=get_label(i) ; 00267 return _labels ; 00268 } 00269 else 00270 return NULL; 00271 } 00272 00273 void CLabels::get_labels(float64_t** p_labels, int32_t* len) 00274 { 00275 ASSERT(p_labels && len); 00276 *p_labels=NULL; 00277 *len=num_labels; 00278 00279 if (num_labels>0) 00280 { 00281 *p_labels=(float64_t*) malloc(sizeof(float64_t)*num_labels); 00282 00283 for (int32_t i=0; i<num_labels; i++) 00284 (*p_labels)[i]=get_label(i); 00285 } 00286 } 00287 00288 int32_t* CLabels::get_int_labels(int32_t &len) 00289 { 00290 len=num_labels; 00291 00292 if (num_labels>0) 00293 { 00294 int32_t* _labels=new int32_t[num_labels] ; 00295 for (int32_t i=0; i<len; i++) 00296 _labels[i]= (int32_t) get_label(i) ; 00297 return _labels ; 00298 } 00299 else 00300 return NULL; 00301 } 00302 00303 void CLabels::set_int_labels(int32_t * mylabels, int32_t len) 00304 { 00305 num_labels = len ; 00306 delete[] labels ; 00307 00308 labels = new float64_t[num_labels] ; 00309 for (int32_t i=0; i<num_labels; i++) 00310 set_int_label(i, mylabels[i]) ; 00311 } 00312 00313 void CLabels::load(CFile* loader) 00314 { 00315 SG_SET_LOCALE_C; 00316 delete[] labels; 00317 delete[] m_confidences; 00318 m_confidences = NULL; 00319 m_confidence_classes = 0; 00320 m_confidence_labels = 0; 00321 num_labels=0; 00322 ASSERT(loader); 00323 loader->get_real_vector(labels, num_labels); 00324 m_num_classes=get_num_classes(); 00325 SG_RESET_LOCALE; 00326 } 00327 00328 void CLabels::save(CFile* writer) 00329 { 00330 SG_SET_LOCALE_C; 00331 ASSERT(writer); 00332 ASSERT(labels && labels>0); 00333 writer->set_real_vector(labels, num_labels); 00334 SG_RESET_LOCALE; 00335 }