SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Gunnar Raetsch 00008 * Written (W) 2009 Soeren Sonnnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/common.h" 00013 #include "lib/Mathematics.h" 00014 #include "kernel/AUCKernel.h" 00015 #include "features/SimpleFeatures.h" 00016 #include "lib/io.h" 00017 00018 using namespace shogun; 00019 00020 void 00021 CAUCKernel::init(void) 00022 { 00023 m_parameters->add((CSGObject**) &subkernel, "subkernel", 00024 "The subkernel."); 00025 } 00026 00027 CAUCKernel::CAUCKernel(void) 00028 : CDotKernel(0), subkernel(NULL) 00029 { 00030 init(); 00031 } 00032 00033 CAUCKernel::CAUCKernel(int32_t size, CKernel* s) 00034 : CDotKernel(size), subkernel(s) 00035 { 00036 init(); 00037 SG_REF(subkernel); 00038 } 00039 00040 CAUCKernel::~CAUCKernel() 00041 { 00042 SG_UNREF(subkernel); 00043 cleanup(); 00044 } 00045 00046 CLabels* CAUCKernel::setup_auc_maximization(CLabels* labels) 00047 { 00048 SG_INFO( "setting up AUC maximization\n") ; 00049 ASSERT(labels); 00050 ASSERT(labels->is_two_class_labeling()); 00051 00052 // get the original labels 00053 int32_t num=0; 00054 ASSERT(labels); 00055 int32_t* int_labels=labels->get_int_labels(num); 00056 ASSERT(subkernel->get_num_vec_rhs()==num); 00057 00058 // count positive and negative 00059 int32_t num_pos=0; 00060 int32_t num_neg=0; 00061 00062 for (int32_t i=0; i<num; i++) 00063 { 00064 if (int_labels[i]==1) 00065 num_pos++; 00066 else 00067 num_neg++; 00068 } 00069 00070 // create AUC features and labels (alternate labels) 00071 int32_t num_auc = num_pos*num_neg; 00072 SG_INFO("num_pos: %i num_neg: %i num_auc: %i\n", num_pos, num_neg, num_auc); 00073 00074 uint16_t* features_auc = new uint16_t[num_auc*2]; 00075 int32_t* labels_auc = new int32_t[num_auc]; 00076 int32_t n=0 ; 00077 00078 for (int32_t i=0; i<num; i++) 00079 { 00080 if (int_labels[i]!=1) 00081 continue; 00082 00083 for (int32_t j=0; j<num; j++) 00084 { 00085 if (int_labels[j]!=-1) 00086 continue; 00087 00088 // create about as many positively as negatively labeled examples 00089 if (n%2==0) 00090 { 00091 features_auc[n*2]=i; 00092 features_auc[n*2+1]=j; 00093 labels_auc[n]=1; 00094 } 00095 else 00096 { 00097 features_auc[n*2]=j; 00098 features_auc[n*2+1]=i; 00099 labels_auc[n]=-1; 00100 } 00101 00102 n++; 00103 ASSERT(n<=num_auc); 00104 } 00105 } 00106 00107 // create label object and attach it to svm 00108 CLabels* lab_auc = new CLabels(num_auc); 00109 lab_auc->set_int_labels(labels_auc, num_auc); 00110 SG_REF(lab_auc); 00111 00112 // create feature object 00113 CSimpleFeatures<uint16_t>* f = new CSimpleFeatures<uint16_t>(0); 00114 f->set_feature_matrix(features_auc, 2, num_auc); 00115 00116 // create AUC kernel and attach the features 00117 init(f,f); 00118 00119 delete[] int_labels; 00120 delete[] labels_auc; 00121 00122 return lab_auc; 00123 } 00124 00125 00126 bool CAUCKernel::init(CFeatures* l, CFeatures* r) 00127 { 00128 CDotKernel::init(l, r); 00129 init_normalizer(); 00130 return true; 00131 } 00132 00133 float64_t CAUCKernel::compute(int32_t idx_a, int32_t idx_b) 00134 { 00135 int32_t alen, blen; 00136 bool afree, bfree; 00137 00138 uint16_t* avec=((CSimpleFeatures<uint16_t>*) lhs)->get_feature_vector(idx_a, alen, afree); 00139 uint16_t* bvec=((CSimpleFeatures<uint16_t>*) rhs)->get_feature_vector(idx_b, blen, bfree); 00140 00141 ASSERT(alen==2); 00142 ASSERT(blen==2); 00143 00144 ASSERT(subkernel && subkernel->has_features()); 00145 00146 float64_t k11,k12,k21,k22; 00147 int32_t idx_a1=avec[0], idx_a2=avec[1], idx_b1=bvec[0], idx_b2=bvec[1]; 00148 00149 k11 = subkernel->kernel(idx_a1,idx_b1); 00150 k12 = subkernel->kernel(idx_a1,idx_b2); 00151 k21 = subkernel->kernel(idx_a2,idx_b1); 00152 k22 = subkernel->kernel(idx_a2,idx_b2); 00153 00154 float64_t result = k11+k22-k21-k12; 00155 00156 ((CSimpleFeatures<uint16_t>*) lhs)->free_feature_vector(avec, idx_a, afree); 00157 ((CSimpleFeatures<uint16_t>*) rhs)->free_feature_vector(bvec, idx_b, bfree); 00158 00159 return result; 00160 }