00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "classifier/svm/SVMSGD.h"
00024 #include "lib/Signal.h"
00025
00026 using namespace shogun;
00027
00028
00029 #define HINGELOSS 1
00030 #define SMOOTHHINGELOSS 2
00031 #define SQUAREDHINGELOSS 3
00032 #define LOGLOSS 10
00033 #define LOGLOSSMARGIN 11
00034
00035
00036 #define LOSS HINGELOSS
00037
00038
00039 #define REGULARIZEBIAS 0
00040
00041 inline
00042 float64_t loss(float64_t z)
00043 {
00044 #if LOSS == LOGLOSS
00045 if (z >= 0)
00046 return log(1+exp(-z));
00047 else
00048 return -z + log(1+exp(z));
00049 #elif LOSS == LOGLOSSMARGIN
00050 if (z >= 1)
00051 return log(1+exp(1-z));
00052 else
00053 return 1-z + log(1+exp(z-1));
00054 #elif LOSS == SMOOTHHINGELOSS
00055 if (z < 0)
00056 return 0.5 - z;
00057 if (z < 1)
00058 return 0.5 * (1-z) * (1-z);
00059 return 0;
00060 #elif LOSS == SQUAREDHINGELOSS
00061 if (z < 1)
00062 return 0.5 * (1 - z) * (1 - z);
00063 return 0;
00064 #elif LOSS == HINGELOSS
00065 if (z < 1)
00066 return 1 - z;
00067 return 0;
00068 #else
00069 # error "Undefined loss"
00070 #endif
00071 }
00072
00073 inline
00074 float64_t dloss(float64_t z)
00075 {
00076 #if LOSS == LOGLOSS
00077 if (z < 0)
00078 return 1 / (exp(z) + 1);
00079 float64_t ez = exp(-z);
00080 return ez / (ez + 1);
00081 #elif LOSS == LOGLOSSMARGIN
00082 if (z < 1)
00083 return 1 / (exp(z-1) + 1);
00084 float64_t ez = exp(1-z);
00085 return ez / (ez + 1);
00086 #elif LOSS == SMOOTHHINGELOSS
00087 if (z < 0)
00088 return 1;
00089 if (z < 1)
00090 return 1-z;
00091 return 0;
00092 #elif LOSS == SQUAREDHINGELOSS
00093 if (z < 1)
00094 return (1 - z);
00095 return 0;
00096 #else
00097 if (z < 1)
00098 return 1;
00099 return 0;
00100 #endif
00101 }
00102
00103
00104
00105 CSVMSGD::CSVMSGD(float64_t C)
00106 : CLinearClassifier(), t(1), C1(C), C2(C),
00107 wscale(1), bscale(1), epochs(5), skip(1000), count(1000), use_bias(true),
00108 use_regularized_bias(false)
00109 {
00110 }
00111
00112 CSVMSGD::CSVMSGD(float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00113 : CLinearClassifier(), t(1), C1(C), C2(C), wscale(1), bscale(1),
00114 epochs(5), skip(1000), count(1000), use_bias(true),
00115 use_regularized_bias(false)
00116 {
00117 w=NULL;
00118 set_features(traindat);
00119 set_labels(trainlab);
00120 }
00121
00122 CSVMSGD::~CSVMSGD()
00123 {
00124 delete[] w;
00125 w=NULL;
00126 }
00127
00128 bool CSVMSGD::train(CFeatures* data)
00129 {
00130
00131 ASSERT(labels);
00132
00133 if (data)
00134 {
00135 if (!data->has_property(FP_DOT))
00136 SG_ERROR("Specified features are not of type CDotFeatures\n");
00137 set_features((CDotFeatures*) data);
00138 }
00139
00140 ASSERT(features);
00141 ASSERT(labels->is_two_class_labeling());
00142
00143 int32_t num_train_labels=labels->get_num_labels();
00144 w_dim=features->get_dim_feature_space();
00145 int32_t num_vec=features->get_num_vectors();
00146
00147 ASSERT(num_vec==num_train_labels);
00148 ASSERT(num_vec>0);
00149
00150 delete[] w;
00151 w=new float64_t[w_dim];
00152 memset(w, 0, w_dim*sizeof(float64_t));
00153 bias=0;
00154
00155 float64_t lambda= 1.0/(C1*num_vec);
00156
00157
00158
00159
00160 float64_t maxw = 1.0 / sqrt(lambda);
00161 float64_t typw = sqrt(maxw);
00162 float64_t eta0 = typw / CMath::max(1.0,dloss(-typw));
00163 t = 1 / (eta0 * lambda);
00164
00165 SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0);
00166
00167
00168
00169 calibrate();
00170
00171 SG_INFO("Training on %d vectors\n", num_vec);
00172 CSignal::clear_cancel();
00173
00174 for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
00175 {
00176 count = skip;
00177 for (int32_t i=0; i<num_vec; i++)
00178 {
00179 float64_t eta = 1.0 / (lambda * t);
00180 float64_t y = labels->get_label(i);
00181 float64_t z = y * (features->dense_dot(i, w, w_dim) + bias);
00182
00183 #if LOSS < LOGLOSS
00184 if (z < 1)
00185 #endif
00186 {
00187 float64_t etd = eta * dloss(z);
00188 features->add_to_dense_vec(etd * y / wscale, i, w, w_dim);
00189
00190 if (use_bias)
00191 {
00192 if (use_regularized_bias)
00193 bias *= 1 - eta * lambda * bscale;
00194 bias += etd * y * bscale;
00195 }
00196 }
00197
00198 if (--count <= 0)
00199 {
00200 float64_t r = 1 - eta * lambda * skip;
00201 if (r < 0.8)
00202 r = pow(1 - eta * lambda, skip);
00203 CMath::scale_vector(r, w, w_dim);
00204 count = skip;
00205 }
00206 t++;
00207 }
00208 }
00209
00210 float64_t wnorm = CMath::dot(w,w, w_dim);
00211 SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias);
00212
00213 return true;
00214 }
00215
00216 void CSVMSGD::calibrate()
00217 {
00218 ASSERT(features);
00219 int32_t num_vec=features->get_num_vectors();
00220 int32_t c_dim=features->get_dim_feature_space();
00221
00222 ASSERT(num_vec>0);
00223 ASSERT(c_dim>0);
00224
00225 float64_t* c=new float64_t[c_dim];
00226 memset(c, 0, c_dim*sizeof(float64_t));
00227
00228 SG_INFO("Estimating sparsity and bscale num_vec=%d num_feat=%d.\n", num_vec, c_dim);
00229
00230
00231 int32_t n = 0;
00232 float64_t m = 0;
00233 float64_t r = 0;
00234
00235 for (int32_t j=0; j<num_vec && m<=1000; j++, n++)
00236 {
00237 r += features->get_nnz_features_for_vector(j);
00238 features->add_to_dense_vec(1, j, c, c_dim, true);
00239
00240
00241
00242 m=CMath::max(c, c_dim);
00243 }
00244
00245
00246 bscale = m/n;
00247
00248
00249 skip = (int32_t) ((16 * n * c_dim) / r);
00250 SG_INFO("using %d examples. skip=%d bscale=%.6f\n", n, skip, bscale);
00251
00252 delete[] c;
00253 }
00254