SHOGUN v0.9.0
|
00001 // -*- C++ -*- 00002 // Main functions of the LaRank algorithm for soving Multiclass SVM 00003 // Copyright (C) 2008- Antoine Bordes 00004 // Shogun specific adjustments (w) 2009 Soeren Sonnenburg 00005 00006 // This library is free software; you can redistribute it and/or 00007 // modify it under the terms of the GNU Lesser General Public 00008 // License as published by the Free Software Foundation; either 00009 // version 2.1 of the License, or (at your option) any later version. 00010 // 00011 // This program is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 // 00016 // You should have received a copy of the GNU General Public License 00017 // aint64_t with this program; if not, write to the Free Software 00018 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA 00019 // 00020 /*********************************************************************** 00021 * 00022 * LUSH Lisp Universal Shell 00023 * Copyright (C) 2002 Leon Bottou, Yann Le Cun, AT&T Corp, NECI. 00024 * Includes parts of TL3: 00025 * Copyright (C) 1987-1999 Leon Bottou and Neuristique. 00026 * Includes selected parts of SN3.2: 00027 * Copyright (C) 1991-2001 AT&T Corp. 00028 * 00029 * This program is free software; you can redistribute it and/or modify 00030 * it under the terms of the GNU General Public License as published by 00031 * the Free Software Foundation; either version 2 of the License, or 00032 * (at your option) any later version. 00033 * 00034 * This program is distributed in the hope that it will be useful, 00035 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00036 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00037 * GNU General Public License for more details. 00038 * 00039 * You should have received a copy of the GNU General Public License 00040 * aint64_t with this program; if not, write to the Free Software 00041 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA 00042 * 00043 ***********************************************************************/ 00044 00045 /*********************************************************************** 00046 * $Id: kcache.c,v 1.9 2007/01/25 22:42:09 leonb Exp $ 00047 **********************************************************************/ 00048 00049 #include <vector> 00050 #include <algorithm> 00051 00052 #include "lib/io.h" 00053 #include "lib/Signal.h" 00054 #include "lib/Time.h" 00055 #include "lib/Mathematics.h" 00056 #include "classifier/svm/LaRank.h" 00057 #include "kernel/Kernel.h" 00058 00059 using namespace shogun; 00060 00061 namespace shogun 00062 { 00063 static void * xmalloc (int32_t n) 00064 { 00065 void *p = malloc (n); 00066 if (!p) 00067 SG_SERROR ("Function malloc() has returned zero\n"); 00068 return p; 00069 } 00070 00071 static void * xrealloc (void *ptr, int32_t n) 00072 { 00073 if (!ptr) 00074 ptr = malloc (n); 00075 else 00076 ptr = realloc (ptr, n); 00077 if (!ptr) 00078 SG_SERROR ("Function realloc has returned zero\n"); 00079 return ptr; 00080 } 00081 00082 static larank_kcache_t* larank_kcache_create (CKernel* kernelfunc) 00083 { 00084 larank_kcache_t *self; 00085 self = (larank_kcache_t *) xmalloc (sizeof (larank_kcache_t)); 00086 memset (self, 0, sizeof (larank_kcache_t)); 00087 self->l = 0; 00088 self->maxrowlen = 0; 00089 self->func = kernelfunc; 00090 self->prevbuddy = self; 00091 self->nextbuddy = self; 00092 self->cursize = sizeof (larank_kcache_t); 00093 self->maxsize = 256 * 1024 * 1024; 00094 self->qprev = (int32_t *) xmalloc (sizeof (int32_t)); 00095 self->qnext = (int32_t *) xmalloc (sizeof (int32_t)); 00096 self->rnext = self->qnext + 1; 00097 self->rprev = self->qprev + 1; 00098 self->rprev[-1] = -1; 00099 self->rnext[-1] = -1; 00100 return self; 00101 } 00102 00103 static void xtruncate (larank_kcache_t * self, int32_t k, int32_t nlen) 00104 { 00105 int32_t olen = self->rsize[k]; 00106 if (nlen < olen) 00107 { 00108 float32_t *ndata; 00109 float32_t *odata = self->rdata[k]; 00110 if (nlen > 0) 00111 { 00112 ndata = (float32_t *) xmalloc (nlen * sizeof (float32_t)); 00113 memcpy (ndata, odata, nlen * sizeof (float32_t)); 00114 } 00115 else 00116 { 00117 ndata = 0; 00118 self->rnext[self->rprev[k]] = self->rnext[k]; 00119 self->rprev[self->rnext[k]] = self->rprev[k]; 00120 self->rnext[k] = self->rprev[k] = k; 00121 } 00122 free (odata); 00123 self->rdata[k] = ndata; 00124 self->rsize[k] = nlen; 00125 self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t); 00126 } 00127 } 00128 00129 static void xpurge (larank_kcache_t * self) 00130 { 00131 if (self->cursize > self->maxsize) 00132 { 00133 int32_t k = self->rprev[-1]; 00134 while (self->cursize > self->maxsize && k != self->rnext[-1]) 00135 { 00136 int32_t pk = self->rprev[k]; 00137 xtruncate (self, k, 0); 00138 k = pk; 00139 } 00140 } 00141 } 00142 00143 static void larank_kcache_set_maximum_size (larank_kcache_t * self, int64_t entries) 00144 { 00145 ASSERT (self); 00146 ASSERT (entries > 0); 00147 self->maxsize = entries; 00148 xpurge (self); 00149 } 00150 00151 static void larank_kcache_destroy (larank_kcache_t * self) 00152 { 00153 if (self) 00154 { 00155 int32_t i; 00156 larank_kcache_t *nb = self->nextbuddy; 00157 larank_kcache_t *pb = self->prevbuddy; 00158 pb->nextbuddy = nb; 00159 nb->prevbuddy = pb; 00160 /* delete */ 00161 if (self->i2r) 00162 free (self->i2r); 00163 if (self->r2i) 00164 free (self->r2i); 00165 if (self->rdata) 00166 for (i = 0; i < self->l; i++) 00167 if (self->rdata[i]) 00168 free (self->rdata[i]); 00169 if (self->rdata) 00170 free (self->rdata); 00171 if (self->rsize) 00172 free (self->rsize); 00173 if (self->rdiag) 00174 free (self->rdiag); 00175 if (self->qnext) 00176 free (self->qnext); 00177 if (self->qprev) 00178 free (self->qprev); 00179 memset (self, 0, sizeof (larank_kcache_t)); 00180 free (self); 00181 } 00182 } 00183 00184 static void xminsize (larank_kcache_t * self, int32_t n) 00185 { 00186 int32_t ol = self->l; 00187 if (n > ol) 00188 { 00189 int32_t i; 00190 int32_t nl = CMath::max (256, ol); 00191 while (nl < n) 00192 nl = nl + nl; 00193 self->i2r = (int32_t *) xrealloc (self->i2r, nl * sizeof (int32_t)); 00194 self->r2i = (int32_t *) xrealloc (self->r2i, nl * sizeof (int32_t)); 00195 self->rsize = (int32_t *) xrealloc (self->rsize, nl * sizeof (int32_t)); 00196 self->qnext = (int32_t *) xrealloc (self->qnext, (1 + nl) * sizeof (int32_t)); 00197 self->qprev = (int32_t *) xrealloc (self->qprev, (1 + nl) * sizeof (int32_t)); 00198 self->rdiag = (float32_t *) xrealloc (self->rdiag, nl * sizeof (float32_t)); 00199 self->rdata = (float32_t **) xrealloc (self->rdata, nl * sizeof (float32_t *)); 00200 self->rnext = self->qnext + 1; 00201 self->rprev = self->qprev + 1; 00202 for (i = ol; i < nl; i++) 00203 { 00204 self->i2r[i] = i; 00205 self->r2i[i] = i; 00206 self->rsize[i] = -1; 00207 self->rnext[i] = i; 00208 self->rprev[i] = i; 00209 self->rdata[i] = 0; 00210 } 00211 self->l = nl; 00212 } 00213 } 00214 00215 static int32_t* larank_kcache_r2i (larank_kcache_t * self, int32_t n) 00216 { 00217 xminsize (self, n); 00218 return self->r2i; 00219 } 00220 00221 static void xextend (larank_kcache_t * self, int32_t k, int32_t nlen) 00222 { 00223 int32_t olen = self->rsize[k]; 00224 if (nlen > olen) 00225 { 00226 float32_t *ndata = (float32_t *) xmalloc (nlen * sizeof (float32_t)); 00227 if (olen > 0) 00228 { 00229 float32_t *odata = self->rdata[k]; 00230 memcpy (ndata, odata, olen * sizeof (float32_t)); 00231 free (odata); 00232 } 00233 self->rdata[k] = ndata; 00234 self->rsize[k] = nlen; 00235 self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t); 00236 self->maxrowlen = CMath::max (self->maxrowlen, nlen); 00237 } 00238 } 00239 00240 static void xswap (larank_kcache_t * self, int32_t i1, int32_t i2, int32_t r1, int32_t r2) 00241 { 00242 /* swap row data */ 00243 if (r1 < self->maxrowlen || r2 < self->maxrowlen) 00244 { 00245 int32_t mrl = 0; 00246 int32_t k = self->rnext[-1]; 00247 while (k >= 0) 00248 { 00249 int32_t nk = self->rnext[k]; 00250 int32_t n = self->rsize[k]; 00251 int32_t rr = self->i2r[k]; 00252 float32_t *d = self->rdata[k]; 00253 if (r1 < n) 00254 { 00255 if (r2 < n) 00256 { 00257 float32_t t1 = d[r1]; 00258 float32_t t2 = d[r2]; 00259 d[r1] = t2; 00260 d[r2] = t1; 00261 } 00262 else if (rr == r2) 00263 { 00264 d[r1] = self->rdiag[k]; 00265 } 00266 else 00267 { 00268 int32_t arsize = self->rsize[i2]; 00269 if (rr < arsize && rr != r1) 00270 d[r1] = self->rdata[i2][rr]; 00271 else 00272 xtruncate (self, k, r1); 00273 } 00274 } 00275 else if (r2 < n) 00276 { 00277 if (rr == r1) 00278 { 00279 d[r2] = self->rdiag[k]; 00280 } 00281 else 00282 { 00283 int32_t arsize = self->rsize[i1]; 00284 if (rr < arsize && rr != r2) 00285 d[r2] = self->rdata[i1][rr]; 00286 else 00287 xtruncate (self, k, r2); 00288 } 00289 } 00290 mrl = CMath::max (mrl, self->rsize[k]); 00291 k = nk; 00292 } 00293 self->maxrowlen = mrl; 00294 } 00295 /* swap r2i and i2r */ 00296 self->r2i[r1] = i2; 00297 self->r2i[r2] = i1; 00298 self->i2r[i1] = r2; 00299 self->i2r[i2] = r1; 00300 } 00301 00302 static void larank_kcache_swap_rr (larank_kcache_t * self, int32_t r1, int32_t r2) 00303 { 00304 xminsize (self, 1 + CMath::max(r1, r2)); 00305 xswap (self, self->r2i[r1], self->r2i[r2], r1, r2); 00306 } 00307 00308 static void larank_kcache_swap_ri (larank_kcache_t * self, int32_t r1, int32_t i2) 00309 { 00310 xminsize (self, 1 + CMath::max (r1, i2)); 00311 xswap (self, self->r2i[r1], i2, r1, self->i2r[i2]); 00312 } 00313 00314 static float64_t xquery (larank_kcache_t * self, int32_t i, int32_t j) 00315 { 00316 /* search buddies */ 00317 larank_kcache_t *cache = self->nextbuddy; 00318 do 00319 { 00320 int32_t l = cache->l; 00321 if (i < l && j < l) 00322 { 00323 int32_t s = cache->rsize[i]; 00324 int32_t p = cache->i2r[j]; 00325 if (p < s) 00326 return cache->rdata[i][p]; 00327 if (i == j && s >= 0) 00328 return cache->rdiag[i]; 00329 p = cache->i2r[i]; 00330 s = cache->rsize[j]; 00331 if (p < s) 00332 return cache->rdata[j][p]; 00333 } 00334 cache = cache->nextbuddy; 00335 } 00336 while (cache != self); 00337 /* compute */ 00338 return self->func->kernel(i, j); 00339 } 00340 00341 00342 static float64_t larank_kcache_query (larank_kcache_t * self, int32_t i, int32_t j) 00343 { 00344 ASSERT (self); 00345 ASSERT (i >= 0); 00346 ASSERT (j >= 0); 00347 return xquery (self, i, j); 00348 } 00349 00350 00351 static void larank_kcache_set_buddy (larank_kcache_t * self, larank_kcache_t * buddy) 00352 { 00353 larank_kcache_t *p = self; 00354 larank_kcache_t *selflast = self->prevbuddy; 00355 larank_kcache_t *buddylast = buddy->prevbuddy; 00356 /* check functions are identical */ 00357 ASSERT (self->func == buddy->func); 00358 /* make sure we are not already buddies */ 00359 do 00360 { 00361 if (p == buddy) 00362 return; 00363 p = p->nextbuddy; 00364 } 00365 while (p != self); 00366 /* link */ 00367 selflast->nextbuddy = buddy; 00368 buddy->prevbuddy = selflast; 00369 buddylast->nextbuddy = self; 00370 self->prevbuddy = buddylast; 00371 } 00372 00373 00374 static float32_t* larank_kcache_query_row (larank_kcache_t * self, int32_t i, int32_t len) 00375 { 00376 ASSERT (i >= 0); 00377 if (i < self->l && len <= self->rsize[i]) 00378 { 00379 self->rnext[self->rprev[i]] = self->rnext[i]; 00380 self->rprev[self->rnext[i]] = self->rprev[i]; 00381 } 00382 else 00383 { 00384 int32_t olen, p; 00385 float32_t *d; 00386 if (i >= self->l || len >= self->l) 00387 xminsize (self, CMath::max (1 + i, len)); 00388 olen = self->rsize[i]; 00389 if (olen < len) 00390 { 00391 if (olen < 0) 00392 { 00393 self->rdiag[i] = self->func->kernel(i, i); 00394 olen = self->rsize[i] = 0; 00395 } 00396 xextend (self, i, len); 00397 d = self->rdata[i]; 00398 self->rsize[i] = olen; 00399 for (p = olen; p < len; p++) 00400 d[p] = larank_kcache_query (self, self->r2i[p], i); 00401 self->rsize[i] = len; 00402 } 00403 self->rnext[self->rprev[i]] = self->rnext[i]; 00404 self->rprev[self->rnext[i]] = self->rprev[i]; 00405 xpurge (self); 00406 } 00407 self->rprev[i] = -1; 00408 self->rnext[i] = self->rnext[-1]; 00409 self->rnext[self->rprev[i]] = i; 00410 self->rprev[self->rnext[i]] = i; 00411 return self->rdata[i]; 00412 } 00413 00414 } 00415 00416 00417 // Initializing an output class (basically creating a kernel cache for it) 00418 void LaRankOutput::initialize (CKernel* kfunc, int64_t cache) 00419 { 00420 kernel = larank_kcache_create (kfunc); 00421 larank_kcache_set_maximum_size (kernel, cache * 1024 * 1024); 00422 beta = new float32_t[1]; 00423 g = new float32_t[1]; 00424 *beta=0; 00425 *g=0; 00426 l = 0; 00427 } 00428 00429 // Destroying an output class (basically destroying the kernel cache) 00430 void LaRankOutput::destroy () 00431 { 00432 larank_kcache_destroy (kernel); 00433 kernel=NULL; 00434 delete[] beta; 00435 delete[] g; 00436 beta=NULL; 00437 g=NULL; 00438 } 00439 00440 // !Important! Computing the score of a given input vector for the actual output 00441 float64_t LaRankOutput::computeScore (int32_t x_id) 00442 { 00443 if (l == 0) 00444 return 0; 00445 else 00446 { 00447 float32_t *row = larank_kcache_query_row (kernel, x_id, l); 00448 return CMath::dot (beta, row, l); 00449 } 00450 } 00451 00452 // !Important! Computing the gradient of a given input vector for the actual output 00453 float64_t LaRankOutput::computeGradient (int32_t xi_id, int32_t yi, int32_t ythis) 00454 { 00455 return (yi == ythis ? 1 : 0) - computeScore (xi_id); 00456 } 00457 00458 // Updating the solution in the actual output 00459 void LaRankOutput::update (int32_t x_id, float64_t lambda, float64_t gp) 00460 { 00461 int32_t *r2i = larank_kcache_r2i (kernel, l); 00462 int64_t xr = l + 1; 00463 for (int32_t r = 0; r < l; r++) 00464 if (r2i[r] == x_id) 00465 { 00466 xr = r; 00467 break; 00468 } 00469 00470 // updates the cache order and the beta coefficient 00471 if (xr < l) 00472 { 00473 beta[xr]+=lambda; 00474 } 00475 else 00476 { 00477 larank_kcache_swap_ri (kernel, l, x_id); 00478 CMath::resize(g, l, l+1); 00479 CMath::resize(beta, l, l+1); 00480 g[l]=gp; 00481 beta[l]=lambda; 00482 l++; 00483 } 00484 00485 // update stored gradients 00486 float32_t *row = larank_kcache_query_row (kernel, x_id, l); 00487 for (int32_t r = 0; r < l; r++) 00488 { 00489 float64_t oldg = g[r]; 00490 g[r]=oldg - lambda * row[r]; 00491 } 00492 } 00493 00494 // Linking the cahe of this output to the cache of an other "buddy" output 00495 // so that if a requested value is not found in this cache, you can ask your buddy if it has it. 00496 void LaRankOutput::set_kernel_buddy (larank_kcache_t * bud) 00497 { 00498 larank_kcache_set_buddy (bud, kernel); 00499 } 00500 00501 // Removing useless support vectors (for which beta=0) 00502 int32_t LaRankOutput::cleanup () 00503 { 00504 int32_t count = 0; 00505 std::vector < int32_t >idx; 00506 for (int32_t x = 0; x < l; x++) 00507 { 00508 if ((beta[x] < FLT_EPSILON) && (beta[x] > -FLT_EPSILON)) 00509 { 00510 idx.push_back (x); 00511 count++; 00512 } 00513 } 00514 int32_t new_l = l - count; 00515 for (int32_t xx = 0; xx < count; xx++) 00516 { 00517 int32_t i = idx[xx] - xx; 00518 for (int32_t r = i; r < (l - 1); r++) 00519 { 00520 larank_kcache_swap_rr (kernel, r, int64_t(r) + 1); 00521 beta[r]=beta[r + 1]; 00522 g[r]=g[r + 1]; 00523 } 00524 } 00525 CMath::resize(beta, l, new_l+1); 00526 CMath::resize(g, l, new_l+1); 00527 beta[new_l]=0; 00528 g[new_l]=0; 00529 l = new_l; 00530 return count; 00531 } 00532 00533 // --- Below are information or "get" functions --- // 00534 // 00535 float64_t LaRankOutput::getW2 () 00536 { 00537 float64_t sum = 0; 00538 int32_t *r2i = larank_kcache_r2i (kernel, l + 1); 00539 for (int32_t r = 0; r < l; r++) 00540 { 00541 float32_t *row_r = larank_kcache_query_row (kernel, r2i[r], l); 00542 sum += beta[r] * CMath::dot (beta, row_r, l); 00543 } 00544 return sum; 00545 } 00546 00547 float64_t LaRankOutput::getKii (int32_t x_id) 00548 { 00549 return larank_kcache_query (kernel, x_id, x_id); 00550 } 00551 00552 // 00553 float64_t LaRankOutput::getBeta (int32_t x_id) 00554 { 00555 int32_t *r2i = larank_kcache_r2i (kernel, l); 00556 int32_t xr = -1; 00557 for (int32_t r = 0; r < l; r++) 00558 if (r2i[r] == x_id) 00559 { 00560 xr = r; 00561 break; 00562 } 00563 return (xr < 0 ? 0 : beta[xr]); 00564 } 00565 00566 // 00567 float64_t LaRankOutput::getGradient (int32_t x_id) 00568 { 00569 int32_t *r2i = larank_kcache_r2i (kernel, l); 00570 int32_t xr = -1; 00571 for (int32_t r = 0; r < l; r++) 00572 if (r2i[r] == x_id) 00573 { 00574 xr = r; 00575 break; 00576 } 00577 return (xr < 0 ? 0 : g[xr]); 00578 } 00579 bool LaRankOutput::isSupportVector (int32_t x_id) const 00580 { 00581 int32_t *r2i = larank_kcache_r2i (kernel, l); 00582 int32_t xr = -1; 00583 for (int32_t r = 0; r < l; r++) 00584 if (r2i[r] == x_id) 00585 { 00586 xr = r; 00587 break; 00588 } 00589 return (xr >= 0); 00590 } 00591 00592 // 00593 int32_t LaRankOutput::getSV (float32_t* &sv) const 00594 { 00595 sv=new float32_t[l]; 00596 int32_t *r2i = larank_kcache_r2i (kernel, l); 00597 for (int32_t r = 0; r < l; r++) 00598 sv[r]=r2i[r]; 00599 return l; 00600 } 00601 00602 CLaRank::CLaRank (): CMultiClassSVM(ONE_VS_REST), 00603 nb_seen_examples (0), nb_removed (0), 00604 n_pro (0), n_rep (0), n_opt (0), 00605 w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0), 00606 batch_mode(true), step(0) 00607 { 00608 } 00609 00610 CLaRank::CLaRank (float64_t C, CKernel* k, CLabels* lab): 00611 CMultiClassSVM(ONE_VS_REST, C, k, lab), 00612 nb_seen_examples (0), nb_removed (0), 00613 n_pro (0), n_rep (0), n_opt (0), 00614 w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0), 00615 batch_mode(true), step(0) 00616 { 00617 } 00618 00619 CLaRank::~CLaRank () 00620 { 00621 destroy(); 00622 } 00623 00624 bool CLaRank::train(CFeatures* data) 00625 { 00626 tau = 0.0001; 00627 00628 ASSERT(kernel); 00629 ASSERT(labels && labels->get_num_labels()); 00630 00631 CSignal::clear_cancel(); 00632 00633 if (data) 00634 { 00635 if (data->get_num_vectors() != labels->get_num_labels()) 00636 { 00637 SG_ERROR("Numbert of vectors (%d) does not match number of labels (%d)\n", 00638 data->get_num_vectors(), labels->get_num_labels()); 00639 } 00640 kernel->init(data, data); 00641 } 00642 00643 ASSERT(kernel->get_num_vec_lhs() && kernel->get_num_vec_rhs()); 00644 00645 nb_train=labels->get_num_labels(); 00646 cache = kernel->get_cache_size(); 00647 00648 int32_t n_it = 1; 00649 float64_t gap = DBL_MAX; 00650 00651 SG_INFO("Training on %d examples\n", nb_train); 00652 while (gap > C1 && (!CSignal::cancel_computations())) // stopping criteria 00653 { 00654 float64_t tr_err = 0; 00655 int32_t ind = step; 00656 for (int32_t i = 0; i < nb_train; i++) 00657 { 00658 int32_t y=labels->get_label(i); 00659 if (add (i, y) != y) // call the add function 00660 tr_err++; 00661 00662 if (ind && i / ind) 00663 { 00664 SG_DEBUG("Done: %d %% Train error (online): %f%%\n", 00665 (int32_t) (((float64_t) i) / nb_train * 100), (tr_err / ((float64_t) i + 1)) * 100); 00666 ind += step; 00667 } 00668 } 00669 00670 SG_DEBUG("End of iteration %d\n", n_it++); 00671 SG_DEBUG("Train error (online): %f%%\n", (tr_err / nb_train) * 100); 00672 gap = computeGap (); 00673 SG_ABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(DBL_MAX), -CMath::log10(C1), 6); 00674 00675 if (!batch_mode) // skip stopping criteria if online mode 00676 gap = 0; 00677 } 00678 SG_DONE(); 00679 00680 int32_t num_classes = outputs.size(); 00681 create_multiclass_svm(num_classes); 00682 SG_DEBUG("%d classes\n", num_classes); 00683 00684 // Used for saving a model file 00685 int32_t i=0; 00686 for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it) 00687 { 00688 const LaRankOutput* o=&(it->second); 00689 00690 larank_kcache_t* k=o->getKernel(); 00691 int32_t l=o->get_l(); 00692 float32_t* beta=o->getBetas(); 00693 int32_t *r2i = larank_kcache_r2i (k, l); 00694 00695 ASSERT(l>0); 00696 SG_DEBUG("svm[%d] has %d sv, b=%f\n", i, l, 0.0); 00697 00698 CSVM* svm=new CSVM(l); 00699 00700 for (int32_t j=0; j<l; j++) 00701 { 00702 svm->set_alpha(j, beta[j]); 00703 svm->set_support_vector(j, r2i[j]); 00704 } 00705 00706 svm->set_bias(0); 00707 set_svm(i, svm); 00708 i++; 00709 } 00710 destroy(); 00711 00712 return true; 00713 } 00714 00715 // LEARNING FUNCTION: add new patterns and run optimization steps selected with adaptative schedule 00716 int32_t CLaRank::add (int32_t x_id, int32_t yi) 00717 { 00718 ++nb_seen_examples; 00719 // create a new output object if this one has never been seen before 00720 if (!getOutput (yi)) 00721 { 00722 outputs.insert (std::make_pair (yi, LaRankOutput ())); 00723 LaRankOutput *cur = getOutput (yi); 00724 cur->initialize (kernel, cache); 00725 if (outputs.size () == 1) 00726 y0 = outputs.begin ()->first; 00727 // link the cache of this new output to a buddy 00728 if (outputs.size () > 1) 00729 { 00730 LaRankOutput *out0 = getOutput (y0); 00731 cur->set_kernel_buddy (out0->getKernel ()); 00732 } 00733 } 00734 00735 LaRankPattern tpattern (x_id, yi); 00736 LaRankPattern & pattern = (patterns.isPattern (x_id)) ? patterns.getPattern (x_id) : tpattern; 00737 00738 // ProcessNew with the "fresh" pattern 00739 float64_t time1 = CTime::get_curtime(); 00740 process_return_t pro_ret = process (pattern, processNew); 00741 float64_t dual_increase = pro_ret.dual_increase; 00742 float64_t duration = (CTime::get_curtime() - time1); 00743 float64_t coeff = dual_increase / (0.00001 + duration); 00744 dual += dual_increase; 00745 n_pro++; 00746 w_pro = 0.05 * coeff + (1 - 0.05) * w_pro; 00747 00748 // ProcessOld & Optimize until ready for a new processnew 00749 // (Adaptative schedule here) 00750 for (;;) 00751 { 00752 float64_t w_sum = w_pro + w_rep + w_opt; 00753 float64_t prop_min = w_sum / 20; 00754 if (w_pro < prop_min) 00755 w_pro = prop_min; 00756 if (w_rep < prop_min) 00757 w_rep = prop_min; 00758 if (w_opt < prop_min) 00759 w_opt = prop_min; 00760 w_sum = w_pro + w_rep + w_opt; 00761 float64_t r = CMath::random(0.0, w_sum); 00762 if (r <= w_pro) 00763 { 00764 break; 00765 } 00766 else if ((r > w_pro) && (r <= w_pro + w_rep)) // ProcessOld here 00767 { 00768 float64_t ltime1 = CTime::get_curtime (); 00769 float64_t ldual_increase = reprocess (); 00770 float64_t lduration = (CTime::get_curtime () - ltime1); 00771 float64_t lcoeff = ldual_increase / (0.00001 + lduration); 00772 dual += ldual_increase; 00773 n_rep++; 00774 w_rep = 0.05 * lcoeff + (1 - 0.05) * w_rep; 00775 } 00776 else // Optimize here 00777 { 00778 float64_t ltime1 = CTime::get_curtime (); 00779 float64_t ldual_increase = optimize (); 00780 float64_t lduration = (CTime::get_curtime () - ltime1); 00781 float64_t lcoeff = ldual_increase / (0.00001 + lduration); 00782 dual += ldual_increase; 00783 n_opt++; 00784 w_opt = 0.05 * lcoeff + (1 - 0.05) * w_opt; 00785 } 00786 } 00787 if (nb_seen_examples % 100 == 0) // Cleanup useless Support Vectors/Patterns sometimes 00788 nb_removed += cleanup (); 00789 return pro_ret.ypred; 00790 } 00791 00792 // PREDICTION FUNCTION: main function in la_rank_classify 00793 int32_t CLaRank::predict (int32_t x_id) 00794 { 00795 int32_t res = -1; 00796 float64_t score_max = -DBL_MAX; 00797 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it) 00798 { 00799 float64_t score = it->second.computeScore (x_id); 00800 if (score > score_max) 00801 { 00802 score_max = score; 00803 res = it->first; 00804 } 00805 } 00806 return res; 00807 } 00808 00809 void CLaRank::destroy () 00810 { 00811 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it) 00812 it->second.destroy (); 00813 } 00814 00815 00816 // Compute Duality gap (costly but used in stopping criteria in batch mode) 00817 float64_t CLaRank::computeGap () 00818 { 00819 float64_t sum_sl = 0; 00820 float64_t sum_bi = 0; 00821 for (uint32_t i = 0; i < patterns.maxcount (); ++i) 00822 { 00823 const LaRankPattern & p = patterns[i]; 00824 if (!p.exists ()) 00825 continue; 00826 LaRankOutput *out = getOutput (p.y); 00827 if (!out) 00828 continue; 00829 sum_bi += out->getBeta (p.x_id); 00830 float64_t gi = out->computeGradient (p.x_id, p.y, p.y); 00831 float64_t gmin = DBL_MAX; 00832 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it) 00833 { 00834 if (it->first != p.y && it->second.isSupportVector (p.x_id)) 00835 { 00836 float64_t g = 00837 it->second.computeGradient (p.x_id, p.y, it->first); 00838 if (g < gmin) 00839 gmin = g; 00840 } 00841 } 00842 sum_sl += CMath::max (0.0, gi - gmin); 00843 } 00844 return CMath::max (0.0, computeW2 () + C1 * sum_sl - sum_bi); 00845 } 00846 00847 // Nuber of classes so far 00848 uint32_t CLaRank::getNumOutputs () const 00849 { 00850 return outputs.size (); 00851 } 00852 00853 // Number of Support Vectors 00854 int32_t CLaRank::getNSV () 00855 { 00856 int32_t res = 0; 00857 for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it) 00858 { 00859 float32_t* sv=NULL; 00860 res += it->second.getSV (sv); 00861 delete[] sv; 00862 } 00863 return res; 00864 } 00865 00866 // Norm of the parameters vector 00867 float64_t CLaRank::computeW2 () 00868 { 00869 float64_t res = 0; 00870 for (uint32_t i = 0; i < patterns.maxcount (); ++i) 00871 { 00872 const LaRankPattern & p = patterns[i]; 00873 if (!p.exists ()) 00874 continue; 00875 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it) 00876 if (it->second.getBeta (p.x_id)) 00877 res += it->second.getBeta (p.x_id) * it->second.computeScore (p.x_id); 00878 } 00879 return res; 00880 } 00881 00882 // Compute Dual objective value 00883 float64_t CLaRank::getDual () 00884 { 00885 float64_t res = 0; 00886 for (uint32_t i = 0; i < patterns.maxcount (); ++i) 00887 { 00888 const LaRankPattern & p = patterns[i]; 00889 if (!p.exists ()) 00890 continue; 00891 LaRankOutput *out = getOutput (p.y); 00892 if (!out) 00893 continue; 00894 res += out->getBeta (p.x_id); 00895 } 00896 return res - computeW2 () / 2; 00897 } 00898 00899 LaRankOutput *CLaRank::getOutput (int32_t index) 00900 { 00901 outputhash_t::iterator it = outputs.find (index); 00902 return it == outputs.end ()? NULL : &it->second; 00903 } 00904 00905 // IMPORTANT Main SMO optimization step 00906 CLaRank::process_return_t CLaRank::process (const LaRankPattern & pattern, process_type ptype) 00907 { 00908 process_return_t pro_ret = process_return_t (0, 0); 00909 00910 /* 00911 ** compute gradient and sort 00912 */ 00913 std::vector < outputgradient_t > outputgradients; 00914 00915 outputgradients.reserve (getNumOutputs ()); 00916 00917 std::vector < outputgradient_t > outputscores; 00918 outputscores.reserve (getNumOutputs ()); 00919 00920 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it) 00921 { 00922 if (ptype != processOptimize 00923 || it->second.isSupportVector (pattern.x_id)) 00924 { 00925 float64_t g = 00926 it->second.computeGradient (pattern.x_id, pattern.y, it->first); 00927 outputgradients.push_back (outputgradient_t (it->first, g)); 00928 if (it->first == pattern.y) 00929 outputscores.push_back (outputgradient_t (it->first, (1 - g))); 00930 else 00931 outputscores.push_back (outputgradient_t (it->first, -g)); 00932 } 00933 } 00934 00935 std::sort (outputgradients.begin (), outputgradients.end ()); 00936 00937 /* 00938 ** determine the prediction 00939 */ 00940 std::sort (outputscores.begin (), outputscores.end ()); 00941 pro_ret.ypred = outputscores[0].output; 00942 00943 /* 00944 ** Find yp (1st part of the pair) 00945 */ 00946 outputgradient_t ygp; 00947 LaRankOutput *outp = NULL; 00948 uint32_t p; 00949 for (p = 0; p < outputgradients.size (); ++p) 00950 { 00951 outputgradient_t & current = outputgradients[p]; 00952 LaRankOutput *output = getOutput (current.output); 00953 bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id); 00954 bool goodclass = current.output == pattern.y; 00955 if ((!support && goodclass) || 00956 (support && output->getBeta (pattern.x_id) < (goodclass ? C1 : 0))) 00957 { 00958 ygp = current; 00959 outp = output; 00960 break; 00961 } 00962 } 00963 if (p == outputgradients.size ()) 00964 return pro_ret; 00965 00966 /* 00967 ** Find ym (2nd part of the pair) 00968 */ 00969 outputgradient_t ygm; 00970 LaRankOutput *outm = NULL; 00971 int32_t m; 00972 for (m = outputgradients.size () - 1; m >= 0; --m) 00973 { 00974 outputgradient_t & current = outputgradients[m]; 00975 LaRankOutput *output = getOutput (current.output); 00976 bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id); 00977 bool goodclass = current.output == pattern.y; 00978 if (!goodclass || (support && output->getBeta (pattern.x_id) > 0)) 00979 { 00980 ygm = current; 00981 outm = output; 00982 break; 00983 } 00984 } 00985 if (m < 0) 00986 return pro_ret; 00987 00988 /* 00989 ** Throw or Insert pattern 00990 */ 00991 if ((ygp.gradient - ygm.gradient) < tau) 00992 return pro_ret; 00993 if (ptype == processNew) 00994 patterns.insert (pattern); 00995 00996 /* 00997 ** compute lambda and clip it 00998 */ 00999 float64_t kii = outp->getKii (pattern.x_id); 01000 float64_t lambda = (ygp.gradient - ygm.gradient) / (2 * kii); 01001 if (ptype == processOptimize || outp->isSupportVector (pattern.x_id)) 01002 { 01003 float64_t beta = outp->getBeta (pattern.x_id); 01004 if (ygp.output == pattern.y) 01005 lambda = CMath::min (lambda, C1 - beta); 01006 else 01007 lambda = CMath::min (lambda, fabs (beta)); 01008 } 01009 else 01010 lambda = CMath::min (lambda, C1); 01011 01012 /* 01013 ** update the solution 01014 */ 01015 outp->update (pattern.x_id, lambda, ygp.gradient); 01016 outm->update (pattern.x_id, -lambda, ygm.gradient); 01017 01018 pro_ret.dual_increase = lambda * ((ygp.gradient - ygm.gradient) - lambda * kii); 01019 return pro_ret; 01020 } 01021 01022 // ProcessOld 01023 float64_t CLaRank::reprocess () 01024 { 01025 if (patterns.size ()) 01026 { 01027 for (int32_t n = 0; n < 10; ++n) 01028 { 01029 process_return_t pro_ret = process (patterns.sample (), processOld); 01030 if (pro_ret.dual_increase) 01031 return pro_ret.dual_increase; 01032 } 01033 } 01034 return 0; 01035 } 01036 01037 // Optimize 01038 float64_t CLaRank::optimize () 01039 { 01040 float64_t dual_increase = 0; 01041 if (patterns.size ()) 01042 { 01043 for (int32_t n = 0; n < 10; ++n) 01044 { 01045 process_return_t pro_ret = 01046 process (patterns.sample(), processOptimize); 01047 dual_increase += pro_ret.dual_increase; 01048 } 01049 } 01050 return dual_increase; 01051 } 01052 01053 // remove patterns and return the number of patterns that were removed 01054 uint32_t CLaRank::cleanup () 01055 { 01056 /* 01057 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it) 01058 it->second.cleanup (); 01059 01060 uint32_t res = 0; 01061 for (uint32_t i = 0; i < patterns.size (); ++i) 01062 { 01063 LaRankPattern & p = patterns[i]; 01064 if (p.exists () && !outputs[p.y].isSupportVector (p.x_id)) 01065 { 01066 patterns.remove (i); 01067 ++res; 01068 } 01069 } 01070 return res; 01071 */ 01072 return 0; 01073 }