SVM_libsvm.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin
00003  * All rights reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  *
00009  * 1. Redistributions of source code must retain the above copyright
00010  * notice, this list of conditions and the following disclaimer.
00011  *
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  * notice, this list of conditions and the following disclaimer in the
00014  * documentation and/or other materials provided with the distribution.
00015  *
00016  * 3. Neither name of copyright holders nor the names of its contributors
00017  * may be used to endorse or promote products derived from this software
00018  * without specific prior written permission.
00019  *
00020  *
00021  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00024  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
00025  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032  *
00033  * Shogun specific adjustments (w) 2006-2009 Soeren Sonnenburg
00034  */
00035 
00036 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00037 
00038 #include "lib/memory.h"
00039 #include "classifier/svm/SVM_libsvm.h"
00040 #include "kernel/Kernel.h"
00041 #include "lib/io.h"
00042 #include "lib/Signal.h"
00043 #include "lib/common.h"
00044 #include "lib/Mathematics.h"
00045 
00046 #include <stdio.h>
00047 #include <stdlib.h>
00048 #include <ctype.h>
00049 #include <float.h>
00050 #include <string.h>
00051 #include <stdarg.h>
00052 
00053 namespace shogun
00054 {
00055 
00056 typedef KERNELCACHE_ELEM Qfloat;
00057 typedef float64_t schar;
00058 
00059 template <class S, class T> inline void clone(T*& dst, S* src, int32_t n)
00060 {
00061     dst = new T[n];
00062     memcpy((void *)dst,(void *)src,sizeof(T)*n);
00063 }
00064 #define INF HUGE_VAL
00065 #define TAU 1e-12
00066 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00067 
00068 class QMatrix;
00069 class SVC_QMC;
00070 
00071 //
00072 // Kernel Cache
00073 //
00074 // l is the number of total data items
00075 // size is the cache size limit in bytes
00076 //
00077 class Cache
00078 {
00079 public:
00080     Cache(int32_t l, int64_t size);
00081     ~Cache();
00082 
00083     // request data [0,len)
00084     // return some position p where [p,len) need to be filled
00085     // (p >= len if nothing needs to be filled)
00086     int32_t get_data(const int32_t index, Qfloat **data, int32_t len);
00087     void swap_index(int32_t i, int32_t j);  // future_option
00088 
00089 private:
00090     int32_t l;
00091     int64_t size;
00092     struct head_t
00093     {
00094         head_t *prev, *next;    // a circular list
00095         Qfloat *data;
00096         int32_t len;        // data[0,len) is cached in this entry
00097     };
00098 
00099     head_t *head;
00100     head_t lru_head;
00101     void lru_delete(head_t *h);
00102     void lru_insert(head_t *h);
00103 };
00104 
00105 Cache::Cache(int32_t l_, int64_t size_):l(l_),size(size_)
00106 {
00107     head = (head_t *)calloc(l,sizeof(head_t));  // initialized to 0
00108     size /= sizeof(Qfloat);
00109     size -= l * sizeof(head_t) / sizeof(Qfloat);
00110     size = CMath::max(size, (int64_t) 2*l); // cache must be large enough for two columns
00111     lru_head.next = lru_head.prev = &lru_head;
00112 }
00113 
00114 Cache::~Cache()
00115 {
00116     for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00117         free(h->data);
00118     free(head);
00119 }
00120 
00121 void Cache::lru_delete(head_t *h)
00122 {
00123     // delete from current location
00124     h->prev->next = h->next;
00125     h->next->prev = h->prev;
00126 }
00127 
00128 void Cache::lru_insert(head_t *h)
00129 {
00130     // insert to last position
00131     h->next = &lru_head;
00132     h->prev = lru_head.prev;
00133     h->prev->next = h;
00134     h->next->prev = h;
00135 }
00136 
00137 int32_t Cache::get_data(const int32_t index, Qfloat **data, int32_t len)
00138 {
00139     head_t *h = &head[index];
00140     if(h->len) lru_delete(h);
00141     int32_t more = len - h->len;
00142 
00143     if(more > 0)
00144     {
00145         // free old space
00146         while(size < more)
00147         {
00148             head_t *old = lru_head.next;
00149             lru_delete(old);
00150             free(old->data);
00151             size += old->len;
00152             old->data = 0;
00153             old->len = 0;
00154         }
00155 
00156         // allocate new space
00157         h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00158         size -= more;
00159         CMath::swap(h->len,len);
00160     }
00161 
00162     lru_insert(h);
00163     *data = h->data;
00164     return len;
00165 }
00166 
00167 void Cache::swap_index(int32_t i, int32_t j)
00168 {
00169     if(i==j) return;
00170 
00171     if(head[i].len) lru_delete(&head[i]);
00172     if(head[j].len) lru_delete(&head[j]);
00173     CMath::swap(head[i].data,head[j].data);
00174     CMath::swap(head[i].len,head[j].len);
00175     if(head[i].len) lru_insert(&head[i]);
00176     if(head[j].len) lru_insert(&head[j]);
00177 
00178     if(i>j) CMath::swap(i,j);
00179     for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00180     {
00181         if(h->len > i)
00182         {
00183             if(h->len > j)
00184                 CMath::swap(h->data[i],h->data[j]);
00185             else
00186             {
00187                 // give up
00188                 lru_delete(h);
00189                 free(h->data);
00190                 size += h->len;
00191                 h->data = 0;
00192                 h->len = 0;
00193             }
00194         }
00195     }
00196 }
00197 
00198 //
00199 // Kernel evaluation
00200 //
00201 // the static method k_function is for doing single kernel evaluation
00202 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00203 // the member function get_Q is for getting one column from the Q Matrix
00204 //
00205 class QMatrix {
00206 public:
00207     virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0;
00208     virtual Qfloat *get_QD() const = 0;
00209     virtual void swap_index(int32_t i, int32_t j) const = 0;
00210     virtual ~QMatrix() {}
00211 };
00212 
00213 class LibSVMKernel: public QMatrix {
00214 public:
00215     LibSVMKernel(int32_t l, svm_node * const * x, const svm_parameter& param);
00216     virtual ~LibSVMKernel();
00217 
00218     virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0;
00219     virtual Qfloat *get_QD() const = 0;
00220     virtual void swap_index(int32_t i, int32_t j) const // no so const...
00221     {
00222         CMath::swap(x[i],x[j]);
00223         if(x_square) CMath::swap(x_square[i],x_square[j]);
00224     }
00225 
00226     inline float64_t kernel_function(int32_t i, int32_t j) const
00227     {
00228         return kernel->kernel(x[i]->index,x[j]->index);
00229     }
00230 
00231 private:
00232     CKernel* kernel;
00233     const svm_node **x;
00234     float64_t *x_square;
00235 
00236     // svm_parameter
00237     const int32_t kernel_type;
00238     const int32_t degree;
00239     const float64_t gamma;
00240     const float64_t coef0;
00241 };
00242 
00243 LibSVMKernel::LibSVMKernel(int32_t l, svm_node * const * x_, const svm_parameter& param)
00244 : kernel_type(param.kernel_type), degree(param.degree),
00245  gamma(param.gamma), coef0(param.coef0)
00246 {
00247     clone(x,x_,l);
00248     x_square = 0;
00249     kernel=param.kernel;
00250 }
00251 
00252 LibSVMKernel::~LibSVMKernel()
00253 {
00254     delete[] x;
00255     delete[] x_square;
00256 }
00257 
00258 // Generalized SMO+SVMlight algorithm
00259 // Solves:
00260 //
00261 //  min 0.5(\alpha^T Q \alpha) + b^T \alpha
00262 //
00263 //      y^T \alpha = \delta
00264 //      y_i = +1 or -1
00265 //      0 <= alpha_i <= Cp for y_i = 1
00266 //      0 <= alpha_i <= Cn for y_i = -1
00267 //
00268 // Given:
00269 //
00270 //  Q, p, y, Cp, Cn, and an initial feasible point \alpha
00271 //  l is the size of vectors and matrices
00272 //  eps is the stopping tolerance
00273 //
00274 // solution will be put in \alpha, objective value will be put in obj
00275 //
00276 class Solver {
00277 public:
00278     Solver() {};
00279     virtual ~Solver() {};
00280 
00281     struct SolutionInfo {
00282         float64_t obj;
00283         float64_t rho;
00284         float64_t upper_bound_p;
00285         float64_t upper_bound_n;
00286         float64_t r;    // for Solver_NU
00287     };
00288 
00289     void Solve(
00290         int32_t l, const QMatrix& Q, const float64_t *p_, const schar *y_,
00291         float64_t *alpha_, float64_t Cp, float64_t Cn, float64_t eps,
00292         SolutionInfo* si, int32_t shrinking);
00293 
00294 protected:
00295     int32_t active_size;
00296     schar *y;
00297     float64_t *G;       // gradient of objective function
00298     enum { LOWER_BOUND, UPPER_BOUND, FREE };
00299     char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
00300     float64_t *alpha;
00301     const QMatrix *Q;
00302     const Qfloat *QD;
00303     float64_t eps;
00304     float64_t Cp,Cn;
00305     float64_t *p;
00306     int32_t *active_set;
00307     float64_t *G_bar;       // gradient, if we treat free variables as 0
00308     int32_t l;
00309     bool unshrink;  // XXX
00310 
00311     float64_t get_C(int32_t i)
00312     {
00313         return (y[i] > 0)? Cp : Cn;
00314     }
00315     void update_alpha_status(int32_t i)
00316     {
00317         if(alpha[i] >= get_C(i))
00318             alpha_status[i] = UPPER_BOUND;
00319         else if(alpha[i] <= 0)
00320             alpha_status[i] = LOWER_BOUND;
00321         else alpha_status[i] = FREE;
00322     }
00323     bool is_upper_bound(int32_t i) { return alpha_status[i] == UPPER_BOUND; }
00324     bool is_lower_bound(int32_t i) { return alpha_status[i] == LOWER_BOUND; }
00325     bool is_free(int32_t i) { return alpha_status[i] == FREE; }
00326     void swap_index(int32_t i, int32_t j);
00327     void reconstruct_gradient();
00328     virtual int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
00329     virtual float64_t calculate_rho();
00330     virtual void do_shrinking();
00331 private:
00332     bool be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2);
00333 };
00334 
00335 void Solver::swap_index(int32_t i, int32_t j)
00336 {
00337     Q->swap_index(i,j);
00338     CMath::swap(y[i],y[j]);
00339     CMath::swap(G[i],G[j]);
00340     CMath::swap(alpha_status[i],alpha_status[j]);
00341     CMath::swap(alpha[i],alpha[j]);
00342     CMath::swap(p[i],p[j]);
00343     CMath::swap(active_set[i],active_set[j]);
00344     CMath::swap(G_bar[i],G_bar[j]);
00345 }
00346 
00347 void Solver::reconstruct_gradient()
00348 {
00349     // reconstruct inactive elements of G from G_bar and free variables
00350 
00351     if(active_size == l) return;
00352 
00353     int32_t i,j;
00354     int32_t nr_free = 0;
00355 
00356     for(j=active_size;j<l;j++)
00357         G[j] = G_bar[j] + p[j];
00358 
00359     for(j=0;j<active_size;j++)
00360         if(is_free(j))
00361             nr_free++;
00362 
00363     if (nr_free*l > 2*active_size*(l-active_size))
00364     {
00365         for(i=active_size;i<l;i++)
00366         {
00367             const Qfloat *Q_i = Q->get_Q(i,active_size);
00368             for(j=0;j<active_size;j++)
00369                 if(is_free(j))
00370                     G[i] += alpha[j] * Q_i[j];
00371         }
00372     }
00373     else
00374     {
00375         for(i=0;i<active_size;i++)
00376             if(is_free(i))
00377             {
00378                 const Qfloat *Q_i = Q->get_Q(i,l);
00379                 float64_t alpha_i = alpha[i];
00380                 for(j=active_size;j<l;j++)
00381                     G[j] += alpha_i * Q_i[j];
00382             }
00383     }
00384 }
00385 
00386 void Solver::Solve(
00387     int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
00388     const schar *p_y, float64_t *p_alpha, float64_t p_Cp, float64_t p_Cn,
00389     float64_t p_eps, SolutionInfo* p_si, int32_t shrinking)
00390 {
00391     this->l = p_l;
00392     this->Q = &p_Q;
00393     QD=Q->get_QD();
00394     clone(p, p_p,l);
00395     clone(y, p_y,l);
00396     clone(alpha,p_alpha,l);
00397     this->Cp = p_Cp;
00398     this->Cn = p_Cn;
00399     this->eps = p_eps;
00400     unshrink = false;
00401 
00402     // initialize alpha_status
00403     {
00404         alpha_status = new char[l];
00405         for(int32_t i=0;i<l;i++)
00406             update_alpha_status(i);
00407     }
00408 
00409     // initialize active set (for shrinking)
00410     {
00411         active_set = new int32_t[l];
00412         for(int32_t i=0;i<l;i++)
00413             active_set[i] = i;
00414         active_size = l;
00415     }
00416 
00417     // initialize gradient
00418     CSignal::clear_cancel();
00419     {
00420         G = new float64_t[l];
00421         G_bar = new float64_t[l];
00422         int32_t i;
00423         for(i=0;i<l;i++)
00424         {
00425             G[i] = p_p[i];
00426             G_bar[i] = 0;
00427         }
00428         SG_SINFO("Computing gradient for initial set of non-zero alphas\n");
00429         //CMath::display_vector(alpha, l, "alphas");
00430         for(i=0;i<l && !CSignal::cancel_computations();i++)
00431         {
00432             if(!is_lower_bound(i))
00433             {
00434                 const Qfloat *Q_i = Q->get_Q(i,l);
00435                 float64_t alpha_i = alpha[i];
00436                 int32_t j;
00437                 for(j=0;j<l;j++)
00438                     G[j] += alpha_i*Q_i[j];
00439                 if(is_upper_bound(i))
00440                     for(j=0;j<l;j++)
00441                         G_bar[j] += get_C(i) * Q_i[j];
00442             }
00443             SG_SPROGRESS(i, 0, l);
00444         }
00445         SG_SDONE();
00446     }
00447 
00448     // optimization step
00449 
00450     int32_t iter = 0;
00451     int32_t counter = CMath::min(l,1000)+1;
00452 
00453     while (!CSignal::cancel_computations())
00454     {
00455         // show progress and do shrinking
00456 
00457         if(--counter == 0)
00458         {
00459             counter = CMath::min(l,1000);
00460             if(shrinking) do_shrinking();
00461             //SG_SINFO(".");
00462         }
00463 
00464         int32_t i,j;
00465         float64_t gap;
00466         if(select_working_set(i,j, gap)!=0)
00467         {
00468             // reconstruct the whole gradient
00469             reconstruct_gradient();
00470             // reset active set size and check
00471             active_size = l;
00472             //SG_SINFO("*");
00473             if(select_working_set(i,j, gap)!=0)
00474                 break;
00475             else
00476                 counter = 1;    // do shrinking next iteration
00477         }
00478 
00479         SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00480 
00481         ++iter;
00482 
00483         // update alpha[i] and alpha[j], handle bounds carefully
00484 
00485         const Qfloat *Q_i = Q->get_Q(i,active_size);
00486         const Qfloat *Q_j = Q->get_Q(j,active_size);
00487 
00488         float64_t C_i = get_C(i);
00489         float64_t C_j = get_C(j);
00490 
00491         float64_t old_alpha_i = alpha[i];
00492         float64_t old_alpha_j = alpha[j];
00493 
00494         if(y[i]!=y[j])
00495         {
00496             float64_t quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
00497             if (quad_coef <= 0)
00498                 quad_coef = TAU;
00499             float64_t delta = (-G[i]-G[j])/quad_coef;
00500             float64_t diff = alpha[i] - alpha[j];
00501             alpha[i] += delta;
00502             alpha[j] += delta;
00503 
00504             if(diff > 0)
00505             {
00506                 if(alpha[j] < 0)
00507                 {
00508                     alpha[j] = 0;
00509                     alpha[i] = diff;
00510                 }
00511             }
00512             else
00513             {
00514                 if(alpha[i] < 0)
00515                 {
00516                     alpha[i] = 0;
00517                     alpha[j] = -diff;
00518                 }
00519             }
00520             if(diff > C_i - C_j)
00521             {
00522                 if(alpha[i] > C_i)
00523                 {
00524                     alpha[i] = C_i;
00525                     alpha[j] = C_i - diff;
00526                 }
00527             }
00528             else
00529             {
00530                 if(alpha[j] > C_j)
00531                 {
00532                     alpha[j] = C_j;
00533                     alpha[i] = C_j + diff;
00534                 }
00535             }
00536         }
00537         else
00538         {
00539             float64_t quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
00540             if (quad_coef <= 0)
00541                 quad_coef = TAU;
00542             float64_t delta = (G[i]-G[j])/quad_coef;
00543             float64_t sum = alpha[i] + alpha[j];
00544             alpha[i] -= delta;
00545             alpha[j] += delta;
00546 
00547             if(sum > C_i)
00548             {
00549                 if(alpha[i] > C_i)
00550                 {
00551                     alpha[i] = C_i;
00552                     alpha[j] = sum - C_i;
00553                 }
00554             }
00555             else
00556             {
00557                 if(alpha[j] < 0)
00558                 {
00559                     alpha[j] = 0;
00560                     alpha[i] = sum;
00561                 }
00562             }
00563             if(sum > C_j)
00564             {
00565                 if(alpha[j] > C_j)
00566                 {
00567                     alpha[j] = C_j;
00568                     alpha[i] = sum - C_j;
00569                 }
00570             }
00571             else
00572             {
00573                 if(alpha[i] < 0)
00574                 {
00575                     alpha[i] = 0;
00576                     alpha[j] = sum;
00577                 }
00578             }
00579         }
00580 
00581         // update G
00582 
00583         float64_t delta_alpha_i = alpha[i] - old_alpha_i;
00584         float64_t delta_alpha_j = alpha[j] - old_alpha_j;
00585 
00586         for(int32_t k=0;k<active_size;k++)
00587         {
00588             G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00589         }
00590 
00591         // update alpha_status and G_bar
00592 
00593         {
00594             bool ui = is_upper_bound(i);
00595             bool uj = is_upper_bound(j);
00596             update_alpha_status(i);
00597             update_alpha_status(j);
00598             int32_t k;
00599             if(ui != is_upper_bound(i))
00600             {
00601                 Q_i = Q->get_Q(i,l);
00602                 if(ui)
00603                     for(k=0;k<l;k++)
00604                         G_bar[k] -= C_i * Q_i[k];
00605                 else
00606                     for(k=0;k<l;k++)
00607                         G_bar[k] += C_i * Q_i[k];
00608             }
00609 
00610             if(uj != is_upper_bound(j))
00611             {
00612                 Q_j = Q->get_Q(j,l);
00613                 if(uj)
00614                     for(k=0;k<l;k++)
00615                         G_bar[k] -= C_j * Q_j[k];
00616                 else
00617                     for(k=0;k<l;k++)
00618                         G_bar[k] += C_j * Q_j[k];
00619             }
00620         }
00621 
00622 #ifdef MCSVM_DEBUG
00623         // calculate objective value
00624         {
00625             float64_t v = 0;
00626             for(i=0;i<l;i++)
00627                 v += alpha[i] * (G[i] + p[i]);
00628 
00629             p_si->obj = v/2;
00630 
00631             float64_t primal=0;
00632             //float64_t gap=100000;
00633             SG_SPRINT("dual obj=%f primal obf=%f gap=%f\n", v/2, primal, gap);
00634         }
00635 #endif
00636     }
00637 
00638     // calculate rho
00639 
00640     p_si->rho = calculate_rho();
00641 
00642     // calculate objective value
00643     {
00644         float64_t v = 0;
00645         int32_t i;
00646         for(i=0;i<l;i++)
00647             v += alpha[i] * (G[i] + p[i]);
00648 
00649         p_si->obj = v/2;
00650     }
00651 
00652     // put back the solution
00653     {
00654         for(int32_t i=0;i<l;i++)
00655             p_alpha[active_set[i]] = alpha[i];
00656     }
00657 
00658     p_si->upper_bound_p = Cp;
00659     p_si->upper_bound_n = Cn;
00660 
00661     SG_SINFO("\noptimization finished, #iter = %d\n",iter);
00662 
00663     delete[] p;
00664     delete[] y;
00665     delete[] alpha;
00666     delete[] alpha_status;
00667     delete[] active_set;
00668     delete[] G;
00669     delete[] G_bar;
00670 }
00671 
00672 // return 1 if already optimal, return 0 otherwise
00673 int32_t Solver::select_working_set(
00674     int32_t &out_i, int32_t &out_j, float64_t &gap)
00675 {
00676     // return i,j such that
00677     // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00678     // j: minimizes the decrease of obj value
00679     //    (if quadratic coefficient <= 0, replace it with tau)
00680     //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00681 
00682     float64_t Gmax = -INF;
00683     float64_t Gmax2 = -INF;
00684     int32_t Gmax_idx = -1;
00685     int32_t Gmin_idx = -1;
00686     float64_t obj_diff_min = INF;
00687 
00688     for(int32_t t=0;t<active_size;t++)
00689         if(y[t]==+1)
00690         {
00691             if(!is_upper_bound(t))
00692                 if(-G[t] >= Gmax)
00693                 {
00694                     Gmax = -G[t];
00695                     Gmax_idx = t;
00696                 }
00697         }
00698         else
00699         {
00700             if(!is_lower_bound(t))
00701                 if(G[t] >= Gmax)
00702                 {
00703                     Gmax = G[t];
00704                     Gmax_idx = t;
00705                 }
00706         }
00707 
00708     int32_t i = Gmax_idx;
00709     const Qfloat *Q_i = NULL;
00710     if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00711         Q_i = Q->get_Q(i,active_size);
00712 
00713     for(int32_t j=0;j<active_size;j++)
00714     {
00715         if(y[j]==+1)
00716         {
00717             if (!is_lower_bound(j))
00718             {
00719                 float64_t grad_diff=Gmax+G[j];
00720                 if (G[j] >= Gmax2)
00721                     Gmax2 = G[j];
00722                 if (grad_diff > 0)
00723                 {
00724                     float64_t obj_diff;
00725                     float64_t quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j];
00726                     if (quad_coef > 0)
00727                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
00728                     else
00729                         obj_diff = -(grad_diff*grad_diff)/TAU;
00730 
00731                     if (obj_diff <= obj_diff_min)
00732                     {
00733                         Gmin_idx=j;
00734                         obj_diff_min = obj_diff;
00735                     }
00736                 }
00737             }
00738         }
00739         else
00740         {
00741             if (!is_upper_bound(j))
00742             {
00743                 float64_t grad_diff= Gmax-G[j];
00744                 if (-G[j] >= Gmax2)
00745                     Gmax2 = -G[j];
00746                 if (grad_diff > 0)
00747                 {
00748                     float64_t obj_diff;
00749                     float64_t quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j];
00750                     if (quad_coef > 0)
00751                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
00752                     else
00753                         obj_diff = -(grad_diff*grad_diff)/TAU;
00754 
00755                     if (obj_diff <= obj_diff_min)
00756                     {
00757                         Gmin_idx=j;
00758                         obj_diff_min = obj_diff;
00759                     }
00760                 }
00761             }
00762         }
00763     }
00764 
00765     gap=Gmax+Gmax2;
00766     if(gap < eps)
00767         return 1;
00768 
00769     out_i = Gmax_idx;
00770     out_j = Gmin_idx;
00771     return 0;
00772 }
00773 
00774 bool Solver::be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2)
00775 {
00776     if(is_upper_bound(i))
00777     {
00778         if(y[i]==+1)
00779             return(-G[i] > Gmax1);
00780         else
00781             return(-G[i] > Gmax2);
00782     }
00783     else if(is_lower_bound(i))
00784     {
00785         if(y[i]==+1)
00786             return(G[i] > Gmax2);
00787         else
00788             return(G[i] > Gmax1);
00789     }
00790     else
00791         return(false);
00792 }
00793 
00794 
00795 void Solver::do_shrinking()
00796 {
00797     int32_t i;
00798     float64_t Gmax1 = -INF;     // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00799     float64_t Gmax2 = -INF;     // max { y_i * grad(f)_i | i in I_low(\alpha) }
00800 
00801     // find maximal violating pair first
00802     for(i=0;i<active_size;i++)
00803     {
00804         if(y[i]==+1)
00805         {
00806             if(!is_upper_bound(i))
00807             {
00808                 if(-G[i] >= Gmax1)
00809                     Gmax1 = -G[i];
00810             }
00811             if(!is_lower_bound(i))
00812             {
00813                 if(G[i] >= Gmax2)
00814                     Gmax2 = G[i];
00815             }
00816         }
00817         else
00818         {
00819             if(!is_upper_bound(i))
00820             {
00821                 if(-G[i] >= Gmax2)
00822                     Gmax2 = -G[i];
00823             }
00824             if(!is_lower_bound(i))
00825             {
00826                 if(G[i] >= Gmax1)
00827                     Gmax1 = G[i];
00828             }
00829         }
00830     }
00831 
00832     if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
00833     {
00834         unshrink = true;
00835         reconstruct_gradient();
00836         active_size = l;
00837     }
00838 
00839     for(i=0;i<active_size;i++)
00840         if (be_shrunk(i, Gmax1, Gmax2))
00841         {
00842             active_size--;
00843             while (active_size > i)
00844             {
00845                 if (!be_shrunk(active_size, Gmax1, Gmax2))
00846                 {
00847                     swap_index(i,active_size);
00848                     break;
00849                 }
00850                 active_size--;
00851             }
00852         }
00853 }
00854 
00855 float64_t Solver::calculate_rho()
00856 {
00857     float64_t r;
00858     int32_t nr_free = 0;
00859     float64_t ub = INF, lb = -INF, sum_free = 0;
00860     for(int32_t i=0;i<active_size;i++)
00861     {
00862         float64_t yG = y[i]*G[i];
00863 
00864         if(is_upper_bound(i))
00865         {
00866             if(y[i]==-1)
00867                 ub = CMath::min(ub,yG);
00868             else
00869                 lb = CMath::max(lb,yG);
00870         }
00871         else if(is_lower_bound(i))
00872         {
00873             if(y[i]==+1)
00874                 ub = CMath::min(ub,yG);
00875             else
00876                 lb = CMath::max(lb,yG);
00877         }
00878         else
00879         {
00880             ++nr_free;
00881             sum_free += yG;
00882         }
00883     }
00884 
00885     if(nr_free>0)
00886         r = sum_free/nr_free;
00887     else
00888         r = (ub+lb)/2;
00889 
00890     return r;
00891 }
00892 
00893 
00894 //
00895 //Solve with individually weighted examples
00896 //
00897 class WeightedSolver : public Solver
00898 {
00899 
00900 public:
00901 
00902     WeightedSolver(float64_t* cost_vec)
00903     {
00904 
00905         this->Cs = cost_vec;
00906 
00907     }
00908 
00909     virtual float64_t get_C(int32_t i)
00910     {
00911 
00912         return Cs[i];
00913     }
00914 
00915 protected:
00916 
00917   float64_t* Cs;
00918 
00919 };
00920 
00921 
00922 //
00923 // Solver for nu-svm classification and regression
00924 //
00925 // additional constraint: e^T \alpha = constant
00926 //
00927 class Solver_NU : public Solver
00928 {
00929 public:
00930     Solver_NU() {}
00931     void Solve(
00932         int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
00933         const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn,
00934         float64_t p_eps, SolutionInfo* p_si, int32_t shrinking)
00935     {
00936         this->si = p_si;
00937         Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,shrinking);
00938     }
00939 private:
00940     SolutionInfo *si;
00941     int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
00942     float64_t calculate_rho();
00943     bool be_shrunk(
00944         int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
00945         float64_t Gmax4);
00946     void do_shrinking();
00947 };
00948 
00949 // return 1 if already optimal, return 0 otherwise
00950 int32_t Solver_NU::select_working_set(
00951     int32_t &out_i, int32_t &out_j, float64_t &gap)
00952 {
00953     // return i,j such that y_i = y_j and
00954     // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00955     // j: minimizes the decrease of obj value
00956     //    (if quadratic coefficient <= 0, replace it with tau)
00957     //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00958 
00959     float64_t Gmaxp = -INF;
00960     float64_t Gmaxp2 = -INF;
00961     int32_t Gmaxp_idx = -1;
00962 
00963     float64_t Gmaxn = -INF;
00964     float64_t Gmaxn2 = -INF;
00965     int32_t Gmaxn_idx = -1;
00966 
00967     int32_t Gmin_idx = -1;
00968     float64_t obj_diff_min = INF;
00969 
00970     for(int32_t t=0;t<active_size;t++)
00971         if(y[t]==+1)
00972         {
00973             if(!is_upper_bound(t))
00974                 if(-G[t] >= Gmaxp)
00975                 {
00976                     Gmaxp = -G[t];
00977                     Gmaxp_idx = t;
00978                 }
00979         }
00980         else
00981         {
00982             if(!is_lower_bound(t))
00983                 if(G[t] >= Gmaxn)
00984                 {
00985                     Gmaxn = G[t];
00986                     Gmaxn_idx = t;
00987                 }
00988         }
00989 
00990     int32_t ip = Gmaxp_idx;
00991     int32_t in = Gmaxn_idx;
00992     const Qfloat *Q_ip = NULL;
00993     const Qfloat *Q_in = NULL;
00994     if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
00995         Q_ip = Q->get_Q(ip,active_size);
00996     if(in != -1)
00997         Q_in = Q->get_Q(in,active_size);
00998 
00999     for(int32_t j=0;j<active_size;j++)
01000     {
01001         if(y[j]==+1)
01002         {
01003             if (!is_lower_bound(j))
01004             {
01005                 float64_t grad_diff=Gmaxp+G[j];
01006                 if (G[j] >= Gmaxp2)
01007                     Gmaxp2 = G[j];
01008                 if (grad_diff > 0)
01009                 {
01010                     float64_t obj_diff;
01011                     float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
01012                     if (quad_coef > 0)
01013                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
01014                     else
01015                         obj_diff = -(grad_diff*grad_diff)/TAU;
01016 
01017                     if (obj_diff <= obj_diff_min)
01018                     {
01019                         Gmin_idx=j;
01020                         obj_diff_min = obj_diff;
01021                     }
01022                 }
01023             }
01024         }
01025         else
01026         {
01027             if (!is_upper_bound(j))
01028             {
01029                 float64_t grad_diff=Gmaxn-G[j];
01030                 if (-G[j] >= Gmaxn2)
01031                     Gmaxn2 = -G[j];
01032                 if (grad_diff > 0)
01033                 {
01034                     float64_t obj_diff;
01035                     float64_t quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
01036                     if (quad_coef > 0)
01037                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
01038                     else
01039                         obj_diff = -(grad_diff*grad_diff)/TAU;
01040 
01041                     if (obj_diff <= obj_diff_min)
01042                     {
01043                         Gmin_idx=j;
01044                         obj_diff_min = obj_diff;
01045                     }
01046                 }
01047             }
01048         }
01049     }
01050 
01051     gap=CMath::max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2);
01052     if(gap < eps)
01053         return 1;
01054 
01055     if (y[Gmin_idx] == +1)
01056         out_i = Gmaxp_idx;
01057     else
01058         out_i = Gmaxn_idx;
01059     out_j = Gmin_idx;
01060 
01061     return 0;
01062 }
01063 
01064 bool Solver_NU::be_shrunk(
01065     int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
01066     float64_t Gmax4)
01067 {
01068     if(is_upper_bound(i))
01069     {
01070         if(y[i]==+1)
01071             return(-G[i] > Gmax1);
01072         else
01073             return(-G[i] > Gmax4);
01074     }
01075     else if(is_lower_bound(i))
01076     {
01077         if(y[i]==+1)
01078             return(G[i] > Gmax2);
01079         else
01080             return(G[i] > Gmax3);
01081     }
01082     else
01083         return(false);
01084 }
01085 
01086 void Solver_NU::do_shrinking()
01087 {
01088     float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01089     float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01090     float64_t Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01091     float64_t Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01092 
01093     // find maximal violating pair first
01094     int32_t i;
01095     for(i=0;i<active_size;i++)
01096     {
01097         if(!is_upper_bound(i))
01098         {
01099             if(y[i]==+1)
01100             {
01101                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01102             }
01103             else    if(-G[i] > Gmax4) Gmax4 = -G[i];
01104         }
01105         if(!is_lower_bound(i))
01106         {
01107             if(y[i]==+1)
01108             {
01109                 if(G[i] > Gmax2) Gmax2 = G[i];
01110             }
01111             else    if(G[i] > Gmax3) Gmax3 = G[i];
01112         }
01113     }
01114 
01115     if(unshrink == false && CMath::max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
01116     {
01117         unshrink = true;
01118         reconstruct_gradient();
01119         active_size = l;
01120     }
01121 
01122     for(i=0;i<active_size;i++)
01123         if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01124         {
01125             active_size--;
01126             while (active_size > i)
01127             {
01128                 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01129                 {
01130                     swap_index(i,active_size);
01131                     break;
01132                 }
01133                 active_size--;
01134             }
01135         }
01136 }
01137 
01138 float64_t Solver_NU::calculate_rho()
01139 {
01140     int32_t nr_free1 = 0,nr_free2 = 0;
01141     float64_t ub1 = INF, ub2 = INF;
01142     float64_t lb1 = -INF, lb2 = -INF;
01143     float64_t sum_free1 = 0, sum_free2 = 0;
01144 
01145     for(int32_t i=0; i<active_size; i++)
01146     {
01147         if(y[i]==+1)
01148         {
01149             if(is_upper_bound(i))
01150                 lb1 = CMath::max(lb1,G[i]);
01151             else if(is_lower_bound(i))
01152                 ub1 = CMath::min(ub1,G[i]);
01153             else
01154             {
01155                 ++nr_free1;
01156                 sum_free1 += G[i];
01157             }
01158         }
01159         else
01160         {
01161             if(is_upper_bound(i))
01162                 lb2 = CMath::max(lb2,G[i]);
01163             else if(is_lower_bound(i))
01164                 ub2 = CMath::min(ub2,G[i]);
01165             else
01166             {
01167                 ++nr_free2;
01168                 sum_free2 += G[i];
01169             }
01170         }
01171     }
01172 
01173     float64_t r1,r2;
01174     if(nr_free1 > 0)
01175         r1 = sum_free1/nr_free1;
01176     else
01177         r1 = (ub1+lb1)/2;
01178 
01179     if(nr_free2 > 0)
01180         r2 = sum_free2/nr_free2;
01181     else
01182         r2 = (ub2+lb2)/2;
01183 
01184     si->r = (r1+r2)/2;
01185     return (r1-r2)/2;
01186 }
01187 
01188 class SVC_QMC: public LibSVMKernel
01189 {
01190 public:
01191     SVC_QMC(const svm_problem& prob, const svm_parameter& param, const schar *y_, int32_t n_class, float64_t fac)
01192     :LibSVMKernel(prob.l, prob.x, param)
01193     {
01194         nr_class=n_class;
01195         factor=fac;
01196         clone(y,y_,prob.l);
01197         cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
01198         QD = new Qfloat[prob.l];
01199         for(int32_t i=0;i<prob.l;i++)
01200         {
01201             QD[i]= factor*(nr_class-1)*kernel_function(i,i);
01202         }
01203     }
01204 
01205     Qfloat *get_Q(int32_t i, int32_t len) const
01206     {
01207         Qfloat *data;
01208         int32_t start;
01209         if((start = cache->get_data(i,&data,len)) < len)
01210         {
01211             for(int32_t j=start;j<len;j++)
01212             {
01213                 if (y[i]==y[j])
01214                     data[j] = factor*(nr_class-1)*kernel_function(i,j);
01215                 else
01216                     data[j] = -factor*kernel_function(i,j);
01217             }
01218         }
01219         return data;
01220     }
01221 
01222     inline Qfloat get_orig_Qij(Qfloat Q, int32_t i, int32_t j)
01223     {
01224         if (y[i]==y[j])
01225             return Q/(nr_class-1);
01226         else
01227             return -Q;
01228     }
01229 
01230     Qfloat *get_QD() const
01231     {
01232         return QD;
01233     }
01234 
01235     void swap_index(int32_t i, int32_t j) const
01236     {
01237         cache->swap_index(i,j);
01238         LibSVMKernel::swap_index(i,j);
01239         CMath::swap(y[i],y[j]);
01240         CMath::swap(QD[i],QD[j]);
01241     }
01242 
01243     ~SVC_QMC()
01244     {
01245         delete[] y;
01246         delete cache;
01247         delete[] QD;
01248     }
01249 private:
01250     float64_t factor;
01251     float64_t nr_class;
01252     schar *y;
01253     Cache *cache;
01254     Qfloat *QD;
01255 };
01256 
01257 //
01258 // Solver for nu-svm classification and regression
01259 //
01260 // additional constraint: e^T \alpha = constant
01261 //
01262 class Solver_NUMC : public Solver
01263 {
01264 public:
01265     Solver_NUMC(int32_t n_class, float64_t svm_nu)
01266     {
01267         nr_class=n_class;
01268         nu=svm_nu;
01269     }
01270 
01271     void Solve(
01272         int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
01273         const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn,
01274         float64_t p_eps, SolutionInfo* p_si, int32_t shrinking)
01275     {
01276         this->si = p_si;
01277         Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,shrinking);
01278     }
01279     float64_t compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases, float64_t* normwcw);
01280 
01281 private:
01282     SolutionInfo *si;
01283     int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
01284     float64_t calculate_rho();
01285     bool be_shrunk(
01286         int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
01287         float64_t Gmax4);
01288     void do_shrinking();
01289 
01290 private:
01291     int32_t nr_class;
01292     float64_t  nu;
01293 };
01294 
01295 float64_t Solver_NUMC::compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases, float64_t* normwcw)
01296 {
01297     clone(y, p_y,l);
01298     clone(alpha,p_alpha,l);
01299 
01300     alpha_status = new char[l];
01301     for(int32_t i=0;i<l;i++)
01302         update_alpha_status(i);
01303 
01304     float64_t* class_count = new float64_t[nr_class];
01305     float64_t* outputs = new float64_t[l];
01306 
01307     for (int32_t i=0; i<nr_class; i++)
01308     {
01309         class_count[i]=0;
01310         biases[i+1]=0;
01311     }
01312 
01313     for (int32_t i=0; i<active_size; i++)
01314     {
01315         update_alpha_status(i);
01316         if(!is_upper_bound(i) && !is_lower_bound(i))
01317             class_count[(int32_t) y[i]]++;
01318     }
01319 
01320     //CMath::display_vector(class_count, nr_class, "class_count");
01321 
01322     float64_t mu=((float64_t) nr_class)/(nu*l);
01323     //SG_SPRINT("nr_class=%d, l=%d, active_size=%d, nu=%f, mu=%f\n", nr_class, l, active_size, nu, mu);
01324 
01325     float64_t rho=0;
01326     float64_t quad=0;
01327     float64_t* zero_counts  = new float64_t[nr_class];
01328     float64_t normwc_const = 0;
01329 
01330     for (int32_t i=0; i<nr_class; i++)
01331     {
01332         zero_counts[i]=-INF;
01333         normwcw[i]=0;
01334     }
01335 
01336     for (int32_t i=0; i<active_size; i++)
01337     {
01338         float64_t sum_free=0;
01339         float64_t sum_atbound=0;
01340         float64_t sum_zero_count=0;
01341 
01342         Qfloat* Q_i = Q->get_Q(i,active_size);
01343         outputs[i]=0;
01344 
01345         for (int j=0; j<active_size; j++)
01346         {
01347             quad+= alpha[i]*alpha[j]*Q_i[j];
01348             float64_t tmp= alpha[j]*Q_i[j]/mu;
01349 
01350             if(!is_upper_bound(i) && !is_lower_bound(i))
01351                 sum_free+=tmp;
01352             else
01353                 sum_atbound+=tmp;
01354 
01355             if (class_count[(int32_t) y[i]] == 0 && y[j]==y[i])
01356                 sum_zero_count+= tmp;
01357 
01358             SVC_QMC* QMC=(SVC_QMC*) Q;
01359             float64_t norm_tmp=alpha[i]*alpha[j]*QMC->get_orig_Qij(Q_i[j], i, j);
01360             if (y[i]==y[j])
01361                 normwcw[(int32_t) y[i]]+=norm_tmp;
01362 
01363             normwcw[(int32_t) y[i]]-=2.0/nr_class*norm_tmp;
01364             normwc_const+=norm_tmp;
01365         }
01366 
01367         if (class_count[(int32_t) y[i]] == 0)
01368         {
01369             if (zero_counts[(int32_t) y[i]]<sum_zero_count)
01370                 zero_counts[(int32_t) y[i]]=sum_zero_count;
01371         }
01372 
01373         biases[(int32_t) y[i]+1]-=sum_free;
01374         if (class_count[(int32_t) y[i]] != 0.0)
01375             rho+=sum_free/class_count[(int32_t) y[i]];
01376         outputs[i]+=sum_free+sum_atbound;
01377     }
01378 
01379     for (int32_t i=0; i<nr_class; i++)
01380     {
01381         if (class_count[i] == 0.0)
01382             rho+=zero_counts[i];
01383 
01384         normwcw[i]+=normwc_const/CMath::sq(nr_class);
01385         normwcw[i]=CMath::sqrt(normwcw[i]);
01386     }
01387 
01388     CMath::display_vector(normwcw, nr_class, "normwcw");
01389 
01390     rho/=nr_class;
01391 
01392     SG_SPRINT("rho=%f\n", rho);
01393 
01394     float64_t sumb=0;
01395     for (int32_t i=0; i<nr_class; i++)
01396     {
01397         if (class_count[i] != 0.0)
01398             biases[i+1]=biases[i+1]/class_count[i]+rho;
01399         else
01400             biases[i+1]+=rho-zero_counts[i];
01401 
01402         SG_SPRINT("biases=%f\n", biases[i+1]);
01403 
01404         sumb+=biases[i+1];
01405     }
01406     SG_SPRINT("sumb=%f\n", sumb);
01407 
01408     delete[] zero_counts;
01409 
01410     for (int32_t i=0; i<l; i++)
01411         outputs[i]+=biases[(int32_t) y[i]+1];
01412 
01413     biases[0]=rho;
01414 
01415     //CMath::display_vector(outputs, l, "outputs");
01416 
01417 
01418     float64_t xi=0;
01419     for (int32_t i=0; i<active_size; i++)
01420     {
01421         if (is_lower_bound(i))
01422             continue;
01423         xi+=rho-outputs[i];
01424     }
01425 
01426     //SG_SPRINT("xi=%f\n", xi);
01427 
01428     //SG_SPRINT("quad=%f Cp=%f xi*mu=%f\n", quad, nr_class*rho, xi*mu);
01429 
01430     float64_t primal=0.5*quad- nr_class*rho+xi*mu;
01431 
01432     //SG_SPRINT("primal=%10.10f\n", primal);
01433 
01434     delete[] y;
01435     delete[] alpha;
01436     delete[] alpha_status;
01437 
01438     return primal;
01439 }
01440 
01441 
01442 // return 1 if already optimal, return 0 otherwise
01443 int32_t Solver_NUMC::select_working_set(
01444     int32_t &out_i, int32_t &out_j, float64_t &gap)
01445 {
01446     // return i,j such that y_i = y_j and
01447     // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
01448     // j: minimizes the decrease of obj value
01449     //    (if quadratic coefficient <= 0, replace it with tau)
01450     //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
01451 
01452     int32_t retval=0;
01453     float64_t best_gap=0;
01454     int32_t best_out_i=-1;
01455     int32_t best_out_j=-1;
01456 
01457     float64_t* Gmaxp = new float64_t[nr_class];
01458     float64_t* Gmaxp2 = new float64_t[nr_class];
01459     int32_t* Gmaxp_idx = new int32_t[nr_class];
01460 
01461     int32_t* Gmin_idx = new int32_t[nr_class];
01462     float64_t* obj_diff_min = new float64_t[nr_class];
01463 
01464     for (int32_t i=0; i<nr_class; i++)
01465     {
01466         Gmaxp[i]=-INF;
01467         Gmaxp2[i]=-INF;
01468         Gmaxp_idx[i]=-1;
01469         Gmin_idx[i]=-1;
01470         obj_diff_min[i]=INF;
01471     }
01472 
01473     for(int32_t t=0;t<active_size;t++)
01474     {
01475         int32_t cidx=y[t];
01476         if(!is_upper_bound(t))
01477         {
01478             if(-G[t] >= Gmaxp[cidx])
01479             {
01480                 Gmaxp[cidx] = -G[t];
01481                 Gmaxp_idx[cidx] = t;
01482             }
01483         }
01484     }
01485 
01486     for(int32_t j=0;j<active_size;j++)
01487     {
01488         int32_t cidx=y[j];
01489         int32_t ip = Gmaxp_idx[cidx];
01490         const Qfloat *Q_ip = NULL;
01491         if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01492             Q_ip = Q->get_Q(ip,active_size);
01493 
01494         if (!is_lower_bound(j))
01495         {
01496             float64_t grad_diff=Gmaxp[cidx]+G[j];
01497             if (G[j] >= Gmaxp2[cidx])
01498                 Gmaxp2[cidx] = G[j];
01499             if (grad_diff > 0)
01500             {
01501                 float64_t obj_diff;
01502                 float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
01503                 if (quad_coef > 0)
01504                     obj_diff = -(grad_diff*grad_diff)/quad_coef;
01505                 else
01506                     obj_diff = -(grad_diff*grad_diff)/TAU;
01507 
01508                 if (obj_diff <= obj_diff_min[cidx])
01509                 {
01510                     Gmin_idx[cidx]=j;
01511                     obj_diff_min[cidx] = obj_diff;
01512                 }
01513             }
01514         }
01515 
01516         gap=Gmaxp[cidx]+Gmaxp2[cidx];
01517         if (gap>=best_gap && Gmin_idx[cidx]>=0 &&
01518                 Gmaxp_idx[cidx]>=0 && Gmin_idx[cidx]<active_size)
01519         {
01520             out_i = Gmaxp_idx[cidx];
01521             out_j = Gmin_idx[cidx];
01522 
01523             best_gap=gap;
01524             best_out_i=out_i;
01525             best_out_j=out_j;
01526         }
01527     }
01528 
01529     gap=best_gap;
01530     out_i=best_out_i;
01531     out_j=best_out_j;
01532 
01533     SG_SDEBUG("i=%d j=%d best_gap=%f y_i=%f y_j=%f\n", out_i, out_j, gap, y[out_i], y[out_j]);
01534 
01535 
01536     if(gap < eps)
01537         retval=1;
01538 
01539     delete[] Gmaxp;
01540     delete[] Gmaxp2;
01541     delete[] Gmaxp_idx;
01542     delete[] Gmin_idx;
01543     delete[] obj_diff_min;
01544 
01545     return retval;
01546 }
01547 
01548 bool Solver_NUMC::be_shrunk(
01549     int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
01550     float64_t Gmax4)
01551 {
01552     return false;
01553 }
01554 
01555 void Solver_NUMC::do_shrinking()
01556 {
01557 }
01558 
01559 float64_t Solver_NUMC::calculate_rho()
01560 {
01561     return 0;
01562 }
01563 
01564 
01565 //
01566 // Q matrices for various formulations
01567 //
01568 class SVC_Q: public LibSVMKernel
01569 {
01570 public:
01571     SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01572     :LibSVMKernel(prob.l, prob.x, param)
01573     {
01574         clone(y,y_,prob.l);
01575         cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
01576         QD = new Qfloat[prob.l];
01577         for(int32_t i=0;i<prob.l;i++)
01578             QD[i]= (Qfloat)kernel_function(i,i);
01579     }
01580 
01581     Qfloat *get_Q(int32_t i, int32_t len) const
01582     {
01583         Qfloat *data;
01584         int32_t start;
01585         if((start = cache->get_data(i,&data,len)) < len)
01586         {
01587             for(int32_t j=start;j<len;j++)
01588                 data[j] = (Qfloat) y[i]*y[j]*kernel_function(i,j);
01589         }
01590         return data;
01591     }
01592 
01593     Qfloat *get_QD() const
01594     {
01595         return QD;
01596     }
01597 
01598     void swap_index(int32_t i, int32_t j) const
01599     {
01600         cache->swap_index(i,j);
01601         LibSVMKernel::swap_index(i,j);
01602         CMath::swap(y[i],y[j]);
01603         CMath::swap(QD[i],QD[j]);
01604     }
01605 
01606     ~SVC_Q()
01607     {
01608         delete[] y;
01609         delete cache;
01610         delete[] QD;
01611     }
01612 private:
01613     schar *y;
01614     Cache *cache;
01615     Qfloat *QD;
01616 };
01617 
01618 
01619 class ONE_CLASS_Q: public LibSVMKernel
01620 {
01621 public:
01622     ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01623     :LibSVMKernel(prob.l, prob.x, param)
01624     {
01625         cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
01626         QD = new Qfloat[prob.l];
01627         for(int32_t i=0;i<prob.l;i++)
01628             QD[i]= (Qfloat)kernel_function(i,i);
01629     }
01630 
01631     Qfloat *get_Q(int32_t i, int32_t len) const
01632     {
01633         Qfloat *data;
01634         int32_t start;
01635         if((start = cache->get_data(i,&data,len)) < len)
01636         {
01637             for(int32_t j=start;j<len;j++)
01638                 data[j] = (Qfloat) kernel_function(i,j);
01639         }
01640         return data;
01641     }
01642 
01643     Qfloat *get_QD() const
01644     {
01645         return QD;
01646     }
01647 
01648     void swap_index(int32_t i, int32_t j) const
01649     {
01650         cache->swap_index(i,j);
01651         LibSVMKernel::swap_index(i,j);
01652         CMath::swap(QD[i],QD[j]);
01653     }
01654 
01655     ~ONE_CLASS_Q()
01656     {
01657         delete cache;
01658         delete[] QD;
01659     }
01660 private:
01661     Cache *cache;
01662     Qfloat *QD;
01663 };
01664 
01665 class SVR_Q: public LibSVMKernel
01666 {
01667 public:
01668     SVR_Q(const svm_problem& prob, const svm_parameter& param)
01669     :LibSVMKernel(prob.l, prob.x, param)
01670     {
01671         l = prob.l;
01672         cache = new Cache(l,(int64_t)(param.cache_size*(1l<<20)));
01673         QD = new Qfloat[2*l];
01674         sign = new schar[2*l];
01675         index = new int32_t[2*l];
01676         for(int32_t k=0;k<l;k++)
01677         {
01678             sign[k] = 1;
01679             sign[k+l] = -1;
01680             index[k] = k;
01681             index[k+l] = k;
01682             QD[k]= (Qfloat)kernel_function(k,k);
01683             QD[k+l]=QD[k];
01684         }
01685         buffer[0] = new Qfloat[2*l];
01686         buffer[1] = new Qfloat[2*l];
01687         next_buffer = 0;
01688     }
01689 
01690     void swap_index(int32_t i, int32_t j) const
01691     {
01692         CMath::swap(sign[i],sign[j]);
01693         CMath::swap(index[i],index[j]);
01694         CMath::swap(QD[i],QD[j]);
01695     }
01696 
01697     Qfloat *get_Q(int32_t i, int32_t len) const
01698     {
01699         Qfloat *data;
01700         int32_t real_i = index[i];
01701         if(cache->get_data(real_i,&data,l) < l)
01702         {
01703             for(int32_t j=0;j<l;j++)
01704                 data[j] = (Qfloat)kernel_function(real_i,j);
01705         }
01706 
01707         // reorder and copy
01708         Qfloat *buf = buffer[next_buffer];
01709         next_buffer = 1 - next_buffer;
01710         schar si = sign[i];
01711         for(int32_t j=0;j<len;j++)
01712             buf[j] = si * sign[j] * data[index[j]];
01713         return buf;
01714     }
01715 
01716     Qfloat *get_QD() const
01717     {
01718         return QD;
01719     }
01720 
01721     ~SVR_Q()
01722     {
01723         delete cache;
01724         delete[] sign;
01725         delete[] index;
01726         delete[] buffer[0];
01727         delete[] buffer[1];
01728         delete[] QD;
01729     }
01730 
01731 private:
01732     int32_t l;
01733     Cache *cache;
01734     schar *sign;
01735     int32_t *index;
01736     mutable int32_t next_buffer;
01737     Qfloat *buffer[2];
01738     Qfloat *QD;
01739 };
01740 
01741 //
01742 // construct and solve various formulations
01743 //
01744 static void solve_c_svc(
01745     const svm_problem *prob, const svm_parameter* param,
01746     float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn)
01747 {
01748     int32_t l = prob->l;
01749     schar *y = new schar[l];
01750 
01751     int32_t i;
01752 
01753     for(i=0;i<l;i++)
01754     {
01755         alpha[i] = 0;
01756         if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01757     }
01758 
01759     Solver s;
01760     s.Solve(l, SVC_Q(*prob,*param,y), prob->pv, y,
01761         alpha, Cp, Cn, param->eps, si, param->shrinking);
01762 
01763     float64_t sum_alpha=0;
01764     for(i=0;i<l;i++)
01765         sum_alpha += alpha[i];
01766 
01767     if (Cp==Cn)
01768         SG_SINFO("nu = %f\n", sum_alpha/(param->C*prob->l));
01769 
01770     for(i=0;i<l;i++)
01771         alpha[i] *= y[i];
01772 
01773     delete[] y;
01774 }
01775 
01776 
01777 //two weighted datasets
01778 void solve_c_svc_weighted(
01779     const svm_problem *prob, const svm_parameter* param,
01780     float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn)
01781 {
01782     int l = prob->l;
01783     float64_t *minus_ones = new float64_t[l];
01784     schar *y = new schar[l];
01785 
01786     int i;
01787 
01788     for(i=0;i<l;i++)
01789     {
01790         alpha[i] = 0;
01791         minus_ones[i] = -1;
01792         if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01793     }
01794 
01795     WeightedSolver s = WeightedSolver(prob->C);
01796     s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01797         alpha, Cp, Cn, param->eps, si, param->shrinking);
01798 
01799     float64_t sum_alpha=0;
01800     for(i=0;i<l;i++)
01801         sum_alpha += alpha[i];
01802 
01803     //if (Cp==Cn)
01804     //  SG_SINFO("nu = %f\n", sum_alpha/(prob->C*prob->l));
01805 
01806     for(i=0;i<l;i++)
01807         alpha[i] *= y[i];
01808 
01809     delete[] minus_ones;
01810     delete[] y;
01811 }
01812 
01813 static void solve_nu_svc(
01814     const svm_problem *prob, const svm_parameter *param,
01815     float64_t *alpha, Solver::SolutionInfo* si)
01816 {
01817     int32_t i;
01818     int32_t l = prob->l;
01819     float64_t nu = param->nu;
01820 
01821     schar *y = new schar[l];
01822 
01823     for(i=0;i<l;i++)
01824         if(prob->y[i]>0)
01825             y[i] = +1;
01826         else
01827             y[i] = -1;
01828 
01829     float64_t sum_pos = nu*l/2;
01830     float64_t sum_neg = nu*l/2;
01831 
01832     for(i=0;i<l;i++)
01833         if(y[i] == +1)
01834         {
01835             alpha[i] = CMath::min(1.0,sum_pos);
01836             sum_pos -= alpha[i];
01837         }
01838         else
01839         {
01840             alpha[i] = CMath::min(1.0,sum_neg);
01841             sum_neg -= alpha[i];
01842         }
01843 
01844     float64_t *zeros = new float64_t[l];
01845 
01846     for(i=0;i<l;i++)
01847         zeros[i] = 0;
01848 
01849     Solver_NU s;
01850     s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01851         alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01852     float64_t r = si->r;
01853 
01854     SG_SINFO("C = %f\n",1/r);
01855 
01856     for(i=0;i<l;i++)
01857         alpha[i] *= y[i]/r;
01858 
01859     si->rho /= r;
01860     si->obj /= (r*r);
01861     si->upper_bound_p = 1/r;
01862     si->upper_bound_n = 1/r;
01863 
01864     delete[] y;
01865     delete[] zeros;
01866 }
01867 
01868 static void solve_nu_multiclass_svc(const svm_problem *prob,
01869         const svm_parameter *param, Solver::SolutionInfo* si, svm_model* model)
01870 {
01871     int32_t l = prob->l;
01872     float64_t nu = param->nu;
01873 
01874     float64_t *alpha = Malloc(float64_t, prob->l);
01875     schar *y = new schar[l];
01876 
01877     for(int32_t i=0;i<l;i++)
01878     {
01879         alpha[i] = 0;
01880         y[i]=prob->y[i];
01881     }
01882 
01883     int32_t nr_class=param->nr_class;
01884     float64_t* sum_class = new float64_t[nr_class];
01885 
01886     for (int32_t j=0; j<nr_class; j++)
01887         sum_class[j] = nu*l/nr_class;
01888 
01889     for(int32_t i=0;i<l;i++)
01890     {
01891         alpha[i] = CMath::min(1.0,sum_class[int32_t(y[i])]);
01892         sum_class[int32_t(y[i])] -= alpha[i];
01893     }
01894     delete[] sum_class;
01895 
01896 
01897     float64_t *zeros = new float64_t[l];
01898 
01899     for (int32_t i=0;i<l;i++)
01900         zeros[i] = 0;
01901 
01902     Solver_NUMC s(nr_class, nu);
01903     SVC_QMC Q(*prob,*param,y, nr_class, ((float64_t) nr_class)/CMath::sq(nu*l));
01904 
01905     s.Solve(l, Q, zeros, y,
01906         alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01907 
01908 
01909     int32_t* class_sv_count=new int32_t[nr_class];
01910 
01911     for (int32_t i=0; i<nr_class; i++)
01912         class_sv_count[i]=0;
01913 
01914     for (int32_t i=0; i<l; i++)
01915     {
01916         if (CMath::abs(alpha[i]) > 0)
01917             class_sv_count[(int32_t) y[i]]++;
01918     }
01919 
01920     model->l=l;
01921     // rho[0]= rho in mcsvm paper, rho[1]...rho[nr_class] is bias in mcsvm paper
01922     model->rho = Malloc(float64_t, nr_class+1);
01923     model->nr_class = nr_class;
01924     model->label = NULL;
01925     model->SV = Malloc(svm_node*,nr_class);
01926     model->nSV = Malloc(int32_t, nr_class);
01927     model->sv_coef = Malloc(float64_t *,nr_class);
01928     model->normwcw = Malloc(float64_t,nr_class);
01929 
01930     for (int32_t i=0; i<nr_class+1; i++)
01931         model->rho[i]=0;
01932 
01933     model->objective = si->obj;
01934 
01935     if (param->use_bias)
01936     {
01937         SG_SDEBUG("Computing biases and primal objective\n");
01938         float64_t primal = s.compute_primal(y, alpha, model->rho, model->normwcw);
01939         SG_SINFO("Primal = %10.10f\n", primal);
01940     }
01941 
01942     for (int32_t i=0; i<nr_class; i++)
01943     {
01944         model->nSV[i]=class_sv_count[i];
01945         model->SV[i] = Malloc(svm_node,class_sv_count[i]);
01946         model->sv_coef[i] = Malloc(float64_t,class_sv_count[i]);
01947         class_sv_count[i]=0;
01948     }
01949 
01950     for (int32_t i=0;i<prob->l;i++)
01951     {
01952         if(fabs(alpha[i]) > 0)
01953         {
01954             model->SV[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]].index = prob->x[i]->index;
01955             model->sv_coef[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]] = alpha[i];
01956             class_sv_count[(int32_t) y[i]]++;
01957         }
01958     }
01959 
01960     delete[] y;
01961     delete[] zeros;
01962     free(alpha);
01963 }
01964 
01965 static void solve_one_class(
01966     const svm_problem *prob, const svm_parameter *param,
01967     float64_t *alpha, Solver::SolutionInfo* si)
01968 {
01969     int32_t l = prob->l;
01970     float64_t *zeros = new float64_t[l];
01971     schar *ones = new schar[l];
01972     int32_t i;
01973 
01974     int32_t n = (int32_t)(param->nu*prob->l);   // # of alpha's at upper bound
01975 
01976     for(i=0;i<n;i++)
01977         alpha[i] = 1;
01978     if(n<prob->l)
01979         alpha[n] = param->nu * prob->l - n;
01980     for(i=n+1;i<l;i++)
01981         alpha[i] = 0;
01982 
01983     for(i=0;i<l;i++)
01984     {
01985         zeros[i] = 0;
01986         ones[i] = 1;
01987     }
01988 
01989     Solver s;
01990     s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01991         alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01992 
01993     delete[] zeros;
01994     delete[] ones;
01995 }
01996 
01997 static void solve_epsilon_svr(
01998     const svm_problem *prob, const svm_parameter *param,
01999     float64_t *alpha, Solver::SolutionInfo* si)
02000 {
02001     int32_t l = prob->l;
02002     float64_t *alpha2 = new float64_t[2*l];
02003     float64_t *linear_term = new float64_t[2*l];
02004     schar *y = new schar[2*l];
02005     int32_t i;
02006 
02007     for(i=0;i<l;i++)
02008     {
02009         alpha2[i] = 0;
02010         linear_term[i] = param->p - prob->y[i];
02011         y[i] = 1;
02012 
02013         alpha2[i+l] = 0;
02014         linear_term[i+l] = param->p + prob->y[i];
02015         y[i+l] = -1;
02016     }
02017 
02018     Solver s;
02019     s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
02020         alpha2, param->C, param->C, param->eps, si, param->shrinking);
02021 
02022     float64_t sum_alpha = 0;
02023     for(i=0;i<l;i++)
02024     {
02025         alpha[i] = alpha2[i] - alpha2[i+l];
02026         sum_alpha += fabs(alpha[i]);
02027     }
02028     SG_SINFO("nu = %f\n",sum_alpha/(param->C*l));
02029 
02030     delete[] alpha2;
02031     delete[] linear_term;
02032     delete[] y;
02033 }
02034 
02035 static void solve_nu_svr(
02036     const svm_problem *prob, const svm_parameter *param,
02037     float64_t *alpha, Solver::SolutionInfo* si)
02038 {
02039     int32_t l = prob->l;
02040     float64_t C = param->C;
02041     float64_t *alpha2 = new float64_t[2*l];
02042     float64_t *linear_term = new float64_t[2*l];
02043     schar *y = new schar[2*l];
02044     int32_t i;
02045 
02046     float64_t sum = C * param->nu * l / 2;
02047     for(i=0;i<l;i++)
02048     {
02049         alpha2[i] = alpha2[i+l] = CMath::min(sum,C);
02050         sum -= alpha2[i];
02051 
02052         linear_term[i] = - prob->y[i];
02053         y[i] = 1;
02054 
02055         linear_term[i+l] = prob->y[i];
02056         y[i+l] = -1;
02057     }
02058 
02059     Solver_NU s;
02060     s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
02061         alpha2, C, C, param->eps, si, param->shrinking);
02062 
02063     SG_SINFO("epsilon = %f\n",-si->r);
02064 
02065     for(i=0;i<l;i++)
02066         alpha[i] = alpha2[i] - alpha2[i+l];
02067 
02068     delete[] alpha2;
02069     delete[] linear_term;
02070     delete[] y;
02071 }
02072 
02073 //
02074 // decision_function
02075 //
02076 struct decision_function
02077 {
02078     float64_t *alpha;
02079     float64_t rho;
02080     float64_t objective;
02081 };
02082 
02083 decision_function svm_train_one(
02084     const svm_problem *prob, const svm_parameter *param,
02085     float64_t Cp, float64_t Cn)
02086 {
02087     float64_t *alpha = Malloc(float64_t, prob->l);
02088     Solver::SolutionInfo si;
02089     switch(param->svm_type)
02090     {
02091         case C_SVC:
02092             solve_c_svc(prob,param,alpha,&si,Cp,Cn);
02093             break;
02094         case NU_SVC:
02095             solve_nu_svc(prob,param,alpha,&si);
02096             break;
02097         case ONE_CLASS:
02098             solve_one_class(prob,param,alpha,&si);
02099             break;
02100         case EPSILON_SVR:
02101             solve_epsilon_svr(prob,param,alpha,&si);
02102             break;
02103         case NU_SVR:
02104             solve_nu_svr(prob,param,alpha,&si);
02105             break;
02106     }
02107 
02108     SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho);
02109 
02110     // output SVs
02111     if (param->svm_type != ONE_CLASS)
02112     {
02113         int32_t nSV = 0;
02114         int32_t nBSV = 0;
02115         for(int32_t i=0;i<prob->l;i++)
02116         {
02117             if(fabs(alpha[i]) > 0)
02118             {
02119                 ++nSV;
02120                 if(prob->y[i] > 0)
02121                 {
02122                     if(fabs(alpha[i]) >= si.upper_bound_p)
02123                         ++nBSV;
02124                 }
02125                 else
02126                 {
02127                     if(fabs(alpha[i]) >= si.upper_bound_n)
02128                         ++nBSV;
02129                 }
02130             }
02131         }
02132         SG_SINFO("nSV = %d, nBSV = %d\n",nSV,nBSV);
02133     }
02134 
02135     decision_function f;
02136     f.alpha = alpha;
02137     f.rho = si.rho;
02138     f.objective=si.obj;
02139     return f;
02140 }
02141 
02142 //
02143 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
02144 // perm, length l, must be allocated before calling this subroutine
02145 void svm_group_classes(
02146     const svm_problem *prob, int32_t *nr_class_ret, int32_t **label_ret,
02147     int32_t **start_ret, int32_t **count_ret, int32_t *perm)
02148 {
02149     int32_t l = prob->l;
02150     int32_t max_nr_class = 16;
02151     int32_t nr_class = 0;
02152     int32_t *label = Malloc(int32_t, max_nr_class);
02153     int32_t *count = Malloc(int32_t, max_nr_class);
02154     int32_t *data_label = Malloc(int32_t, l);
02155     int32_t i;
02156 
02157     for(i=0;i<l;i++)
02158     {
02159         int32_t this_label=(int32_t) prob->y[i];
02160         int32_t j;
02161         for(j=0;j<nr_class;j++)
02162         {
02163             if(this_label == label[j])
02164             {
02165                 ++count[j];
02166                 break;
02167             }
02168         }
02169         data_label[i] = j;
02170         if(j == nr_class)
02171         {
02172             if(nr_class == max_nr_class)
02173             {
02174                 max_nr_class *= 2;
02175                 label=(int32_t *) realloc(label,max_nr_class*sizeof(int32_t));
02176                 count=(int32_t *) realloc(count,max_nr_class*sizeof(int32_t));
02177             }
02178             label[nr_class] = this_label;
02179             count[nr_class] = 1;
02180             ++nr_class;
02181         }
02182     }
02183 
02184     int32_t *start = Malloc(int32_t, nr_class);
02185     start[0] = 0;
02186     for(i=1;i<nr_class;i++)
02187         start[i] = start[i-1]+count[i-1];
02188     for(i=0;i<l;i++)
02189     {
02190         perm[start[data_label[i]]] = i;
02191         ++start[data_label[i]];
02192     }
02193     start[0] = 0;
02194     for(i=1;i<nr_class;i++)
02195         start[i] = start[i-1]+count[i-1];
02196 
02197     *nr_class_ret = nr_class;
02198     *label_ret = label;
02199     *start_ret = start;
02200     *count_ret = count;
02201     free(data_label);
02202 }
02203 
02204 //
02205 // Interface functions
02206 //
02207 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02208 {
02209     svm_model *model = Malloc(svm_model,1);
02210     model->param = *param;
02211     model->free_sv = 0; // XXX
02212 
02213     if(param->svm_type == ONE_CLASS ||
02214        param->svm_type == EPSILON_SVR ||
02215        param->svm_type == NU_SVR)
02216     {
02217         SG_SINFO("training one class svm or doing epsilon sv regression\n");
02218 
02219         // regression or one-class-svm
02220         model->nr_class = 2;
02221         model->label = NULL;
02222         model->nSV = NULL;
02223         model->sv_coef = Malloc(float64_t *,1);
02224         decision_function f = svm_train_one(prob,param,0,0);
02225         model->rho = Malloc(float64_t, 1);
02226         model->rho[0] = f.rho;
02227         model->objective = f.objective;
02228 
02229         int32_t nSV = 0;
02230         int32_t i;
02231         for(i=0;i<prob->l;i++)
02232             if(fabs(f.alpha[i]) > 0) ++nSV;
02233         model->l = nSV;
02234         model->SV = Malloc(svm_node *,nSV);
02235         model->sv_coef[0] = Malloc(float64_t, nSV);
02236         int32_t j = 0;
02237         for(i=0;i<prob->l;i++)
02238             if(fabs(f.alpha[i]) > 0)
02239             {
02240                 model->SV[j] = prob->x[i];
02241                 model->sv_coef[0][j] = f.alpha[i];
02242                 ++j;
02243             }
02244 
02245         free(f.alpha);
02246     }
02247     else if(param->svm_type == NU_MULTICLASS_SVC)
02248     {
02249         Solver::SolutionInfo si;
02250         solve_nu_multiclass_svc(prob,param,&si,model);
02251         SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho);
02252     }
02253     else
02254     {
02255         // classification
02256         int32_t l = prob->l;
02257         int32_t nr_class;
02258         int32_t *label = NULL;
02259         int32_t *start = NULL;
02260         int32_t *count = NULL;
02261         int32_t *perm = Malloc(int32_t, l);
02262 
02263         // group training data of the same class
02264         svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02265         svm_node **x = Malloc(svm_node *,l);
02266         float64_t *C = Malloc(float64_t,l);
02267         float64_t *pv = Malloc(float64_t,l);
02268 
02269 
02270         int32_t i;
02271         for(i=0;i<l;i++) {
02272             x[i] = prob->x[perm[i]];
02273             C[i] = prob->C[perm[i]];
02274 
02275             if (prob->pv)
02276             {
02277                 pv[i] = prob->pv[perm[i]];
02278             }
02279             else
02280             {
02281                 //no custom linear term is set
02282                 pv[i] = -1.0;
02283             }
02284 
02285         }
02286 
02287 
02288         // calculate weighted C
02289         float64_t *weighted_C = Malloc(float64_t, nr_class);
02290         for(i=0;i<nr_class;i++)
02291             weighted_C[i] = param->C;
02292         for(i=0;i<param->nr_weight;i++)
02293         {
02294             int32_t j;
02295             for(j=0;j<nr_class;j++)
02296                 if(param->weight_label[i] == label[j])
02297                     break;
02298             if(j == nr_class)
02299                 SG_SWARNING("warning: class label %d specified in weight is not found\n", param->weight_label[i]);
02300             else
02301                 weighted_C[j] *= param->weight[i];
02302         }
02303 
02304         // train k*(k-1)/2 models
02305 
02306         bool *nonzero = Malloc(bool,l);
02307         for(i=0;i<l;i++)
02308             nonzero[i] = false;
02309         decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02310 
02311         int32_t p = 0;
02312         for(i=0;i<nr_class;i++)
02313             for(int32_t j=i+1;j<nr_class;j++)
02314             {
02315                 svm_problem sub_prob;
02316                 int32_t si = start[i], sj = start[j];
02317                 int32_t ci = count[i], cj = count[j];
02318                 sub_prob.l = ci+cj;
02319                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02320                 sub_prob.y = Malloc(float64_t,sub_prob.l+1); //dirty hack to surpress valgrind err
02321                 sub_prob.C = Malloc(float64_t,sub_prob.l+1);
02322                 sub_prob.pv = Malloc(float64_t,sub_prob.l+1);
02323 
02324                 int32_t k;
02325                 for(k=0;k<ci;k++)
02326                 {
02327                     sub_prob.x[k] = x[si+k];
02328                     sub_prob.y[k] = +1;
02329                     sub_prob.C[k] = C[si+k];
02330                     sub_prob.pv[k] = pv[si+k];
02331 
02332                 }
02333                 for(k=0;k<cj;k++)
02334                 {
02335                     sub_prob.x[ci+k] = x[sj+k];
02336                     sub_prob.y[ci+k] = -1;
02337                     sub_prob.C[ci+k] = C[sj+k];
02338                     sub_prob.pv[ci+k] = pv[sj+k];
02339                 }
02340                 sub_prob.y[sub_prob.l]=-1; //dirty hack to surpress valgrind err
02341                 sub_prob.C[sub_prob.l]=-1;
02342                 sub_prob.pv[sub_prob.l]=-1;
02343 
02344                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02345                 for(k=0;k<ci;k++)
02346                     if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02347                         nonzero[si+k] = true;
02348                 for(k=0;k<cj;k++)
02349                     if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02350                         nonzero[sj+k] = true;
02351                 free(sub_prob.x);
02352                 free(sub_prob.y);
02353                 free(sub_prob.C);
02354                 free(sub_prob.pv);
02355                 ++p;
02356             }
02357 
02358         // build output
02359 
02360         model->objective = f[0].objective;
02361         model->nr_class = nr_class;
02362 
02363         model->label = Malloc(int32_t, nr_class);
02364         for(i=0;i<nr_class;i++)
02365             model->label[i] = label[i];
02366 
02367         model->rho = Malloc(float64_t, nr_class*(nr_class-1)/2);
02368         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02369             model->rho[i] = f[i].rho;
02370 
02371         int32_t total_sv = 0;
02372         int32_t *nz_count = Malloc(int32_t, nr_class);
02373         model->nSV = Malloc(int32_t, nr_class);
02374         for(i=0;i<nr_class;i++)
02375         {
02376             int32_t nSV = 0;
02377             for(int32_t j=0;j<count[i];j++)
02378                 if(nonzero[start[i]+j])
02379                 {
02380                     ++nSV;
02381                     ++total_sv;
02382                 }
02383             model->nSV[i] = nSV;
02384             nz_count[i] = nSV;
02385         }
02386 
02387         SG_SINFO("Total nSV = %d\n",total_sv);
02388 
02389         model->l = total_sv;
02390         model->SV = Malloc(svm_node *,total_sv);
02391         p = 0;
02392         for(i=0;i<l;i++)
02393             if(nonzero[i]) model->SV[p++] = x[i];
02394 
02395         int32_t *nz_start = Malloc(int32_t, nr_class);
02396         nz_start[0] = 0;
02397         for(i=1;i<nr_class;i++)
02398             nz_start[i] = nz_start[i-1]+nz_count[i-1];
02399 
02400         model->sv_coef = Malloc(float64_t *,nr_class-1);
02401         for(i=0;i<nr_class-1;i++)
02402             model->sv_coef[i] = Malloc(float64_t, total_sv);
02403 
02404         p = 0;
02405         for(i=0;i<nr_class;i++)
02406             for(int32_t j=i+1;j<nr_class;j++)
02407             {
02408                 // classifier (i,j): coefficients with
02409                 // i are in sv_coef[j-1][nz_start[i]...],
02410                 // j are in sv_coef[i][nz_start[j]...]
02411 
02412                 int32_t si = start[i];
02413                 int32_t sj = start[j];
02414                 int32_t ci = count[i];
02415                 int32_t cj = count[j];
02416 
02417                 int32_t q = nz_start[i];
02418                 int32_t k;
02419                 for(k=0;k<ci;k++)
02420                     if(nonzero[si+k])
02421                         model->sv_coef[j-1][q++] = f[p].alpha[k];
02422                 q = nz_start[j];
02423                 for(k=0;k<cj;k++)
02424                     if(nonzero[sj+k])
02425                         model->sv_coef[i][q++] = f[p].alpha[ci+k];
02426                 ++p;
02427             }
02428 
02429         free(label);
02430         free(count);
02431         free(perm);
02432         free(start);
02433         free(x);
02434         free(C);
02435         free(pv);
02436         free(weighted_C);
02437         free(nonzero);
02438         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02439             free(f[i].alpha);
02440         free(f);
02441         free(nz_count);
02442         free(nz_start);
02443     }
02444     return model;
02445 }
02446 
02447 void svm_destroy_model(svm_model* model)
02448 {
02449     if(model->free_sv && model->l > 0)
02450         free((void *)(model->SV[0]));
02451     for(int32_t i=0;i<model->nr_class-1;i++)
02452         free(model->sv_coef[i]);
02453     free(model->SV);
02454     free(model->sv_coef);
02455     free(model->rho);
02456     free(model->label);
02457     free(model->nSV);
02458     free(model);
02459 }
02460 
02461 void svm_destroy_param(svm_parameter* param)
02462 {
02463     free(param->weight_label);
02464     free(param->weight);
02465 }
02466 
02467 const char *svm_check_parameter(
02468     const svm_problem *prob, const svm_parameter *param)
02469 {
02470     // svm_type
02471 
02472     int32_t svm_type = param->svm_type;
02473     if(svm_type != C_SVC &&
02474        svm_type != NU_SVC &&
02475        svm_type != ONE_CLASS &&
02476        svm_type != EPSILON_SVR &&
02477        svm_type != NU_SVR &&
02478        svm_type != NU_MULTICLASS_SVC)
02479         return "unknown svm type";
02480 
02481     // kernel_type, degree
02482 
02483     int32_t kernel_type = param->kernel_type;
02484     if(kernel_type != LINEAR &&
02485        kernel_type != POLY &&
02486        kernel_type != RBF &&
02487        kernel_type != SIGMOID &&
02488        kernel_type != PRECOMPUTED)
02489         return "unknown kernel type";
02490 
02491     if(param->degree < 0)
02492         return "degree of polynomial kernel < 0";
02493 
02494     // cache_size,eps,C,nu,p,shrinking
02495 
02496     if(param->cache_size <= 0)
02497         return "cache_size <= 0";
02498 
02499     if(param->eps <= 0)
02500         return "eps <= 0";
02501 
02502     if(svm_type == C_SVC ||
02503        svm_type == EPSILON_SVR ||
02504        svm_type == NU_SVR)
02505         if(param->C <= 0)
02506             return "C <= 0";
02507 
02508     if(svm_type == NU_SVC ||
02509        svm_type == ONE_CLASS ||
02510        svm_type == NU_SVR)
02511         if(param->nu <= 0 || param->nu > 1)
02512             return "nu <= 0 or nu > 1";
02513 
02514     if(svm_type == EPSILON_SVR)
02515         if(param->p < 0)
02516             return "p < 0";
02517 
02518     if(param->shrinking != 0 &&
02519        param->shrinking != 1)
02520         return "shrinking != 0 and shrinking != 1";
02521 
02522 
02523     // check whether nu-svc is feasible
02524 
02525     if(svm_type == NU_SVC)
02526     {
02527         int32_t l = prob->l;
02528         int32_t max_nr_class = 16;
02529         int32_t nr_class = 0;
02530         int32_t *label = Malloc(int32_t, max_nr_class);
02531         int32_t *count = Malloc(int32_t, max_nr_class);
02532 
02533         int32_t i;
02534         for(i=0;i<l;i++)
02535         {
02536             int32_t this_label = (int32_t) prob->y[i];
02537             int32_t j;
02538             for(j=0;j<nr_class;j++)
02539                 if(this_label == label[j])
02540                 {
02541                     ++count[j];
02542                     break;
02543                 }
02544             if(j == nr_class)
02545             {
02546                 if(nr_class == max_nr_class)
02547                 {
02548                     max_nr_class *= 2;
02549                     label=(int32_t *) realloc(label,
02550                         max_nr_class*sizeof(int32_t));
02551                     count=(int32_t *) realloc(count,
02552                         max_nr_class*sizeof(int32_t));
02553                 }
02554                 label[nr_class] = this_label;
02555                 count[nr_class] = 1;
02556                 ++nr_class;
02557             }
02558         }
02559 
02560         for(i=0;i<nr_class;i++)
02561         {
02562             int32_t n1 = count[i];
02563             for(int32_t j=i+1;j<nr_class;j++)
02564             {
02565                 int32_t n2 = count[j];
02566                 if(param->nu*(n1+n2)/2 > CMath::min(n1,n2))
02567                 {
02568                     free(label);
02569                     free(count);
02570                     return "specified nu is infeasible";
02571                 }
02572             }
02573         }
02574         free(label);
02575         free(count);
02576     }
02577 
02578     return NULL;
02579 }
02580 }
02581 #endif // DOXYGEN_SHOULD_SKIP_THIS

SHOGUN Machine Learning Toolbox - Documentation