TOPFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "features/TOPFeatures.h"
00013 #include "lib/io.h"
00014 #include "lib/Mathematics.h"
00015 
00016 using namespace shogun;
00017 
00018 CTOPFeatures::CTOPFeatures(
00019     int32_t size, CHMM* p, CHMM* n, bool neglin, bool poslin)
00020 : CSimpleFeatures<float64_t>(size), neglinear(neglin), poslinear(poslin)
00021 {
00022     memset(&pos_relevant_indizes, 0, sizeof(pos_relevant_indizes));
00023     memset(&neg_relevant_indizes, 0, sizeof(neg_relevant_indizes));
00024     set_models(p,n);
00025 }
00026 
00027 CTOPFeatures::CTOPFeatures(const CTOPFeatures &orig)
00028 : CSimpleFeatures<float64_t>(orig), pos(orig.pos), neg(orig.neg), neglinear(orig.neglinear),
00029     poslinear(orig.poslinear)
00030 {
00031 }
00032 
00033 CTOPFeatures::~CTOPFeatures()
00034 {
00035     delete[] pos_relevant_indizes.idx_p;
00036     delete[] pos_relevant_indizes.idx_q;
00037     delete[] pos_relevant_indizes.idx_a_cols;
00038     delete[] pos_relevant_indizes.idx_a_rows;
00039     delete[] pos_relevant_indizes.idx_b_cols;
00040     delete[] pos_relevant_indizes.idx_b_rows;
00041 
00042     delete[] neg_relevant_indizes.idx_p;
00043     delete[] neg_relevant_indizes.idx_q;
00044     delete[] neg_relevant_indizes.idx_a_cols;
00045     delete[] neg_relevant_indizes.idx_a_rows;
00046     delete[] neg_relevant_indizes.idx_b_cols;
00047     delete[] neg_relevant_indizes.idx_b_rows;
00048 
00049     SG_UNREF(pos);
00050     SG_UNREF(neg);
00051 }
00052 
00053 void CTOPFeatures::set_models(CHMM* p, CHMM* n)
00054 {
00055     ASSERT(p && n);
00056     SG_REF(p);
00057     SG_REF(n);
00058 
00059     pos=p; 
00060     neg=n;
00061     set_num_vectors(0);
00062 
00063     delete[] feature_matrix  ;
00064     feature_matrix=NULL ;
00065 
00066 
00067     if (pos && pos->get_observations())
00068         set_num_vectors(pos->get_observations()->get_num_vectors());
00069 
00070     compute_relevant_indizes(p, &pos_relevant_indizes);
00071     compute_relevant_indizes(n, &neg_relevant_indizes);
00072     num_features=compute_num_features();
00073 
00074     SG_DEBUG( "pos_feat=[%i,%i,%i,%i],neg_feat=[%i,%i,%i,%i] -> %i features\n", pos->get_N(), pos->get_N(), pos->get_N()*pos->get_N(), pos->get_N()*pos->get_M(), neg->get_N(), neg->get_N(), neg->get_N()*neg->get_N(), neg->get_N()*neg->get_M(),num_features) ;
00075 }
00076 
00077 float64_t* CTOPFeatures::compute_feature_vector(
00078     int32_t num, int32_t &len, float64_t* target)
00079 {
00080     float64_t* featurevector=target;
00081 
00082     if (!featurevector) 
00083         featurevector=new float64_t[get_num_features()];
00084 
00085     if (!featurevector)
00086         return NULL;
00087 
00088     compute_feature_vector(featurevector, num, len);
00089 
00090     return featurevector;
00091 }
00092 
00093 void CTOPFeatures::compute_feature_vector(
00094     float64_t* featurevector, int32_t num, int32_t& len)
00095 {
00096     int32_t i,j,p=0,x=num;
00097     int32_t idx=0;
00098 
00099     float64_t posx=(poslinear) ?
00100         (pos->linear_model_probability(x)) : (pos->model_probability(x));
00101     float64_t negx=(neglinear) ?
00102         (neg->linear_model_probability(x)) : (neg->model_probability(x));
00103 
00104     len=get_num_features();
00105 
00106     featurevector[p++]=(posx-negx);
00107 
00108     //first do positive model
00109     if (poslinear)
00110     {
00111         for (i=0; i<pos->get_N(); i++)
00112         {
00113             for (j=0; j<pos->get_M(); j++)
00114                 featurevector[p++]=exp(pos->linear_model_derivative(i, j, x)-posx);
00115         }
00116     }
00117     else
00118     {
00119         for (idx=0; idx< pos_relevant_indizes.num_p; idx++)
00120             featurevector[p++]=exp(pos->model_derivative_p(pos_relevant_indizes.idx_p[idx], x)-posx);
00121 
00122         for (idx=0; idx< pos_relevant_indizes.num_q; idx++)
00123             featurevector[p++]=exp(pos->model_derivative_q(pos_relevant_indizes.idx_q[idx], x)-posx);
00124 
00125         for (idx=0; idx< pos_relevant_indizes.num_a; idx++)
00126                 featurevector[p++]=exp(pos->model_derivative_a(pos_relevant_indizes.idx_a_rows[idx], pos_relevant_indizes.idx_a_cols[idx], x)-posx);
00127 
00128         for (idx=0; idx< pos_relevant_indizes.num_b; idx++)
00129                 featurevector[p++]=exp(pos->model_derivative_b(pos_relevant_indizes.idx_b_rows[idx], pos_relevant_indizes.idx_b_cols[idx], x)-posx);
00130 
00131 
00132         //for (i=0; i<pos->get_N(); i++)
00133         //{
00134         //  featurevector[p++]=exp(pos->model_derivative_p(i, x)-posx);
00135         //  featurevector[p++]=exp(pos->model_derivative_q(i, x)-posx);
00136 
00137         //  for (j=0; j<pos->get_N(); j++)
00138         //      featurevector[p++]=exp(pos->model_derivative_a(i, j, x)-posx);
00139 
00140         //  for (j=0; j<pos->get_M(); j++)
00141         //      featurevector[p++]=exp(pos->model_derivative_b(i, j, x)-posx);
00142         //}
00143     }
00144 
00145     //then do negative
00146     if (neglinear)
00147     {
00148         for (i=0; i<neg->get_N(); i++)
00149         {
00150             for (j=0; j<neg->get_M(); j++)
00151                 featurevector[p++]= - exp(neg->linear_model_derivative(i, j, x)-negx);
00152         }
00153     }
00154     else
00155     {
00156         for (idx=0; idx< neg_relevant_indizes.num_p; idx++)
00157             featurevector[p++]= - exp(neg->model_derivative_p(neg_relevant_indizes.idx_p[idx], x)-negx);
00158 
00159         for (idx=0; idx< neg_relevant_indizes.num_q; idx++)
00160             featurevector[p++]= - exp(neg->model_derivative_q(neg_relevant_indizes.idx_q[idx], x)-negx);
00161 
00162         for (idx=0; idx< neg_relevant_indizes.num_a; idx++)
00163                 featurevector[p++]= - exp(neg->model_derivative_a(neg_relevant_indizes.idx_a_rows[idx], neg_relevant_indizes.idx_a_cols[idx], x)-negx);
00164 
00165         for (idx=0; idx< neg_relevant_indizes.num_b; idx++)
00166                 featurevector[p++]= - exp(neg->model_derivative_b(neg_relevant_indizes.idx_b_rows[idx], neg_relevant_indizes.idx_b_cols[idx], x)-negx);
00167 
00168         //for (i=0; i<neg->get_N(); i++)
00169         //{
00170         //  featurevector[p++]= - exp(neg->model_derivative_p(i, x)-negx);
00171         //  featurevector[p++]= - exp(neg->model_derivative_q(i, x)-negx);
00172 
00173         //  for (j=0; j<neg->get_N(); j++)
00174         //      featurevector[p++]= - exp(neg->model_derivative_a(i, j, x)-negx);
00175 
00176         //  for (j=0; j<neg->get_M(); j++)
00177         //      featurevector[p++]= - exp(neg->model_derivative_b(i, j, x)-negx);
00178         //}
00179     }
00180 }
00181 
00182 float64_t* CTOPFeatures::set_feature_matrix()
00183 {
00184     int32_t len=0;
00185 
00186     num_features=get_num_features();
00187     ASSERT(num_features);
00188     ASSERT(pos);
00189     ASSERT(pos->get_observations());
00190 
00191     num_vectors=pos->get_observations()->get_num_vectors();
00192     SG_INFO( "allocating top feature cache of size %.2fM\n", sizeof(float64_t)*num_features*num_vectors/1024.0/1024.0);
00193     delete[] feature_matrix;
00194     feature_matrix=new float64_t[num_features*num_vectors];
00195     if (!feature_matrix)
00196     {
00197       SG_ERROR( "allocation not successful!");
00198         return NULL ;
00199     } ;
00200 
00201     SG_INFO( "calculating top feature matrix\n");
00202 
00203     for (int32_t x=0; x<num_vectors; x++)
00204     {
00205         if (!(x % (num_vectors/10+1)))
00206             SG_DEBUG( "%02d%%.", (int) (100.0*x/num_vectors));
00207         else if (!(x % (num_vectors/200+1)))
00208             SG_DEBUG( ".");
00209 
00210         compute_feature_vector(&feature_matrix[x*num_features], x, len);
00211     }
00212 
00213     SG_DONE();
00214 
00215     num_vectors=get_num_vectors() ;
00216     num_features=get_num_features() ;
00217 
00218     return feature_matrix;
00219 }
00220 
00221 bool CTOPFeatures::compute_relevant_indizes(CHMM* hmm, T_HMM_INDIZES* hmm_idx)
00222 {
00223     int32_t i=0;
00224     int32_t j=0;
00225 
00226     hmm_idx->num_p=0;
00227     hmm_idx->num_q=0;
00228     hmm_idx->num_a=0;
00229     hmm_idx->num_b=0;
00230 
00231     for (i=0; i<hmm->get_N(); i++)
00232     {
00233         if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00234             hmm_idx->num_p++;
00235 
00236         if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00237             hmm_idx->num_q++;
00238 
00239         for (j=0; j<hmm->get_N(); j++)
00240         {
00241             if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00242                 hmm_idx->num_a++;
00243         }
00244 
00245         for (j=0; j<pos->get_M(); j++)
00246         {
00247             if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00248                 hmm_idx->num_b++;
00249         }
00250     }
00251 
00252     if (hmm_idx->num_p > 0)
00253     {
00254         hmm_idx->idx_p=new int32_t[hmm_idx->num_p];
00255         ASSERT(hmm_idx->idx_p);
00256     }
00257 
00258     if (hmm_idx->num_q > 0)
00259     {
00260         hmm_idx->idx_q=new int32_t[hmm_idx->num_q];
00261         ASSERT(hmm_idx->idx_q);
00262     }
00263 
00264     if (hmm_idx->num_a > 0)
00265     {
00266         hmm_idx->idx_a_rows=new int32_t[hmm_idx->num_a];
00267         hmm_idx->idx_a_cols=new int32_t[hmm_idx->num_a];
00268         ASSERT(hmm_idx->idx_a_rows);
00269         ASSERT(hmm_idx->idx_a_cols);
00270     }
00271 
00272     if (hmm_idx->num_b > 0)
00273     {
00274         hmm_idx->idx_b_rows=new int32_t[hmm_idx->num_b];
00275         hmm_idx->idx_b_cols=new int32_t[hmm_idx->num_b];
00276         ASSERT(hmm_idx->idx_b_rows);
00277         ASSERT(hmm_idx->idx_b_cols);
00278     }
00279 
00280 
00281     int32_t idx_p=0;
00282     int32_t idx_q=0;
00283     int32_t idx_a=0;
00284     int32_t idx_b=0;
00285 
00286     for (i=0; i<hmm->get_N(); i++)
00287     {
00288         if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00289         {
00290             ASSERT(idx_p < hmm_idx->num_p);
00291             hmm_idx->idx_p[idx_p++]=i;
00292         }
00293         
00294         if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00295         {
00296             ASSERT(idx_q < hmm_idx->num_q);
00297             hmm_idx->idx_q[idx_q++]=i;
00298         }
00299 
00300         for (j=0; j<hmm->get_N(); j++)
00301         {
00302             if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00303             {
00304                 ASSERT(idx_a < hmm_idx->num_a);
00305                 hmm_idx->idx_a_rows[idx_a]=i;
00306                 hmm_idx->idx_a_cols[idx_a++]=j;
00307             }
00308         }
00309 
00310         for (j=0; j<pos->get_M(); j++)
00311         {
00312             if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00313             {
00314                 ASSERT(idx_b < hmm_idx->num_b);
00315                 hmm_idx->idx_b_rows[idx_b]=i;
00316                 hmm_idx->idx_b_cols[idx_b++]=j;
00317             }
00318         }
00319     }
00320 
00321     return true;
00322 }
00323 
00324 int32_t CTOPFeatures::compute_num_features()
00325 {
00326     int32_t num=0;
00327 
00328     if (pos && neg)
00329     {
00330         num+=1; //zeroth- component
00331 
00332         if (poslinear)
00333             num+=pos->get_N()*pos->get_M();
00334         else
00335         {
00336             num+= pos_relevant_indizes.num_p + pos_relevant_indizes.num_q + pos_relevant_indizes.num_a + pos_relevant_indizes.num_b;
00337         }
00338 
00339         if (neglinear)
00340             num+=neg->get_N()*neg->get_M();
00341         else
00342         {
00343             num+= neg_relevant_indizes.num_p + neg_relevant_indizes.num_q + neg_relevant_indizes.num_a + neg_relevant_indizes.num_b;
00344         }
00345 
00346         //num+=1; //zeroth- component
00347         //num+= (poslinear) ? (pos->get_N()*pos->get_M()) : (pos->get_N()*(1+pos->get_N()+1+pos->get_M()));
00348         //num+= (neglinear) ? (neg->get_N()*neg->get_M()) : (neg->get_N()*(1+neg->get_N()+1+neg->get_M()));
00349     }
00350     return num;
00351 }

SHOGUN Machine Learning Toolbox - Documentation