SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Soeren Sonnenburg 00008 * Copyright (C) 2010 Berlin Institute of Technology 00009 */ 00010 #include "features/SparsePolyFeatures.h" 00011 #include "lib/Hash.h" 00012 00013 using namespace shogun; 00014 00015 CSparsePolyFeatures::CSparsePolyFeatures(void) 00016 { 00017 SG_UNSTABLE("CSparsePolyFeatures::CSparsePolyFeatures(void)", 00018 "\n"); 00019 00020 m_feat = NULL; 00021 m_degree = 0; 00022 m_normalize = false; 00023 m_input_dimensions = 0; 00024 m_output_dimensions = 0; 00025 m_normalization_values = NULL; 00026 mask = 0; 00027 m_hash_bits = 0; 00028 } 00029 00030 CSparsePolyFeatures::CSparsePolyFeatures(CSparseFeatures<float64_t>* feat, int32_t degree, bool normalize, int32_t hash_bits) 00031 : CDotFeatures(), m_normalization_values(NULL) 00032 { 00033 ASSERT(feat); 00034 00035 m_feat = feat; 00036 SG_REF(m_feat); 00037 m_degree=degree; 00038 m_normalize=normalize; 00039 m_hash_bits=hash_bits; 00040 mask=(uint32_t) (((uint64_t) 1)<<m_hash_bits)-1; 00041 m_output_dimensions=1<<m_hash_bits; 00042 m_input_dimensions=feat->get_num_features(); 00043 00044 if (m_normalize) 00045 store_normalization_values(); 00046 } 00047 00048 CSparsePolyFeatures::~CSparsePolyFeatures() 00049 { 00050 delete[] m_normalization_values; 00051 SG_UNREF(m_feat); 00052 } 00053 00054 float64_t CSparsePolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2) 00055 { 00056 ASSERT(df); 00057 ASSERT(df->get_feature_type() == get_feature_type()); 00058 ASSERT(df->get_feature_class() == get_feature_class()); 00059 00060 CSparsePolyFeatures* pf=(CSparsePolyFeatures*) df; 00061 00062 int32_t len1, len2; 00063 bool do_free1, do_free2; 00064 TSparseEntry<float64_t>* vec1 = m_feat->get_sparse_feature_vector(vec_idx1, len1, do_free1); 00065 TSparseEntry<float64_t>* vec2 = pf->m_feat->get_sparse_feature_vector(vec_idx2, len2, do_free2); 00066 00067 float64_t result=CSparseFeatures<float64_t>::sparse_dot(1, vec1, len1, vec2, len2); 00068 result=CMath::pow(result, m_degree); 00069 00070 m_feat->free_feature_vector(vec1, len1, do_free1); 00071 pf->m_feat->free_feature_vector(vec2, len2, do_free2); 00072 00073 return result; 00074 } 00075 00076 float64_t CSparsePolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00077 { 00078 if (vec2_len != m_output_dimensions) 00079 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions); 00080 00081 int32_t vlen; 00082 bool do_free; 00083 TSparseEntry<float64_t>* vec = m_feat->get_sparse_feature_vector(vec_idx1, vlen, do_free); 00084 00085 float64_t result=0; 00086 00087 if (vec) 00088 { 00089 if (m_degree==2) 00090 { 00091 /* (a+b)^2 = a^2 + 2ab +b^2 */ 00092 for (int32_t i=0; i<vlen; i++) 00093 { 00094 float64_t v1=vec[i].entry; 00095 uint32_t seed=CHash::MurmurHash2((uint8_t*) &(vec[i].feat_index), sizeof(int32_t), 0xDEADBEAF); 00096 00097 for (int32_t j=i; j<vlen; j++) 00098 { 00099 float64_t v2=vec[j].entry; 00100 uint32_t h=CHash::MurmurHash2((uint8_t*) &(vec[j].feat_index), sizeof(int32_t), seed) & mask; 00101 float64_t v; 00102 00103 if (i==j) 00104 v=v1*v1; 00105 else 00106 v=CMath::sqrt(2.0)*v1*v2; 00107 00108 result+=v*vec2[h]; 00109 } 00110 } 00111 } 00112 else if (m_degree==3) 00113 SG_NOTIMPLEMENTED; 00114 } 00115 00116 if (m_normalize) 00117 result/=m_normalization_values[vec_idx1]; 00118 00119 m_feat->free_feature_vector(vec, vlen, do_free); 00120 return result; 00121 } 00122 00123 void CSparsePolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val) 00124 { 00125 if (vec2_len != m_output_dimensions) 00126 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions); 00127 00128 int32_t vlen; 00129 bool do_free; 00130 TSparseEntry<float64_t>* vec = m_feat->get_sparse_feature_vector(vec_idx1, vlen, do_free); 00131 00132 float64_t norm_val=1.0; 00133 if (m_normalize) 00134 norm_val = m_normalization_values[vec_idx1]; 00135 alpha/=norm_val; 00136 00137 if (m_degree==2) 00138 { 00139 /* (a+b)^2 = a^2 + 2ab +b^2 */ 00140 for (int32_t i=0; i<vlen; i++) 00141 { 00142 float64_t v1=vec[i].entry; 00143 uint32_t seed=CHash::MurmurHash2((uint8_t*) &(vec[i].feat_index), sizeof(int32_t), 0xDEADBEAF); 00144 00145 for (int32_t j=i; j<vlen; j++) 00146 { 00147 float64_t v2=vec[j].entry; 00148 uint32_t h=CHash::MurmurHash2((uint8_t*) &(vec[j].feat_index), sizeof(int32_t), seed) & mask; 00149 float64_t v; 00150 00151 if (i==j) 00152 v=alpha*v1*v1; 00153 else 00154 v=alpha*CMath::sqrt(2.0)*v1*v2; 00155 00156 if (abs_val) 00157 vec2[h]+=CMath::abs(v); 00158 else 00159 vec2[h]+=v; 00160 } 00161 } 00162 } 00163 else if (m_degree==3) 00164 SG_NOTIMPLEMENTED; 00165 00166 m_feat->free_feature_vector(vec, vlen, do_free); 00167 } 00168 00169 void CSparsePolyFeatures::store_normalization_values() 00170 { 00171 delete[] m_normalization_values; 00172 00173 int32_t num_vec = this->get_num_vectors(); 00174 00175 m_normalization_values=new float64_t[num_vec]; 00176 for (int i=0; i<num_vec; i++) 00177 { 00178 float64_t val = CMath::sqrt(dot(i, this,i)); 00179 if (val==0) 00180 // trap division by zero 00181 m_normalization_values[i]=1.0; 00182 else 00183 m_normalization_values[i]=val; 00184 } 00185 00186 } 00187 00188 CFeatures* CSparsePolyFeatures::duplicate() const 00189 { 00190 return new CSparsePolyFeatures(*this); 00191 }