SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009-2010 Soeren Sonnenburg 00008 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 * Copyright (C) 2010 Berlin Institute of Technology 00010 */ 00011 00012 #ifndef _IMPLICITSPECFEATURES_H___ 00013 #define _IMPLICITSPECFEATURES_H___ 00014 00015 #include "lib/common.h" 00016 #include "lib/io.h" 00017 #include "features/DotFeatures.h" 00018 #include "features/StringFeatures.h" 00019 00020 namespace shogun 00021 { 00022 00023 template <class ST> class CStringFeatures; 00024 00030 class CImplicitWeightedSpecFeatures : public CDotFeatures 00031 { 00032 public: 00034 CImplicitWeightedSpecFeatures(void); 00035 00041 CImplicitWeightedSpecFeatures(CStringFeatures<uint16_t>* str, bool normalize=true); 00042 00044 CImplicitWeightedSpecFeatures(const CImplicitWeightedSpecFeatures & orig); 00045 00046 virtual ~CImplicitWeightedSpecFeatures(); 00047 00052 virtual CFeatures* duplicate() const; 00053 00061 inline virtual int32_t get_dim_feature_space() 00062 { 00063 return spec_size; 00064 } 00065 00073 virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2); 00074 00081 virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len); 00082 00091 virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val=false); 00092 00098 virtual inline int32_t get_nnz_features_for_vector(int32_t num) 00099 { 00100 int32_t vlen=-1; 00101 bool free_vec; 00102 uint16_t* vec1=strings->get_feature_vector(num, vlen, free_vec); 00103 strings->free_feature_vector(vec1, num, free_vec); 00104 int32_t nnz=0; 00105 for (int32_t i=1; i<=degree; i++) 00106 nnz+=CMath::min(CMath::pow(alphabet_size,i), vlen); 00107 return nnz; 00108 } 00109 00114 inline virtual EFeatureType get_feature_type() 00115 { 00116 return F_UNKNOWN; 00117 } 00118 00123 inline virtual EFeatureClass get_feature_class() 00124 { 00125 return C_WEIGHTEDSPEC; 00126 } 00127 00132 inline virtual int32_t get_num_vectors() 00133 { 00134 return num_strings; 00135 } 00136 00141 inline virtual int32_t get_size() 00142 { 00143 return sizeof(float64_t); 00144 } 00145 00150 bool set_wd_weights(); 00151 00158 bool set_weights(float64_t* w, int32_t d); 00159 00161 struct wspec_feature_iterator 00162 { 00164 uint16_t* vec; 00166 int32_t vidx; 00168 int32_t vlen; 00170 bool vfree; 00171 00176 int32_t offs; 00177 int32_t d; 00178 int32_t j; 00179 uint8_t mask; 00180 float64_t alpha; 00182 }; 00183 00193 virtual void* get_feature_iterator(int32_t vector_index); 00194 00205 virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator); 00206 00212 virtual void free_feature_iterator(void* iterator); 00213 00215 inline virtual const char* get_name() const { return "ImplicitWeightedSpecFeatures"; } 00216 00217 protected: 00222 void compute_normalization_const(); 00223 00224 protected: 00226 CStringFeatures<uint16_t>* strings; 00227 00229 float64_t* normalization_factors; 00231 int32_t num_strings; 00233 int32_t alphabet_size; 00234 00236 int32_t degree; 00238 int32_t spec_size; 00239 00241 float64_t* spec_weights; 00242 }; 00243 } 00244 #endif // _IMPLICITSPECFEATURES_H___