SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Soeren Sonnenburg 00008 * Copyright (C) 2010 Berlin Institute of Technology 00009 */ 00010 #ifndef __BINARY_FILE_H__ 00011 #define __BINARY_FILE_H__ 00012 00013 #include <shogun/lib/config.h> 00014 #include <shogun/lib/common.h> 00015 #include <shogun/base/SGObject.h> 00016 #include <shogun/lib/io.h> 00017 #include <shogun/lib/SimpleFile.h> 00018 00019 namespace shogun 00020 { 00027 class CBinaryFile: public CFile 00028 { 00029 public: 00031 CBinaryFile(void); 00032 00038 CBinaryFile(FILE* f, const char* name=NULL); 00039 00046 CBinaryFile(char* fname, char rw='r', const char* name=NULL); 00047 00049 virtual ~CBinaryFile(); 00050 00058 virtual void get_byte_vector(uint8_t*& vector, int32_t& len); 00059 virtual void get_char_vector(char*& vector, int32_t& len); 00060 virtual void get_int_vector(int32_t*& vector, int32_t& len); 00061 virtual void get_real_vector(float64_t*& vector, int32_t& len); 00062 virtual void get_shortreal_vector(float32_t*& vector, int32_t& len); 00063 virtual void get_short_vector(int16_t*& vector, int32_t& len); 00064 virtual void get_word_vector(uint16_t*& vector, int32_t& len); 00066 00075 virtual void get_byte_matrix( 00076 uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00077 virtual void get_int8_matrix( 00078 int8_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00079 virtual void get_char_matrix( 00080 char*& matrix, int32_t& num_feat, int32_t& num_vec); 00081 virtual void get_int_matrix( 00082 int32_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00083 virtual void get_uint_matrix( 00084 uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00085 virtual void get_long_matrix( 00086 int64_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00087 virtual void get_ulong_matrix( 00088 uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00089 virtual void get_shortreal_matrix( 00090 float32_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00091 virtual void get_real_matrix( 00092 float64_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00093 virtual void get_longreal_matrix( 00094 floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00095 virtual void get_short_matrix( 00096 int16_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00097 virtual void get_word_matrix( 00098 uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00100 00109 virtual void get_byte_ndarray( 00110 uint8_t*& array, int32_t*& dims, int32_t& num_dims); 00111 virtual void get_char_ndarray( 00112 char*& array, int32_t*& dims, int32_t& num_dims); 00113 virtual void get_int_ndarray( 00114 int32_t*& array, int32_t*& dims, int32_t& num_dims); 00115 virtual void get_shortreal_ndarray( 00116 float32_t*& array, int32_t*& dims, int32_t& num_dims); 00117 virtual void get_real_ndarray( 00118 float64_t*& array, int32_t*& dims, int32_t& num_dims); 00119 virtual void get_short_ndarray( 00120 int16_t*& array, int32_t*& dims, int32_t& num_dims); 00121 virtual void get_word_ndarray( 00122 uint16_t*& array, int32_t*& dims, int32_t& num_dims); 00124 00133 virtual void get_bool_sparsematrix( 00134 TSparse<bool>*& matrix, int32_t& num_feat, int32_t& num_vec); 00135 virtual void get_byte_sparsematrix( 00136 TSparse<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00137 virtual void get_int8_sparsematrix( 00138 TSparse<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00139 virtual void get_char_sparsematrix( 00140 TSparse<char>*& matrix, int32_t& num_feat, int32_t& num_vec); 00141 virtual void get_int_sparsematrix( 00142 TSparse<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00143 virtual void get_uint_sparsematrix( 00144 TSparse<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00145 virtual void get_long_sparsematrix( 00146 TSparse<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00147 virtual void get_ulong_sparsematrix( 00148 TSparse<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00149 virtual void get_short_sparsematrix( 00150 TSparse<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00151 virtual void get_word_sparsematrix( 00152 TSparse<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00153 virtual void get_shortreal_sparsematrix( 00154 TSparse<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00155 virtual void get_real_sparsematrix( 00156 TSparse<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00157 virtual void get_longreal_sparsematrix( 00158 TSparse<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00160 00161 00170 virtual void get_byte_string_list( 00171 TString<uint8_t>*& strings, int32_t& num_str, 00172 int32_t& max_string_len); 00173 virtual void get_int8_string_list( 00174 TString<int8_t>*& strings, int32_t& num_str, 00175 int32_t& max_string_len); 00176 virtual void get_char_string_list( 00177 TString<char>*& strings, int32_t& num_str, 00178 int32_t& max_string_len); 00179 virtual void get_int_string_list( 00180 TString<int32_t>*& strings, int32_t& num_str, 00181 int32_t& max_string_len); 00182 virtual void get_uint_string_list( 00183 TString<uint32_t>*& strings, int32_t& num_str, 00184 int32_t& max_string_len); 00185 virtual void get_short_string_list( 00186 TString<int16_t>*& strings, int32_t& num_str, 00187 int32_t& max_string_len); 00188 virtual void get_word_string_list( 00189 TString<uint16_t>*& strings, int32_t& num_str, 00190 int32_t& max_string_len); 00191 virtual void get_long_string_list( 00192 TString<int64_t>*& strings, int32_t& num_str, 00193 int32_t& max_string_len); 00194 virtual void get_ulong_string_list( 00195 TString<uint64_t>*& strings, int32_t& num_str, 00196 int32_t& max_string_len); 00197 virtual void get_shortreal_string_list( 00198 TString<float32_t>*& strings, int32_t& num_str, 00199 int32_t& max_string_len); 00200 virtual void get_real_string_list( 00201 TString<float64_t>*& strings, int32_t& num_str, 00202 int32_t& max_string_len); 00203 virtual void get_longreal_string_list( 00204 TString<floatmax_t>*& strings, int32_t& num_str, 00205 int32_t& max_string_len); 00207 00215 virtual void set_byte_vector(const uint8_t* vector, int32_t len); 00216 virtual void set_char_vector(const char* vector, int32_t len); 00217 virtual void set_int_vector(const int32_t* vector, int32_t len); 00218 virtual void set_shortreal_vector( const float32_t* vector, int32_t len); 00219 virtual void set_real_vector(const float64_t* vector, int32_t len); 00220 virtual void set_short_vector(const int16_t* vector, int32_t len); 00221 virtual void set_word_vector(const uint16_t* vector, int32_t len); 00223 00224 00232 virtual void set_byte_matrix( 00233 const uint8_t* matrix, int32_t num_feat, int32_t num_vec); 00234 virtual void set_int8_matrix( 00235 const int8_t* matrix, int32_t num_feat, int32_t num_vec); 00236 virtual void set_char_matrix( 00237 const char* matrix, int32_t num_feat, int32_t num_vec); 00238 virtual void set_int_matrix( 00239 const int32_t* matrix, int32_t num_feat, int32_t num_vec); 00240 virtual void set_uint_matrix( 00241 const uint32_t* matrix, int32_t num_feat, int32_t num_vec); 00242 virtual void set_long_matrix( 00243 const int64_t* matrix, int32_t num_feat, int32_t num_vec); 00244 virtual void set_ulong_matrix( 00245 const uint64_t* matrix, int32_t num_feat, int32_t num_vec); 00246 virtual void set_shortreal_matrix( 00247 const float32_t* matrix, int32_t num_feat, int32_t num_vec); 00248 virtual void set_real_matrix( 00249 const float64_t* matrix, int32_t num_feat, int32_t num_vec); 00250 virtual void set_longreal_matrix( 00251 const floatmax_t* matrix, int32_t num_feat, int32_t num_vec); 00252 virtual void set_short_matrix( 00253 const int16_t* matrix, int32_t num_feat, int32_t num_vec); 00254 virtual void set_word_matrix( 00255 const uint16_t* matrix, int32_t num_feat, int32_t num_vec); 00257 00265 virtual void set_bool_sparsematrix( 00266 const TSparse<bool>* matrix, int32_t num_feat, int32_t num_vec); 00267 virtual void set_byte_sparsematrix( 00268 const TSparse<uint8_t>* matrix, int32_t num_feat, int32_t num_vec); 00269 virtual void set_int8_sparsematrix( 00270 const TSparse<int8_t>* matrix, int32_t num_feat, int32_t num_vec); 00271 virtual void set_char_sparsematrix( 00272 const TSparse<char>* matrix, int32_t num_feat, int32_t num_vec); 00273 virtual void set_int_sparsematrix( 00274 const TSparse<int32_t>* matrix, int32_t num_feat, int32_t num_vec); 00275 virtual void set_uint_sparsematrix( 00276 const TSparse<uint32_t>* matrix, int32_t num_feat, int32_t num_vec); 00277 virtual void set_long_sparsematrix( 00278 const TSparse<int64_t>* matrix, int32_t num_feat, int32_t num_vec); 00279 virtual void set_ulong_sparsematrix( 00280 const TSparse<uint64_t>* matrix, int32_t num_feat, int32_t num_vec); 00281 virtual void set_short_sparsematrix( 00282 const TSparse<int16_t>* matrix, int32_t num_feat, int32_t num_vec); 00283 virtual void set_word_sparsematrix( 00284 const TSparse<uint16_t>* matrix, int32_t num_feat, int32_t num_vec); 00285 virtual void set_shortreal_sparsematrix( 00286 const TSparse<float32_t>* matrix, int32_t num_feat, int32_t num_vec); 00287 virtual void set_real_sparsematrix( 00288 const TSparse<float64_t>* matrix, int32_t num_feat, int32_t num_vec); 00289 virtual void set_longreal_sparsematrix( 00290 const TSparse<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec); 00292 00293 00302 virtual void set_byte_string_list( 00303 const TString<uint8_t>* strings, int32_t num_str); 00304 virtual void set_int8_string_list( 00305 const TString<int8_t>* strings, int32_t num_str); 00306 virtual void set_char_string_list( 00307 const TString<char>* strings, int32_t num_str); 00308 virtual void set_int_string_list( 00309 const TString<int32_t>* strings, int32_t num_str); 00310 virtual void set_uint_string_list( 00311 const TString<uint32_t>* strings, int32_t num_str); 00312 virtual void set_short_string_list( 00313 const TString<int16_t>* strings, int32_t num_str); 00314 virtual void set_word_string_list( 00315 const TString<uint16_t>* strings, int32_t num_str); 00316 virtual void set_long_string_list( 00317 const TString<int64_t>* strings, int32_t num_str); 00318 virtual void set_ulong_string_list( 00319 const TString<uint64_t>* strings, int32_t num_str); 00320 virtual void set_shortreal_string_list( 00321 const TString<float32_t>* strings, int32_t num_str); 00322 virtual void set_real_string_list( 00323 const TString<float64_t>* strings, int32_t num_str); 00324 virtual void set_longreal_string_list( 00325 const TString<floatmax_t>* strings, int32_t num_str); 00327 00329 inline virtual const char* get_name() const { return "BinaryFile"; } 00330 00331 protected: 00336 void read_header(TSGDataType* dest); 00337 00342 void write_header(const TSGDataType* datatype); 00343 00349 int32_t parse_first_header(TSGDataType& type); 00350 00356 int32_t parse_next_header(TSGDataType& type); 00357 00358 private: 00365 template <class DT> DT* load_data(DT* target, int64_t& num) 00366 { 00367 CSimpleFile<DT> f(filename, file); 00368 return f.load(target, num); 00369 } 00370 00377 template <class DT> bool save_data(DT* src, int64_t num) 00378 { 00379 CSimpleFile<DT> f(filename, file); 00380 return f.save(src, num); 00381 } 00382 }; 00383 } 00384 #endif //__BINARY_FILE_H__