SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include "features/RealFileFeatures.h" 00012 #include "features/Features.h" 00013 #include "lib/io.h" 00014 00015 #include <stdio.h> 00016 #include <string.h> 00017 00018 using namespace shogun; 00019 00020 CRealFileFeatures::CRealFileFeatures(void) 00021 { 00022 SG_UNSTABLE("CRealFileFeatures::CRealFileFeatures(void)", "\n"); 00023 00024 working_file=NULL; 00025 working_filename=strdup(""); 00026 intlen=0; 00027 doublelen=0; 00028 endian=0; 00029 fourcc=0; 00030 preprocd=0; 00031 labels=NULL; 00032 status=false; 00033 } 00034 00035 CRealFileFeatures::CRealFileFeatures(int32_t size, char* fname) 00036 : CSimpleFeatures<float64_t>(size) 00037 { 00038 working_file=fopen(fname, "r"); 00039 working_filename=strdup(fname); 00040 ASSERT(working_file); 00041 intlen=0; 00042 doublelen=0; 00043 endian=0; 00044 fourcc=0; 00045 preprocd=0; 00046 labels=NULL; 00047 status=load_base_data(); 00048 } 00049 00050 CRealFileFeatures::CRealFileFeatures(int32_t size, FILE* file) 00051 : CSimpleFeatures<float64_t>(size), working_file(file), working_filename(NULL) 00052 { 00053 ASSERT(working_file); 00054 intlen=0; 00055 doublelen=0; 00056 endian=0; 00057 fourcc=0; 00058 preprocd=0; 00059 labels=NULL; 00060 status=load_base_data(); 00061 } 00062 00063 CRealFileFeatures::~CRealFileFeatures() 00064 { 00065 delete[] feature_matrix; 00066 delete[] working_filename; 00067 delete[] labels; 00068 } 00069 00070 CRealFileFeatures::CRealFileFeatures(const CRealFileFeatures & orig) 00071 : CSimpleFeatures<float64_t>(orig), working_file(orig.working_file), status(orig.status) 00072 { 00073 if (orig.working_filename) 00074 working_filename=strdup(orig.working_filename); 00075 if (orig.labels && get_num_vectors()) 00076 { 00077 labels=new int32_t[get_num_vectors()]; 00078 memcpy(labels, orig.labels, sizeof(int32_t)*get_num_vectors()); 00079 } 00080 } 00081 00082 float64_t* CRealFileFeatures::compute_feature_vector( 00083 int32_t num, int32_t &len, float64_t* target) 00084 { 00085 ASSERT(num<num_vectors); 00086 len=num_features; 00087 float64_t* featurevector=target; 00088 if (!featurevector) 00089 featurevector=new float64_t[num_features]; 00090 ASSERT(working_file); 00091 fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET); 00092 ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features); 00093 return featurevector; 00094 } 00095 00096 float64_t* CRealFileFeatures::load_feature_matrix() 00097 { 00098 ASSERT(working_file); 00099 fseek(working_file, filepos, SEEK_SET); 00100 delete[] feature_matrix; 00101 00102 SG_INFO( "allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0); 00103 free_feature_matrix(); 00104 feature_matrix=new float64_t[num_features*num_vectors]; 00105 00106 SG_INFO( "loading... be patient.\n"); 00107 00108 for (int32_t i=0; i<(int32_t) num_vectors; i++) 00109 { 00110 if (!(i % (num_vectors/10+1))) 00111 SG_PRINT( "%02d%%.", (int) (100.0*i/num_vectors)); 00112 else if (!(i % (num_vectors/200+1))) 00113 SG_PRINT( "."); 00114 00115 ASSERT(fread(&feature_matrix[num_features*i], doublelen, num_features, working_file)==(size_t) num_features); 00116 } 00117 SG_DONE(); 00118 00119 return feature_matrix; 00120 } 00121 00122 int32_t CRealFileFeatures::get_label(int32_t idx) 00123 { 00124 ASSERT(idx<num_vectors); 00125 if (labels) 00126 return labels[idx]; 00127 return 0; 00128 } 00129 00130 bool CRealFileFeatures::load_base_data() 00131 { 00132 ASSERT(working_file); 00133 uint32_t num_vec=0; 00134 uint32_t num_feat=0; 00135 00136 ASSERT(fread(&intlen, sizeof(uint8_t), 1, working_file)==1); 00137 ASSERT(fread(&doublelen, sizeof(uint8_t), 1, working_file)==1); 00138 ASSERT(fread(&endian, (uint32_t) intlen, 1, working_file)== 1); 00139 ASSERT(fread(&fourcc, (uint32_t) intlen, 1, working_file)==1); 00140 ASSERT(fread(&num_vec, (uint32_t) intlen, 1, working_file)==1); 00141 ASSERT(fread(&num_feat, (uint32_t) intlen, 1, working_file)==1); 00142 ASSERT(fread(&preprocd, (uint32_t) intlen, 1, working_file)==1); 00143 SG_INFO( "detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd); 00144 filepos=ftell(working_file); 00145 set_num_vectors(num_vec); 00146 set_num_features(num_feat); 00147 fseek(working_file, filepos+num_features*num_vectors*doublelen, SEEK_SET); 00148 delete[] labels; 00149 labels=new int[num_vec]; 00150 ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec); 00151 return true; 00152 }