SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Soeren Sonnenburg 00008 * Copyright (C) 2010 Berlin Institute of Technology 00009 */ 00010 00011 #include "features/SparseFeatures.h" 00012 #include "lib/File.h" 00013 #include "lib/AsciiFile.h" 00014 #include "lib/Mathematics.h" 00015 #include <ctype.h> 00016 00017 using namespace shogun; 00018 00019 CAsciiFile::CAsciiFile(void) 00020 { 00021 SG_UNSTABLE("CAsciiFile::CAsciiFile(void)", "\n"); 00022 } 00023 00024 CAsciiFile::CAsciiFile(FILE* f, const char* name) : CFile(f, name) 00025 { 00026 } 00027 00028 CAsciiFile::CAsciiFile(char* fname, char rw, const char* name) : CFile(fname, rw, name) 00029 { 00030 } 00031 00032 CAsciiFile::~CAsciiFile() 00033 { 00034 } 00035 00036 #define GET_VECTOR(fname, mfname, sg_type) \ 00037 void CAsciiFile::fname(sg_type*& vec, int32_t& len) \ 00038 { \ 00039 vec=NULL; \ 00040 len=0; \ 00041 int32_t num_feat=0; \ 00042 int32_t num_vec=0; \ 00043 mfname(vec, num_feat, num_vec); \ 00044 if ((num_feat==1) || (num_vec==1)) \ 00045 { \ 00046 if (num_feat==1) \ 00047 len=num_vec; \ 00048 else \ 00049 len=num_feat; \ 00050 } \ 00051 else \ 00052 { \ 00053 delete[] vec; \ 00054 vec=NULL; \ 00055 len=0; \ 00056 SG_ERROR("Could not read vector from" \ 00057 " file %s (shape %dx%d found but " \ 00058 "vector expected).\n", filename, \ 00059 num_vec, num_feat); \ 00060 } \ 00061 } 00062 00063 GET_VECTOR(get_byte_vector, get_byte_matrix, uint8_t) 00064 GET_VECTOR(get_char_vector, get_char_matrix, char) 00065 GET_VECTOR(get_int_vector, get_int_matrix, int32_t) 00066 GET_VECTOR(get_shortreal_vector, get_shortreal_matrix, float32_t) 00067 GET_VECTOR(get_real_vector, get_real_matrix, float64_t) 00068 GET_VECTOR(get_short_vector, get_short_matrix, int16_t) 00069 GET_VECTOR(get_word_vector, get_word_matrix, uint16_t) 00070 #undef GET_VECTOR 00071 00072 #define GET_MATRIX(fname, conv, sg_type) \ 00073 void CAsciiFile::fname(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \ 00074 { \ 00075 struct stat stats; \ 00076 if (stat(filename, &stats)!=0) \ 00077 SG_ERROR("Could not get file statistics.\n"); \ 00078 \ 00079 char* data=new char[stats.st_size+1]; \ 00080 memset(data, 0, sizeof(char)*(stats.st_size+1)); \ 00081 size_t nread=fread(data, sizeof(char), stats.st_size, file); \ 00082 if (nread<=0) \ 00083 SG_ERROR("Could not read data from %s.\n", filename); \ 00084 \ 00085 SG_DEBUG("data read from file:\n%s\n", data); \ 00086 \ 00087 /* determine num_feat and num_vec, populate dynamic array */ \ 00088 int32_t nf=0; \ 00089 num_feat=0; \ 00090 num_vec=0; \ 00091 char* ptr_item=NULL; \ 00092 char* ptr_data=data; \ 00093 DynArray<char*>* items=new DynArray<char*>(); \ 00094 \ 00095 while (*ptr_data) \ 00096 { \ 00097 if (*ptr_data=='\n') \ 00098 { \ 00099 if (ptr_item) \ 00100 nf++; \ 00101 \ 00102 if (num_feat!=0 && nf!=num_feat) \ 00103 SG_ERROR("Number of features mismatches (%d != %d) in vector" \ 00104 " %d in file %s.\n", num_feat, nf, num_vec, filename); \ 00105 \ 00106 append_item(items, ptr_data, ptr_item); \ 00107 num_feat=nf; \ 00108 num_vec++; \ 00109 nf=0; \ 00110 ptr_item=NULL; \ 00111 } \ 00112 else if (!isblank(*ptr_data) && !ptr_item) \ 00113 { \ 00114 ptr_item=ptr_data; \ 00115 } \ 00116 else if (isblank(*ptr_data) && ptr_item) \ 00117 { \ 00118 append_item(items, ptr_data, ptr_item); \ 00119 ptr_item=NULL; \ 00120 nf++; \ 00121 } \ 00122 \ 00123 ptr_data++; \ 00124 } \ 00125 \ 00126 SG_DEBUG("num feat: %d, num_vec %d\n", num_feat, num_vec); \ 00127 delete[] data; \ 00128 \ 00129 /* now copy data into matrix */ \ 00130 matrix=new sg_type[num_vec*num_feat]; \ 00131 for (int32_t i=0; i<num_vec; i++) \ 00132 { \ 00133 for (int32_t j=0; j<num_feat; j++) \ 00134 { \ 00135 char* item=items->get_element(i*num_feat+j); \ 00136 matrix[i*num_feat+j]=conv(item); \ 00137 delete[] item; \ 00138 } \ 00139 } \ 00140 delete items; \ 00141 } 00142 00143 GET_MATRIX(get_byte_matrix, atoi, uint8_t) 00144 GET_MATRIX(get_int8_matrix, atoi, int8_t) 00145 GET_MATRIX(get_char_matrix, atoi, char) 00146 GET_MATRIX(get_int_matrix, atoi, int32_t) 00147 GET_MATRIX(get_uint_matrix, atoi, uint32_t) 00148 GET_MATRIX(get_long_matrix, atoll, int64_t) 00149 GET_MATRIX(get_ulong_matrix, atoll, uint64_t) 00150 GET_MATRIX(get_shortreal_matrix, atof, float32_t) 00151 GET_MATRIX(get_real_matrix, atof, float64_t) 00152 GET_MATRIX(get_longreal_matrix, atof, floatmax_t) 00153 GET_MATRIX(get_short_matrix, atoi, int16_t) 00154 GET_MATRIX(get_word_matrix, atoi, uint16_t) 00155 #undef GET_MATRIX 00156 00157 void CAsciiFile::get_byte_ndarray(uint8_t*& array, int32_t*& dims, int32_t& num_dims) 00158 { 00159 } 00160 00161 void CAsciiFile::get_char_ndarray(char*& array, int32_t*& dims, int32_t& num_dims) 00162 { 00163 } 00164 00165 void CAsciiFile::get_int_ndarray(int32_t*& array, int32_t*& dims, int32_t& num_dims) 00166 { 00167 } 00168 00169 void CAsciiFile::get_shortreal_ndarray(float32_t*& array, int32_t*& dims, int32_t& num_dims) 00170 { 00171 } 00172 00173 void CAsciiFile::get_real_ndarray(float64_t*& array, int32_t*& dims, int32_t& num_dims) 00174 { 00175 } 00176 00177 void CAsciiFile::get_short_ndarray(int16_t*& array, int32_t*& dims, int32_t& num_dims) 00178 { 00179 } 00180 00181 void CAsciiFile::get_word_ndarray(uint16_t*& array, int32_t*& dims, int32_t& num_dims) 00182 { 00183 } 00184 00185 #define GET_SPARSEMATRIX(fname, conv, sg_type) \ 00186 void CAsciiFile::fname(TSparse<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \ 00187 { \ 00188 size_t blocksize=1024*1024; \ 00189 size_t required_blocksize=blocksize; \ 00190 uint8_t* dummy=new uint8_t[blocksize]; \ 00191 \ 00192 if (file) \ 00193 { \ 00194 num_vec=0; \ 00195 num_feat=0; \ 00196 \ 00197 SG_INFO("counting line numbers in file %s\n", filename); \ 00198 size_t sz=blocksize; \ 00199 size_t block_offs=0; \ 00200 size_t old_block_offs=0; \ 00201 fseek(file, 0, SEEK_END); \ 00202 size_t fsize=ftell(file); \ 00203 rewind(file); \ 00204 \ 00205 while (sz == blocksize) \ 00206 { \ 00207 sz=fread(dummy, sizeof(uint8_t), blocksize, file); \ 00208 bool contains_cr=false; \ 00209 for (size_t i=0; i<sz; i++) \ 00210 { \ 00211 block_offs++; \ 00212 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) \ 00213 { \ 00214 num_vec++; \ 00215 contains_cr=true; \ 00216 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs+1); \ 00217 old_block_offs=block_offs; \ 00218 } \ 00219 } \ 00220 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t"); \ 00221 } \ 00222 \ 00223 SG_INFO("found %d feature vectors\n", num_vec); \ 00224 delete[] dummy; \ 00225 blocksize=required_blocksize; \ 00226 dummy = new uint8_t[blocksize+1]; /*allow setting of '\0' at EOL*/ \ 00227 matrix=new TSparse<sg_type>[num_vec]; \ 00228 \ 00229 rewind(file); \ 00230 sz=blocksize; \ 00231 int32_t lines=0; \ 00232 while (sz == blocksize) \ 00233 { \ 00234 sz=fread(dummy, sizeof(uint8_t), blocksize, file); \ 00235 \ 00236 size_t old_sz=0; \ 00237 for (size_t i=0; i<sz; i++) \ 00238 { \ 00239 if (i==sz-1 && dummy[i]!='\n' && sz==blocksize) \ 00240 { \ 00241 size_t len=i-old_sz+1; \ 00242 uint8_t* data=&dummy[old_sz]; \ 00243 \ 00244 for (size_t j=0; j<len; j++) \ 00245 dummy[j]=data[j]; \ 00246 \ 00247 sz=fread(dummy+len, sizeof(uint8_t), blocksize-len, file); \ 00248 i=0; \ 00249 old_sz=0; \ 00250 sz+=len; \ 00251 } \ 00252 \ 00253 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) \ 00254 { \ 00255 \ 00256 size_t len=i-old_sz; \ 00257 uint8_t* data=&dummy[old_sz]; \ 00258 \ 00259 int32_t dims=0; \ 00260 for (size_t j=0; j<len; j++) \ 00261 { \ 00262 if (data[j]==':') \ 00263 dims++; \ 00264 } \ 00265 \ 00266 if (dims<=0) \ 00267 { \ 00268 SG_ERROR("Error in line %d - number of" \ 00269 " dimensions is %d line is %d characters" \ 00270 " long\n line_content:'%.*s'\n", lines, \ 00271 dims, len, len, (const char*) data); \ 00272 } \ 00273 \ 00274 TSparseEntry<sg_type>* feat=new TSparseEntry<sg_type>[dims]; \ 00275 \ 00276 /* skip label part */ \ 00277 size_t j=0; \ 00278 for (; j<len; j++) \ 00279 { \ 00280 if (data[j]==':') \ 00281 { \ 00282 j=-1; /* file without label*/ \ 00283 break; \ 00284 } \ 00285 \ 00286 if (data[j]==' ') \ 00287 { \ 00288 data[j]='\0'; \ 00289 \ 00290 /* skip label part */ \ 00291 break; \ 00292 } \ 00293 } \ 00294 \ 00295 int32_t d=0; \ 00296 j++; \ 00297 uint8_t* start=&data[j]; \ 00298 for (; j<len; j++) \ 00299 { \ 00300 if (data[j]==':') \ 00301 { \ 00302 data[j]='\0'; \ 00303 \ 00304 feat[d].feat_index=(int32_t) atoi((const char*) start)-1; \ 00305 num_feat=CMath::max(num_feat, feat[d].feat_index+1); \ 00306 \ 00307 j++; \ 00308 start=&data[j]; \ 00309 for (; j<len; j++) \ 00310 { \ 00311 if (data[j]==' ' || data[j]=='\n') \ 00312 { \ 00313 data[j]='\0'; \ 00314 feat[d].entry=(sg_type) conv((const char*) start); \ 00315 d++; \ 00316 break; \ 00317 } \ 00318 } \ 00319 \ 00320 if (j==len) \ 00321 { \ 00322 data[j]='\0'; \ 00323 feat[dims-1].entry=(sg_type) conv((const char*) start); \ 00324 } \ 00325 \ 00326 j++; \ 00327 start=&data[j]; \ 00328 } \ 00329 } \ 00330 \ 00331 matrix[lines].vec_index=lines; \ 00332 matrix[lines].num_feat_entries=dims; \ 00333 matrix[lines].features=feat; \ 00334 \ 00335 old_sz=i+1; \ 00336 lines++; \ 00337 SG_PROGRESS(lines, 0, num_vec, 1, "LOADING:\t"); \ 00338 } \ 00339 } \ 00340 } \ 00341 \ 00342 SG_INFO("file successfully read\n"); \ 00343 } \ 00344 \ 00345 delete[] dummy; \ 00346 } 00347 00348 GET_SPARSEMATRIX(get_bool_sparsematrix, atoi, bool) 00349 GET_SPARSEMATRIX(get_byte_sparsematrix, atoi, uint8_t) 00350 GET_SPARSEMATRIX(get_int8_sparsematrix, atoi, int8_t) 00351 GET_SPARSEMATRIX(get_char_sparsematrix, atoi, char) 00352 GET_SPARSEMATRIX(get_int_sparsematrix, atoi, int32_t) 00353 GET_SPARSEMATRIX(get_uint_sparsematrix, atoi, uint32_t) 00354 GET_SPARSEMATRIX(get_long_sparsematrix, atoll, int64_t) 00355 GET_SPARSEMATRIX(get_ulong_sparsematrix, atoll, uint64_t) 00356 GET_SPARSEMATRIX(get_shortreal_sparsematrix, atof, float32_t) 00357 GET_SPARSEMATRIX(get_real_sparsematrix, atof, float64_t) 00358 GET_SPARSEMATRIX(get_longreal_sparsematrix, atof, floatmax_t) 00359 GET_SPARSEMATRIX(get_short_sparsematrix, atoi, int16_t) 00360 GET_SPARSEMATRIX(get_word_sparsematrix, atoi, uint16_t) 00361 #undef GET_SPARSEMATRIX 00362 00363 00364 void CAsciiFile::get_byte_string_list(TString<uint8_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00365 { 00366 size_t blocksize=1024*1024; 00367 size_t required_blocksize=0; 00368 uint8_t* dummy=new uint8_t[blocksize]; 00369 uint8_t* overflow=NULL; 00370 int32_t overflow_len=0; 00371 00372 if (file) 00373 { 00374 num_str=0; 00375 max_string_len=0; 00376 00377 SG_INFO("counting line numbers in file %s\n", filename); 00378 size_t sz=blocksize; 00379 size_t block_offs=0; 00380 size_t old_block_offs=0; 00381 fseek(file, 0, SEEK_END); 00382 size_t fsize=ftell(file); 00383 rewind(file); 00384 00385 while (sz == blocksize) 00386 { 00387 sz=fread(dummy, sizeof(uint8_t), blocksize, file); 00388 bool contains_cr=false; 00389 for (size_t i=0; i<sz; i++) 00390 { 00391 block_offs++; 00392 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) 00393 { 00394 num_str++; 00395 contains_cr=true; 00396 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs); 00397 old_block_offs=block_offs; 00398 } 00399 } 00400 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t"); 00401 } 00402 00403 SG_INFO("found %d strings\n", num_str); 00404 SG_DEBUG("block_size=%d\n", required_blocksize); 00405 delete[] dummy; 00406 blocksize=required_blocksize; 00407 dummy=new uint8_t[blocksize]; 00408 overflow=new uint8_t[blocksize]; 00409 strings=new TString<uint8_t>[num_str]; 00410 00411 rewind(file); 00412 sz=blocksize; 00413 int32_t lines=0; 00414 size_t old_sz=0; 00415 while (sz == blocksize) 00416 { 00417 sz=fread(dummy, sizeof(uint8_t), blocksize, file); 00418 00419 old_sz=0; 00420 for (size_t i=0; i<sz; i++) 00421 { 00422 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) 00423 { 00424 int32_t len=i-old_sz; 00425 max_string_len=CMath::max(max_string_len, len+overflow_len); 00426 00427 strings[lines].length=len+overflow_len; 00428 strings[lines].string=new uint8_t[len+overflow_len]; 00429 00430 for (int32_t j=0; j<overflow_len; j++) 00431 strings[lines].string[j]=overflow[j]; 00432 for (int32_t j=0; j<len; j++) 00433 strings[lines].string[j+overflow_len]=dummy[old_sz+j]; 00434 00435 // clear overflow 00436 overflow_len=0; 00437 00438 //CMath::display_vector(strings[lines].string, len); 00439 old_sz=i+1; 00440 lines++; 00441 SG_PROGRESS(lines, 0, num_str, 1, "LOADING:\t"); 00442 } 00443 } 00444 00445 for (size_t i=old_sz; i<sz; i++) 00446 overflow[i-old_sz]=dummy[i]; 00447 00448 overflow_len=sz-old_sz; 00449 } 00450 SG_INFO("file successfully read\n"); 00451 SG_INFO("max_string_length=%d\n", max_string_len); 00452 SG_INFO("num_strings=%d\n", num_str); 00453 } 00454 00455 delete[] dummy; 00456 delete[] overflow; 00457 } 00458 00459 void CAsciiFile::get_int8_string_list(TString<int8_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00460 { 00461 size_t blocksize=1024*1024; 00462 size_t required_blocksize=0; 00463 int8_t* dummy=new int8_t[blocksize]; 00464 int8_t* overflow=NULL; 00465 int32_t overflow_len=0; 00466 00467 if (file) 00468 { 00469 num_str=0; 00470 max_string_len=0; 00471 00472 SG_INFO("counting line numbers in file %s\n", filename); 00473 size_t sz=blocksize; 00474 size_t block_offs=0; 00475 size_t old_block_offs=0; 00476 fseek(file, 0, SEEK_END); 00477 size_t fsize=ftell(file); 00478 rewind(file); 00479 00480 while (sz == blocksize) 00481 { 00482 sz=fread(dummy, sizeof(int8_t), blocksize, file); 00483 bool contains_cr=false; 00484 for (size_t i=0; i<sz; i++) 00485 { 00486 block_offs++; 00487 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) 00488 { 00489 num_str++; 00490 contains_cr=true; 00491 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs); 00492 old_block_offs=block_offs; 00493 } 00494 } 00495 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t"); 00496 } 00497 00498 SG_INFO("found %d strings\n", num_str); 00499 SG_DEBUG("block_size=%d\n", required_blocksize); 00500 delete[] dummy; 00501 blocksize=required_blocksize; 00502 dummy=new int8_t[blocksize]; 00503 overflow=new int8_t[blocksize]; 00504 strings=new TString<int8_t>[num_str]; 00505 00506 rewind(file); 00507 sz=blocksize; 00508 int32_t lines=0; 00509 size_t old_sz=0; 00510 while (sz == blocksize) 00511 { 00512 sz=fread(dummy, sizeof(int8_t), blocksize, file); 00513 00514 old_sz=0; 00515 for (size_t i=0; i<sz; i++) 00516 { 00517 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) 00518 { 00519 int32_t len=i-old_sz; 00520 max_string_len=CMath::max(max_string_len, len+overflow_len); 00521 00522 strings[lines].length=len+overflow_len; 00523 strings[lines].string=new int8_t[len+overflow_len]; 00524 00525 for (int32_t j=0; j<overflow_len; j++) 00526 strings[lines].string[j]=overflow[j]; 00527 for (int32_t j=0; j<len; j++) 00528 strings[lines].string[j+overflow_len]=dummy[old_sz+j]; 00529 00530 // clear overflow 00531 overflow_len=0; 00532 00533 //CMath::display_vector(strings[lines].string, len); 00534 old_sz=i+1; 00535 lines++; 00536 SG_PROGRESS(lines, 0, num_str, 1, "LOADING:\t"); 00537 } 00538 } 00539 00540 for (size_t i=old_sz; i<sz; i++) 00541 overflow[i-old_sz]=dummy[i]; 00542 00543 overflow_len=sz-old_sz; 00544 } 00545 SG_INFO("file successfully read\n"); 00546 SG_INFO("max_string_length=%d\n", max_string_len); 00547 SG_INFO("num_strings=%d\n", num_str); 00548 } 00549 00550 delete[] dummy; 00551 delete[] overflow; 00552 } 00553 00554 void CAsciiFile::get_char_string_list(TString<char>*& strings, int32_t& num_str, int32_t& max_string_len) 00555 { 00556 size_t blocksize=1024*1024; 00557 size_t required_blocksize=0; 00558 char* dummy=new char[blocksize]; 00559 char* overflow=NULL; 00560 int32_t overflow_len=0; 00561 00562 if (file) 00563 { 00564 num_str=0; 00565 max_string_len=0; 00566 00567 SG_INFO("counting line numbers in file %s\n", filename); 00568 size_t sz=blocksize; 00569 size_t block_offs=0; 00570 size_t old_block_offs=0; 00571 fseek(file, 0, SEEK_END); 00572 size_t fsize=ftell(file); 00573 rewind(file); 00574 00575 while (sz == blocksize) 00576 { 00577 sz=fread(dummy, sizeof(char), blocksize, file); 00578 bool contains_cr=false; 00579 for (size_t i=0; i<sz; i++) 00580 { 00581 block_offs++; 00582 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) 00583 { 00584 num_str++; 00585 contains_cr=true; 00586 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs); 00587 old_block_offs=block_offs; 00588 } 00589 } 00590 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t"); 00591 } 00592 00593 SG_INFO("found %d strings\n", num_str); 00594 SG_DEBUG("block_size=%d\n", required_blocksize); 00595 delete[] dummy; 00596 blocksize=required_blocksize; 00597 dummy=new char[blocksize]; 00598 overflow=new char[blocksize]; 00599 strings=new TString<char>[num_str]; 00600 00601 rewind(file); 00602 sz=blocksize; 00603 int32_t lines=0; 00604 size_t old_sz=0; 00605 while (sz == blocksize) 00606 { 00607 sz=fread(dummy, sizeof(char), blocksize, file); 00608 00609 old_sz=0; 00610 for (size_t i=0; i<sz; i++) 00611 { 00612 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) 00613 { 00614 int32_t len=i-old_sz; 00615 max_string_len=CMath::max(max_string_len, len+overflow_len); 00616 00617 strings[lines].length=len+overflow_len; 00618 strings[lines].string=new char[len+overflow_len]; 00619 00620 for (int32_t j=0; j<overflow_len; j++) 00621 strings[lines].string[j]=overflow[j]; 00622 for (int32_t j=0; j<len; j++) 00623 strings[lines].string[j+overflow_len]=dummy[old_sz+j]; 00624 00625 // clear overflow 00626 overflow_len=0; 00627 00628 //CMath::display_vector(strings[lines].string, len); 00629 old_sz=i+1; 00630 lines++; 00631 SG_PROGRESS(lines, 0, num_str, 1, "LOADING:\t"); 00632 } 00633 } 00634 00635 for (size_t i=old_sz; i<sz; i++) 00636 overflow[i-old_sz]=dummy[i]; 00637 00638 overflow_len=sz-old_sz; 00639 } 00640 SG_INFO("file successfully read\n"); 00641 SG_INFO("max_string_length=%d\n", max_string_len); 00642 SG_INFO("num_strings=%d\n", num_str); 00643 } 00644 00645 delete[] dummy; 00646 delete[] overflow; 00647 } 00648 00649 void CAsciiFile::get_int_string_list(TString<int32_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00650 { 00651 strings=NULL; 00652 num_str=0; 00653 max_string_len=0; 00654 } 00655 00656 void CAsciiFile::get_uint_string_list(TString<uint32_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00657 { 00658 strings=NULL; 00659 num_str=0; 00660 max_string_len=0; 00661 } 00662 00663 void CAsciiFile::get_short_string_list(TString<int16_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00664 { 00665 strings=NULL; 00666 num_str=0; 00667 max_string_len=0; 00668 } 00669 00670 void CAsciiFile::get_word_string_list(TString<uint16_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00671 { 00672 strings=NULL; 00673 num_str=0; 00674 max_string_len=0; 00675 } 00676 00677 void CAsciiFile::get_long_string_list(TString<int64_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00678 { 00679 strings=NULL; 00680 num_str=0; 00681 max_string_len=0; 00682 } 00683 00684 void CAsciiFile::get_ulong_string_list(TString<uint64_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00685 { 00686 strings=NULL; 00687 num_str=0; 00688 max_string_len=0; 00689 } 00690 00691 void CAsciiFile::get_shortreal_string_list(TString<float32_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00692 { 00693 strings=NULL; 00694 num_str=0; 00695 max_string_len=0; 00696 } 00697 00698 void CAsciiFile::get_real_string_list(TString<float64_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00699 { 00700 strings=NULL; 00701 num_str=0; 00702 max_string_len=0; 00703 } 00704 00705 void CAsciiFile::get_longreal_string_list(TString<floatmax_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00706 { 00707 strings=NULL; 00708 num_str=0; 00709 max_string_len=0; 00710 } 00711 00712 00715 #define SET_VECTOR(fname, mfname, sg_type) \ 00716 void CAsciiFile::fname(const sg_type* vec, int32_t len) \ 00717 { \ 00718 mfname(vec, len, 1); \ 00719 } 00720 SET_VECTOR(set_byte_vector, set_byte_matrix, uint8_t) 00721 SET_VECTOR(set_char_vector, set_char_matrix, char) 00722 SET_VECTOR(set_int_vector, set_int_matrix, int32_t) 00723 SET_VECTOR(set_shortreal_vector, set_shortreal_matrix, float32_t) 00724 SET_VECTOR(set_real_vector, set_real_matrix, float64_t) 00725 SET_VECTOR(set_short_vector, set_short_matrix, int16_t) 00726 SET_VECTOR(set_word_vector, set_word_matrix, uint16_t) 00727 #undef SET_VECTOR 00728 00729 #define SET_MATRIX(fname, sg_type, fprt_type, type_str) \ 00730 void CAsciiFile::fname(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \ 00731 { \ 00732 if (!(file && matrix)) \ 00733 SG_ERROR("File or matrix invalid.\n"); \ 00734 \ 00735 for (int32_t i=0; i<num_vec; i++) \ 00736 { \ 00737 for (int32_t j=0; j<num_feat; j++) \ 00738 { \ 00739 sg_type v=matrix[num_feat*i+j]; \ 00740 if (j==num_feat-1) \ 00741 fprintf(file, type_str "\n", (fprt_type) v); \ 00742 else \ 00743 fprintf(file, type_str " ", (fprt_type) v); \ 00744 } \ 00745 } \ 00746 } 00747 SET_MATRIX(set_char_matrix, char, char, "%c") 00748 SET_MATRIX(set_byte_matrix, uint8_t, uint8_t, "%u") 00749 SET_MATRIX(set_int8_matrix, int8_t, int8_t, "%d") 00750 SET_MATRIX(set_int_matrix, int32_t, int32_t, "%i") 00751 SET_MATRIX(set_uint_matrix, uint32_t, uint32_t, "%u") 00752 SET_MATRIX(set_long_matrix, int64_t, long long int, "%lli") 00753 SET_MATRIX(set_ulong_matrix, uint64_t, long long unsigned int, "%llu") 00754 SET_MATRIX(set_short_matrix, int16_t, int16_t, "%i") 00755 SET_MATRIX(set_word_matrix, uint16_t, uint16_t, "%u") 00756 SET_MATRIX(set_shortreal_matrix, float32_t, float32_t, "%f") 00757 SET_MATRIX(set_real_matrix, float64_t, float64_t, "%f") 00758 SET_MATRIX(set_longreal_matrix, floatmax_t, floatmax_t, "%Lf") 00759 #undef SET_MATRIX 00760 00761 #define SET_SPARSEMATRIX(fname, sg_type, fprt_type, type_str) \ 00762 void CAsciiFile::fname(const TSparse<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \ 00763 { \ 00764 if (!(file && matrix)) \ 00765 SG_ERROR("File or matrix invalid.\n"); \ 00766 \ 00767 for (int32_t i=0; i<num_vec; i++) \ 00768 { \ 00769 TSparseEntry<sg_type>* vec = matrix[i].features; \ 00770 int32_t len=matrix[i].num_feat_entries; \ 00771 \ 00772 for (int32_t j=0; j<len; j++) \ 00773 { \ 00774 if (j<len-1) \ 00775 { \ 00776 fprintf(file, "%d:" type_str " ", \ 00777 (int32_t) vec[j].feat_index+1, (fprt_type) vec[j].entry); \ 00778 } \ 00779 else \ 00780 { \ 00781 fprintf(file, "%d:" type_str "\n", \ 00782 (int32_t) vec[j].feat_index+1, (fprt_type) vec[j].entry); \ 00783 } \ 00784 } \ 00785 } \ 00786 } 00787 SET_SPARSEMATRIX(set_bool_sparsematrix, bool, uint8_t, "%u") 00788 SET_SPARSEMATRIX(set_char_sparsematrix, char, char, "%c") 00789 SET_SPARSEMATRIX(set_byte_sparsematrix, uint8_t, uint8_t, "%u") 00790 SET_SPARSEMATRIX(set_int8_sparsematrix, int8_t, int8_t, "%d") 00791 SET_SPARSEMATRIX(set_int_sparsematrix, int32_t, int32_t, "%i") 00792 SET_SPARSEMATRIX(set_uint_sparsematrix, uint32_t, uint32_t, "%u") 00793 SET_SPARSEMATRIX(set_long_sparsematrix, int64_t, long long int, "%lli") 00794 SET_SPARSEMATRIX(set_ulong_sparsematrix, uint64_t, long long unsigned int, "%llu") 00795 SET_SPARSEMATRIX(set_short_sparsematrix, int16_t, int16_t, "%i") 00796 SET_SPARSEMATRIX(set_word_sparsematrix, uint16_t, uint16_t, "%u") 00797 SET_SPARSEMATRIX(set_shortreal_sparsematrix, float32_t, float32_t, "%f") 00798 SET_SPARSEMATRIX(set_real_sparsematrix, float64_t, float64_t, "%f") 00799 SET_SPARSEMATRIX(set_longreal_sparsematrix, floatmax_t, floatmax_t, "%Lf") 00800 #undef SET_SPARSEMATRIX 00801 00802 void CAsciiFile::set_byte_string_list(const TString<uint8_t>* strings, int32_t num_str) 00803 { 00804 if (!(file && strings)) 00805 SG_ERROR("File or strings invalid.\n"); 00806 00807 for (int32_t i=0; i<num_str; i++) 00808 { 00809 int32_t len = strings[i].length; 00810 fwrite(strings[i].string, sizeof(uint8_t), len, file); 00811 fprintf(file, "\n"); 00812 } 00813 } 00814 00815 void CAsciiFile::set_int8_string_list(const TString<int8_t>* strings, int32_t num_str) 00816 { 00817 if (!(file && strings)) 00818 SG_ERROR("File or strings invalid.\n"); 00819 00820 for (int32_t i=0; i<num_str; i++) 00821 { 00822 int32_t len = strings[i].length; 00823 fwrite(strings[i].string, sizeof(int8_t), len, file); 00824 fprintf(file, "\n"); 00825 } 00826 } 00827 00828 void CAsciiFile::set_char_string_list(const TString<char>* strings, int32_t num_str) 00829 { 00830 if (!(file && strings)) 00831 SG_ERROR("File or strings invalid.\n"); 00832 00833 for (int32_t i=0; i<num_str; i++) 00834 { 00835 int32_t len = strings[i].length; 00836 fwrite(strings[i].string, sizeof(char), len, file); 00837 fprintf(file, "\n"); 00838 } 00839 } 00840 00841 void CAsciiFile::set_int_string_list(const TString<int32_t>* strings, int32_t num_str) 00842 { 00843 } 00844 00845 void CAsciiFile::set_uint_string_list(const TString<uint32_t>* strings, int32_t num_str) 00846 { 00847 } 00848 00849 void CAsciiFile::set_short_string_list(const TString<int16_t>* strings, int32_t num_str) 00850 { 00851 } 00852 00853 void CAsciiFile::set_word_string_list(const TString<uint16_t>* strings, int32_t num_str) 00854 { 00855 } 00856 00857 void CAsciiFile::set_long_string_list(const TString<int64_t>* strings, int32_t num_str) 00858 { 00859 } 00860 00861 void CAsciiFile::set_ulong_string_list(const TString<uint64_t>* strings, int32_t num_str) 00862 { 00863 } 00864 00865 void CAsciiFile::set_shortreal_string_list(const TString<float32_t>* strings, int32_t num_str) 00866 { 00867 } 00868 00869 void CAsciiFile::set_real_string_list(const TString<float64_t>* strings, int32_t num_str) 00870 { 00871 } 00872 00873 void CAsciiFile::set_longreal_string_list(const TString<floatmax_t>* strings, int32_t num_str) 00874 { 00875 } 00876 00877 template <class T> void CAsciiFile::append_item( 00878 DynArray<T>* items, char* ptr_data, char* ptr_item) 00879 { 00880 size_t len=(ptr_data-ptr_item)/sizeof(char); 00881 char* item=new char[len+1]; 00882 memset(item, 0, sizeof(char)*(len+1)); 00883 item=strncpy(item, ptr_item, len); 00884 00885 SG_DEBUG("current %c, len %d, item %s\n", *ptr_data, len, item); 00886 items->append_element(item); 00887 }