SimpleFile.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #ifndef __SIMPLEFILE_H__
00012 #define __SIMPLEFILE_H__
00013 
00014 #include "lib/io.h"
00015 #include "base/SGObject.h"
00016 
00017 #include <stdio.h>
00018 #include <string.h>
00019 #include <sys/mman.h>
00020 
00021 namespace shogun
00022 {
00027 template <class T> class CSimpleFile : public CSGObject
00028 {
00029     public:
00036         CSimpleFile(char* fname, FILE* f)
00037         : CSGObject(), line_buffer_size(1024*1024), line_buffer(NULL)
00038         {
00039             file=f;
00040             filename=strdup(fname);
00041             status = (file!=NULL && filename!=NULL);
00042         }
00043 
00044         virtual ~CSimpleFile()
00045         {
00046             free(filename);
00047             free_line_buffer();
00048         }
00049 
00056         T* load(T* target, int64_t& num=0)
00057         {
00058             if (status)
00059             {
00060                 status=false;
00061 
00062                 if (num==0)
00063                 {
00064                     bool seek_status=true;
00065                     int64_t cur_pos=ftell(file);
00066 
00067                     if (cur_pos!=-1)
00068                     {
00069                         if (!fseek(file, 0, SEEK_END))
00070                         {
00071                             if ((num=(int64_t) ftell(file)) != -1)
00072                             {
00073                                 SG_INFO( "file of size %ld bytes == %ld entries detected\n", num,num/sizeof(T));
00074                                 num/=sizeof(T);
00075                             }
00076                             else
00077                                 seek_status=false;
00078                         }
00079                         else
00080                             seek_status=false;
00081                     }
00082 
00083                     if ((fseek(file,cur_pos, SEEK_SET)) == -1)
00084                         seek_status=false;
00085 
00086                     if (!seek_status)
00087                     {
00088                         SG_ERROR( "filesize autodetection failed\n");
00089                         num=0;
00090                         return NULL;
00091                     }
00092                 }
00093 
00094                 if (num>0)
00095                 {
00096                     if (!target)
00097                         target=new T[num];
00098 
00099                     if (target)
00100                     {
00101                         size_t num_read=fread((void*) target, sizeof(T), num, file);
00102                         status=((int64_t) num_read == num);
00103 
00104                         if (!status)
00105                             SG_ERROR( "only %ld of %ld entries read. io error\n", (int64_t) num_read, num);
00106                     }
00107                     else
00108                         SG_ERROR( "failed to allocate memory while trying to read %ld entries from file \"s\"\n", (int64_t) num, filename);
00109                 }
00110                 return target;
00111             }
00112             else 
00113             {
00114                 num=-1;
00115                 return NULL;
00116             }
00117         }
00118 
00125         bool save(T* target, int64_t num)
00126         {
00127             if (status)
00128             {
00129                 status=false;
00130                 if (num>0)
00131                 {
00132                     if (!target)
00133                         target=new T[num];
00134 
00135                     if (target)
00136                     {
00137                         status=(fwrite((void*) target, sizeof(T), num, file)==
00138                             (size_t) num);
00139                     }
00140                 }
00141             }
00142             return status;
00143         }
00144 
00150         void get_buffered_line(char* line, uint64_t len)
00151         {
00152 
00153             /*
00154             if (!line_buffer)
00155             {
00156                 line_buffer=new char[line_buffer_size];
00157                 size_t num_read=fread((void*) target, sizeof(T), num, file);
00158 
00159                     if (target)
00160                     {
00161                         size_t num_read=fread((void*) target, sizeof(T), num, file);
00162                         status=((int64_t) num_read == num);
00163 
00164                         if (!status)
00165                             SG_ERROR( "only %ld of %ld entries read. io error\n", (int64_t) num_read, num);
00166                     }
00167                     else
00168                         SG_ERROR( "failed to allocate memory while trying to read %ld entries from file \"s\"\n", (int64_t) num, filename);
00169 
00170                         */
00171         }
00172 
00174         void free_line_buffer()
00175         {
00176             delete[] line_buffer;
00177             line_buffer=NULL;
00178         }
00179 
00184         inline void set_line_buffer_size(int32_t bufsize)
00185         {
00186             if (bufsize<=0)
00187                 bufsize=1024*1024;
00188 
00189             free_line_buffer();
00190             line_buffer_size=bufsize;
00191         }
00192 
00197         inline bool is_ok() { return status; }
00198 
00200         inline virtual const char* get_name() const { return "SimpleFile"; }
00201 
00202     protected:
00204         FILE* file;
00206         bool status;
00208         char task;
00210         char* filename;
00211 
00213         int32_t line_buffer_size;
00215         char* line_buffer;
00216 };
00217 }
00218 #endif

SHOGUN Machine Learning Toolbox - Documentation