MemoryMappedFile.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #ifndef __MEMORYMAPPEDFILE_H__
00012 #define __MEMORYMAPPEDFILE_H__
00013 
00014 #include "lib/io.h"
00015 #include "base/SGObject.h"
00016 
00017 #include <stdio.h>
00018 #include <string.h>
00019 #include <sys/mman.h>
00020 #include <sys/stat.h>
00021 #include <sys/types.h>
00022 #include <fcntl.h>
00023 #include <unistd.h>
00024 
00025 namespace shogun
00026 {
00031 template <class T> class CMemoryMappedFile : public CSGObject
00032 {
00033     public:
00047         CMemoryMappedFile(const char* fname, char flag='r', uint64_t fsize=0)
00048         : CSGObject()
00049         {
00050             last_written_byte=0;
00051             rw=flag;
00052 
00053             int open_flags;
00054             int mmap_prot;
00055             int mmap_flags;
00056 
00057             if (rw=='w')
00058             {
00059                 open_flags=O_RDWR | O_CREAT;
00060                 mmap_prot=PROT_READ|PROT_WRITE;
00061                 mmap_flags=MAP_SHARED;
00062             }
00063             else if (rw=='r')
00064             {
00065                 open_flags=O_RDONLY;
00066                 mmap_prot=PROT_READ;
00067                 mmap_flags=MAP_PRIVATE;
00068             }
00069             else
00070                 SG_ERROR("Unknown flags\n");
00071 
00072             fd = open(fname, open_flags, S_IRWXU | S_IRWXG | S_IRWXO);
00073             if (fd == -1)
00074                 SG_ERROR("Error opening file\n");
00075 
00076             if (rw=='w' && fsize)
00077             {
00078                 uint8_t byte=0;
00079                 if (lseek(fd, fsize, SEEK_SET) != fsize || write(fd, &byte, 1) != 1)
00080                     SG_ERROR("Error creating file of size %ld bytes\n", fsize);
00081             }
00082 
00083             struct stat sb;
00084             if (fstat(fd, &sb) == -1)
00085                 SG_ERROR("Error determining file size\n");
00086 
00087             length = sb.st_size;
00088             address = mmap(NULL, length, mmap_prot, mmap_flags, fd, 0);
00089             if (address == MAP_FAILED)
00090                 SG_ERROR("Error mapping file");
00091         }
00092 
00094         virtual ~CMemoryMappedFile()
00095         {
00096             munmap(address, length);
00097             if (rw=='w' && last_written_byte && ftruncate(fd, last_written_byte) == -1)
00098 
00099             {
00100                 close(fd);
00101                 SG_ERROR("Error Truncating file to %ld bytes\n", last_written_byte);
00102             }
00103             close(fd);
00104         }
00105 
00115         inline T* get_map()
00116         {
00117             return (T*) address;
00118         }
00119 
00124         uint64_t get_length()
00125         {
00126             return length/sizeof(T);
00127         }
00128 
00133         uint64_t get_size()
00134         {
00135             return length;
00136         }
00137 
00149         char* get_line(uint64_t& len, uint64_t& offs)
00150         {
00151             char* s = (char*) address;
00152             for (uint64_t i=offs; i<length; i++)
00153             {
00154                 if (s[i] == '\n')
00155                 {
00156                     char* line=&s[offs];
00157                     len=i-offs;
00158                     offs=i+1;
00159                     return line;
00160                 }
00161             }
00162 
00163             len=0;
00164             offs=length;
00165             return NULL;
00166         }
00167 
00178         void write_line(const char* line, uint64_t len, uint64_t& offs)
00179         {
00180             char* s = ((char*) address) + offs;
00181             if (len+1+offs > length)
00182                 SG_ERROR("Writing beyond size of file\n");
00183 
00184             for (uint64_t i=0; i<len; i++)
00185                 s[i] = line[i];
00186 
00187             s[len]='\n';
00188             offs+=length+1;
00189             last_written_byte=offs-1;
00190         }
00191 
00203         inline void set_truncate_size(uint64_t* sz=0)
00204         {
00205             last_written_byte=sz;
00206         }
00207 
00212         int32_t get_num_lines()
00213         {
00214             char* s = (char*) address;
00215             int32_t linecount=0;
00216             for (uint64_t i=0; i<length; i++)
00217             {
00218                 if (s[i] == '\n')
00219                     linecount++;
00220             }
00221 
00222             return linecount;
00223         }
00224 
00232         inline T operator[](int32_t index) const
00233         {
00234             return ((T*)address)[index];
00235         }
00236 
00238         inline virtual const char* get_name() const { return "MemoryMappedFile"; }
00239 
00240     protected:
00242         int fd;
00244         uint64_t length;
00246         void* address;
00248         char rw;
00249 
00251         uint64_t last_written_byte;
00252 };
00253 }
00254 #endif

SHOGUN Machine Learning Toolbox - Documentation