System documentation of the GNU Image-Finding Tool

CAcIFFileSystem.h
00001 /* -*- mode: c++ -*- 
00002 */
00003 /* 
00004 
00005     GIFT, a flexible content based image retrieval system.
00006     Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva
00007 
00008      Copyright (C) 2003, 2004 Bayreuth University
00009       2005 Bamberg University
00010     This program is free software; you can redistribute it and/or modify
00011     it under the terms of the GNU General Public License as published by
00012     the Free Software Foundation; either version 2 of the License, or
00013     (at your option) any later version.
00014 
00015     This program is distributed in the hope that it will be useful,
00016     but WITHOUT ANY WARRANTY; without even the implied warranty of
00017     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018     GNU General Public License for more details.
00019 
00020     You should have received a copy of the GNU General Public License
00021     along with this program; if not, write to the Free Software
00022     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 
00024 */
00025 // -*- mode: c++ -*-
00026 
00027 
00028 class CXMLElement;
00029 
00030 /*
00031 *
00032 * 
00033 *  This class manages the access to the inverted file as well 
00034 *    as its generation
00035 *
00036 *
00037 *
00038 * modification history:
00039 *
00040 * WM   1099 changed documentation format
00041 *           completed documentation
00042 * HM 090399 created the documentation
00043 * WM   1098 created the file
00044 *
00045 *
00046 *
00047 * compiler defines used:
00048 *
00049 *
00050 */
00051 
00052 #ifndef _CACIFFILESYSTEM
00053 #define _CACIFFILESYSTEM
00054 #include "libGIFTAcInvertedFile/include/uses-declarations.h"
00055 #include <string>
00056 #include "libMRML/include/TID.h"
00057 #include "libMRML/include/CSelfDestroyPointer.h"
00058 #include "libMRML/include/CArraySelfDestroyPointer.h"
00059 #include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h"
00060 #include "libMRML/include/CMutex.h" // multi threading
00061 //#include "CCollectionFrequencyList.h"
00062 #include "libGIFTAcInvertedFile/include/CADIHash.h"
00063 #include "libGIFTAcURL2FTS/include/CAcURL2FTS.h"
00064 #include "libGIFTAcInvertedFile/include/CAcInvertedFile.h"
00065 #include <iostream>
00066 #include <fstream>
00067 #include <map>
00068 #include <vector>
00069 #ifdef HAS_HASH_MAP
00070 #include <hash_map>
00071 #define HASH_MAP hash_map
00072 #else
00073 #define HASH_MAP map
00074 #endif
00075 #include <functional>
00076 #include <algorithm>
00077 
00078 #include "libMRML/include/CMagic.h"
00079 
00080 
00081 typedef TID TFeatureID ;
00082 
00093 class CAcIFFileSystem:public CAcInvertedFile{  
00094 
00095 protected:
00097   CMutex mMutex;
00103   CSelfDestroyPointer<CAcURL2FTS> mURL2FTS;
00105   TID mMaximumFeatureID;
00108 #ifndef V295
00109   string mInvertedFileBuffer;
00110 #else
00111   CArraySelfDestroyPointer<char> mInvertedFileBuffer;
00112 #endif
00113 
00115   string mTemporaryIndexingFileBase;
00117   mutable CSelfDestroyPointer<istream> mInvertedFile;
00118 
00120   mutable ifstream mOffsetFile;
00121 
00123   ifstream mFeatureDescriptionFile;
00124 
00126   string mInvertedFileName;
00127 
00129   string mOffsetFileName;
00130 
00132   string mFeatureDescriptionFileName;
00133 
00135   typedef HASH_MAP<TID,streampos> CIDToOffset;//new hash
00137   CIDToOffset mIDToOffset;
00138 
00140   mutable HASH_MAP<TID,double> mFeatureToCollectionFrequency;//new hash
00141 
00145   HASH_MAP<TID,unsigned int> mFeatureDescription;//new hash_
00146 
00150   CADIHash mDocumentInformation;
00152 
00155   void writeOffsetFileElement(TID inFeatureID,
00156                               streampos inPosition,
00157                               ostream& inOpenOffsetFile);
00159   CDocumentFrequencyList* getFeatureFile(string inFileName)const;
00160 public:
00162   bool operator()()const;
00163 
00190   CAcIFFileSystem(const CXMLElement& inCollectionElement);
00192   bool init(bool);
00193 
00195   ~CAcIFFileSystem();
00196   
00198   string IDToURL(TID inID)const;
00199 
00203   CDocumentFrequencyList* FeatureToList(TFeatureID)const;
00204 
00206   CDocumentFrequencyList* URLToFeatureList(string inURL)const;
00207 
00209   CDocumentFrequencyList* DIDToFeatureList(TID inDID)const;
00210 
00212 
00213 
00217   double FeatureToCollectionFrequency(TFeatureID)const;
00218 
00220   unsigned int getFeatureDescription(TID inFeatureID)const;
00222 
00226   double DIDToMaxDocumentFrequency(TID)const;
00227 
00229   double DIDToDFSquareSum(TID)const;
00230 
00232   double DIDToSquareDFLogICFSum(TID)const;
00234 
00235   /*@name Inverted File Generation and Consistency Checking*/
00237 
00245   bool generateInvertedFile();
00246 
00254   bool newGenerateInvertedFile();
00255 
00258   bool checkConsistency();
00259 
00266   bool findWithinStream(TID inFeatureID,
00267                         TID inDocumentID,
00268                         double inDocumentFrequency)const;
00269   
00271 
00277   virtual pair<bool,TID> URLToID(const string& inURL)const;
00278   
00280   void getAllIDs(list<TID>&)const;
00283   void getAllAccessorElements(list<CAccessorElement>&)const;
00288   void getRandomIDs(list<TID>&,
00289                     list<TID>::size_type)const;
00298   void getRandomAccessorElements(list<CAccessorElement>& outResult,
00299                                   list<CAccessorElement>::size_type inSize)const;
00301   int size()const;
00303 
00304   TID getMaximumFeatureID()const;
00312   list<TID>* getAllFeatureIDs()const;
00318   virtual pair<bool,CAccessorElement> IDToAccessorElement(TID inID)const;
00320   operator bool()const;
00321 
00322 };
00323 
00324 #endif

Need for discussion? Want to contribute? Contact
help-gift@gnu.org Generated using Doxygen