System documentation of the GNU Image-Finding Tool

merge_sort_streams.h
00001 /* -*- mode: c++ -*- 
00002  */
00003 /* 
00004    
00005     GIFT, a flexible content based image retrieval system.
00006     Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva, University of Bayreuth
00007 
00008      Copyright (C) 2003, 2004 Bayreuth University
00009       2005 Bamberg University
00010     This program is free software; you can redistribute it and/or modify
00011     it under the terms of the GNU General Public License as published by
00012     the Free Software Foundation; either version 2 of the License, or
00013     (at your option) any later version.
00014 
00015     This program is distributed in the hope that it will be useful,
00016     but WITHOUT ANY WARRANTY; without even the implied warranty of
00017     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018     GNU General Public License for more details.
00019 
00020     You should have received a copy of the GNU General Public License
00021     along with this program; if not, write to the Free Software
00022     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 
00024 */
00025 #ifndef _MERGE_SORT
00026 #define _MERGE_SORT
00027 
00028 //#warning FIXME: this needs to be added to the GIFT distro. It speeds up the generation
00029 //#warning of inverted files considerably
00030 //(done)
00031 
00032 #include <memory> // auto_ptr
00033 #include <algorithm> // using swap
00034 #include <fstream>   // file access
00035 #include <cassert>
00036 
00037 using namespace std;
00038 
00039 //#define STREAMPOS_T fstream::pos_type
00040 
00043 template<typename T>
00044 class CStreamPos{
00046   T mContent;
00047 public:
00048   CStreamPos(T inContent):mContent(inContent){
00049     
00050   }
00051 
00052   CStreamPos(long int inContent):mContent(inContent){
00053     
00054   }
00055 
00056   operator T()const{
00057     return mContent;
00058   }
00059   operator long int()const{
00060     return mContent;
00061   }
00062 
00063   CStreamPos<T>& operator++ (){
00064     mContent=mContent+static_cast<T>(1);
00065     return *this;
00066   }
00067   CStreamPos<T> operator++ (int){
00068     CStreamPos<T> lReturnValue=*this;
00069     mContent=mContent+static_cast<T>(1);
00070     return lReturnValue;
00071   }
00072 
00073   CStreamPos<T> operator% (int inMod)const{
00074     return mContent % inMod;
00075   }
00076   CStreamPos<T> operator/ (int inMod)const{
00077     return mContent / inMod;
00078   }
00079   bool operator< (CStreamPos<T>& inThan)const{
00080     return this->mContent < inThan.mContent;
00081   }
00082   bool operator! ()const{
00083     return !(this->mContent);
00084   }
00085   CStreamPos<T> operator+ (CStreamPos<T>& inSummand){
00086     return CStreamPos<T>(mContent + inSummand.mContent);
00087   }
00088 };
00089 
00090 #if __GNUC__==2
00091 #define STREAMPOS_T long long int
00092 #else
00093 #define STREAMPOS_T CStreamPos<fstream::pos_type>
00094 #endif
00095 // ex long long int
00096 
00107 template<class T>
00108 void merge_streams(istream& in1, const STREAMPOS_T inCount1,
00109                    istream& in2, const STREAMPOS_T inCount2,
00110                    ostream& out,
00111                    int inNumberOfPageElements=1){
00112 
00113   {
00114 //     cout << "Merging: " 
00115 //       << inCount1
00116 //       << ","
00117 //       << inCount2
00118 //       << endl;
00119     const STREAMPOS_T lPageSize(sizeof(T)*inNumberOfPageElements);
00120 
00121 
00122 
00123     if(!(inCount1)){// if there is nothing to merge
00124       for(STREAMPOS_T i=0;//...copy
00125           i<inCount2;
00126           i++){
00127         T l2;
00128         in2.read((char*)&l2,
00129                  sizeof(l2));
00130         assert(in2);
00131 
00132         out.write((char*)&l2,
00133                   sizeof(l2));
00134         assert(out);
00135       }
00136       return;//and return
00137     }
00138     if(!inCount2){//if there is nothing to merge
00139       for(STREAMPOS_T i=0;//...copy
00140           i<inCount1;
00141           i++){
00142         T l1;
00143         in1.read((char*)&l1,
00144                  sizeof(l1));
00145         assert(in1);
00146         out.write((char*)&l1,
00147                   sizeof(l1));
00148         assert(out);
00149       }
00150       return;//and return
00151     }
00152     
00153     STREAMPOS_T lI1(0);
00154     STREAMPOS_T lI2(0);
00155     
00156     //read the first record from both files
00157     T l1;
00158     in1.read((char*)&l1,
00159              sizeof(l1));
00160     assert(in1);
00161     T l2;
00162     in2.read((char*)&l2,
00163              sizeof(l2));
00164     assert(in2);
00165     
00166     while((lI1<inCount1)
00167           &&(lI2<inCount2)){
00168       if(l1<l2){
00169         //
00170         out.write((char*)&l1,
00171                   sizeof(l1));
00172         assert(out);
00173 
00174         //
00175         lI1++;
00176         if(lI1<inCount1){
00177           in1.read((char*)&l1,
00178                    sizeof(l1));
00179           assert(in1);
00180         }
00181       }else{
00182         //
00183         out.write((char*)&l2,
00184                   sizeof(l2));
00185         //
00186         lI2++;
00187         if(lI2<inCount2){
00188           in2.read((char*)&l2,
00189                    sizeof(l2));
00190           assert(in2);
00191         }
00192       }
00193     }
00194     while(lI1<inCount1){
00195       //
00196       out.write((char*)&l1,
00197                 sizeof(l1));
00198       //
00199       lI1++;
00200       if(lI1<inCount1){
00201         in1.read((char*)&l1,
00202                  sizeof(l1));
00203         assert(in1);
00204       }
00205     }
00206     while(lI2<inCount2){
00207       //
00208       out.write((char*)&l2,
00209                 sizeof(l2));
00210       assert(out);
00211       //
00212       lI2++;
00213       if(lI2<inCount2){
00214         in2.read((char*)&l2,
00215                  sizeof(l2));
00216         assert(in2);
00217       }
00218     }
00219   }
00220 }
00221 template<typename T>
00222 void first_level_quicksort(int inNumberOfPageElements,
00223                            const char* inFileToBeSortedName,
00224                            const char* inTemporaryName){
00225 
00226   cout << "Starting quicksort: "
00227        << inNumberOfPageElements 
00228        << " elements per page." << endl
00229        << "Sorting files " << inFileToBeSortedName << endl
00230        << "to            " << inTemporaryName << endl;
00231   cout << "NOW ALLOCATING A PAGE" << inNumberOfPageElements << endl;
00232   auto_ptr<T> lPage(new T[inNumberOfPageElements]);
00233 
00234   cout << "H" << flush;  
00235 
00236   const STREAMPOS_T lPageSize(sizeof(T)*inNumberOfPageElements);
00237 
00238   cout << "I" << flush;
00239 
00240   STREAMPOS_T lFileSize(0);
00241   ifstream lToBeSorted1(inFileToBeSortedName);
00242   assert(lToBeSorted1);
00243   lToBeSorted1.seekg(0,
00244                     ios::end);
00245   lFileSize=lToBeSorted1.tellg();
00246   lToBeSorted1.clear();
00247   lToBeSorted1.seekg(0,
00248                     ios::beg);
00249   cout << "E" << flush;
00250 
00251   ofstream lTemporary(inTemporaryName);
00252   assert(lTemporary);
00253   cout << "R" << flush;
00254 
00255   STREAMPOS_T lSum(0);
00256 
00257   T* lBegin(lPage.get());
00258   T* lEnd(lPage.get());
00259   
00260   cout << "FIRSTLEVELQUICK" << lFileSize << ";" << lSum<< endl;
00261   while((lSum<lFileSize)
00262         && lToBeSorted1){
00263 
00264     cout << "." << flush;
00265 
00266     int lRead(0);
00267     if(lSum+lPageSize < lFileSize){
00268       lToBeSorted1.read((char*)lPage.get(),
00269                         lPageSize);
00270       lRead=lPageSize;
00271     }else{
00272       lToBeSorted1.read((char*)lPage.get(),
00273                         lFileSize-lSum);      
00274       lRead=lFileSize-lSum;
00275     }
00276     if(lRead){
00277       lEnd=lBegin+(lRead)/sizeof(T);
00278       sort(lBegin,lEnd);
00279       lTemporary.write((char*)lPage.get(),lRead);
00280       assert(lTemporary);
00281     }
00282   }
00283   cout << "." << endl;
00284 
00285 }
00286 
00297 template<class T>
00298 char* merge_sort_streams(const char* inFileToBeSortedName,
00299                         const char* inTemporaryName,
00300                         int inNumberOfPageElements=(1 << 20)
00301                         ){
00302 
00303   const char* lFileToBeSortedName(inFileToBeSortedName);
00304   const char* lTemporaryName(inTemporaryName);
00305 
00306   STREAMPOS_T lFileSize(0);
00307   ifstream lToBeSorted1(inFileToBeSortedName);
00308   lToBeSorted1.seekg(0,
00309                     ios::end);
00310   lFileSize=lToBeSorted1.tellg();
00311   lToBeSorted1.close();
00312   
00313   ofstream lTemporary;
00314   ifstream lToBeSorted2;
00315  
00316 #ifdef first_level_quick
00317   first_level_quicksort<T>(inNumberOfPageElements,
00318                            inFileToBeSortedName,
00319                            inTemporaryName);
00320   swap(lFileToBeSortedName,
00321        lTemporaryName);
00322 #else 
00323   cout << "STARTING mit MERGESIZE1" << endl;
00324   inNumberOfPageElements=1;
00325 #endif
00326   STREAMPOS_T lCount(0);
00327   for(STREAMPOS_T iMergeSize(sizeof(T)*inNumberOfPageElements);
00328       (iMergeSize < lFileSize)
00329         || !(lCount%2)
00330         // ||(lCount%2) makes sure that we will get 
00331         // the result in inFileToBeSorted
00332         // the ! is, because we have have already
00333         // the quicksort sorting pass behind us
00334         ;
00335       (iMergeSize = iMergeSize << 1),
00336         (lCount=lCount+static_cast<STREAMPOS_T>(1))){
00337     
00338     cout << "MERGESORT MergeSize " 
00339          << iMergeSize 
00340          << endl;
00341     
00342     lToBeSorted1.open(lFileToBeSortedName);
00343     lToBeSorted1.clear();
00344 
00345     lToBeSorted2.open(lFileToBeSortedName);
00346     lToBeSorted2.clear();
00347 
00348     lTemporary.open(lTemporaryName);
00349     lTemporary.clear();
00350     
00351     
00352     for(STREAMPOS_T i(0);
00353         i<lFileSize;
00354         i = i + iMergeSize*2){
00355       lToBeSorted1.seekg(i);
00356 
00357       if(!lToBeSorted1){
00358         cerr << __FILE__ << ":" << __LINE__ << " lToBeSorted false, after seekg("
00359              << static_cast<long int>(i)
00360              << ")"
00361              << endl;
00362       }
00363       
00364       assert(lToBeSorted1);
00365 
00366       if(i+iMergeSize<lFileSize){
00367         lToBeSorted2.seekg(i+iMergeSize);
00368         assert(lToBeSorted2);
00369       }
00370 
00371       STREAMPOS_T lMergeSize1=lFileSize-i;
00372       if(lFileSize < i){// underflow
00373         lMergeSize1=0;//correct underflow
00374       }
00375       STREAMPOS_T lMergeSize2=lFileSize-i-iMergeSize;
00376       if(lFileSize < i + iMergeSize){// underflow of lMergeSize2
00377         lMergeSize2=0;//correct underflow
00378       }
00379       
00380       // lToBeSorted1.clear();
00381       // lToBeSorted2.clear();
00382       
00383       if(lMergeSize1>iMergeSize){
00384         lMergeSize1=iMergeSize;
00385       }
00386       if(lMergeSize2>iMergeSize){
00387         lMergeSize2=iMergeSize;
00388       }
00389 
00390 #if __GNUC__==2
00391       merge_streams<T>(lToBeSorted1,
00392                        lMergeSize1/sizeof(T),
00393                        lToBeSorted2,
00394                        lMergeSize2/sizeof(T),
00395                        lTemporary,
00396                        inNumberOfPageElements
00397                        );
00398 #else
00399       merge_streams<T>(lToBeSorted1,
00400                        lMergeSize1.operator/(sizeof(T)),
00401                        lToBeSorted2,
00402                        lMergeSize2.operator/(sizeof(T)),
00403                        lTemporary,
00404                        inNumberOfPageElements
00405                        );
00406 #endif
00407     }
00408     
00409     lTemporary.close();
00410     lToBeSorted1.close();
00411     lToBeSorted2.close();
00412     swap(lFileToBeSortedName,
00413          lTemporaryName);
00414     cout << "endmerge" << endl;
00415   }
00416   return strdup(lFileToBeSortedName);
00417 }
00418 #endif

Need for discussion? Want to contribute? Contact
help-gift@gnu.org Generated using Doxygen