CLAW Library (a C++ Library Absolutely Wonderful) 1.5.5
|
00001 /* 00002 CLAW - a C++ Library Absolutely Wonderful 00003 00004 CLAW is a free library without any particular aim but being useful to 00005 anyone. 00006 00007 Copyright (C) 2005-2010 Julien Jorge 00008 00009 This library is free software; you can redistribute it and/or 00010 modify it under the terms of the GNU Lesser General Public 00011 License as published by the Free Software Foundation; either 00012 version 2.1 of the License, or (at your option) any later version. 00013 00014 This library is distributed in the hope that it will be useful, 00015 but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 Lesser General Public License for more details. 00018 00019 You should have received a copy of the GNU Lesser General Public 00020 License along with this library; if not, write to the Free Software 00021 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00022 00023 contact: julien_jorge@yahoo.fr 00024 */ 00030 #include <map> 00031 #include <assert.h> 00032 #include <claw/it_index.hpp> 00033 00034 /*---------------------------------------------------------------------------*/ 00043 template<class RandomIterator> 00044 unsigned int 00045 claw::text::kmp<RandomIterator>:: 00046 common_prefix_length( const RandomIterator begin_1, 00047 const RandomIterator begin_2, 00048 const RandomIterator end_1, const RandomIterator end_2 00049 ) const 00050 { 00051 unsigned int count = 0; 00052 RandomIterator it_1 = begin_1, it_2 = begin_2; 00053 bool quit = false; 00054 00055 while ( (it_1 != end_1) && (it_2 != end_2) && ! quit ) 00056 if ( *it_1 == *it_2 ) 00057 { 00058 ++it_1; 00059 ++it_2; 00060 ++count; 00061 } 00062 else 00063 quit = true; 00064 00065 return count; 00066 } // common_prefix_length() 00067 00068 /*---------------------------------------------------------------------------*/ 00076 template<class RandomIterator> 00077 void claw::text::kmp<RandomIterator>::z_boxes(const RandomIterator begin, 00078 const RandomIterator end, 00079 std::map<unsigned int,unsigned int>& out) const 00080 { 00081 // right and left bounds of the current item's Z-box. 00082 claw::it_index<RandomIterator> it_r(begin); 00083 claw::it_index<RandomIterator> it_l(begin); 00084 claw::it_index<RandomIterator> it_k(begin); // increment on the items 00085 unsigned int z; // le Zi of the current position 00086 00087 for (++it_k; it_k!=end; ++it_k) 00088 { 00089 if (it_k > it_r) 00090 { 00091 z = common_prefix_length(begin, it_k, end, end); 00092 00093 if ( z > 0 ) // set the Z-box 00094 { 00095 out[it_k] = z; 00096 it_l = it_k; 00097 it_r = it_k.operator+(z) - 1; 00098 } 00099 } 00100 else /* k <= r */ 00101 { 00102 unsigned int k_bis = it_k - it_l; 00103 claw::it_index<RandomIterator> it_b(it_r - it_k); 00104 00105 if ( out.find(k_bis) == out.end() ) 00106 z = 0; 00107 else 00108 z = out[k_bis]; 00109 00110 if ( z <= (unsigned int)it_b ) 00111 { 00112 if ( z > 0 ) 00113 out[it_k] = z; 00114 } 00115 else 00116 { 00117 claw::it_index<RandomIterator> it_q = it_r + 1; 00118 00119 it_q += common_prefix_length(it_q, it_b+1, end, end); 00120 00121 out[it_k] = it_q - it_k; 00122 it_r = it_q - 1; 00123 it_l = it_k; 00124 } 00125 } 00126 } 00127 } // z_boxes() 00128 00129 /*---------------------------------------------------------------------------*/ 00137 template<class RandomIterator> 00138 void claw::text::kmp<RandomIterator>::spi_prime(const RandomIterator begin, 00139 const RandomIterator end, 00140 std::map<unsigned int, unsigned int>& out) const 00141 { 00142 std::map<unsigned int, unsigned int> z; // pattern's Z-boxes. 00143 unsigned int j; // loop index. 00144 00145 // set Z-boxes 00146 z_boxes(begin, end, z); 00147 00148 // calculates spi' (from end to begining) 00149 j=end-begin; 00150 00151 do 00152 { 00153 --j; 00154 if (z.find(j) != z.end()) 00155 out[j + z[j] - 1] = z[j]; 00156 } 00157 while (j!=0); 00158 } // spi_prime() 00159 00160 /*---------------------------------------------------------------------------*/ 00171 template<class RandomIterator> 00172 template<class UnaryPredicate> 00173 void claw::text::kmp<RandomIterator>::operator() 00174 (const RandomIterator pattern_begin, const RandomIterator pattern_end, 00175 const RandomIterator text_begin, const RandomIterator text_end, 00176 UnaryPredicate& action ) const 00177 { 00178 std::map<unsigned int, unsigned int> spi; // pattern's spi' 00179 claw::it_index<RandomIterator> it_p(pattern_begin,1); 00180 claw::it_index<RandomIterator> it_t(text_begin,1); 00181 bool stop = false; // result of the last call to the predicate action 00182 00183 const int pattern_length = pattern_end - pattern_begin; 00184 const int text_length = text_end - text_begin; 00185 00186 assert(pattern_begin != pattern_end); 00187 00188 spi_prime(pattern_begin, pattern_end, spi); 00189 00190 unsigned int i = 0; 00191 00192 while ( ((int)it_t <= text_length - (pattern_length - it_p)) && !stop ) 00193 { 00194 unsigned int common; 00195 00196 common = common_prefix_length(it_p, it_t, pattern_end, text_end); 00197 i += common; 00198 it_p += common; 00199 it_t += common; 00200 00201 if (it_p == 1) 00202 ++it_t; 00203 else 00204 { 00205 if ( (int)it_p == pattern_length+1 ) 00206 stop = !action( (int)it_t - pattern_length-1 ); 00207 00208 i = spi[i-1]; 00209 it_p.set(pattern_begin+i, i+1); 00210 } 00211 } 00212 } // operator()