libstdc++
|
00001 // Locale support (codecvt) -*- C++ -*- 00002 00003 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009 00004 // Free Software Foundation, Inc. 00005 // 00006 // This file is part of the GNU ISO C++ Library. This library is free 00007 // software; you can redistribute it and/or modify it under the 00008 // terms of the GNU General Public License as published by the 00009 // Free Software Foundation; either version 3, or (at your option) 00010 // any later version. 00011 00012 // This library is distributed in the hope that it will be useful, 00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 // GNU General Public License for more details. 00016 00017 // Under Section 7 of GPL version 3, you are granted additional 00018 // permissions described in the GCC Runtime Library Exception, version 00019 // 3.1, as published by the Free Software Foundation. 00020 00021 // You should have received a copy of the GNU General Public License and 00022 // a copy of the GCC Runtime Library Exception along with this program; 00023 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00024 // <http://www.gnu.org/licenses/>. 00025 00026 // 00027 // ISO C++ 14882: 22.2.1.5 Template class codecvt 00028 // 00029 00030 // Written by Benjamin Kosnik <bkoz@redhat.com> 00031 00032 /** @file ext/codecvt_specializations.h 00033 * This file is a GNU extension to the Standard C++ Library. 00034 */ 00035 00036 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H 00037 #define _EXT_CODECVT_SPECIALIZATIONS_H 1 00038 00039 #include <bits/c++config.h> 00040 #include <locale> 00041 #include <iconv.h> 00042 00043 _GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx) 00044 00045 /// Extension to use iconv for dealing with character encodings. 00046 // This includes conversions and comparisons between various character 00047 // sets. This object encapsulates data that may need to be shared between 00048 // char_traits, codecvt and ctype. 00049 class encoding_state 00050 { 00051 public: 00052 // Types: 00053 // NB: A conversion descriptor subsumes and enhances the 00054 // functionality of a simple state type such as mbstate_t. 00055 typedef iconv_t descriptor_type; 00056 00057 protected: 00058 // Name of internal character set encoding. 00059 std::string _M_int_enc; 00060 00061 // Name of external character set encoding. 00062 std::string _M_ext_enc; 00063 00064 // Conversion descriptor between external encoding to internal encoding. 00065 descriptor_type _M_in_desc; 00066 00067 // Conversion descriptor between internal encoding to external encoding. 00068 descriptor_type _M_out_desc; 00069 00070 // The byte-order marker for the external encoding, if necessary. 00071 int _M_ext_bom; 00072 00073 // The byte-order marker for the internal encoding, if necessary. 00074 int _M_int_bom; 00075 00076 // Number of external bytes needed to construct one complete 00077 // character in the internal encoding. 00078 // NB: -1 indicates variable, or stateful, encodings. 00079 int _M_bytes; 00080 00081 public: 00082 explicit 00083 encoding_state() 00084 : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0) 00085 { } 00086 00087 explicit 00088 encoding_state(const char* __int, const char* __ext, 00089 int __ibom = 0, int __ebom = 0, int __bytes = 1) 00090 : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0), 00091 _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes) 00092 { init(); } 00093 00094 // 21.1.2 traits typedefs 00095 // p4 00096 // typedef STATE_T state_type 00097 // requires: state_type shall meet the requirements of 00098 // CopyConstructible types (20.1.3) 00099 // NB: This does not preserve the actual state of the conversion 00100 // descriptor member, but it does duplicate the encoding 00101 // information. 00102 encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0) 00103 { construct(__obj); } 00104 00105 // Need assignment operator as well. 00106 encoding_state& 00107 operator=(const encoding_state& __obj) 00108 { 00109 construct(__obj); 00110 return *this; 00111 } 00112 00113 ~encoding_state() 00114 { destroy(); } 00115 00116 bool 00117 good() const throw() 00118 { 00119 const descriptor_type __err = (iconv_t)(-1); 00120 bool __test = _M_in_desc && _M_in_desc != __err; 00121 __test &= _M_out_desc && _M_out_desc != __err; 00122 return __test; 00123 } 00124 00125 int 00126 character_ratio() const 00127 { return _M_bytes; } 00128 00129 const std::string 00130 internal_encoding() const 00131 { return _M_int_enc; } 00132 00133 int 00134 internal_bom() const 00135 { return _M_int_bom; } 00136 00137 const std::string 00138 external_encoding() const 00139 { return _M_ext_enc; } 00140 00141 int 00142 external_bom() const 00143 { return _M_ext_bom; } 00144 00145 const descriptor_type& 00146 in_descriptor() const 00147 { return _M_in_desc; } 00148 00149 const descriptor_type& 00150 out_descriptor() const 00151 { return _M_out_desc; } 00152 00153 protected: 00154 void 00155 init() 00156 { 00157 const descriptor_type __err = (iconv_t)(-1); 00158 const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size(); 00159 if (!_M_in_desc && __have_encodings) 00160 { 00161 _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str()); 00162 if (_M_in_desc == __err) 00163 std::__throw_runtime_error(__N("encoding_state::_M_init " 00164 "creating iconv input descriptor failed")); 00165 } 00166 if (!_M_out_desc && __have_encodings) 00167 { 00168 _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str()); 00169 if (_M_out_desc == __err) 00170 std::__throw_runtime_error(__N("encoding_state::_M_init " 00171 "creating iconv output descriptor failed")); 00172 } 00173 } 00174 00175 void 00176 construct(const encoding_state& __obj) 00177 { 00178 destroy(); 00179 _M_int_enc = __obj._M_int_enc; 00180 _M_ext_enc = __obj._M_ext_enc; 00181 _M_ext_bom = __obj._M_ext_bom; 00182 _M_int_bom = __obj._M_int_bom; 00183 _M_bytes = __obj._M_bytes; 00184 init(); 00185 } 00186 00187 void 00188 destroy() throw() 00189 { 00190 const descriptor_type __err = (iconv_t)(-1); 00191 if (_M_in_desc && _M_in_desc != __err) 00192 { 00193 iconv_close(_M_in_desc); 00194 _M_in_desc = 0; 00195 } 00196 if (_M_out_desc && _M_out_desc != __err) 00197 { 00198 iconv_close(_M_out_desc); 00199 _M_out_desc = 0; 00200 } 00201 } 00202 }; 00203 00204 /// encoding_char_traits 00205 // Custom traits type with encoding_state for the state type, and the 00206 // associated fpos<encoding_state> for the position type, all other 00207 // bits equivalent to the required char_traits instantiations. 00208 template<typename _CharT> 00209 struct encoding_char_traits : public std::char_traits<_CharT> 00210 { 00211 typedef encoding_state state_type; 00212 typedef typename std::fpos<state_type> pos_type; 00213 }; 00214 00215 _GLIBCXX_END_NAMESPACE 00216 00217 00218 _GLIBCXX_BEGIN_NAMESPACE(std) 00219 00220 using __gnu_cxx::encoding_state; 00221 00222 /// codecvt<InternT, _ExternT, encoding_state> specialization. 00223 // This partial specialization takes advantage of iconv to provide 00224 // code conversions between a large number of character encodings. 00225 template<typename _InternT, typename _ExternT> 00226 class codecvt<_InternT, _ExternT, encoding_state> 00227 : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state> 00228 { 00229 public: 00230 // Types: 00231 typedef codecvt_base::result result; 00232 typedef _InternT intern_type; 00233 typedef _ExternT extern_type; 00234 typedef __gnu_cxx::encoding_state state_type; 00235 typedef state_type::descriptor_type descriptor_type; 00236 00237 // Data Members: 00238 static locale::id id; 00239 00240 explicit 00241 codecvt(size_t __refs = 0) 00242 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 00243 { } 00244 00245 explicit 00246 codecvt(state_type& __enc, size_t __refs = 0) 00247 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 00248 { } 00249 00250 protected: 00251 virtual 00252 ~codecvt() { } 00253 00254 virtual result 00255 do_out(state_type& __state, const intern_type* __from, 00256 const intern_type* __from_end, const intern_type*& __from_next, 00257 extern_type* __to, extern_type* __to_end, 00258 extern_type*& __to_next) const; 00259 00260 virtual result 00261 do_unshift(state_type& __state, extern_type* __to, 00262 extern_type* __to_end, extern_type*& __to_next) const; 00263 00264 virtual result 00265 do_in(state_type& __state, const extern_type* __from, 00266 const extern_type* __from_end, const extern_type*& __from_next, 00267 intern_type* __to, intern_type* __to_end, 00268 intern_type*& __to_next) const; 00269 00270 virtual int 00271 do_encoding() const throw(); 00272 00273 virtual bool 00274 do_always_noconv() const throw(); 00275 00276 virtual int 00277 do_length(state_type&, const extern_type* __from, 00278 const extern_type* __end, size_t __max) const; 00279 00280 virtual int 00281 do_max_length() const throw(); 00282 }; 00283 00284 template<typename _InternT, typename _ExternT> 00285 locale::id 00286 codecvt<_InternT, _ExternT, encoding_state>::id; 00287 00288 // This adaptor works around the signature problems of the second 00289 // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2 00290 // uses 'char**', which matches the POSIX 1003.1-2001 standard. 00291 // Using this adaptor, g++ will do the work for us. 00292 template<typename _Tp> 00293 inline size_t 00294 __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*), 00295 iconv_t __cd, char** __inbuf, size_t* __inbytes, 00296 char** __outbuf, size_t* __outbytes) 00297 { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); } 00298 00299 template<typename _InternT, typename _ExternT> 00300 codecvt_base::result 00301 codecvt<_InternT, _ExternT, encoding_state>:: 00302 do_out(state_type& __state, const intern_type* __from, 00303 const intern_type* __from_end, const intern_type*& __from_next, 00304 extern_type* __to, extern_type* __to_end, 00305 extern_type*& __to_next) const 00306 { 00307 result __ret = codecvt_base::error; 00308 if (__state.good()) 00309 { 00310 const descriptor_type& __desc = __state.out_descriptor(); 00311 const size_t __fmultiple = sizeof(intern_type); 00312 size_t __fbytes = __fmultiple * (__from_end - __from); 00313 const size_t __tmultiple = sizeof(extern_type); 00314 size_t __tbytes = __tmultiple * (__to_end - __to); 00315 00316 // Argument list for iconv specifies a byte sequence. Thus, 00317 // all to/from arrays must be brutally casted to char*. 00318 char* __cto = reinterpret_cast<char*>(__to); 00319 char* __cfrom; 00320 size_t __conv; 00321 00322 // Some encodings need a byte order marker as the first item 00323 // in the byte stream, to designate endian-ness. The default 00324 // value for the byte order marker is NULL, so if this is 00325 // the case, it's not necessary and we can just go on our 00326 // merry way. 00327 int __int_bom = __state.internal_bom(); 00328 if (__int_bom) 00329 { 00330 size_t __size = __from_end - __from; 00331 intern_type* __cfixed = static_cast<intern_type*> 00332 (__builtin_alloca(sizeof(intern_type) * (__size + 1))); 00333 __cfixed[0] = static_cast<intern_type>(__int_bom); 00334 char_traits<intern_type>::copy(__cfixed + 1, __from, __size); 00335 __cfrom = reinterpret_cast<char*>(__cfixed); 00336 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 00337 &__fbytes, &__cto, &__tbytes); 00338 } 00339 else 00340 { 00341 intern_type* __cfixed = const_cast<intern_type*>(__from); 00342 __cfrom = reinterpret_cast<char*>(__cfixed); 00343 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes, 00344 &__cto, &__tbytes); 00345 } 00346 00347 if (__conv != size_t(-1)) 00348 { 00349 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 00350 __to_next = reinterpret_cast<extern_type*>(__cto); 00351 __ret = codecvt_base::ok; 00352 } 00353 else 00354 { 00355 if (__fbytes < __fmultiple * (__from_end - __from)) 00356 { 00357 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 00358 __to_next = reinterpret_cast<extern_type*>(__cto); 00359 __ret = codecvt_base::partial; 00360 } 00361 else 00362 __ret = codecvt_base::error; 00363 } 00364 } 00365 return __ret; 00366 } 00367 00368 template<typename _InternT, typename _ExternT> 00369 codecvt_base::result 00370 codecvt<_InternT, _ExternT, encoding_state>:: 00371 do_unshift(state_type& __state, extern_type* __to, 00372 extern_type* __to_end, extern_type*& __to_next) const 00373 { 00374 result __ret = codecvt_base::error; 00375 if (__state.good()) 00376 { 00377 const descriptor_type& __desc = __state.in_descriptor(); 00378 const size_t __tmultiple = sizeof(intern_type); 00379 size_t __tlen = __tmultiple * (__to_end - __to); 00380 00381 // Argument list for iconv specifies a byte sequence. Thus, 00382 // all to/from arrays must be brutally casted to char*. 00383 char* __cto = reinterpret_cast<char*>(__to); 00384 size_t __conv = __iconv_adaptor(iconv,__desc, NULL, NULL, 00385 &__cto, &__tlen); 00386 00387 if (__conv != size_t(-1)) 00388 { 00389 __to_next = reinterpret_cast<extern_type*>(__cto); 00390 if (__tlen == __tmultiple * (__to_end - __to)) 00391 __ret = codecvt_base::noconv; 00392 else if (__tlen == 0) 00393 __ret = codecvt_base::ok; 00394 else 00395 __ret = codecvt_base::partial; 00396 } 00397 else 00398 __ret = codecvt_base::error; 00399 } 00400 return __ret; 00401 } 00402 00403 template<typename _InternT, typename _ExternT> 00404 codecvt_base::result 00405 codecvt<_InternT, _ExternT, encoding_state>:: 00406 do_in(state_type& __state, const extern_type* __from, 00407 const extern_type* __from_end, const extern_type*& __from_next, 00408 intern_type* __to, intern_type* __to_end, 00409 intern_type*& __to_next) const 00410 { 00411 result __ret = codecvt_base::error; 00412 if (__state.good()) 00413 { 00414 const descriptor_type& __desc = __state.in_descriptor(); 00415 const size_t __fmultiple = sizeof(extern_type); 00416 size_t __flen = __fmultiple * (__from_end - __from); 00417 const size_t __tmultiple = sizeof(intern_type); 00418 size_t __tlen = __tmultiple * (__to_end - __to); 00419 00420 // Argument list for iconv specifies a byte sequence. Thus, 00421 // all to/from arrays must be brutally casted to char*. 00422 char* __cto = reinterpret_cast<char*>(__to); 00423 char* __cfrom; 00424 size_t __conv; 00425 00426 // Some encodings need a byte order marker as the first item 00427 // in the byte stream, to designate endian-ness. The default 00428 // value for the byte order marker is NULL, so if this is 00429 // the case, it's not necessary and we can just go on our 00430 // merry way. 00431 int __ext_bom = __state.external_bom(); 00432 if (__ext_bom) 00433 { 00434 size_t __size = __from_end - __from; 00435 extern_type* __cfixed = static_cast<extern_type*> 00436 (__builtin_alloca(sizeof(extern_type) * (__size + 1))); 00437 __cfixed[0] = static_cast<extern_type>(__ext_bom); 00438 char_traits<extern_type>::copy(__cfixed + 1, __from, __size); 00439 __cfrom = reinterpret_cast<char*>(__cfixed); 00440 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 00441 &__flen, &__cto, &__tlen); 00442 } 00443 else 00444 { 00445 extern_type* __cfixed = const_cast<extern_type*>(__from); 00446 __cfrom = reinterpret_cast<char*>(__cfixed); 00447 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 00448 &__flen, &__cto, &__tlen); 00449 } 00450 00451 00452 if (__conv != size_t(-1)) 00453 { 00454 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 00455 __to_next = reinterpret_cast<intern_type*>(__cto); 00456 __ret = codecvt_base::ok; 00457 } 00458 else 00459 { 00460 if (__flen < static_cast<size_t>(__from_end - __from)) 00461 { 00462 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 00463 __to_next = reinterpret_cast<intern_type*>(__cto); 00464 __ret = codecvt_base::partial; 00465 } 00466 else 00467 __ret = codecvt_base::error; 00468 } 00469 } 00470 return __ret; 00471 } 00472 00473 template<typename _InternT, typename _ExternT> 00474 int 00475 codecvt<_InternT, _ExternT, encoding_state>:: 00476 do_encoding() const throw() 00477 { 00478 int __ret = 0; 00479 if (sizeof(_ExternT) <= sizeof(_InternT)) 00480 __ret = sizeof(_InternT) / sizeof(_ExternT); 00481 return __ret; 00482 } 00483 00484 template<typename _InternT, typename _ExternT> 00485 bool 00486 codecvt<_InternT, _ExternT, encoding_state>:: 00487 do_always_noconv() const throw() 00488 { return false; } 00489 00490 template<typename _InternT, typename _ExternT> 00491 int 00492 codecvt<_InternT, _ExternT, encoding_state>:: 00493 do_length(state_type&, const extern_type* __from, 00494 const extern_type* __end, size_t __max) const 00495 { return std::min(__max, static_cast<size_t>(__end - __from)); } 00496 00497 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00498 // 74. Garbled text for codecvt::do_max_length 00499 template<typename _InternT, typename _ExternT> 00500 int 00501 codecvt<_InternT, _ExternT, encoding_state>:: 00502 do_max_length() const throw() 00503 { return 1; } 00504 00505 _GLIBCXX_END_NAMESPACE 00506 00507 #endif