MyGUI  3.2.0
MyGUI_UString.cpp
Go to the documentation of this file.
1 
6 /*
7  This file is part of MyGUI.
8 
9  MyGUI is free software: you can redistribute it and/or modify
10  it under the terms of the GNU Lesser General Public License as published by
11  the Free Software Foundation, either version 3 of the License, or
12  (at your option) any later version.
13 
14  MyGUI is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU Lesser General Public License for more details.
18 
19  You should have received a copy of the GNU Lesser General Public License
20  along with MyGUI. If not, see <http://www.gnu.org/licenses/>.
21 */
22 #include "MyGUI_Precompiled.h"
23 #include "MyGUI_UString.h"
24 
25 namespace MyGUI
26 {
27 
28  //--------------------------------------------------------------------------
30  {
31  mString = 0;
32  }
33  //--------------------------------------------------------------------------
35  {
36  mIter += c;
37  }
38  //--------------------------------------------------------------------------
40  {
41  mIter -= c;
42  }
43  //--------------------------------------------------------------------------
45  {
46  mIter = i.mIter;
47  mString = i.mString;
48  }
49  //--------------------------------------------------------------------------
51  {
52  return mIter == mString->mData.begin();
53  }
54  //--------------------------------------------------------------------------
56  {
57  return mIter == mString->mData.end();
58  }
59  //--------------------------------------------------------------------------
61  {
62  return mIter - mString->mData.begin();
63  }
64  //--------------------------------------------------------------------------
66  {
67  mIter = mString->mData.begin() + index;
68  }
69  //--------------------------------------------------------------------------
71  {
72  size_type current_index = _get_index();
73  return mString->getChar( current_index );
74  }
75  //--------------------------------------------------------------------------
77  {
78  size_type current_index = _get_index();
79  int change = mString->setChar( current_index, uc );
80  _jump_to( current_index );
81  return change;
82  }
83  //--------------------------------------------------------------------------
85  {
86  _seekFwd( 1 ); // move 1 code point forward
87  if ( _test_end() ) return; // exit if we hit the end
88  if ( _utf16_surrogate_follow( mIter[0] ) ) {
89  // landing on a follow code point means we might be part of a bigger character
90  // so we test for that
91  code_point lead_half = 0;
92  //NB: we can't possibly be at the beginning here, so no need to test
93  lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
94  if ( _utf16_surrogate_lead( lead_half ) ) {
95  _seekFwd( 1 ); // if so, then advance 1 more code point
96  }
97  }
98  }
99  //--------------------------------------------------------------------------
101  {
102  _seekRev( 1 ); // move 1 code point backwards
103  if ( _test_begin() ) return; // exit if we hit the beginning
104  if ( _utf16_surrogate_follow( mIter[0] ) ) {
105  // landing on a follow code point means we might be part of a bigger character
106  // so we test for that
107  code_point lead_half = 0;
108  lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
109  if ( _utf16_surrogate_lead( lead_half ) ) {
110  _seekRev( 1 ); // if so, then rewind 1 more code point
111  }
112  }
113  }
114  //--------------------------------------------------------------------------
115  //--------------------------------------------------------------------------
116  //--------------------------------------------------------------------------
117  //--------------------------------------------------------------------------
119  {
120 
121  }
122  //--------------------------------------------------------------------------
124  {
125  _become( i );
126  }
127  //--------------------------------------------------------------------------
129  {
130  _seekFwd( 1 );
131  return *this;
132  }
133  //--------------------------------------------------------------------------
135  {
136  _fwd_iterator tmp( *this );
137  _seekFwd( 1 );
138  return tmp;
139  }
140  //--------------------------------------------------------------------------
142  {
143  _seekRev( 1 );
144  return *this;
145  }
146  //--------------------------------------------------------------------------
148  {
149  _fwd_iterator tmp( *this );
150  _seekRev( 1 );
151  return tmp;
152  }
153  //--------------------------------------------------------------------------
155  {
156  _fwd_iterator tmp( *this );
157  if ( n < 0 )
158  tmp._seekRev( -n );
159  else
160  tmp._seekFwd( n );
161  return tmp;
162  }
163  //--------------------------------------------------------------------------
165  {
166  _fwd_iterator tmp( *this );
167  if ( n < 0 )
168  tmp._seekFwd( -n );
169  else
170  tmp._seekRev( n );
171  return tmp;
172  }
173  //--------------------------------------------------------------------------
175  {
176  if ( n < 0 )
177  _seekRev( -n );
178  else
179  _seekFwd( n );
180  return *this;
181  }
182  //--------------------------------------------------------------------------
184  {
185  if ( n < 0 )
186  _seekFwd( -n );
187  else
188  _seekRev( n );
189  return *this;
190  }
191  //--------------------------------------------------------------------------
193  {
194  return *mIter;
195  }
196  //--------------------------------------------------------------------------
198  {
199  _fwd_iterator tmp( *this );
200  tmp += n;
201  return *tmp;
202  }
203  //--------------------------------------------------------------------------
205  {
206  _moveNext();
207  return *this;
208  }
209  //--------------------------------------------------------------------------
211  {
212  _movePrev();
213  return *this;
214  }
215  //--------------------------------------------------------------------------
217  {
218  return _getCharacter();
219  }
220  //--------------------------------------------------------------------------
222  {
223  return _setCharacter( uc );
224  }
225  //--------------------------------------------------------------------------
226  //--------------------------------------------------------------------------
227  //--------------------------------------------------------------------------
228  //--------------------------------------------------------------------------
230  {
231 
232  }
233  //--------------------------------------------------------------------------
235  {
236  _become( i );
237  }
238  //--------------------------------------------------------------------------
240  {
241  _become( i );
242  }
243  //--------------------------------------------------------------------------
245  {
246  _seekFwd( 1 );
247  return *this;
248  }
249  //--------------------------------------------------------------------------
251  {
252  _const_fwd_iterator tmp( *this );
253  _seekFwd( 1 );
254  return tmp;
255  }
256  //--------------------------------------------------------------------------
258  {
259  _seekRev( 1 );
260  return *this;
261  }
262  //--------------------------------------------------------------------------
264  {
265  _const_fwd_iterator tmp( *this );
266  _seekRev( 1 );
267  return tmp;
268  }
269  //--------------------------------------------------------------------------
271  {
272  _const_fwd_iterator tmp( *this );
273  if ( n < 0 )
274  tmp._seekRev( -n );
275  else
276  tmp._seekFwd( n );
277  return tmp;
278  }
279  //--------------------------------------------------------------------------
281  {
282  _const_fwd_iterator tmp( *this );
283  if ( n < 0 )
284  tmp._seekFwd( -n );
285  else
286  tmp._seekRev( n );
287  return tmp;
288  }
289  //--------------------------------------------------------------------------
291  {
292  if ( n < 0 )
293  _seekRev( -n );
294  else
295  _seekFwd( n );
296  return *this;
297  }
298  //--------------------------------------------------------------------------
300  {
301  if ( n < 0 )
302  _seekFwd( -n );
303  else
304  _seekRev( n );
305  return *this;
306  }
307  //--------------------------------------------------------------------------
309  {
310  return *mIter;
311  }
312  //--------------------------------------------------------------------------
314  {
315  _const_fwd_iterator tmp( *this );
316  tmp += n;
317  return *tmp;
318  }
319  //--------------------------------------------------------------------------
321  {
322  _moveNext();
323  return *this;
324  }
325  //--------------------------------------------------------------------------
327  {
328  _movePrev();
329  return *this;
330  }
331  //--------------------------------------------------------------------------
333  {
334  return _getCharacter();
335  }
336  //--------------------------------------------------------------------------
337  //--------------------------------------------------------------------------
338  //--------------------------------------------------------------------------
339  //--------------------------------------------------------------------------
341  {
342 
343  }
344  //--------------------------------------------------------------------------
346  {
347  _become( i );
348  }
349  //--------------------------------------------------------------------------
351  {
352  _seekRev( 1 );
353  return *this;
354  }
355  //--------------------------------------------------------------------------
357  {
358  _rev_iterator tmp( *this );
359  _seekRev( 1 );
360  return tmp;
361  }
362  //--------------------------------------------------------------------------
364  {
365  _seekFwd( 1 );
366  return *this;
367  }
368  //--------------------------------------------------------------------------
370  {
371  _rev_iterator tmp( *this );
372  _seekFwd( 1 );
373  return tmp;
374  }
375  //--------------------------------------------------------------------------
377  {
378  _rev_iterator tmp( *this );
379  if ( n < 0 )
380  tmp._seekFwd( -n );
381  else
382  tmp._seekRev( n );
383  return tmp;
384  }
385  //--------------------------------------------------------------------------
387  {
388  _rev_iterator tmp( *this );
389  if ( n < 0 )
390  tmp._seekRev( -n );
391  else
392  tmp._seekFwd( n );
393  return tmp;
394  }
395  //--------------------------------------------------------------------------
397  {
398  if ( n < 0 )
399  _seekFwd( -n );
400  else
401  _seekRev( n );
402  return *this;
403  }
404  //--------------------------------------------------------------------------
406  {
407  if ( n < 0 )
408  _seekRev( -n );
409  else
410  _seekFwd( n );
411  return *this;
412  }
413  //--------------------------------------------------------------------------
415  {
416  return mIter[-1];
417  }
418  //--------------------------------------------------------------------------
420  {
421  _rev_iterator tmp( *this );
422  tmp -= n;
423  return *tmp;
424  }
425  //--------------------------------------------------------------------------
426  //--------------------------------------------------------------------------
427  //--------------------------------------------------------------------------
428  //--------------------------------------------------------------------------
430  {
431 
432  }
433  //--------------------------------------------------------------------------
435  {
436  _become( i );
437  }
438  //--------------------------------------------------------------------------
440  {
441  _become( i );
442  }
443  //--------------------------------------------------------------------------
445  {
446  _seekRev( 1 );
447  return *this;
448  }
449  //--------------------------------------------------------------------------
451  {
452  _const_rev_iterator tmp( *this );
453  _seekRev( 1 );
454  return tmp;
455  }
456  //--------------------------------------------------------------------------
458  {
459  _seekFwd( 1 );
460  return *this;
461  }
462  //--------------------------------------------------------------------------
464  {
465  _const_rev_iterator tmp( *this );
466  _seekFwd( 1 );
467  return tmp;
468  }
469  //--------------------------------------------------------------------------
471  {
472  _const_rev_iterator tmp( *this );
473  if ( n < 0 )
474  tmp._seekFwd( -n );
475  else
476  tmp._seekRev( n );
477  return tmp;
478  }
479  //--------------------------------------------------------------------------
481  {
482  _const_rev_iterator tmp( *this );
483  if ( n < 0 )
484  tmp._seekRev( -n );
485  else
486  tmp._seekFwd( n );
487  return tmp;
488  }
489  //--------------------------------------------------------------------------
491  {
492  if ( n < 0 )
493  _seekFwd( -n );
494  else
495  _seekRev( n );
496  return *this;
497  }
498  //--------------------------------------------------------------------------
500  {
501  if ( n < 0 )
502  _seekRev( -n );
503  else
504  _seekFwd( n );
505  return *this;
506  }
507  //--------------------------------------------------------------------------
509  {
510  return mIter[-1];
511  }
512  //--------------------------------------------------------------------------
514  {
515  _const_rev_iterator tmp( *this );
516  tmp -= n;
517  return *tmp;
518  }
519  //--------------------------------------------------------------------------
520  //--------------------------------------------------------------------------
521  //--------------------------------------------------------------------------
522  //--------------------------------------------------------------------------
524  {
525  _init();
526  }
527  //--------------------------------------------------------------------------
528  UString::UString( const UString& copy )
529  {
530  _init();
531  mData = copy.mData;
532  }
533  //--------------------------------------------------------------------------
535  {
536  _init();
537  assign( length, ch );
538  }
539  //--------------------------------------------------------------------------
541  {
542  _init();
543  assign( str );
544  }
545  //--------------------------------------------------------------------------
547  {
548  _init();
549  assign( str, length );
550  }
551  //--------------------------------------------------------------------------
553  {
554  _init();
555  assign( str, index, length );
556  }
557  //--------------------------------------------------------------------------
558 #if MYGUI_IS_NATIVE_WCHAR_T
559  UString::UString( const wchar_t* w_str )
560  {
561  _init();
562  assign( w_str );
563  }
564  //--------------------------------------------------------------------------
565  UString::UString( const wchar_t* w_str, size_type length )
566  {
567  _init();
568  assign( w_str, length );
569  }
570 #endif
571  //--------------------------------------------------------------------------
572  UString::UString( const std::wstring& wstr )
573  {
574  _init();
575  assign( wstr );
576  }
577  //--------------------------------------------------------------------------
578  UString::UString( const char* c_str )
579  {
580  _init();
581  assign( c_str );
582  }
583  //--------------------------------------------------------------------------
585  {
586  _init();
587  assign( c_str, length );
588  }
589  //--------------------------------------------------------------------------
590  UString::UString( const std::string& str )
591  {
592  _init();
593  assign( str );
594  }
595  //--------------------------------------------------------------------------
597  {
598  _cleanBuffer();
599  }
600  //--------------------------------------------------------------------------
602  {
603  return mData.size();
604  }
605  //--------------------------------------------------------------------------
607  {
608  return size();
609  }
610  //--------------------------------------------------------------------------
612  {
613  const_iterator i = begin(), ie = end();
614  size_type c = 0;
615  while ( i != ie ) {
616  i.moveNext();
617  ++c;
618  }
619  return c;
620  }
621  //--------------------------------------------------------------------------
623  {
624  return mData.max_size();
625  }
626  //--------------------------------------------------------------------------
628  {
629  mData.reserve( size );
630  }
631  //--------------------------------------------------------------------------
632  void UString::resize( size_type num, const code_point& val /*= 0 */ )
633  {
634  mData.resize( num, val );
635  }
636  //--------------------------------------------------------------------------
637  void UString::swap( UString& from )
638  {
639  mData.swap( from.mData );
640  }
641  //--------------------------------------------------------------------------
642  bool UString::empty() const
643  {
644  return mData.empty();
645  }
646  //--------------------------------------------------------------------------
648  {
649  return mData.c_str();
650  }
651  //--------------------------------------------------------------------------
653  {
654  return c_str();
655  }
656  //--------------------------------------------------------------------------
658  {
659  return mData.capacity();
660  }
661  //--------------------------------------------------------------------------
663  {
664  mData.clear();
665  }
666  //--------------------------------------------------------------------------
667  UString UString::substr( size_type index, size_type num /*= npos */ ) const
668  {
669  // this could avoid the extra copy if we used a private specialty constructor
670  dstring data = mData.substr( index, num );
671  UString tmp;
672  tmp.mData.swap( data );
673  return tmp;
674  }
675  //--------------------------------------------------------------------------
677  {
678  code_point cp[2];
679  size_t c = _utf32_to_utf16( val, cp );
680  if ( c > 0 ) push_back( cp[0] );
681  if ( c > 1 ) push_back( cp[1] );
682  }
683  //--------------------------------------------------------------------------
684 #if MYGUI_IS_NATIVE_WCHAR_T
685  void UString::push_back( wchar_t val )
686  {
687  // we do this because the Unicode method still preserves UTF-16 code points
688  mData.push_back( static_cast<code_point>( val ) );
689  }
690 #endif
691  //--------------------------------------------------------------------------
693  {
694  mData.push_back( val );
695  }
696 
697  void UString::push_back( char val )
698  {
699  mData.push_back( static_cast<code_point>( val ) );
700  }
701 
703  {
704  const_iterator i, ie = end();
705  for ( i = begin(); i != ie; i.moveNext() ) {
706  if ( i.getCharacter() == ch )
707  return true;
708  }
709  return false;
710  }
711 
712  const std::string& UString::asUTF8() const
713  {
714  _load_buffer_UTF8();
715  return *m_buffer.mStrBuffer;
716  }
717 
718  const char* UString::asUTF8_c_str() const
719  {
720  _load_buffer_UTF8();
721  return m_buffer.mStrBuffer->c_str();
722  }
723 
725  {
726  _load_buffer_UTF32();
727  return *m_buffer.mUTF32StrBuffer;
728  }
729 
731  {
732  _load_buffer_UTF32();
733  return m_buffer.mUTF32StrBuffer->c_str();
734  }
735 
736  const std::wstring& UString::asWStr() const
737  {
738  _load_buffer_WStr();
739  return *m_buffer.mWStrBuffer;
740  }
741 
742  const wchar_t* UString::asWStr_c_str() const
743  {
744  _load_buffer_WStr();
745  return m_buffer.mWStrBuffer->c_str();
746  }
747 
749  {
750  return mData.at( loc );
751  }
752 
754  {
755  return mData.at( loc );
756  }
757 
759  {
760  const code_point* ptr = c_str();
761  unicode_char uc;
762  size_t l = _utf16_char_length( ptr[loc] );
763  code_point cp[2] = { /* blame the code beautifier */
764  0, 0
765  };
766  cp[0] = ptr[loc];
767 
768  if ( l == 2 && ( loc + 1 ) < mData.length() ) {
769  cp[1] = ptr[loc+1];
770  }
771  _utf16_to_utf32( cp, uc );
772  return uc;
773  }
774 
776  {
777  code_point cp[2] = { /* blame the code beautifier */
778  0, 0
779  };
780  size_t l = _utf32_to_utf16( ch, cp );
781  unicode_char existingChar = getChar( loc );
782  size_t existingSize = _utf16_char_length( existingChar );
783  size_t newSize = _utf16_char_length( ch );
784 
785  if ( newSize > existingSize ) {
786  at( loc ) = cp[0];
787  insert( loc + 1, 1, cp[1] );
788  return 1;
789  }
790  if ( newSize < existingSize ) {
791  erase( loc, 1 );
792  at( loc ) = cp[0];
793  return -1;
794  }
795 
796  // newSize == existingSize
797  at( loc ) = cp[0];
798  if ( l == 2 ) at( loc + 1 ) = cp[1];
799  return 0;
800  }
801 
803  {
804  iterator i;
805  i.mIter = mData.begin();
806  i.mString = this;
807  return i;
808  }
809 
811  {
812  const_iterator i;
813  i.mIter = const_cast<UString*>( this )->mData.begin();
814  i.mString = const_cast<UString*>( this );
815  return i;
816  }
817 
819  {
820  iterator i;
821  i.mIter = mData.end();
822  i.mString = this;
823  return i;
824  }
825 
827  {
828  const_iterator i;
829  i.mIter = const_cast<UString*>( this )->mData.end();
830  i.mString = const_cast<UString*>( this );
831  return i;
832  }
833 
835  {
837  i.mIter = mData.end();
838  i.mString = this;
839  return i;
840  }
841 
843  {
845  i.mIter = const_cast<UString*>( this )->mData.end();
846  i.mString = const_cast<UString*>( this );
847  return i;
848  }
849 
851  {
853  i.mIter = mData.begin();
854  i.mString = this;
855  return i;
856  }
857 
859  {
861  i.mIter = const_cast<UString*>( this )->mData.begin();
862  i.mString = const_cast<UString*>( this );
863  return i;
864  }
865 
867  {
868  mData.assign( start.mIter, end.mIter );
869  return *this;
870  }
871 
873  {
874  mData.assign( str.mData );
875  return *this;
876  }
877 
879  {
880  mData.assign( str );
881  return *this;
882  }
883 
885  {
886  mData.assign( str, num );
887  return *this;
888  }
889 
891  {
892  mData.assign( str.mData, index, len );
893  return *this;
894  }
895 
897  {
898  mData.assign( num, ch );
899  return *this;
900  }
901 
902  UString& UString::assign( const std::wstring& wstr )
903  {
904  mData.clear();
905  mData.reserve( wstr.length() ); // best guess bulk allocate
906 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
907  code_point tmp;
908  std::wstring::const_iterator i, ie = wstr.end();
909  for ( i = wstr.begin(); i != ie; i++ ) {
910  tmp = static_cast<code_point>( *i );
911  mData.push_back( tmp );
912  }
913 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
914  code_point cp[3] = {0, 0, 0};
915  unicode_char tmp;
916  std::wstring::const_iterator i, ie = wstr.end();
917  for ( i = wstr.begin(); i != ie; i++ ) {
918  tmp = static_cast<unicode_char>( *i );
919  size_t l = _utf32_to_utf16( tmp, cp );
920  if ( l > 0 ) mData.push_back( cp[0] );
921  if ( l > 1 ) mData.push_back( cp[1] );
922  }
923 #endif
924  return *this;
925  }
926 
927 #if MYGUI_IS_NATIVE_WCHAR_T
928  UString& UString::assign( const wchar_t* w_str )
929  {
930  std::wstring tmp;
931  tmp.assign( w_str );
932  return assign( tmp );
933  }
934 
935  UString& UString::assign( const wchar_t* w_str, size_type num )
936  {
937  std::wstring tmp;
938  tmp.assign( w_str, num );
939  return assign( tmp );
940  }
941 #endif
942 
943  UString& UString::assign( const std::string& str )
944  {
945  size_type len = _verifyUTF8( str );
946  clear(); // empty our contents, if there are any
947  reserve( len ); // best guess bulk capacity growth
948 
949  // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
950  // then converting it to UTF-16, then finally appending the data buffer
951 
952  unicode_char uc; // temporary Unicode character buffer
953  unsigned char utf8buf[7]; // temporary UTF-8 buffer
954  utf8buf[6] = 0;
955  size_t utf8len; // UTF-8 length
956  code_point utf16buff[3]; // temporary UTF-16 buffer
957  utf16buff[2] = 0;
958  size_t utf16len; // UTF-16 length
959 
960  std::string::const_iterator i, ie = str.end();
961  for ( i = str.begin(); i != ie; i++ ) {
962  utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load
963  for ( size_t j = 0; j < utf8len; j++ ) { // load the needed UTF-8 bytes
964  utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
965  }
966  utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
967  utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion
968  i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop
969 
970  utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion
971  append( utf16buff, utf16len ); // append the characters to the string
972  }
973  return *this;
974  }
975 
976  UString& UString::assign( const char* c_str )
977  {
978  std::string tmp( c_str );
979  return assign( tmp );
980  }
981 
982  UString& UString::assign( const char* c_str, size_type num )
983  {
984  std::string tmp;
985  tmp.assign( c_str, num );
986  return assign( tmp );
987  }
988 
990  {
991  mData.append( str.mData );
992  return *this;
993  }
994 
996  {
997  mData.append( str );
998  return *this;
999  }
1000 
1002  {
1003  mData.append( str.mData, index, len );
1004  return *this;
1005  }
1006 
1008  {
1009  mData.append( str, num );
1010  return *this;
1011  }
1012 
1014  {
1015  mData.append( num, ch );
1016  return *this;
1017  }
1018 
1020  {
1021  mData.append( start.mIter, end.mIter );
1022  return *this;
1023  }
1024 
1025 #if MYGUI_IS_NATIVE_WCHAR_T
1026  UString& UString::append( const wchar_t* w_str, size_type num )
1027  {
1028  std::wstring tmp( w_str, num );
1029  return append( tmp );
1030  }
1031 
1032  UString& UString::append( size_type num, wchar_t ch )
1033  {
1034  return append( num, static_cast<unicode_char>( ch ) );
1035  }
1036 #endif
1038  {
1039  UString tmp( c_str, num );
1040  append( tmp );
1041  return *this;
1042  }
1043 
1045  {
1046  append( num, static_cast<code_point>( ch ) );
1047  return *this;
1048  }
1049 
1051  {
1052  code_point cp[2] = {0, 0};
1053  if ( _utf32_to_utf16( ch, cp ) == 2 ) {
1054  for ( size_type i = 0; i < num; i++ ) {
1055  append( 1, cp[0] );
1056  append( 1, cp[1] );
1057  }
1058  } else {
1059  for ( size_type i = 0; i < num; i++ ) {
1060  append( 1, cp[0] );
1061  }
1062  }
1063  return *this;
1064  }
1065 
1067  {
1068  iterator ret;
1069  ret.mIter = mData.insert( i.mIter, ch );
1070  ret.mString = this;
1071  return ret;
1072  }
1073 
1075  {
1076  mData.insert( index, str.mData );
1077  return *this;
1078  }
1079 
1080  UString& UString::insert( size_type index1, const UString& str, size_type index2, size_type num )
1081  {
1082  mData.insert( index1, str.mData, index2, num );
1083  return *this;
1084  }
1085 
1087  {
1088  mData.insert( i.mIter, start.mIter, end.mIter );
1089  }
1090 
1092  {
1093  mData.insert( index, str, num );
1094  return *this;
1095  }
1096 
1097 #if MYGUI_IS_NATIVE_WCHAR_T
1098  UString& UString::insert( size_type index, const wchar_t* w_str, size_type num )
1099  {
1100  UString tmp( w_str, num );
1101  insert( index, tmp );
1102  return *this;
1103  }
1104 #endif
1105 
1106  UString& UString::insert( size_type index, const char* c_str, size_type num )
1107  {
1108  UString tmp( c_str, num );
1109  insert( index, tmp );
1110  return *this;
1111  }
1112 
1114  {
1115  mData.insert( index, num, ch );
1116  return *this;
1117  }
1118 
1119 #if MYGUI_IS_NATIVE_WCHAR_T
1120  UString& UString::insert( size_type index, size_type num, wchar_t ch )
1121  {
1122  insert( index, num, static_cast<unicode_char>( ch ) );
1123  return *this;
1124  }
1125 #endif
1126 
1127  UString& UString::insert( size_type index, size_type num, char ch )
1128  {
1129  insert( index, num, static_cast<code_point>( ch ) );
1130  return *this;
1131  }
1132 
1134  {
1135  code_point cp[3] = {0, 0, 0};
1136  size_t l = _utf32_to_utf16( ch, cp );
1137  if ( l == 1 ) {
1138  return insert( index, num, cp[0] );
1139  }
1140  for ( size_type c = 0; c < num; c++ ) {
1141  // insert in reverse order to preserve ordering after insert
1142  insert( index, 1, cp[1] );
1143  insert( index, 1, cp[0] );
1144  }
1145  return *this;
1146  }
1147 
1148  void UString::insert( iterator i, size_type num, const code_point& ch )
1149  {
1150  mData.insert( i.mIter, num, ch );
1151  }
1152 #if MYGUI_IS_NATIVE_WCHAR_T
1153  void UString::insert( iterator i, size_type num, const wchar_t& ch )
1154  {
1155  insert( i, num, static_cast<unicode_char>( ch ) );
1156  }
1157 #endif
1158 
1159  void UString::insert( iterator i, size_type num, const char& ch )
1160  {
1161  insert( i, num, static_cast<code_point>( ch ) );
1162  }
1163 
1165  {
1166  code_point cp[3] = {0, 0, 0};
1167  size_t l = _utf32_to_utf16( ch, cp );
1168  if ( l == 1 ) {
1169  insert( i, num, cp[0] );
1170  } else {
1171  for ( size_type c = 0; c < num; c++ ) {
1172  // insert in reverse order to preserve ordering after insert
1173  insert( i, 1, cp[1] );
1174  insert( i, 1, cp[0] );
1175  }
1176  }
1177  }
1178 
1180  {
1181  iterator ret;
1182  ret.mIter = mData.erase( loc.mIter );
1183  ret.mString = this;
1184  return ret;
1185  }
1186 
1188  {
1189  iterator ret;
1190  ret.mIter = mData.erase( start.mIter, end.mIter );
1191  ret.mString = this;
1192  return ret;
1193  }
1194 
1195  UString& UString::erase( size_type index /*= 0*/, size_type num /*= npos */ )
1196  {
1197  if ( num == npos )
1198  mData.erase( index );
1199  else
1200  mData.erase( index, num );
1201  return *this;
1202  }
1203 
1204  UString& UString::replace( size_type index1, size_type num1, const UString& str )
1205  {
1206  mData.replace( index1, num1, str.mData, 0, npos );
1207  return *this;
1208  }
1209 
1210  UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type num2 )
1211  {
1212  mData.replace( index1, num1, str.mData, 0, num2 );
1213  return *this;
1214  }
1215 
1216  UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 )
1217  {
1218  mData.replace( index1, num1, str.mData, index2, num2 );
1219  return *this;
1220  }
1221 
1222  UString& UString::replace( iterator start, iterator end, const UString& str, size_type num /*= npos */ )
1223  {
1224  _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
1225 
1226  size_type index1 = begin() - st;
1227  size_type num1 = end - st;
1228  return replace( index1, num1, str, 0, num );
1229  }
1230 
1232  {
1233  mData.replace( index, num1, num2, ch );
1234  return *this;
1235  }
1236 
1238  {
1239  _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
1240 
1241  size_type index1 = begin() - st;
1242  size_type num1 = end - st;
1243  return replace( index1, num1, num, ch );
1244  }
1245 
1246  int UString::compare( const UString& str ) const
1247  {
1248  return mData.compare( str.mData );
1249  }
1250 
1251  int UString::compare( const code_point* str ) const
1252  {
1253  return mData.compare( str );
1254  }
1255 
1256  int UString::compare( size_type index, size_type length, const UString& str ) const
1257  {
1258  return mData.compare( index, length, str.mData );
1259  }
1260 
1261  int UString::compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const
1262  {
1263  return mData.compare( index, length, str.mData, index2, length2 );
1264  }
1265 
1266  int UString::compare( size_type index, size_type length, const code_point* str, size_type length2 ) const
1267  {
1268  return mData.compare( index, length, str, length2 );
1269  }
1270 
1271 #if MYGUI_IS_NATIVE_WCHAR_T
1272  int UString::compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
1273  {
1274  UString tmp( w_str, length2 );
1275  return compare( index, length, tmp );
1276  }
1277 #endif
1278 
1279  int UString::compare( size_type index, size_type length, const char* c_str, size_type length2 ) const
1280  {
1281  UString tmp( c_str, length2 );
1282  return compare( index, length, tmp );
1283  }
1284 
1285  UString::size_type UString::find( const UString& str, size_type index /*= 0 */ ) const
1286  {
1287  return mData.find( str.c_str(), index );
1288  }
1289 
1291  {
1292  UString tmp( cp_str );
1293  return mData.find( tmp.c_str(), index, length );
1294  }
1295 
1297  {
1298  UString tmp( c_str );
1299  return mData.find( tmp.c_str(), index, length );
1300  }
1301 
1302 #if MYGUI_IS_NATIVE_WCHAR_T
1303  UString::size_type UString::find( const wchar_t* w_str, size_type index, size_type length ) const
1304  {
1305  UString tmp( w_str );
1306  return mData.find( tmp.c_str(), index, length );
1307  }
1308 #endif
1309 
1310  UString::size_type UString::find( char ch, size_type index /*= 0 */ ) const
1311  {
1312  return find( static_cast<code_point>( ch ), index );
1313  }
1314 
1316  {
1317  return mData.find( ch, index );
1318  }
1319 
1320 #if MYGUI_IS_NATIVE_WCHAR_T
1321  UString::size_type UString::find( wchar_t ch, size_type index /*= 0 */ ) const
1322  {
1323  return find( static_cast<unicode_char>( ch ), index );
1324  }
1325 #endif
1326 
1328  {
1329  code_point cp[3] = {0, 0, 0};
1330  size_t l = _utf32_to_utf16( ch, cp );
1331  return find( UString( cp, l ), index );
1332  }
1333 
1334  UString::size_type UString::rfind( const UString& str, size_type index /*= 0 */ ) const
1335  {
1336  return mData.rfind( str.c_str(), index );
1337  }
1338 
1340  {
1341  UString tmp( cp_str );
1342  return mData.rfind( tmp.c_str(), index, num );
1343  }
1344 
1345  UString::size_type UString::rfind( const char* c_str, size_type index, size_type num ) const
1346  {
1347  UString tmp( c_str );
1348  return mData.rfind( tmp.c_str(), index, num );
1349  }
1350 
1351 #if MYGUI_IS_NATIVE_WCHAR_T
1352  UString::size_type UString::rfind( const wchar_t* w_str, size_type index, size_type num ) const
1353  {
1354  UString tmp( w_str );
1355  return mData.rfind( tmp.c_str(), index, num );
1356  }
1357 #endif
1358 
1359  UString::size_type UString::rfind( char ch, size_type index /*= 0 */ ) const
1360  {
1361  return rfind( static_cast<code_point>( ch ), index );
1362  }
1363 
1365  {
1366  return mData.rfind( ch, index );
1367  }
1368 
1369 #if MYGUI_IS_NATIVE_WCHAR_T
1370  UString::size_type UString::rfind( wchar_t ch, size_type index /*= 0 */ ) const
1371  {
1372  return rfind( static_cast<unicode_char>( ch ), index );
1373  }
1374 #endif
1375 
1377  {
1378  code_point cp[3] = {0, 0, 0};
1379  size_t l = _utf32_to_utf16( ch, cp );
1380  return rfind( UString( cp, l ), index );
1381  }
1382 
1383  UString::size_type UString::find_first_of( const UString &str, size_type index /*= 0*/, size_type num /*= npos */ ) const
1384  {
1385  size_type i = 0;
1386  const size_type len = length();
1387  while ( i < num && ( index + i ) < len ) {
1388  unicode_char ch = getChar( index + i );
1389  if ( str.inString( ch ) )
1390  return index + i;
1391  i += _utf16_char_length( ch ); // increment by the Unicode character length
1392  }
1393  return npos;
1394  }
1395 
1397  {
1398  UString tmp;
1399  tmp.assign( 1, ch );
1400  return find_first_of( tmp, index );
1401  }
1402 
1403  UString::size_type UString::find_first_of( char ch, size_type index /*= 0 */ ) const
1404  {
1405  return find_first_of( static_cast<code_point>( ch ), index );
1406  }
1407 
1408 #if MYGUI_IS_NATIVE_WCHAR_T
1409  UString::size_type UString::find_first_of( wchar_t ch, size_type index /*= 0 */ ) const
1410  {
1411  return find_first_of( static_cast<unicode_char>( ch ), index );
1412  }
1413 #endif
1414 
1416  {
1417  code_point cp[3] = {0, 0, 0};
1418  size_t l = _utf32_to_utf16( ch, cp );
1419  return find_first_of( UString( cp, l ), index );
1420  }
1421 
1422  UString::size_type UString::find_first_not_of( const UString& str, size_type index /*= 0*/, size_type num /*= npos */ ) const
1423  {
1424  size_type i = 0;
1425  const size_type len = length();
1426  while ( i < num && ( index + i ) < len ) {
1427  unicode_char ch = getChar( index + i );
1428  if ( !str.inString( ch ) )
1429  return index + i;
1430  i += _utf16_char_length( ch ); // increment by the Unicode character length
1431  }
1432  return npos;
1433  }
1434 
1436  {
1437  UString tmp;
1438  tmp.assign( 1, ch );
1439  return find_first_not_of( tmp, index );
1440  }
1441 
1443  {
1444  return find_first_not_of( static_cast<code_point>( ch ), index );
1445  }
1446 
1447 #if MYGUI_IS_NATIVE_WCHAR_T
1448  UString::size_type UString::find_first_not_of( wchar_t ch, size_type index /*= 0 */ ) const
1449  {
1450  return find_first_not_of( static_cast<unicode_char>( ch ), index );
1451  }
1452 #endif
1453 
1455  {
1456  code_point cp[3] = {0, 0, 0};
1457  size_t l = _utf32_to_utf16( ch, cp );
1458  return find_first_not_of( UString( cp, l ), index );
1459  }
1460 
1461  UString::size_type UString::find_last_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
1462  {
1463  size_type i = 0;
1464  const size_type len = length();
1465  if ( index > len ) index = len - 1;
1466 
1467  while ( i < num && ( index - i ) != npos ) {
1468  size_type j = index - i;
1469  // careful to step full Unicode characters
1470  if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
1471  j = index - ++i;
1472  }
1473  // and back to the usual dull test
1474  unicode_char ch = getChar( j );
1475  if ( str.inString( ch ) )
1476  return j;
1477  i++;
1478  }
1479  return npos;
1480  }
1481 
1483  {
1484  UString tmp;
1485  tmp.assign( 1, ch );
1486  return find_last_of( tmp, index );
1487  }
1488 
1489 #if MYGUI_IS_NATIVE_WCHAR_T
1490  UString::size_type UString::find_last_of( wchar_t ch, size_type index /*= npos */ ) const
1491  {
1492  return find_last_of( static_cast<unicode_char>( ch ), index );
1493  }
1494 #endif
1495 
1497  {
1498  code_point cp[3] = {0, 0, 0};
1499  size_t l = _utf32_to_utf16( ch, cp );
1500  return find_last_of( UString( cp, l ), index );
1501  }
1502 
1503  UString::size_type UString::find_last_not_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
1504  {
1505  size_type i = 0;
1506  const size_type len = length();
1507  if ( index > len ) index = len - 1;
1508 
1509  while ( i < num && ( index - i ) != npos ) {
1510  size_type j = index - i;
1511  // careful to step full Unicode characters
1512  if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
1513  j = index - ++i;
1514  }
1515  // and back to the usual dull test
1516  unicode_char ch = getChar( j );
1517  if ( !str.inString( ch ) )
1518  return j;
1519  i++;
1520  }
1521  return npos;
1522  }
1523 
1525  {
1526  UString tmp;
1527  tmp.assign( 1, ch );
1528  return find_last_not_of( tmp, index );
1529  }
1530 
1531  UString::size_type UString::find_last_not_of( char ch, size_type index /*= npos */ ) const
1532  {
1533  return find_last_not_of( static_cast<code_point>( ch ), index );
1534  }
1535 
1536 #if MYGUI_IS_NATIVE_WCHAR_T
1537  UString::size_type UString::find_last_not_of( wchar_t ch, size_type index /*= npos */ ) const
1538  {
1539  return find_last_not_of( static_cast<unicode_char>( ch ), index );
1540  }
1541 #endif
1542 
1544  {
1545  code_point cp[3] = {0, 0, 0};
1546  size_t l = _utf32_to_utf16( ch, cp );
1547  return find_last_not_of( UString( cp, l ), index );
1548  }
1549 
1550  bool UString::operator<( const UString& right ) const
1551  {
1552  return compare( right ) < 0;
1553  }
1554 
1555  bool UString::operator<=( const UString& right ) const
1556  {
1557  return compare( right ) <= 0;
1558  }
1559 
1561  {
1562  return assign( s );
1563  }
1564 
1566  {
1567  clear();
1568  return append( 1, ch );
1569  }
1570 
1572  {
1573  clear();
1574  return append( 1, ch );
1575  }
1576 
1577 #if MYGUI_IS_NATIVE_WCHAR_T
1578  UString& UString::operator=( wchar_t ch )
1579  {
1580  clear();
1581  return append( 1, ch );
1582  }
1583 #endif
1584 
1586  {
1587  clear();
1588  return append( 1, ch );
1589  }
1590 
1591  bool UString::operator>( const UString& right ) const
1592  {
1593  return compare( right ) > 0;
1594  }
1595 
1596  bool UString::operator>=( const UString& right ) const
1597  {
1598  return compare( right ) >= 0;
1599  }
1600 
1601  bool UString::operator==( const UString& right ) const
1602  {
1603  return compare( right ) == 0;
1604  }
1605 
1606  bool UString::operator!=( const UString& right ) const
1607  {
1608  return !operator==( right );
1609  }
1610 
1612  {
1613  return at( index );
1614  }
1615 
1617  {
1618  return at( index );
1619  }
1620 
1621  UString::operator std::string() const
1622  {
1623  return std::string( asUTF8() );
1624  }
1625 
1627  UString::operator std::wstring() const
1628  {
1629  return std::wstring( asWStr() );
1630  }
1631 
1632 
1634  {
1635  if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range
1636  return false; // it matches a surrogate pair signature
1637  return true; // everything else is a standalone code point
1638  }
1639 
1641  {
1642  if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair
1643  return true; // it is a 1st word
1644  return false; // it isn't
1645  }
1646 
1648  {
1649  if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair
1650  return true; // it is a 2nd word
1651  return false; // everything else isn't
1652  }
1653 
1655  {
1656  if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair
1657  return 2; // if it is, then we are 2 words long
1658  return 1; // otherwise we are only 1 word long
1659  }
1660 
1662  {
1663  if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum
1664  return 2; // if so, we need a surrogate pair
1665  return 1; // otherwise we can stuff it into a single word
1666  }
1667 
1668  size_t UString::_utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc )
1669  {
1670  const code_point& cp1 = in_cp[0];
1671  const code_point& cp2 = in_cp[1];
1672  bool wordPair = false;
1673 
1674  // does it look like a surrogate pair?
1675  if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
1676  // looks like one, but does the other half match the algorithm as well?
1677  if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
1678  wordPair = true; // yep!
1679  }
1680 
1681  if ( !wordPair ) { // if we aren't a 100% authentic surrogate pair, then just copy the value
1682  out_uc = cp1;
1683  return 1;
1684  }
1685 
1686  unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
1687  cU -= 0xD800; // remove the encoding markers
1688  cL -= 0xDC00;
1689 
1690  out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location
1691  out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits
1692  out_uc += 0x10000; // add back in the value offset
1693 
1694  return 2; // this whole operation takes to words, so that's what we'll return
1695  }
1696 
1697  size_t UString::_utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] )
1698  {
1699  if ( in_uc <= 0xFFFF ) { // we blindly preserve sentinel values because our decoder understands them
1700  out_cp[0] = static_cast<code_point>(in_uc);
1701  return 1;
1702  }
1703  unicode_char uc = in_uc; // copy to writable buffer
1704  unsigned short tmp; // single code point buffer
1705  uc -= 0x10000; // subtract value offset
1706 
1707  //process upper word
1708  tmp = static_cast<unsigned short>(( uc >> 10 ) & 0x03FF); // grab the upper 10 bits
1709  tmp += 0xD800; // add encoding offset
1710  out_cp[0] = tmp; // write
1711 
1712  // process lower word
1713  tmp = static_cast<unsigned short>(uc & 0x03FF); // grab the lower 10 bits
1714  tmp += 0xDC00; // add encoding offset
1715  out_cp[1] = tmp; // write
1716 
1717  return 2; // return used word count (2 for surrogate pairs)
1718  }
1719 
1720  bool UString::_utf8_start_char( unsigned char cp )
1721  {
1722  return ( cp & ~_cont_mask ) != _cont;
1723  }
1724 
1725  size_t UString::_utf8_char_length( unsigned char cp )
1726  {
1727  if ( !( cp & 0x80 ) ) return 1;
1728  if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
1729  if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
1730  if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
1731  if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
1732  if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
1733  throw invalid_data( "invalid UTF-8 sequence header value" );
1734  }
1735 
1737  {
1738  /*
1739  7 bit: U-00000000 - U-0000007F: 0xxxxxxx
1740  11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
1741  16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
1742  21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1743  26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1744  31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1745  */
1746  if ( !( uc & ~0x0000007F ) ) return 1;
1747  if ( !( uc & ~0x000007FF ) ) return 2;
1748  if ( !( uc & ~0x0000FFFF ) ) return 3;
1749  if ( !( uc & ~0x001FFFFF ) ) return 4;
1750  if ( !( uc & ~0x03FFFFFF ) ) return 5;
1751  if ( !( uc & ~0x7FFFFFFF ) ) return 6;
1752  throw invalid_data( "invalid UTF-32 value" );
1753  }
1754 
1755  size_t UString::_utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
1756  {
1757  size_t len = _utf8_char_length( in_cp[0] );
1758  if ( len == 1 ) { // if we are only 1 byte long, then just grab it and exit
1759  out_uc = in_cp[0];
1760  return 1;
1761  }
1762 
1763  unicode_char c = 0; // temporary buffer
1764  size_t i = 0;
1765  switch ( len ) { // load header byte
1766  case 6:
1767  c = in_cp[i] & _lead5_mask;
1768  break;
1769  case 5:
1770  c = in_cp[i] & _lead4_mask;
1771  break;
1772  case 4:
1773  c = in_cp[i] & _lead3_mask;
1774  break;
1775  case 3:
1776  c = in_cp[i] & _lead2_mask;
1777  break;
1778  case 2:
1779  c = in_cp[i] & _lead1_mask;
1780  break;
1781  }
1782 
1783  for ( ++i; i < len; i++ ) { // load each continuation byte
1784  if (( in_cp[i] & ~_cont_mask ) != _cont )
1785  throw invalid_data( "bad UTF-8 continuation byte" );
1786  c <<= 6;
1787  c |= ( in_cp[i] & _cont_mask );
1788  }
1789 
1790  out_uc = c; // write the final value and return the used byte length
1791  return len;
1792  }
1793 
1794  size_t UString::_utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
1795  {
1796  size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence
1797  unicode_char c = in_uc; // copy to temp buffer
1798 
1799  //stuff all of the lower bits
1800  for ( size_t i = len - 1; i > 0; i-- ) {
1801  out_cp[i] = static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
1802  c >>= 6;
1803  }
1804 
1805  //now write the header byte
1806  switch ( len ) {
1807  case 6:
1808  out_cp[0] = static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
1809  break;
1810  case 5:
1811  out_cp[0] = static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
1812  break;
1813  case 4:
1814  out_cp[0] = static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
1815  break;
1816  case 3:
1817  out_cp[0] = static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
1818  break;
1819  case 2:
1820  out_cp[0] = static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
1821  break;
1822  case 1:
1823  default:
1824  out_cp[0] = static_cast<unsigned char>(( c ) & 0x7F);
1825  break;
1826  }
1827 
1828  // return the byte length of the sequence
1829  return len;
1830  }
1831 
1833  {
1834  std::string tmp( reinterpret_cast<const char*>( c_str ) );
1835  return _verifyUTF8( tmp );
1836  }
1837 
1838  UString::size_type UString::_verifyUTF8( const std::string& str )
1839  {
1840  std::string::const_iterator i, ie = str.end();
1841  i = str.begin();
1842  size_type length = 0;
1843 
1844  while ( i != ie ) {
1845  // characters pass until we find an extended sequence
1846  if (( *i ) & 0x80 ) {
1847  unsigned char c = ( *i );
1848  size_t contBytes = 0;
1849 
1850  // get continuation byte count and test for overlong sequences
1851  if (( c & ~_lead1_mask ) == _lead1 ) { // 1 additional byte
1852  if ( c == _lead1 ) throw invalid_data( "overlong UTF-8 sequence" );
1853  contBytes = 1;
1854 
1855  } else if (( c & ~_lead2_mask ) == _lead2 ) { // 2 additional bytes
1856  contBytes = 2;
1857  if ( c == _lead2 ) { // possible overlong UTF-8 sequence
1858  c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1859  if (( c & _lead2 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
1860  }
1861 
1862  } else if (( c & ~_lead3_mask ) == _lead3 ) { // 3 additional bytes
1863  contBytes = 3;
1864  if ( c == _lead3 ) { // possible overlong UTF-8 sequence
1865  c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1866  if (( c & _lead3 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
1867  }
1868 
1869  } else if (( c & ~_lead4_mask ) == _lead4 ) { // 4 additional bytes
1870  contBytes = 4;
1871  if ( c == _lead4 ) { // possible overlong UTF-8 sequence
1872  c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1873  if (( c & _lead4 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
1874  }
1875 
1876  } else if (( c & ~_lead5_mask ) == _lead5 ) { // 5 additional bytes
1877  contBytes = 5;
1878  if ( c == _lead5 ) { // possible overlong UTF-8 sequence
1879  c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1880  if (( c & _lead5 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
1881  }
1882  }
1883 
1884  // check remaining continuation bytes for
1885  while ( contBytes-- ) {
1886  c = ( *( ++i ) ); // get next byte in sequence
1887  if (( c & ~_cont_mask ) != _cont )
1888  throw invalid_data( "bad UTF-8 continuation byte" );
1889  }
1890  }
1891  length++;
1892  i++;
1893  }
1894  return length;
1895  }
1896 
1897  void UString::_init()
1898  {
1899  m_buffer.mVoidBuffer = 0;
1900  m_bufferType = bt_none;
1901  m_bufferSize = 0;
1902  }
1903 
1904  void UString::_cleanBuffer() const
1905  {
1906  if ( m_buffer.mVoidBuffer != 0 ) {
1907  switch ( m_bufferType ) {
1908  case bt_string:
1909  delete m_buffer.mStrBuffer;
1910  break;
1911  case bt_wstring:
1912  delete m_buffer.mWStrBuffer;
1913  break;
1914  case bt_utf32string:
1915  delete m_buffer.mUTF32StrBuffer;
1916  break;
1917  case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
1918  default:
1919  //delete m_buffer.mVoidBuffer;
1920  // delete void* is undefined, don't do that
1921  assert("This should never happen - mVoidBuffer should never contain something if we "
1922  "don't know the type");
1923  break;
1924  }
1925  m_buffer.mVoidBuffer = 0;
1926  m_bufferSize = 0;
1927  m_bufferType = bt_none;
1928  }
1929  }
1930 
1931  void UString::_getBufferStr() const
1932  {
1933  if ( m_bufferType != bt_string ) {
1934  _cleanBuffer();
1935  m_buffer.mStrBuffer = new std::string();
1936  m_bufferType = bt_string;
1937  }
1938  m_buffer.mStrBuffer->clear();
1939  }
1940 
1941  void UString::_getBufferWStr() const
1942  {
1943  if ( m_bufferType != bt_wstring ) {
1944  _cleanBuffer();
1945  m_buffer.mWStrBuffer = new std::wstring();
1946  m_bufferType = bt_wstring;
1947  }
1948  m_buffer.mWStrBuffer->clear();
1949  }
1950 
1951  void UString::_getBufferUTF32Str() const
1952  {
1953  if ( m_bufferType != bt_utf32string ) {
1954  _cleanBuffer();
1955  m_buffer.mUTF32StrBuffer = new utf32string();
1956  m_bufferType = bt_utf32string;
1957  }
1958  m_buffer.mUTF32StrBuffer->clear();
1959  }
1960 
1961  void UString::_load_buffer_UTF8() const
1962  {
1963  _getBufferStr();
1964  std::string& buffer = ( *m_buffer.mStrBuffer );
1965  buffer.reserve( length() );
1966 
1967  unsigned char utf8buf[6];
1968  char* charbuf = ( char* )utf8buf;
1969  unicode_char c;
1970  size_t len;
1971 
1972  const_iterator i, ie = end();
1973  for ( i = begin(); i != ie; i.moveNext() ) {
1974  c = i.getCharacter();
1975  len = _utf32_to_utf8( c, utf8buf );
1976  size_t j = 0;
1977  while ( j < len )
1978  buffer.push_back( charbuf[j++] );
1979  }
1980  }
1981 
1982  void UString::_load_buffer_WStr() const
1983  {
1984  _getBufferWStr();
1985  std::wstring& buffer = ( *m_buffer.mWStrBuffer );
1986  buffer.reserve( length() ); // may over reserve, but should be close enough
1987 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
1988  const_iterator i, ie = end();
1989  for ( i = begin(); i != ie; ++i ) {
1990  buffer.push_back(( wchar_t )( *i ) );
1991  }
1992 #else // wchar_t fits UTF-32
1993  unicode_char c;
1994  const_iterator i, ie = end();
1995  for ( i = begin(); i != ie; i.moveNext() ) {
1996  c = i.getCharacter();
1997  buffer.push_back(( wchar_t )c );
1998  }
1999 #endif
2000  }
2001 
2002  void UString::_load_buffer_UTF32() const
2003  {
2004  _getBufferUTF32Str();
2005  utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
2006  buffer.reserve( length() ); // may over reserve, but should be close enough
2007 
2008  unicode_char c;
2009 
2010  const_iterator i, ie = end();
2011  for ( i = begin(); i != ie; i.moveNext() ) {
2012  c = i.getCharacter();
2013  buffer.push_back( c );
2014  }
2015  }
2016 
2017 } // namespace MyGUI