00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include <config.h>
00017
00018 #include <drizzled/internal/m_string.h>
00019 #include <drizzled/charset_info.h>
00020 #include <cerrno>
00021 #include <cstdio>
00022 #include <cstdlib>
00023
00024 #include <stdarg.h>
00025
00026 #include <algorithm>
00027
00028 using namespace std;
00029
00030 namespace drizzled
00031 {
00032
00033
00034
00035
00036
00037 size_t my_strnxfrmlen_simple(const CHARSET_INFO * const cs, size_t len)
00038 {
00039 return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
00040 }
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050 size_t my_snprintf_8bit(const CHARSET_INFO * const,
00051 char* to, size_t n,
00052 const char* fmt, ...)
00053 {
00054 va_list args;
00055 int result;
00056 va_start(args,fmt);
00057 result= vsnprintf(to, n, fmt, args);
00058 va_end(args);
00059 return result;
00060 }
00061
00062
00063 long my_strntol_8bit(const CHARSET_INFO * const cs,
00064 const char *nptr, size_t l, int base,
00065 char **endptr, int *err)
00066 {
00067 int negative;
00068 uint32_t cutoff;
00069 uint32_t cutlim;
00070 uint32_t i;
00071 const char *s;
00072 unsigned char c;
00073 const char *save, *e;
00074 int overflow;
00075
00076 *err= 0;
00077 #ifdef NOT_USED
00078 if (base < 0 || base == 1 || base > 36)
00079 base = 10;
00080 #endif
00081
00082 s = nptr;
00083 e = nptr+l;
00084
00085 for ( ; s<e && my_isspace(cs, *s) ; s++) {}
00086
00087 if (s == e)
00088 {
00089 goto noconv;
00090 }
00091
00092
00093 if (*s == '-')
00094 {
00095 negative = 1;
00096 ++s;
00097 }
00098 else if (*s == '+')
00099 {
00100 negative = 0;
00101 ++s;
00102 }
00103 else
00104 negative = 0;
00105
00106 #ifdef NOT_USED
00107 if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
00108 s += 2;
00109 #endif
00110
00111 #ifdef NOT_USED
00112 if (base == 0)
00113 {
00114 if (*s == '0')
00115 {
00116 if (s[1]=='X' || s[1]=='x')
00117 {
00118 s += 2;
00119 base = 16;
00120 }
00121 else
00122 base = 8;
00123 }
00124 else
00125 base = 10;
00126 }
00127 #endif
00128
00129 save = s;
00130 cutoff = (UINT32_MAX) / (uint32_t) base;
00131 cutlim = (uint32_t) ((UINT32_MAX) % (uint32_t) base);
00132
00133 overflow = 0;
00134 i = 0;
00135 for (c = *s; s != e; c = *++s)
00136 {
00137 if (c>='0' && c<='9')
00138 c -= '0';
00139 else if (c>='A' && c<='Z')
00140 c = c - 'A' + 10;
00141 else if (c>='a' && c<='z')
00142 c = c - 'a' + 10;
00143 else
00144 break;
00145 if (c >= base)
00146 break;
00147 if (i > cutoff || (i == cutoff && c > cutlim))
00148 overflow = 1;
00149 else
00150 {
00151 i *= (uint32_t) base;
00152 i += c;
00153 }
00154 }
00155
00156 if (s == save)
00157 goto noconv;
00158
00159 if (endptr != NULL)
00160 *endptr = (char *) s;
00161
00162 if (negative)
00163 {
00164 if (i > (uint32_t) INT32_MIN)
00165 overflow = 1;
00166 }
00167 else if (i > INT32_MAX)
00168 overflow = 1;
00169
00170 if (overflow)
00171 {
00172 err[0]= ERANGE;
00173 return negative ? INT32_MIN : INT32_MAX;
00174 }
00175
00176 return (negative ? -((long) i) : (long) i);
00177
00178 noconv:
00179 err[0]= EDOM;
00180 if (endptr != NULL)
00181 *endptr = (char *) nptr;
00182 return 0L;
00183 }
00184
00185
00186 ulong my_strntoul_8bit(const CHARSET_INFO * const cs,
00187 const char *nptr, size_t l, int base,
00188 char **endptr, int *err)
00189 {
00190 int negative;
00191 uint32_t cutoff;
00192 uint32_t cutlim;
00193 uint32_t i;
00194 const char *s;
00195 unsigned char c;
00196 const char *save, *e;
00197 int overflow;
00198
00199 *err= 0;
00200 #ifdef NOT_USED
00201 if (base < 0 || base == 1 || base > 36)
00202 base = 10;
00203 #endif
00204
00205 s = nptr;
00206 e = nptr+l;
00207
00208 for( ; s<e && my_isspace(cs, *s); s++) {}
00209
00210 if (s==e)
00211 {
00212 goto noconv;
00213 }
00214
00215 if (*s == '-')
00216 {
00217 negative = 1;
00218 ++s;
00219 }
00220 else if (*s == '+')
00221 {
00222 negative = 0;
00223 ++s;
00224 }
00225 else
00226 negative = 0;
00227
00228 #ifdef NOT_USED
00229 if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
00230 s += 2;
00231 #endif
00232
00233 #ifdef NOT_USED
00234 if (base == 0)
00235 {
00236 if (*s == '0')
00237 {
00238 if (s[1]=='X' || s[1]=='x')
00239 {
00240 s += 2;
00241 base = 16;
00242 }
00243 else
00244 base = 8;
00245 }
00246 else
00247 base = 10;
00248 }
00249 #endif
00250
00251 save = s;
00252 cutoff = (UINT32_MAX) / (uint32_t) base;
00253 cutlim = (uint32_t) ((UINT32_MAX) % (uint32_t) base);
00254 overflow = 0;
00255 i = 0;
00256
00257 for (c = *s; s != e; c = *++s)
00258 {
00259 if (c>='0' && c<='9')
00260 c -= '0';
00261 else if (c>='A' && c<='Z')
00262 c = c - 'A' + 10;
00263 else if (c>='a' && c<='z')
00264 c = c - 'a' + 10;
00265 else
00266 break;
00267 if (c >= base)
00268 break;
00269 if (i > cutoff || (i == cutoff && c > cutlim))
00270 overflow = 1;
00271 else
00272 {
00273 i *= (uint32_t) base;
00274 i += c;
00275 }
00276 }
00277
00278 if (s == save)
00279 goto noconv;
00280
00281 if (endptr != NULL)
00282 *endptr = (char *) s;
00283
00284 if (overflow)
00285 {
00286 err[0]= ERANGE;
00287 return UINT32_MAX;
00288 }
00289
00290 return (negative ? -((long) i) : (long) i);
00291
00292 noconv:
00293 err[0]= EDOM;
00294 if (endptr != NULL)
00295 *endptr = (char *) nptr;
00296 return 0L;
00297 }
00298
00299
00300 int64_t my_strntoll_8bit(const CHARSET_INFO * const cs,
00301 const char *nptr, size_t l, int base,
00302 char **endptr,int *err)
00303 {
00304 int negative;
00305 uint64_t cutoff;
00306 uint32_t cutlim;
00307 uint64_t i;
00308 const char *s, *e;
00309 const char *save;
00310 int overflow;
00311
00312 *err= 0;
00313 #ifdef NOT_USED
00314 if (base < 0 || base == 1 || base > 36)
00315 base = 10;
00316 #endif
00317
00318 s = nptr;
00319 e = nptr+l;
00320
00321 for(; s<e && my_isspace(cs,*s); s++) {}
00322
00323 if (s == e)
00324 {
00325 goto noconv;
00326 }
00327
00328 if (*s == '-')
00329 {
00330 negative = 1;
00331 ++s;
00332 }
00333 else if (*s == '+')
00334 {
00335 negative = 0;
00336 ++s;
00337 }
00338 else
00339 negative = 0;
00340
00341 #ifdef NOT_USED
00342 if (base == 16 && s[0] == '0' && (s[1]=='X'|| s[1]=='x'))
00343 s += 2;
00344 #endif
00345
00346 #ifdef NOT_USED
00347 if (base == 0)
00348 {
00349 if (*s == '0')
00350 {
00351 if (s[1]=='X' || s[1]=='x')
00352 {
00353 s += 2;
00354 base = 16;
00355 }
00356 else
00357 base = 8;
00358 }
00359 else
00360 base = 10;
00361 }
00362 #endif
00363
00364 save = s;
00365
00366 cutoff = (~(uint64_t) 0) / (unsigned long int) base;
00367 cutlim = (uint32_t) ((~(uint64_t) 0) % (unsigned long int) base);
00368
00369 overflow = 0;
00370 i = 0;
00371 for ( ; s != e; s++)
00372 {
00373 unsigned char c= *s;
00374 if (c>='0' && c<='9')
00375 c -= '0';
00376 else if (c>='A' && c<='Z')
00377 c = c - 'A' + 10;
00378 else if (c>='a' && c<='z')
00379 c = c - 'a' + 10;
00380 else
00381 break;
00382 if (c >= base)
00383 break;
00384 if (i > cutoff || (i == cutoff && c > cutlim))
00385 overflow = 1;
00386 else
00387 {
00388 i *= (uint64_t) base;
00389 i += c;
00390 }
00391 }
00392
00393 if (s == save)
00394 goto noconv;
00395
00396 if (endptr != NULL)
00397 *endptr = (char *) s;
00398
00399 if (negative)
00400 {
00401 if (i > (uint64_t) INT64_MIN)
00402 overflow = 1;
00403 }
00404 else if (i > (uint64_t) INT64_MAX)
00405 overflow = 1;
00406
00407 if (overflow)
00408 {
00409 err[0]= ERANGE;
00410 return negative ? INT64_MIN : INT64_MAX;
00411 }
00412
00413 return (negative ? -((int64_t) i) : (int64_t) i);
00414
00415 noconv:
00416 err[0]= EDOM;
00417 if (endptr != NULL)
00418 *endptr = (char *) nptr;
00419 return 0L;
00420 }
00421
00422
00423 uint64_t my_strntoull_8bit(const CHARSET_INFO * const cs,
00424 const char *nptr, size_t l, int base,
00425 char **endptr, int *err)
00426 {
00427 int negative;
00428 uint64_t cutoff;
00429 uint32_t cutlim;
00430 uint64_t i;
00431 const char *s, *e;
00432 const char *save;
00433 int overflow;
00434
00435 *err= 0;
00436 #ifdef NOT_USED
00437 if (base < 0 || base == 1 || base > 36)
00438 base = 10;
00439 #endif
00440
00441 s = nptr;
00442 e = nptr+l;
00443
00444 for(; s<e && my_isspace(cs,*s); s++) {}
00445
00446 if (s == e)
00447 {
00448 goto noconv;
00449 }
00450
00451 if (*s == '-')
00452 {
00453 negative = 1;
00454 ++s;
00455 }
00456 else if (*s == '+')
00457 {
00458 negative = 0;
00459 ++s;
00460 }
00461 else
00462 negative = 0;
00463
00464 #ifdef NOT_USED
00465 if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
00466 s += 2;
00467 #endif
00468
00469 #ifdef NOT_USED
00470 if (base == 0)
00471 {
00472 if (*s == '0')
00473 {
00474 if (s[1]=='X' || s[1]=='x')
00475 {
00476 s += 2;
00477 base = 16;
00478 }
00479 else
00480 base = 8;
00481 }
00482 else
00483 base = 10;
00484 }
00485 #endif
00486
00487 save = s;
00488
00489 cutoff = (~(uint64_t) 0) / (unsigned long int) base;
00490 cutlim = (uint32_t) ((~(uint64_t) 0) % (unsigned long int) base);
00491
00492 overflow = 0;
00493 i = 0;
00494 for ( ; s != e; s++)
00495 {
00496 unsigned char c= *s;
00497
00498 if (c>='0' && c<='9')
00499 c -= '0';
00500 else if (c>='A' && c<='Z')
00501 c = c - 'A' + 10;
00502 else if (c>='a' && c<='z')
00503 c = c - 'a' + 10;
00504 else
00505 break;
00506 if (c >= base)
00507 break;
00508 if (i > cutoff || (i == cutoff && c > cutlim))
00509 overflow = 1;
00510 else
00511 {
00512 i *= (uint64_t) base;
00513 i += c;
00514 }
00515 }
00516
00517 if (s == save)
00518 goto noconv;
00519
00520 if (endptr != NULL)
00521 *endptr = (char *) s;
00522
00523 if (overflow)
00524 {
00525 err[0]= ERANGE;
00526 return (~(uint64_t) 0);
00527 }
00528
00529 return (negative ? -((int64_t) i) : (int64_t) i);
00530
00531 noconv:
00532 err[0]= EDOM;
00533 if (endptr != NULL)
00534 *endptr = (char *) nptr;
00535 return 0L;
00536 }
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562 double my_strntod_8bit(const CHARSET_INFO * const,
00563 char *str, size_t length,
00564 char **end, int *err)
00565 {
00566 if (length == INT32_MAX)
00567 length= 65535;
00568 *end= str + length;
00569 return internal::my_strtod(str, end, err);
00570 }
00571
00572
00573
00574
00575
00576
00577
00578
00579 size_t my_long10_to_str_8bit(const CHARSET_INFO * const,
00580 char *dst, size_t len, int radix, long int val)
00581 {
00582 char buffer[66];
00583 char *p, *e;
00584 long int new_val;
00585 uint32_t sign=0;
00586 unsigned long int uval = (unsigned long int) val;
00587
00588 e = p = &buffer[sizeof(buffer)-1];
00589 *p= 0;
00590
00591 if (radix < 0)
00592 {
00593 if (val < 0)
00594 {
00595
00596 uval= (unsigned long int)0 - uval;
00597 *dst++= '-';
00598 len--;
00599 sign= 1;
00600 }
00601 }
00602
00603 new_val = (long) (uval / 10);
00604 *--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
00605 val = new_val;
00606
00607 while (val != 0)
00608 {
00609 new_val=val/10;
00610 *--p = '0' + (char) (val-new_val*10);
00611 val= new_val;
00612 }
00613
00614 len= min(len, (size_t) (e-p));
00615 memcpy(dst, p, len);
00616 return len+sign;
00617 }
00618
00619
00620 size_t my_int64_t10_to_str_8bit(const CHARSET_INFO * const,
00621 char *dst, size_t len, int radix,
00622 int64_t val)
00623 {
00624 char buffer[65];
00625 char *p, *e;
00626 long long_val;
00627 uint32_t sign= 0;
00628 uint64_t uval = (uint64_t)val;
00629
00630 if (radix < 0)
00631 {
00632 if (val < 0)
00633 {
00634
00635 uval = (uint64_t)0 - uval;
00636 *dst++= '-';
00637 len--;
00638 sign= 1;
00639 }
00640 }
00641
00642 e = p = &buffer[sizeof(buffer)-1];
00643 *p= 0;
00644
00645 if (uval == 0)
00646 {
00647 *--p= '0';
00648 len= 1;
00649 goto cnv;
00650 }
00651
00652 while (uval > (uint64_t) LONG_MAX)
00653 {
00654 uint64_t quo= uval/(uint32_t) 10;
00655 uint32_t rem= (uint32_t) (uval- quo* (uint32_t) 10);
00656 *--p = '0' + rem;
00657 uval= quo;
00658 }
00659
00660 long_val= (long) uval;
00661 while (long_val != 0)
00662 {
00663 long quo= long_val/10;
00664 *--p = (char) ('0' + (long_val - quo*10));
00665 long_val= quo;
00666 }
00667
00668 len= min(len, (size_t) (e-p));
00669 cnv:
00670 memcpy(dst, p, len);
00671 return len+sign;
00672 }
00673
00674
00675
00676
00677
00678
00679
00680
00681
00682 inline static int likeconv(const charset_info_st *cs, const char c)
00683 {
00684 #ifdef LIKE_CMP_TOUPPER
00685 return (unsigned char) my_toupper(cs, c);
00686 #else
00687 return cs->sort_order[(unsigned char)c];
00688 #endif
00689 }
00690
00691
00692 inline static const char* inc_ptr(const charset_info_st *cs, const char *str, const char *str_end)
00693 {
00694
00695
00696 (void)cs;
00697 (void)str_end;
00698 return str++;
00699 }
00700
00701 int my_wildcmp_8bit(const CHARSET_INFO * const cs,
00702 const char *str,const char *str_end,
00703 const char *wildstr,const char *wildend,
00704 int escape, int w_one, int w_many)
00705 {
00706 int result= -1;
00707
00708 while (wildstr != wildend)
00709 {
00710 while (*wildstr != w_many && *wildstr != w_one)
00711 {
00712 if (*wildstr == escape && wildstr+1 != wildend)
00713 wildstr++;
00714
00715 if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
00716 return(1);
00717 if (wildstr == wildend)
00718 return(str != str_end);
00719 result=1;
00720 }
00721 if (*wildstr == w_one)
00722 {
00723 do
00724 {
00725 if (str == str_end)
00726 return(result);
00727 inc_ptr(cs,str,str_end);
00728 } while (++wildstr < wildend && *wildstr == w_one);
00729 if (wildstr == wildend)
00730 break;
00731 }
00732 if (*wildstr == w_many)
00733 {
00734 unsigned char cmp;
00735
00736 wildstr++;
00737
00738 for (; wildstr != wildend ; wildstr++)
00739 {
00740 if (*wildstr == w_many)
00741 continue;
00742 if (*wildstr == w_one)
00743 {
00744 if (str == str_end)
00745 return(-1);
00746 inc_ptr(cs,str,str_end);
00747 continue;
00748 }
00749 break;
00750 }
00751 if (wildstr == wildend)
00752 return(0);
00753 if (str == str_end)
00754 return(-1);
00755
00756 if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
00757 cmp= *++wildstr;
00758
00759 inc_ptr(cs,wildstr,wildend);
00760 cmp=likeconv(cs,cmp);
00761 do
00762 {
00763 while (str != str_end && (unsigned char) likeconv(cs,*str) != cmp)
00764 str++;
00765 if (str++ == str_end) return(-1);
00766 {
00767 int tmp=my_wildcmp_8bit(cs,str,str_end,wildstr,wildend,escape,w_one,
00768 w_many);
00769 if (tmp <= 0)
00770 return(tmp);
00771 }
00772 } while (str != str_end && wildstr[0] != w_many);
00773 return(-1);
00774 }
00775 }
00776 return(str != str_end ? 1 : 0);
00777 }
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798 bool my_like_range_simple(const CHARSET_INFO * const cs,
00799 const char *ptr, size_t ptr_length,
00800 char escape, char w_one, char w_many,
00801 size_t res_length,
00802 char *min_str,char *max_str,
00803 size_t *min_length, size_t *max_length)
00804 {
00805 const char *end= ptr + ptr_length;
00806 char *min_org=min_str;
00807 char *min_end=min_str+res_length;
00808 size_t charlen= res_length / cs->mbmaxlen;
00809
00810 for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
00811 {
00812 if (*ptr == escape && ptr+1 != end)
00813 {
00814 ptr++;
00815 *min_str++= *max_str++ = *ptr;
00816 continue;
00817 }
00818 if (*ptr == w_one)
00819 {
00820 *min_str++='\0';
00821 *max_str++= (char) cs->max_sort_char;
00822 continue;
00823 }
00824 if (*ptr == w_many)
00825 {
00826
00827 *min_length= ((cs->state & MY_CS_BINSORT) ?
00828 (size_t) (min_str - min_org) :
00829 res_length);
00830 *max_length= res_length;
00831 do
00832 {
00833 *min_str++= 0;
00834 *max_str++= (char) cs->max_sort_char;
00835 } while (min_str != min_end);
00836 return 0;
00837 }
00838 *min_str++= *max_str++ = *ptr;
00839 }
00840
00841 *min_length= *max_length = (size_t) (min_str - min_org);
00842 while (min_str != min_end)
00843 *min_str++= *max_str++ = ' ';
00844 return 0;
00845 }
00846
00847
00848 size_t my_scan_8bit(const CHARSET_INFO * const cs, const char *str, const char *end, int sq)
00849 {
00850 const char *str0= str;
00851 switch (sq)
00852 {
00853 case MY_SEQ_INTTAIL:
00854 if (*str == '.')
00855 {
00856 for(str++ ; str != end && *str == '0' ; str++) {}
00857 return (size_t) (str - str0);
00858 }
00859 return 0;
00860
00861 case MY_SEQ_SPACES:
00862 for ( ; str < end ; str++)
00863 {
00864 if (!my_isspace(cs,*str))
00865 break;
00866 }
00867 return (size_t) (str - str0);
00868 default:
00869 return 0;
00870 }
00871 }
00872
00873
00874 void my_fill_8bit(const CHARSET_INFO * const,
00875 char *s, size_t l, int fill)
00876 {
00877 memset(s, fill, l);
00878 }
00879
00880
00881 size_t my_numchars_8bit(const CHARSET_INFO * const,
00882 const char *b, const char *e)
00883 {
00884 return (size_t) (e - b);
00885 }
00886
00887
00888 size_t my_numcells_8bit(const CHARSET_INFO * const,
00889 const char *b, const char *e)
00890 {
00891 return (size_t) (e - b);
00892 }
00893
00894
00895 size_t my_charpos_8bit(const CHARSET_INFO * const,
00896 const char *, const char *, size_t pos)
00897 {
00898 return pos;
00899 }
00900
00901
00902 size_t my_well_formed_len_8bit(const CHARSET_INFO * const,
00903 const char *start, const char *end,
00904 size_t nchars, int *error)
00905 {
00906 size_t nbytes= (size_t) (end-start);
00907 *error= 0;
00908 return min(nbytes, nchars);
00909 }
00910
00911
00912 size_t my_lengthsp_8bit(const CHARSET_INFO * const,
00913 const char *ptr, size_t length)
00914 {
00915 const char *end;
00916 end= (const char *) internal::skip_trailing_space((const unsigned char *)ptr, length);
00917 return (size_t) (end-ptr);
00918 }
00919
00920
00921 uint32_t my_instr_simple(const CHARSET_INFO * const cs,
00922 const char *b, size_t b_length,
00923 const char *s, size_t s_length,
00924 my_match_t *match, uint32_t nmatch)
00925 {
00926 const unsigned char *str, *search, *end, *search_end;
00927
00928 if (s_length <= b_length)
00929 {
00930 if (!s_length)
00931 {
00932 if (nmatch)
00933 {
00934 match->beg= 0;
00935 match->end= 0;
00936 match->mb_len= 0;
00937 }
00938 return 1;
00939 }
00940
00941 str= (const unsigned char*) b;
00942 search= (const unsigned char*) s;
00943 end= (const unsigned char*) b+b_length-s_length+1;
00944 search_end= (const unsigned char*) s + s_length;
00945
00946 skip:
00947 while (str != end)
00948 {
00949 if (cs->sort_order[*str++] == cs->sort_order[*search])
00950 {
00951 const unsigned char *i,*j;
00952
00953 i= str;
00954 j= search+1;
00955
00956 while (j != search_end)
00957 if (cs->sort_order[*i++] != cs->sort_order[*j++])
00958 goto skip;
00959
00960 if (nmatch > 0)
00961 {
00962 match[0].beg= 0;
00963 match[0].end= (size_t) (str- (const unsigned char*)b-1);
00964 match[0].mb_len= match[0].end;
00965
00966 if (nmatch > 1)
00967 {
00968 match[1].beg= match[0].end;
00969 match[1].end= match[0].end+s_length;
00970 match[1].mb_len= match[1].end-match[1].beg;
00971 }
00972 }
00973 return 2;
00974 }
00975 }
00976 }
00977 return 0;
00978 }
00979
00980
00981 typedef struct
00982 {
00983 int nchars;
00984 MY_UNI_IDX uidx;
00985 } uni_idx;
00986
00987 #define PLANE_SIZE 0x100
00988 #define PLANE_NUM 0x100
00989 inline static int plane_number(uint16_t x)
00990 {
00991 return ((x >> 8) % PLANE_NUM);
00992 }
00993
00994 static int pcmp(const void * f, const void * s)
00995 {
00996 const uni_idx *F= (const uni_idx*) f;
00997 const uni_idx *S= (const uni_idx*) s;
00998 int res;
00999
01000 if (!(res=((S->nchars)-(F->nchars))))
01001 res=((F->uidx.from)-(S->uidx.to));
01002 return res;
01003 }
01004
01005 static bool create_fromuni(CHARSET_INFO *cs, cs_alloc_func alloc)
01006 {
01007 uni_idx idx[PLANE_NUM];
01008 int i,n;
01009
01010
01011
01012
01013
01014
01015
01016 if (!cs->tab_to_uni)
01017 return true;
01018
01019
01020 memset(idx, 0, sizeof(idx));
01021
01022
01023 for (i=0; i< 0x100; i++)
01024 {
01025 uint16_t wc=cs->tab_to_uni[i];
01026 int pl= plane_number(wc);
01027
01028 if (wc || !i)
01029 {
01030 if (!idx[pl].nchars)
01031 {
01032 idx[pl].uidx.from=wc;
01033 idx[pl].uidx.to=wc;
01034 }else
01035 {
01036 idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
01037 idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
01038 }
01039 idx[pl].nchars++;
01040 }
01041 }
01042
01043
01044 qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
01045
01046 for (i=0; i < PLANE_NUM; i++)
01047 {
01048 int ch,numchars;
01049
01050
01051 if (!idx[i].nchars)
01052 break;
01053
01054 numchars=idx[i].uidx.to-idx[i].uidx.from+1;
01055 if (!(idx[i].uidx.tab=(unsigned char*) alloc(numchars * sizeof(*idx[i].uidx.tab))))
01056 return true;
01057
01058 memset(idx[i].uidx.tab, 0, numchars*sizeof(*idx[i].uidx.tab));
01059
01060 for (ch=1; ch < PLANE_SIZE; ch++)
01061 {
01062 uint16_t wc=cs->tab_to_uni[ch];
01063 if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
01064 {
01065 int ofs= wc - idx[i].uidx.from;
01066 idx[i].uidx.tab[ofs]= ch;
01067 }
01068 }
01069 }
01070
01071
01072 n=i;
01073 if (!(cs->tab_from_uni= (MY_UNI_IDX*) alloc(sizeof(MY_UNI_IDX)*(n+1))))
01074 return true;
01075
01076 for (i=0; i< n; i++)
01077 cs->tab_from_uni[i]= idx[i].uidx;
01078
01079
01080 memset(&cs->tab_from_uni[i], 0, sizeof(MY_UNI_IDX));
01081 return false;
01082 }
01083
01084 bool my_cset_init_8bit(CHARSET_INFO *cs, cs_alloc_func alloc)
01085 {
01086 cs->caseup_multiply= 1;
01087 cs->casedn_multiply= 1;
01088 cs->pad_char= ' ';
01089 return create_fromuni(cs, alloc);
01090 }
01091
01092 static void set_max_sort_char(CHARSET_INFO *cs)
01093 {
01094 unsigned char max_char;
01095 uint32_t i;
01096
01097 if (!cs->sort_order)
01098 return;
01099
01100 max_char=cs->sort_order[(unsigned char) cs->max_sort_char];
01101 for (i= 0; i < 256; i++)
01102 {
01103 if ((unsigned char) cs->sort_order[i] > max_char)
01104 {
01105 max_char=(unsigned char) cs->sort_order[i];
01106 cs->max_sort_char= i;
01107 }
01108 }
01109 }
01110
01111 bool my_coll_init_simple(CHARSET_INFO *cs, cs_alloc_func)
01112 {
01113 set_max_sort_char(cs);
01114 return false;
01115 }
01116
01117
01118 int64_t my_strtoll10_8bit(const CHARSET_INFO * const,
01119 const char *nptr, char **endptr, int *error)
01120 {
01121 return internal::my_strtoll10(nptr, endptr, error);
01122 }
01123
01124
01125 int my_mb_ctype_8bit(const CHARSET_INFO * const cs, int *ctype,
01126 const unsigned char *s, const unsigned char *e)
01127 {
01128 if (s >= e)
01129 {
01130 *ctype= 0;
01131 return MY_CS_TOOSMALL;
01132 }
01133 *ctype= cs->ctype[*s + 1];
01134 return 1;
01135 }
01136
01137
01138 #undef UINT64_MAX
01139 #define UINT64_MAX (~(uint64_t) 0)
01140
01141 #define CUTOFF (UINT64_MAX / 10)
01142 #define CUTLIM (UINT64_MAX % 10)
01143 #define DIGITS_IN_ULONGLONG 20
01144
01145 static uint64_t d10[DIGITS_IN_ULONGLONG]=
01146 {
01147 1,
01148 10,
01149 100,
01150 1000,
01151 10000,
01152 100000,
01153 1000000,
01154 10000000,
01155 100000000,
01156 1000000000,
01157 10000000000ULL,
01158 100000000000ULL,
01159 1000000000000ULL,
01160 10000000000000ULL,
01161 100000000000000ULL,
01162 1000000000000000ULL,
01163 10000000000000000ULL,
01164 100000000000000000ULL,
01165 1000000000000000000ULL,
01166 10000000000000000000ULL
01167 };
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226 uint64_t
01227 my_strntoull10rnd_8bit(const CHARSET_INFO * const,
01228 const char *str, size_t length, int unsigned_flag,
01229 char **endptr, int *error)
01230 {
01231 const char *dot, *end9, *beg, *end= str + length;
01232 uint64_t ull;
01233 ulong ul;
01234 unsigned char ch;
01235 int shift= 0, digits= 0, negative, addon;
01236
01237
01238 for ( ; str < end && (*str == ' ' || *str == '\t') ; str++) {}
01239
01240 if (str >= end)
01241 goto ret_edom;
01242
01243 if ((negative= (*str == '-')) || *str=='+')
01244 {
01245 if (++str == end)
01246 goto ret_edom;
01247 }
01248
01249 beg= str;
01250 end9= (str + 9) > end ? end : (str + 9);
01251
01252 for (ul= 0 ; str < end9 && (ch= (unsigned char) (*str - '0')) < 10; str++)
01253 {
01254 ul= ul * 10 + ch;
01255 }
01256
01257 if (str >= end)
01258 {
01259 *endptr= (char*) str;
01260 if (negative)
01261 {
01262 if (unsigned_flag)
01263 {
01264 *error= ul ? ERANGE : 0;
01265 return 0;
01266 }
01267 else
01268 {
01269 *error= 0;
01270 return (uint64_t) (int64_t) -(long) ul;
01271 }
01272 }
01273 else
01274 {
01275 *error=0;
01276 return (uint64_t) ul;
01277 }
01278 }
01279
01280 digits= str - beg;
01281
01282
01283 for (dot= NULL, ull= ul; str < end; str++)
01284 {
01285 if ((ch= (unsigned char) (*str - '0')) < 10)
01286 {
01287 if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM))
01288 {
01289 ull= ull * 10 + ch;
01290 digits++;
01291 continue;
01292 }
01293
01294
01295
01296
01297
01298 if (ull == CUTOFF)
01299 {
01300 ull= UINT64_MAX;
01301 addon= 1;
01302 str++;
01303 }
01304 else
01305 addon= (*str >= '5');
01306 if (!dot)
01307 {
01308 for ( ; str < end && (ch= (unsigned char) (*str - '0')) < 10; shift++, str++) {}
01309 if (str < end && *str == '.')
01310 {
01311 str++;
01312 for ( ; str < end && (ch= (unsigned char) (*str - '0')) < 10; str++) {}
01313 }
01314 }
01315 else
01316 {
01317 shift= dot - str;
01318 for ( ; str < end && (ch= (unsigned char) (*str - '0')) < 10; str++) {}
01319 }
01320 goto exp;
01321 }
01322
01323 if (*str == '.')
01324 {
01325 if (dot)
01326 {
01327
01328 addon= 0;
01329 goto exp;
01330 }
01331 else
01332 {
01333 dot= str + 1;
01334 }
01335 continue;
01336 }
01337
01338
01339 break;
01340 }
01341 shift= dot ? dot - str : 0;
01342 addon= 0;
01343
01344 exp:
01345
01346 if (!digits)
01347 {
01348 str= beg;
01349 goto ret_edom;
01350 }
01351
01352 if (str < end && (*str == 'e' || *str == 'E'))
01353 {
01354 str++;
01355 if (str < end)
01356 {
01357 int negative_exp, exponent;
01358 if ((negative_exp= (*str == '-')) || *str=='+')
01359 {
01360 if (++str == end)
01361 goto ret_sign;
01362 }
01363 for (exponent= 0 ;
01364 str < end && (ch= (unsigned char) (*str - '0')) < 10;
01365 str++)
01366 {
01367 exponent= exponent * 10 + ch;
01368 }
01369 shift+= negative_exp ? -exponent : exponent;
01370 }
01371 }
01372
01373 if (shift == 0)
01374 {
01375 if (addon)
01376 {
01377 if (ull == UINT64_MAX)
01378 goto ret_too_big;
01379 ull++;
01380 }
01381 goto ret_sign;
01382 }
01383
01384 if (shift < 0)
01385 {
01386 uint64_t d, r;
01387
01388 if (-shift >= DIGITS_IN_ULONGLONG)
01389 goto ret_zero;
01390
01391 d= d10[-shift];
01392 r= (ull % d) * 2;
01393 ull /= d;
01394 if (r >= d)
01395 ull++;
01396 goto ret_sign;
01397 }
01398
01399 if (shift > DIGITS_IN_ULONGLONG)
01400 {
01401 if (!ull)
01402 goto ret_sign;
01403 goto ret_too_big;
01404 }
01405
01406 for ( ; shift > 0; shift--, ull*= 10)
01407 {
01408 if (ull > CUTOFF)
01409 goto ret_too_big;
01410 }
01411
01412 ret_sign:
01413 *endptr= (char*) str;
01414
01415 if (!unsigned_flag)
01416 {
01417 if (negative)
01418 {
01419 if (ull > (uint64_t) INT64_MIN)
01420 {
01421 *error= ERANGE;
01422 return (uint64_t) INT64_MIN;
01423 }
01424 *error= 0;
01425 return (uint64_t) -(int64_t) ull;
01426 }
01427 else
01428 {
01429 if (ull > (uint64_t) INT64_MAX)
01430 {
01431 *error= ERANGE;
01432 return (uint64_t) INT64_MAX;
01433 }
01434 *error= 0;
01435 return ull;
01436 }
01437 }
01438
01439
01440 if (negative && ull)
01441 {
01442 *error= ERANGE;
01443 return 0;
01444 }
01445 *error= 0;
01446 return ull;
01447
01448 ret_zero:
01449 *endptr= (char*) str;
01450 *error= 0;
01451 return 0;
01452
01453 ret_edom:
01454 *endptr= (char*) str;
01455 *error= EDOM;
01456 return 0;
01457
01458 ret_too_big:
01459 *endptr= (char*) str;
01460 *error= ERANGE;
01461 return unsigned_flag ?
01462 UINT64_MAX :
01463 negative ? (uint64_t) INT64_MIN : (uint64_t) INT64_MAX;
01464 }
01465
01466
01467
01468
01469
01470
01471
01472
01473
01474
01475
01476
01477
01478
01479
01480
01481
01482
01483
01484
01485
01486
01487
01488
01489
01490
01491
01492
01493
01494
01495
01496
01497
01498
01499
01500
01501
01502
01503
01504
01505 bool my_propagate_simple(const CHARSET_INFO * const, const unsigned char *,
01506 size_t)
01507 {
01508 return 1;
01509 }
01510
01511
01512 bool my_propagate_complex(const CHARSET_INFO * const, const unsigned char *,
01513 size_t)
01514 {
01515 return 0;
01516 }
01517
01518
01519
01520
01521
01522
01523
01524
01525
01526
01527
01528
01529
01530
01531
01532
01533
01534
01535
01536
01537 uint32_t my_strxfrm_flag_normalize(uint32_t flags, uint32_t maximum)
01538 {
01539 assert(maximum >= 1 && maximum <= MY_STRXFRM_NLEVELS);
01540
01541
01542 if (!(flags & MY_STRXFRM_LEVEL_ALL))
01543 {
01544 static uint32_t def_level_flags[]= {0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F };
01545 uint32_t flag_pad= flags & MY_STRXFRM_PAD_WITH_SPACE;
01546 flags= def_level_flags[maximum] | flag_pad;
01547 }
01548 else
01549 {
01550 uint32_t i;
01551 uint32_t flag_lev= flags & MY_STRXFRM_LEVEL_ALL;
01552 uint32_t flag_dsc= (flags >> MY_STRXFRM_DESC_SHIFT) & MY_STRXFRM_LEVEL_ALL;
01553 uint32_t flag_rev= (flags >> MY_STRXFRM_REVERSE_SHIFT) & MY_STRXFRM_LEVEL_ALL;
01554 uint32_t flag_pad= flags & MY_STRXFRM_PAD_WITH_SPACE;
01555
01556
01557
01558
01559
01560 for (maximum--, flags= 0, i= 0; i < MY_STRXFRM_NLEVELS; i++)
01561 {
01562 uint32_t src_bit= 1 << i;
01563 uint32_t dst_bit= 1 << min(i, maximum);
01564 if (flag_lev & src_bit)
01565 {
01566 flags|= dst_bit;
01567 flags|= (flag_dsc & dst_bit) << MY_STRXFRM_DESC_SHIFT;
01568 flags|= (flag_rev & dst_bit) << MY_STRXFRM_REVERSE_SHIFT;
01569 }
01570 else
01571 {
01572
01573 assert(!(flag_dsc & src_bit) && !(flag_rev & src_bit));
01574 }
01575 }
01576 flags|= flag_pad;
01577 }
01578
01579 return flags;
01580 }
01581
01582
01583
01584
01585
01586
01587
01588
01589
01590
01591
01592
01593
01594
01595
01596
01597
01598
01599
01600
01601
01602
01603
01604
01605
01606 void my_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *strend,
01607 uint32_t flags, uint32_t level)
01608 {
01609 if (flags & (MY_STRXFRM_DESC_LEVEL1 << level))
01610 {
01611 if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
01612 {
01613 for (strend--; str <= strend;)
01614 {
01615 unsigned char tmp= *str;
01616 *str++= ~*strend;
01617 *strend--= ~tmp;
01618 }
01619 }
01620 else
01621 {
01622 for (; str < strend; str++)
01623 *str= ~*str;
01624 }
01625 }
01626 else if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
01627 {
01628 for (strend--; str < strend;)
01629 {
01630 unsigned char tmp= *str;
01631 *str++= *strend;
01632 *strend--= tmp;
01633 }
01634 }
01635 }
01636
01637
01638 size_t
01639 my_strxfrm_pad_desc_and_reverse(const CHARSET_INFO * const cs,
01640 unsigned char *str, unsigned char *frmend, unsigned char *strend,
01641 uint32_t nweights, uint32_t flags, uint32_t level)
01642 {
01643 if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
01644 {
01645 uint32_t fill_length= min((uint32_t) (strend - frmend), nweights * cs->mbminlen);
01646 cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
01647 frmend+= fill_length;
01648 }
01649 my_strxfrm_desc_and_reverse(str, frmend, flags, level);
01650 return frmend - str;
01651 }
01652
01653 }