Drizzled Public API Documentation

ctype-bin.cc
00001 /* Copyright (C) 2002 MySQL AB & tommy@valley.ne.jp.
00002 
00003    This library is free software; you can redistribute it and/or
00004    modify it under the terms of the GNU Library General Public
00005    License as published by the Free Software Foundation; version 2
00006    of the License.
00007 
00008    This library is distributed in the hope that it will be useful,
00009    but WITHOUT ANY WARRANTY; without even the implied warranty of
00010    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00011    Library General Public License for more details.
00012 
00013    You should have received a copy of the GNU Library General Public
00014    License along with this library; if not, write to the Free
00015    Software Foundation, Inc., 51 Franklin Place - Suite 330, Boston,
00016    MA 02110-1301, USA */
00017 
00018 /* This file is for binary pseudo charset, created by bar@mysql.com */
00019 
00020 
00021 #include <config.h>
00022 #include <drizzled/definitions.h>
00023 #include <drizzled/internal/m_string.h>
00024 #include <drizzled/charset_info.h>
00025 
00026 #include <algorithm>
00027 
00028 using namespace std;
00029 
00030 namespace drizzled
00031 {
00032 
00033 void my_hash_sort_bin(const CHARSET_INFO * const,
00034                       const unsigned char *key, size_t len,
00035                       uint32_t *nr1, uint32_t *nr2);
00036 
00037 
00038 static unsigned char ctype_bin[]=
00039 {
00040   0,
00041   32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
00042   32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
00043   72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
00044   132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
00045   16,129,129,129,129,129,129,  1,  1,  1,  1,  1,  1,  1,  1,  1,
00046   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
00047   16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
00048   2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
00049   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00050   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00051   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00052   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00053   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00054   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00055   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00056   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00057 };
00058 
00059 
00060 /* Dummy array for toupper / tolower / sortorder */
00061 
00062 static unsigned char bin_char_array[] =
00063 {
00064     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
00065    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
00066    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
00067    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
00068    64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
00069    80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
00070    96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
00071   112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
00072   128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
00073   144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
00074   160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
00075   176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
00076   192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
00077   208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
00078   224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
00079   240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
00080 };
00081 
00082 
00083 bool my_coll_init_8bit_bin(CHARSET_INFO *cs,
00084                            cs_alloc_func)
00085 {
00086   cs->max_sort_char=255;
00087   return false;
00088 }
00089 
00090 int my_strnncoll_binary(const CHARSET_INFO * const,
00091                         const unsigned char *s, size_t slen,
00092                         const unsigned char *t, size_t tlen,
00093                         bool t_is_prefix)
00094 {
00095   size_t len= min(slen,tlen);
00096   int cmp= memcmp(s,t,len);
00097   return cmp ? cmp : static_cast<int>((t_is_prefix ? len : slen) - tlen);
00098 }
00099 
00100 
00101 size_t my_lengthsp_binary(const CHARSET_INFO * const,
00102                           const char *, size_t length)
00103 {
00104   return length;
00105 }
00106 
00107 
00108 /*
00109   Compare two strings. Result is sign(first_argument - second_argument)
00110 
00111   SYNOPSIS
00112     my_strnncollsp_binary()
00113     cs      Chararacter set
00114     s     String to compare
00115     slen    Length of 's'
00116     t     String to compare
00117     tlen    Length of 't'
00118 
00119   NOTE
00120    This function is used for real binary strings, i.e. for
00121    BLOB, BINARY(N) and VARBINARY(N).
00122    It compares trailing spaces as spaces.
00123 
00124   RETURN
00125   < 0 s < t
00126   0 s == t
00127   > 0 s > t
00128 */
00129 
00130 int my_strnncollsp_binary(const CHARSET_INFO * const cs,
00131                           const unsigned char *s, size_t slen,
00132                           const unsigned char *t, size_t tlen,
00133                           bool)
00134 {
00135   return my_strnncoll_binary(cs,s,slen,t,tlen,0);
00136 }
00137 
00138 
00139 int my_strnncoll_8bit_bin(const CHARSET_INFO * const,
00140                           const unsigned char *s, size_t slen,
00141                           const unsigned char *t, size_t tlen,
00142                           bool t_is_prefix)
00143 {
00144   size_t len= min(slen,tlen);
00145   int cmp= memcmp(s,t,len);
00146   return cmp ? cmp : static_cast<int>((t_is_prefix ? len : slen) - tlen);
00147 }
00148 
00149 
00150 /*
00151   Compare two strings. Result is sign(first_argument - second_argument)
00152 
00153   SYNOPSIS
00154     my_strnncollsp_8bit_bin()
00155     cs      Chararacter set
00156     s     String to compare
00157     slen    Length of 's'
00158     t     String to compare
00159     tlen    Length of 't'
00160     diff_if_only_endspace_difference
00161             Set to 1 if the strings should be regarded as different
00162                         if they only difference in end space
00163 
00164   NOTE
00165    This function is used for character strings with binary collations.
00166    The shorter string is extended with end space to be as long as the longer
00167    one.
00168 
00169   RETURN
00170   < 0 s < t
00171   0 s == t
00172   > 0 s > t
00173 */
00174 
00175 int my_strnncollsp_8bit_bin(const CHARSET_INFO * const,
00176                             const unsigned char *a, size_t a_length,
00177                             const unsigned char *b, size_t b_length,
00178                             bool diff_if_only_endspace_difference)
00179 {
00180   const unsigned char *end;
00181   size_t length;
00182   int res;
00183 
00184 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
00185   diff_if_only_endspace_difference= 0;
00186 #endif
00187 
00188   end= a + (length= min(a_length, b_length));
00189   while (a < end)
00190   {
00191     if (*a++ != *b++)
00192       return a[-1] - b[-1];
00193   }
00194   res= 0;
00195   if (a_length != b_length)
00196   {
00197     int swap= 1;
00198     /*
00199       Check the next not space character of the longer key. If it's < ' ',
00200       then it's smaller than the other key.
00201     */
00202     if (diff_if_only_endspace_difference)
00203       res= 1;                                   /* Assume 'a' is bigger */
00204     if (a_length < b_length)
00205     {
00206       /* put shorter key in s */
00207       a_length= b_length;
00208       a= b;
00209       swap= -1;         /* swap sign of result */
00210       res= -res;
00211     }
00212     for (end= a + a_length-length; a < end ; a++)
00213     {
00214       if (*a != ' ')
00215   return (*a < ' ') ? -swap : swap;
00216     }
00217   }
00218   return res;
00219 }
00220 
00221 
00222 /* This function is used for all conversion functions */
00223 
00224 size_t my_case_str_bin(const CHARSET_INFO * const, char *)
00225 {
00226   return 0;
00227 }
00228 
00229 
00230 size_t my_case_bin(const CHARSET_INFO * const, char *,
00231                    size_t srclen, char *, size_t)
00232 {
00233   return srclen;
00234 }
00235 
00236 
00237 int my_strcasecmp_bin(const CHARSET_INFO * const,
00238                       const char *s, const char *t)
00239 {
00240   return strcmp(s,t);
00241 }
00242 
00243 
00244 uint32_t my_mbcharlen_8bit(const CHARSET_INFO * const, uint32_t)
00245 {
00246   return 1;
00247 }
00248 
00249 
00250 int my_mb_wc_bin(const CHARSET_INFO * const,
00251                  my_wc_t *wc, const unsigned char *str,
00252                  const unsigned char *end)
00253 {
00254   if (str >= end)
00255     return MY_CS_TOOSMALL;
00256 
00257   *wc=str[0];
00258   return 1;
00259 }
00260 
00261 
00262 int my_wc_mb_bin(const CHARSET_INFO * const, my_wc_t wc,
00263                  unsigned char *str, unsigned char *end)
00264 {
00265   if (str >= end)
00266     return MY_CS_TOOSMALL;
00267 
00268   if (wc < 256)
00269   {
00270     str[0]= wc;
00271     return 1;
00272   }
00273   return MY_CS_ILUNI;
00274 }
00275 
00276 
00277 void my_hash_sort_8bit_bin(const CHARSET_INFO * const,
00278                            const unsigned char *key, size_t len,
00279                            uint32_t *nr1, uint32_t *nr2)
00280 {
00281   const unsigned char *pos = key;
00282 
00283   /*
00284      Remove trailing spaces. We have to do this to be able to compare
00285     'A ' and 'A' as identical
00286   */
00287   key= internal::skip_trailing_space(key, len);
00288 
00289   for (; pos < key ; pos++)
00290   {
00291     nr1[0]^= (((nr1[0] & 63) + nr2[0]) * *pos) + (nr1[0] << 8);
00292     nr2[0]+=3;
00293   }
00294 }
00295 
00296 
00297 void my_hash_sort_bin(const CHARSET_INFO * const,
00298                       const unsigned char *key, size_t len,
00299                       uint32_t *nr1, uint32_t *nr2)
00300 {
00301   const unsigned char *pos = key;
00302 
00303   key+= len;
00304 
00305   for (; pos < key ; pos++)
00306   {
00307     nr1[0]^= (((nr1[0] & 63) + nr2[0]) * *pos) + (nr1[0] << 8);
00308     nr2[0]+=3;
00309   }
00310 }
00311 
00312 
00313 /*
00314   The following defines is here to keep the following code identical to
00315   the one in ctype-simple.c
00316 */
00317 
00318 #define likeconv(s,A) (A)
00319 #define INC_PTR(cs,A,B) (A)++
00320 
00321 
00322 int my_wildcmp_bin(const CHARSET_INFO * const cs,
00323                    const char *str,const char *str_end,
00324                    const char *wildstr,const char *wildend,
00325                    int escape, int w_one, int w_many)
00326 {
00327   int result= -1;     /* Not found, using wildcards */
00328 
00329   while (wildstr != wildend)
00330   {
00331     while (*wildstr != w_many && *wildstr != w_one)
00332     {
00333       if (*wildstr == escape && wildstr+1 != wildend)
00334   wildstr++;
00335       if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
00336   return(1);      /* No match */
00337       if (wildstr == wildend)
00338   return(str != str_end);   /* Match if both are at end */
00339       result=1;       /* Found an anchor char */
00340     }
00341     if (*wildstr == w_one)
00342     {
00343       do
00344       {
00345   if (str == str_end)   /* Skip one char if possible */
00346     return(result);
00347   INC_PTR(cs,str,str_end);
00348       } while (++wildstr < wildend && *wildstr == w_one);
00349       if (wildstr == wildend)
00350   break;
00351     }
00352     if (*wildstr == w_many)
00353     {         /* Found w_many */
00354       unsigned char cmp;
00355       wildstr++;
00356       /* Remove any '%' and '_' from the wild search string */
00357       for (; wildstr != wildend ; wildstr++)
00358       {
00359   if (*wildstr == w_many)
00360     continue;
00361   if (*wildstr == w_one)
00362   {
00363     if (str == str_end)
00364       return(-1);
00365     INC_PTR(cs,str,str_end);
00366     continue;
00367   }
00368   break;        /* Not a wild character */
00369       }
00370       if (wildstr == wildend)
00371   return(0);      /* match if w_many is last */
00372       if (str == str_end)
00373   return(-1);
00374 
00375       if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
00376   cmp= *++wildstr;
00377 
00378       INC_PTR(cs,wildstr,wildend);  /* This is compared through cmp */
00379       cmp=likeconv(cs,cmp);
00380       do
00381       {
00382   while (str != str_end && (unsigned char) likeconv(cs,*str) != cmp)
00383     str++;
00384   if (str++ == str_end)
00385     return(-1);
00386   {
00387     int tmp=my_wildcmp_bin(cs,str,str_end,wildstr,wildend,escape,w_one,
00388          w_many);
00389     if (tmp <= 0)
00390       return(tmp);
00391   }
00392       } while (str != str_end && wildstr[0] != w_many);
00393       return(-1);
00394     }
00395   }
00396   return(str != str_end ? 1 : 0);
00397 }
00398 
00399 
00400 size_t
00401 my_strnxfrm_8bit_bin(const CHARSET_INFO * const cs,
00402                      unsigned char * dst, size_t dstlen, uint32_t nweights,
00403                      const unsigned char *src, size_t srclen, uint32_t flags)
00404 {
00405   set_if_smaller(srclen, dstlen);
00406   set_if_smaller(srclen, (size_t) nweights);
00407   if (dst != src)
00408     memcpy(dst, src, srclen);
00409   return my_strxfrm_pad_desc_and_reverse(cs, dst, dst + srclen, dst + dstlen,
00410                                          nweights - srclen, flags, 0);
00411 }
00412 
00413 
00414 uint32_t my_instr_bin(const CHARSET_INFO * const,
00415                       const char *b, size_t b_length,
00416                       const char *s, size_t s_length,
00417                       my_match_t *match, uint32_t nmatch)
00418 {
00419   const unsigned char *str, *search, *end, *search_end;
00420 
00421   if (s_length <= b_length)
00422   {
00423     if (!s_length)
00424     {
00425       if (nmatch)
00426       {
00427         match->beg= 0;
00428         match->end= 0;
00429         match->mb_len= 0;
00430       }
00431       return 1;   /* Empty string is always found */
00432     }
00433 
00434     str= (const unsigned char*) b;
00435     search= (const unsigned char*) s;
00436     end= (const unsigned char*) b+b_length-s_length+1;
00437     search_end= (const unsigned char*) s + s_length;
00438 
00439 skip:
00440     while (str != end)
00441     {
00442       if ( (*str++) == (*search))
00443       {
00444   const unsigned char *i,*j;
00445 
00446   i= str;
00447   j= search+1;
00448 
00449   while (j != search_end)
00450     if ((*i++) != (*j++))
00451             goto skip;
00452 
00453         if (nmatch > 0)
00454   {
00455     match[0].beg= 0;
00456     match[0].end= (size_t) (str- (const unsigned char*)b-1);
00457     match[0].mb_len= match[0].end;
00458 
00459     if (nmatch > 1)
00460     {
00461       match[1].beg= match[0].end;
00462       match[1].end= match[0].end+s_length;
00463       match[1].mb_len= match[1].end-match[1].beg;
00464     }
00465   }
00466   return 2;
00467       }
00468     }
00469   }
00470   return 0;
00471 }
00472 
00473 
00474 static MY_COLLATION_HANDLER my_collation_binary_handler =
00475 {
00476   NULL,     /* init */
00477   my_strnncoll_binary,
00478   my_strnncollsp_binary,
00479   my_strnxfrm_8bit_bin,
00480   my_strnxfrmlen_simple,
00481   my_like_range_simple,
00482   my_wildcmp_bin,
00483   my_strcasecmp_bin,
00484   my_instr_bin,
00485   my_hash_sort_bin,
00486   my_propagate_simple
00487 };
00488 
00489 
00490 static MY_CHARSET_HANDLER my_charset_handler=
00491 {
00492   NULL,     /* init */
00493   NULL,     /* ismbchar      */
00494   my_mbcharlen_8bit,  /* mbcharlen     */
00495   my_numchars_8bit,
00496   my_charpos_8bit,
00497   my_well_formed_len_8bit,
00498   my_lengthsp_binary,
00499   my_numcells_8bit,
00500   my_mb_wc_bin,
00501   my_wc_mb_bin,
00502   my_mb_ctype_8bit,
00503   my_case_str_bin,
00504   my_case_str_bin,
00505   my_case_bin,
00506   my_case_bin,
00507   my_snprintf_8bit,
00508   my_long10_to_str_8bit,
00509   my_int64_t10_to_str_8bit,
00510   my_fill_8bit,
00511   my_strntol_8bit,
00512   my_strntoul_8bit,
00513   my_strntoll_8bit,
00514   my_strntoull_8bit,
00515   my_strntod_8bit,
00516   my_strtoll10_8bit,
00517   my_strntoull10rnd_8bit,
00518   my_scan_8bit
00519 };
00520 
00521 
00522 DRIZZLED_API CHARSET_INFO my_charset_bin =
00523 {
00524     63,0,0,     /* number        */
00525     MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PRIMARY,/* state */
00526     "binary",     /* cs name    */
00527     "binary",     /* name          */
00528     "",       /* comment       */
00529     NULL,     /* tailoring     */
00530     ctype_bin,      /* ctype         */
00531     bin_char_array,   /* to_lower      */
00532     bin_char_array,   /* to_upper      */
00533     NULL,     /* sort_order    */
00534     NULL,     /* contractions */
00535     NULL,     /* sort_order_big*/
00536     NULL,     /* tab_to_uni    */
00537     NULL,     /* tab_from_uni  */
00538     my_unicase_default,         /* caseinfo     */
00539     NULL,     /* state_map    */
00540     NULL,     /* ident_map    */
00541     1,        /* strxfrm_multiply */
00542     1,                          /* caseup_multiply  */
00543     1,                          /* casedn_multiply  */
00544     1,        /* mbminlen      */
00545     1,        /* mbmaxlen      */
00546     0,        /* min_sort_char */
00547     255,      /* max_sort_char */
00548     0,                          /* pad char      */
00549     0,                          /* escape_with_backslash_is_dangerous */
00550     1,                          /* levels_for_compare */
00551     1,                          /* levels_for_order   */
00552     &my_charset_handler,
00553     &my_collation_binary_handler
00554 };
00555 
00556 } /* namespace drizzled */