Drizzled Public API Documentation

sql_string.cc
1 /* Copyright (C) 2000 MySQL AB
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 /* This file is originally from the mysql distribution. Coded by monty */
17 
18 #include <config.h>
19 
20 #include <drizzled/definitions.h>
21 #include <drizzled/internal/my_sys.h>
22 #include <drizzled/internal/m_string.h>
23 #include <drizzled/memory/root.h>
24 #include <drizzled/charset.h>
25 
26 #include <algorithm>
27 
28 #include <drizzled/sql_string.h>
29 
30 using namespace std;
31 
32 namespace drizzled {
33 
34 /*****************************************************************************
35 ** String functions
36 *****************************************************************************/
37 
38 String::String()
39  : Ptr(NULL),
40  str_length(0),
41  Alloced_length(0),
42  alloced(false),
43  str_charset(&my_charset_bin)
44 { }
45 
46 
47 String::String(size_t length_arg)
48  : Ptr(NULL),
49  str_length(0),
50  Alloced_length(0),
51  alloced(false),
52  str_charset(&my_charset_bin)
53 {
54  (void) real_alloc(length_arg);
55 }
56 
57 String::String(const char *str, const charset_info_st * const cs)
58  : Ptr(const_cast<char *>(str)),
59  str_length(static_cast<size_t>(strlen(str))),
60  Alloced_length(0),
61  alloced(false),
62  str_charset(cs)
63 { }
64 
65 
66 String::String(const char *str, size_t len, const charset_info_st * const cs)
67  : Ptr(const_cast<char *>(str)),
68  str_length(len),
69  Alloced_length(0),
70  alloced(false),
71  str_charset(cs)
72 { }
73 
74 
75 String::String(char *str, size_t len, const charset_info_st * const cs)
76  : Ptr(str),
77  str_length(len),
78  Alloced_length(len),
79  alloced(false),
80  str_charset(cs)
81 { }
82 
83 
84 String::String(const String &str)
85  : Ptr(str.Ptr),
86  str_length(str.str_length),
87  Alloced_length(str.Alloced_length),
88  alloced(false),
89  str_charset(str.str_charset)
90 { }
91 
92 
93 void *String::operator new(size_t size, memory::Root *mem_root)
94 {
95  return mem_root->alloc(size);
96 }
97 
98 String::~String() { free(); }
99 
100 void String::real_alloc(size_t arg_length)
101 {
102  arg_length=ALIGN_SIZE(arg_length+1);
103  str_length=0;
104  if (Alloced_length < arg_length)
105  {
106  if (Alloced_length > 0)
107  free();
108  Ptr=(char*) malloc(arg_length);
109  Alloced_length=arg_length;
110  alloced=1;
111  }
112  Ptr[0]=0;
113 }
114 
115 
116 /*
117 ** Check that string is big enough. Set string[alloc_length] to 0
118 ** (for C functions)
119 */
120 
121 void String::realloc(size_t alloc_length)
122 {
123  size_t len=ALIGN_SIZE(alloc_length+1);
124  if (Alloced_length < len)
125  {
126  char *new_ptr;
127  if (alloced)
128  {
129  new_ptr= (char*) ::realloc(Ptr,len);
130  Ptr=new_ptr;
131  Alloced_length=len;
132  }
133  else
134  {
135  new_ptr= (char*) malloc(len);
136  if (str_length) // Avoid bugs in memcpy on AIX
137  memcpy(new_ptr,Ptr,str_length);
138  new_ptr[str_length]=0;
139  Ptr=new_ptr;
140  Alloced_length=len;
141  alloced=1;
142  }
143  }
144  Ptr[alloc_length]=0; // This make other funcs shorter
145 }
146 
147 void String::set_int(int64_t num, bool unsigned_flag, const charset_info_st * const cs)
148 {
149  size_t l= 20 * cs->mbmaxlen + 1;
150  alloc(l);
151  str_length=(size_t) (cs->cset->int64_t10_to_str)(cs, Ptr, l, unsigned_flag ? 10 : -10,num);
152  str_charset=cs;
153 }
154 
155 void String::set_real(double num,size_t decimals, const charset_info_st * const cs)
156 {
157  char buff[FLOATING_POINT_BUFFER];
158  size_t len;
159 
160  str_charset=cs;
161  if (decimals >= NOT_FIXED_DEC)
162  {
163  len= internal::my_gcvt(num, internal::MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
164  copy(buff, len, cs);
165  return;
166  }
167  len= internal::my_fcvt(num, decimals, buff, NULL);
168  copy(buff, len, cs);
169 }
170 
171 
172 void String::copy()
173 {
174  if (!alloced)
175  {
176  Alloced_length=0; // Force realloc
177  realloc(str_length);
178  }
179 }
180 
181 void String::copy(const String &str)
182 {
183  alloc(str.str_length);
184  str_length=str.str_length;
185  memmove(Ptr, str.Ptr, str_length); // May be overlapping
186  Ptr[str_length]=0;
187  str_charset= str.str_charset;
188 }
189 
190 void String::copy(const std::string& arg, const charset_info_st* cs) // Allocate new string
191 {
192  alloc(arg.size());
193  str_length= arg.size();
194  memcpy(Ptr, arg.c_str(), arg.size());
195  Ptr[arg.size()]= 0;
196  str_charset= cs;
197 }
198 
199 void String::copy(const char *str,size_t arg_length, const charset_info_st* cs)
200 {
201  alloc(arg_length);
202  if ((str_length=arg_length))
203  memcpy(Ptr,str,arg_length);
204  Ptr[arg_length]=0;
205  str_charset=cs;
206 }
207 
208 /*
209  Checks that the source string can be just copied to the destination string
210  without conversion.
211 
212  SYNPOSIS
213 
214  needs_conversion()
215  arg_length Length of string to copy.
216  from_cs Character set to copy from
217  to_cs Character set to copy to
218  size_t *offset Returns number of unaligned characters.
219 
220  RETURN
221  0 No conversion needed
222  1 Either character set conversion or adding leading zeros
223  (e.g. for UCS-2) must be done
224 
225  NOTE
226  to_cs may be NULL for "no conversion" if the system variable
227  character_set_results is NULL.
228 */
229 
230 bool String::needs_conversion(size_t arg_length, const charset_info_st* from_cs, const charset_info_st* to_cs)
231 {
232  if (!to_cs ||
233  to_cs == &my_charset_bin ||
234  to_cs == from_cs ||
235  my_charset_same(from_cs, to_cs) ||
236  (from_cs == &my_charset_bin && not (arg_length % to_cs->mbminlen)))
237  return false;
238  return true;
239 }
240 
241 /*
242  Set a string to the value of a latin1-string, keeping the original charset
243 
244  SYNOPSIS
245  copy_or_set()
246  str String of a simple charset (latin1)
247  arg_length Length of string
248 
249  IMPLEMENTATION
250  If string object is of a simple character set, set it to point to the
251  given string.
252  If not, make a copy and convert it to the new character set.
253 
254  RETURN
255  0 ok
256  1 Could not allocate result buffer
257 
258 */
259 
260 void String::set_ascii(const char *str, size_t arg_length)
261 {
262  if (str_charset->mbminlen == 1)
263  {
264  set(str, arg_length, str_charset);
265  return;
266  }
267  copy(str, arg_length, str_charset);
268 }
269 
270 
271 /*
272  Append an ASCII string to the a string of the current character set
273 */
274 
275 void String::append(const char *s,size_t arg_length)
276 {
277  if (arg_length == 0)
278  {
279  return;
280  }
281 
282  /*
283  For an ASCII compatinble string we can just append.
284  */
285  realloc(str_length + arg_length);
286  memcpy(Ptr + str_length, s, arg_length);
287  str_length+= arg_length;
288 }
289 
290 void String::append(str_ref s)
291 {
292  append(s.data(), s.size());
293 }
294 
295 void String::append_with_prefill(const char *s,size_t arg_length, size_t full_length, char fill_char)
296 {
297  int t_length= arg_length > full_length ? arg_length : full_length;
298 
299  realloc(str_length + t_length);
300  t_length= full_length - arg_length;
301  if (t_length > 0)
302  {
303  memset(Ptr+str_length, fill_char, t_length);
304  str_length=str_length + t_length;
305  }
306  append(s, arg_length);
307 }
308 
309 size_t String::numchars() const
310 {
311  return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
312 }
313 
314 int String::charpos(int i,size_t offset) const
315 {
316  return i <= 0 ? i : str_charset->cset->charpos(str_charset, Ptr + offset, Ptr + str_length, i);
317 }
318 
319 int String::strstr(const String &s, size_t offset)
320 {
321  if (s.length() + offset <= str_length)
322  {
323  if (!s.length())
324  return ((int) offset); // Empty string is always found
325 
326  const char *str = Ptr+offset;
327  const char *search=s.ptr();
328  const char *last=Ptr+str_length-s.length()+1;
329  const char *search_end=s.ptr()+s.length();
330 skip:
331  while (str != last)
332  {
333  if (*str++ == *search)
334  {
335  const char* i= str;
336  const char* j= search + 1;
337  while (j != search_end)
338  if (*i++ != *j++) goto skip;
339  return (int) (str - Ptr) - 1;
340  }
341  }
342  }
343  return -1;
344 }
345 
346 /*
347 ** Search string from end. Offset is offset to the end of string
348 */
349 
350 int String::strrstr(const String &s,size_t offset)
351 {
352  if (s.length() <= offset && offset <= str_length)
353  {
354  if (!s.length())
355  return offset; // Empty string is always found
356  const char *str = Ptr+offset-1;
357  const char *search=s.ptr()+s.length()-1;
358 
359  const char *last=Ptr+s.length()-2;
360  const char *search_end=s.ptr()-1;
361 skip:
362  while (str != last)
363  {
364  if (*str-- == *search)
365  {
366  const char* i= str;
367  const char* j= search-1;
368  while (j != search_end)
369  if (*i-- != *j--) goto skip;
370  return (int) (i-Ptr) + 1;
371  }
372  }
373  }
374  return -1;
375 }
376 
377 /*
378  Replace substring with string
379  If wrong parameter or not enough memory, do nothing
380 */
381 
382 void String::replace(size_t offset,size_t arg_length,const String &to)
383 {
384  replace(offset,arg_length,to.ptr(),to.length());
385 }
386 
387 void String::replace(size_t offset,size_t arg_length,
388  const char *to, size_t to_length)
389 {
390  long diff = (long) to_length-(long) arg_length;
391  if (offset+arg_length <= str_length)
392  {
393  if (diff < 0)
394  {
395  if (to_length)
396  memcpy(Ptr+offset,to,to_length);
397  memmove(Ptr+offset+to_length, Ptr+offset+arg_length,
398  str_length-offset-arg_length);
399  }
400  else
401  {
402  if (diff)
403  {
404  realloc(str_length+(size_t) diff);
405  internal::bmove_upp((unsigned char*) Ptr+str_length+diff,
406  (unsigned char*) Ptr+str_length,
407  str_length-offset-arg_length);
408  }
409  if (to_length)
410  memcpy(Ptr+offset,to,to_length);
411  }
412  str_length+=(size_t) diff;
413  }
414 }
415 
416 
417 
418 /*
419  Compare strings according to collation, without end space.
420 
421  SYNOPSIS
422  sortcmp()
423  s First string
424  t Second string
425  cs Collation
426 
427  NOTE:
428  Normally this is case sensitive comparison
429 
430  RETURN
431  < 0 s < t
432  0 s == t
433  > 0 s > t
434 */
435 
436 
437 int sortcmp(const String *s,const String *t, const charset_info_st * const cs)
438 {
439  return cs->coll->strnncollsp(cs,
440  (unsigned char *) s->ptr(),s->length(),
441  (unsigned char *) t->ptr(),t->length(), 0);
442 }
443 
444 
445 /*
446  Compare strings byte by byte. End spaces are also compared.
447 
448  SYNOPSIS
449  stringcmp()
450  s First string
451  t Second string
452 
453  NOTE:
454  Strings are compared as a stream of unsigned chars
455 
456  RETURN
457  < 0 s < t
458  0 s == t
459  > 0 s > t
460 */
461 
462 
463 int stringcmp(const String *s,const String *t)
464 {
465  size_t s_len= s->length(), t_len= t->length(), len= min(s_len,t_len);
466  int cmp= memcmp(s->ptr(), t->ptr(), len);
467  return (cmp) ? cmp : (int) (s_len - t_len);
468 }
469 
470 
471 String *copy_if_not_alloced(String *to,String *from,size_t from_length)
472 {
473  if (from->Alloced_length >= from_length)
474  return from;
475  if (from->alloced || !to || from == to)
476  {
477  (void) from->realloc(from_length);
478  return from;
479  }
480  to->realloc(from_length);
481  if ((to->str_length= min(from->str_length,from_length)))
482  memcpy(to->Ptr,from->Ptr,to->str_length);
483  to->str_charset=from->str_charset;
484  return to;
485 }
486 
487 
488 /****************************************************************************
489  Help functions
490 ****************************************************************************/
491 
492 /*
493  copy a string,
494  with optional character set conversion,
495  with optional left padding (for binary -> UCS2 conversion)
496 
497  SYNOPSIS
498  well_formed_copy_nchars()
499  to Store result here
500  to_length Maxinum length of "to" string
501  to_cs Character set of "to" string
502  from Copy from here
503  from_length Length of from string
504  from_cs From character set
505  nchars Copy not more that nchars characters
506  well_formed_error_pos Return position when "from" is not well formed
507  or NULL otherwise.
508  cannot_convert_error_pos Return position where a not convertable
509  character met, or NULL otherwise.
510  from_end_pos Return position where scanning of "from"
511  string stopped.
512  NOTES
513 
514  RETURN
515  length of bytes copied to 'to'
516 */
517 
518 
519 size_t
520 well_formed_copy_nchars(const charset_info_st * const to_cs,
521  char *to, size_t to_length,
522  const charset_info_st * const from_cs,
523  const char *from, size_t from_length,
524  size_t nchars,
525  const char **well_formed_error_pos,
526  const char **cannot_convert_error_pos,
527  const char **from_end_pos)
528 {
529  size_t res;
530 
531  assert((to_cs == &my_charset_bin) ||
532  (from_cs == &my_charset_bin) ||
533  (to_cs == from_cs) ||
534  my_charset_same(from_cs, to_cs));
535 
536  if (to_length < to_cs->mbminlen || !nchars)
537  {
538  *from_end_pos= from;
539  *cannot_convert_error_pos= NULL;
540  *well_formed_error_pos= NULL;
541  return 0;
542  }
543 
544  if (to_cs == &my_charset_bin)
545  {
546  res= min(min(nchars, to_length), from_length);
547  memmove(to, from, res);
548  *from_end_pos= from + res;
549  *well_formed_error_pos= NULL;
550  *cannot_convert_error_pos= NULL;
551  }
552  else
553  {
554  int well_formed_error;
555  size_t from_offset;
556 
557  if ((from_offset= (from_length % to_cs->mbminlen)) &&
558  (from_cs == &my_charset_bin))
559  {
560  /*
561  Copying from BINARY to UCS2 needs to prepend zeros sometimes:
562  INSERT INTO t1 (ucs2_column) VALUES (0x01);
563  0x01 -> 0x0001
564  */
565  size_t pad_length= to_cs->mbminlen - from_offset;
566  memset(to, 0, pad_length);
567  memmove(to + pad_length, from, from_offset);
568  nchars--;
569  from+= from_offset;
570  from_length-= from_offset;
571  to+= to_cs->mbminlen;
572  to_length-= to_cs->mbminlen;
573  }
574 
575  set_if_smaller(from_length, to_length);
576  res= to_cs->cset->well_formed_len(*to_cs, str_ref(from, from_length), nchars, &well_formed_error);
577  memmove(to, from, res);
578  *from_end_pos= from + res;
579  *well_formed_error_pos= well_formed_error ? from + res : NULL;
580  *cannot_convert_error_pos= NULL;
581  if (from_offset)
582  res+= to_cs->mbminlen;
583  }
584 
585  return res;
586 }
587 
588 void String::print(String& str) const
589 {
590  const char* last= Ptr + str_length;
591  for (const char* st= Ptr; st < last; st++)
592  {
593  unsigned char c= *st;
594  switch (c)
595  {
596  case '\\':
597  str.append("\\\\", sizeof("\\\\")-1);
598  break;
599  case '\0':
600  str.append("\\0", sizeof("\\0")-1);
601  break;
602  case '\'':
603  str.append("\\'", sizeof("\\'")-1);
604  break;
605  case '\n':
606  str.append("\\n", sizeof("\\n")-1);
607  break;
608  case '\r':
609  str.append("\\r", sizeof("\\r")-1);
610  break;
611  case '\032': // Ctrl-Z
612  str.append("\\Z", sizeof("\\Z")-1);
613  break;
614  default:
615  str.append(c);
616  }
617  }
618 }
619 
620 /*
621  Quote the given identifier.
622  If the given identifier is empty, it will be quoted.
623 
624  SYNOPSIS
625  append_identifier()
626  name the identifier to be appended
627  name_length length of the appending identifier
628 */
629 
630 /* Factor the extern out */
631 extern const charset_info_st *system_charset_info;
632 
633 void String::append_identifier(const char *name, size_t in_length)
634 {
635  // The identifier must be quoted as it includes a quote character or it's a keyword
636 
637  reserve(in_length * 2 + 2);
638  const char quote_char= '`';
639  append(&quote_char, 1);
640 
641  for (const char* name_end= name+in_length ; name < name_end ; name+= in_length)
642  {
643  unsigned char chr= (unsigned char) *name;
644  in_length= my_mbcharlen(system_charset_info, chr);
645  /*
646  my_mbcharlen can return 0 on a wrong multibyte
647  sequence. It is possible when upgrading from 4.0,
648  and identifier contains some accented characters.
649  The manual says it does not work. So we'll just
650  change length to 1 not to hang in the endless loop.
651  */
652  if (!in_length)
653  in_length= 1;
654  if (in_length == 1 && chr == (unsigned char) quote_char)
655  append(&quote_char, 1);
656  append(name, in_length);
657  }
658  append(&quote_char, 1);
659 }
660 
661 void String::append_identifier(str_ref v)
662 {
663  append_identifier(v.data(), v.size());
664 }
665 
666 bool check_if_only_end_space(const charset_info_st * const cs, char *str, char *end)
667 {
668  return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
669 }
670 
671 std::ostream& operator<<(std::ostream& output, const String &str)
672 {
673  output << "String:(";
674  output << const_cast<String&>(str).c_str();
675  output << ", ";
676  output << str.length();
677  output << ")";
678 
679  return output; // for multiple << operators.
680 }
681 
682 } /* namespace drizzled */
683 
684 bool operator==(const drizzled::String &s1, const drizzled::String &s2)
685 {
686  return stringcmp(&s1,&s2) == 0;
687 }
688 
689 bool operator!=(const drizzled::String &s1, const drizzled::String &s2)
690 {
691  return !(s1 == s2);
692 }
693