00001 /* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*- 00002 * vim:expandtab:shiftwidth=2:tabstop=2:smarttab: 00003 * 00004 * Copyright (C) 2008-2009 Sun Microsystems, Inc. 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; version 2 of the License. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program; if not, write to the Free Software 00017 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00018 */ 00019 00020 #pragma once 00021 00022 namespace drizzled { 00023 namespace optimizer { 00024 00025 class RangeParameter; 00026 00027 /* 00028 A construction block of the SEL_ARG-graph. 00029 00030 The following description only covers graphs of SEL_ARG objects with 00031 sel_arg->type==KEY_RANGE: 00032 00033 One SEL_ARG object represents an "elementary interval" in form 00034 00035 min_value <=? table.keypartX <=? max_value 00036 00037 The interval is a non-empty interval of any kind: with[out] minimum/maximum 00038 bound, [half]open/closed, single-point interval, etc. 00039 00040 1. SEL_ARG GRAPH STRUCTURE 00041 00042 SEL_ARG objects are linked together in a graph. The meaning of the graph 00043 is better demostrated by an example: 00044 00045 tree->keys[i] 00046 | 00047 | $ $ 00048 | part=1 $ part=2 $ part=3 00049 | $ $ 00050 | +-------+ $ +-------+ $ +--------+ 00051 | | kp1<1 |--$-->| kp2=5 |--$-->| kp3=10 | 00052 | +-------+ $ +-------+ $ +--------+ 00053 | | $ $ | 00054 | | $ $ +--------+ 00055 | | $ $ | kp3=12 | 00056 | | $ $ +--------+ 00057 | +-------+ $ $ 00058 \->| kp1=2 |--$--------------$-+ 00059 +-------+ $ $ | +--------+ 00060 | $ $ ==>| kp3=11 | 00061 +-------+ $ $ | +--------+ 00062 | kp1=3 |--$--------------$-+ | 00063 +-------+ $ $ +--------+ 00064 | $ $ | kp3=14 | 00065 ... $ $ +--------+ 00066 00067 The entire graph is partitioned into "interval lists". 00068 00069 An interval list is a sequence of ordered disjoint intervals over the same 00070 key part. SEL_ARG are linked via "next" and "prev" pointers. Additionally, 00071 all intervals in the list form an RB-tree, linked via left/right/parent 00072 pointers. The RB-tree root SEL_ARG object will be further called "root of the 00073 interval list". 00074 00075 In the example pic, there are 4 interval lists: 00076 "kp<1 OR kp1=2 OR kp1=3", "kp2=5", "kp3=10 OR kp3=12", "kp3=11 OR kp3=13". 00077 The vertical lines represent SEL_ARG::next/prev pointers. 00078 00079 In an interval list, each member X may have SEL_ARG::next_key_part pointer 00080 pointing to the root of another interval list Y. The pointed interval list 00081 must cover a key part with greater number (i.e. Y->part > X->part). 00082 00083 In the example pic, the next_key_part pointers are represented by 00084 horisontal lines. 00085 00086 2. SEL_ARG GRAPH SEMANTICS 00087 00088 It represents a condition in a special form (we don't have a name for it ATM) 00089 The SEL_ARG::next/prev is "OR", and next_key_part is "AND". 00090 00091 For example, the picture represents the condition in form: 00092 (kp1 < 1 AND kp2=5 AND (kp3=10 OR kp3=12)) OR 00093 (kp1=2 AND (kp3=11 OR kp3=14)) OR 00094 (kp1=3 AND (kp3=11 OR kp3=14)) 00095 00096 00097 3. SEL_ARG GRAPH USE 00098 00099 Use get_mm_tree() to construct SEL_ARG graph from WHERE condition. 00100 Then walk the SEL_ARG graph and get a list of dijsoint ordered key 00101 intervals (i.e. intervals in form 00102 00103 (constA1, .., const1_K) < (keypart1,.., keypartK) < (constB1, .., constB_K) 00104 00105 Those intervals can be used to access the index. The uses are in: 00106 - check_quick_select() - Walk the SEL_ARG graph and find an estimate of 00107 how many table records are contained within all 00108 intervals. 00109 - get_quick_select() - Walk the SEL_ARG, materialize the key intervals, 00110 and create QuickRangeSelect object that will 00111 read records within these intervals. 00112 00113 4. SPACE COMPLEXITY NOTES 00114 00115 SEL_ARG graph is a representation of an ordered disjoint sequence of 00116 intervals over the ordered set of index tuple values. 00117 00118 For multi-part keys, one can construct a WHERE expression such that its 00119 list of intervals will be of combinatorial size. Here is an example: 00120 00121 (keypart1 IN (1,2, ..., n1)) AND 00122 (keypart2 IN (1,2, ..., n2)) AND 00123 (keypart3 IN (1,2, ..., n3)) 00124 00125 For this WHERE clause the list of intervals will have n1*n2*n3 intervals 00126 of form 00127 00128 (keypart1, keypart2, keypart3) = (k1, k2, k3), where 1 <= k{i} <= n{i} 00129 00130 SEL_ARG graph structure aims to reduce the amount of required space by 00131 "sharing" the elementary intervals when possible (the pic at the 00132 beginning of this comment has examples of such sharing). The sharing may 00133 prevent combinatorial blowup: 00134 00135 There are WHERE clauses that have combinatorial-size interval lists but 00136 will be represented by a compact SEL_ARG graph. 00137 Example: 00138 (keypartN IN (1,2, ..., n1)) AND 00139 ... 00140 (keypart2 IN (1,2, ..., n2)) AND 00141 (keypart1 IN (1,2, ..., n3)) 00142 00143 but not in all cases: 00144 00145 - There are WHERE clauses that do have a compact SEL_ARG-graph 00146 representation but get_mm_tree() and its callees will construct a 00147 graph of combinatorial size. 00148 Example: 00149 (keypart1 IN (1,2, ..., n1)) AND 00150 (keypart2 IN (1,2, ..., n2)) AND 00151 ... 00152 (keypartN IN (1,2, ..., n3)) 00153 00154 - There are WHERE clauses for which the minimal possible SEL_ARG graph 00155 representation will have combinatorial size. 00156 Example: 00157 By induction: Let's take any interval on some keypart in the middle: 00158 00159 kp15=c0 00160 00161 Then let's AND it with this interval 'structure' from preceding and 00162 following keyparts: 00163 00164 (kp14=c1 AND kp16=c3) OR keypart14=c2) (*) 00165 00166 We will obtain this SEL_ARG graph: 00167 00168 kp14 $ kp15 $ kp16 00169 $ $ 00170 +---------+ $ +---------+ $ +---------+ 00171 | kp14=c1 |--$-->| kp15=c0 |--$-->| kp16=c3 | 00172 +---------+ $ +---------+ $ +---------+ 00173 | $ $ 00174 +---------+ $ +---------+ $ 00175 | kp14=c2 |--$-->| kp15=c0 | $ 00176 +---------+ $ +---------+ $ 00177 $ $ 00178 00179 Note that we had to duplicate "kp15=c0" and there was no way to avoid 00180 that. 00181 The induction step: AND the obtained expression with another "wrapping" 00182 expression like (*). 00183 When the process ends because of the limit on max. number of keyparts 00184 we'll have: 00185 00186 WHERE clause length is O(3*#max_keyparts) 00187 SEL_ARG graph size is O(2^(#max_keyparts/2)) 00188 00189 (it is also possible to construct a case where instead of 2 in 2^n we 00190 have a bigger constant, e.g. 4, and get a graph with 4^(31/2)= 2^31 00191 nodes) 00192 00193 We avoid consuming too much memory by setting a limit on the number of 00194 SEL_ARG object we can construct during one range analysis invocation. 00195 */ 00196 00197 class SEL_ARG :public memory::SqlAlloc 00198 { 00199 public: 00200 uint8_t min_flag,max_flag,maybe_flag; 00201 uint8_t part; // Which key part 00202 uint8_t maybe_null; 00203 /* 00204 Number of children of this element in the RB-tree, plus 1 for this 00205 element itself. 00206 */ 00207 uint16_t elements; 00208 /* 00209 Valid only for elements which are RB-tree roots: Number of times this 00210 RB-tree is referred to (it is referred by SEL_ARG::next_key_part or by 00211 SEL_TREE::keys[i] or by a temporary SEL_ARG* variable) 00212 */ 00213 ulong use_count; 00214 00215 Field *field; 00216 unsigned char *min_value,*max_value; // Pointer to range 00217 00218 /* 00219 eq_tree() requires that left == right == 0 if the type is MAYBE_KEY. 00220 */ 00221 SEL_ARG *left,*right; /* R-B tree children */ 00222 SEL_ARG *next,*prev; /* Links for bi-directional interval list */ 00223 SEL_ARG *parent; /* R-B tree parent */ 00224 SEL_ARG *next_key_part; 00225 enum leaf_color { BLACK,RED } color; 00226 enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type; 00227 00228 enum 00229 { 00230 MAX_SEL_ARGS = 16000 00231 }; 00232 00233 SEL_ARG() {} 00234 00235 SEL_ARG(SEL_ARG &); 00236 00237 SEL_ARG(Field *,const unsigned char *, const unsigned char *); 00238 00239 SEL_ARG(Field *field, 00240 uint8_t part, 00241 unsigned char *min_value, 00242 unsigned char *max_value, 00243 uint8_t min_flag, 00244 uint8_t max_flag, 00245 uint8_t maybe_flag); 00246 00247 SEL_ARG(enum Type type_arg) 00248 : 00249 min_flag(0), 00250 elements(1), 00251 use_count(1), 00252 left(0), 00253 right(0), 00254 next_key_part(0), 00255 color(BLACK), 00256 type(type_arg) 00257 {} 00258 00259 int size() const 00260 { 00261 return elements; 00262 } 00263 00264 inline bool is_same(SEL_ARG *arg) 00265 { 00266 if (type != arg->type || part != arg->part) 00267 return 0; 00268 if (type != KEY_RANGE) 00269 return 1; 00270 return (cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0); 00271 } 00272 00273 inline void merge_flags(SEL_ARG *arg) 00274 { 00275 maybe_flag|= arg->maybe_flag; 00276 } 00277 00278 inline void maybe_smaller() 00279 { 00280 maybe_flag= 1; 00281 } 00282 00283 /* Return true iff it's a single-point null interval */ 00284 inline bool is_null_interval() 00285 { 00286 return (maybe_null && max_value[0] == 1); 00287 } 00288 00289 inline int cmp_min_to_min(SEL_ARG *arg) 00290 { 00291 return sel_cmp(field,min_value, arg->min_value, min_flag, arg->min_flag); 00292 } 00293 00294 inline int cmp_min_to_max(SEL_ARG *arg) 00295 { 00296 return sel_cmp(field,min_value, arg->max_value, min_flag, arg->max_flag); 00297 } 00298 00299 inline int cmp_max_to_max(SEL_ARG *arg) 00300 { 00301 return sel_cmp(field,max_value, arg->max_value, max_flag, arg->max_flag); 00302 } 00303 00304 inline int cmp_max_to_min(SEL_ARG *arg) 00305 { 00306 return sel_cmp(field,max_value, arg->min_value, max_flag, arg->min_flag); 00307 } 00308 00309 SEL_ARG *clone_and(SEL_ARG *arg); 00310 00311 SEL_ARG *clone_first(SEL_ARG *arg); 00312 00313 SEL_ARG *clone_last(SEL_ARG *arg); 00314 00315 SEL_ARG *clone(RangeParameter *param, SEL_ARG *new_parent, SEL_ARG **next); 00316 00317 bool copy_min(SEL_ARG *arg); 00318 00319 bool copy_max(SEL_ARG *arg); 00320 00321 void copy_min_to_min(SEL_ARG *arg); 00322 00323 void copy_min_to_max(SEL_ARG *arg); 00324 00325 void copy_max_to_min(SEL_ARG *arg); 00326 00327 /* returns a number of keypart values (0 or 1) appended to the key buffer */ 00328 int store_min(uint32_t length, unsigned char **min_key, uint32_t min_key_flag); 00329 00330 /* returns a number of keypart values (0 or 1) appended to the key buffer */ 00331 int store_max(uint32_t length, unsigned char **max_key, uint32_t max_key_flag); 00332 00333 /* returns a number of keypart values appended to the key buffer */ 00334 int store_min_key(KEY_PART *key, unsigned char **range_key, uint32_t *range_key_flag); 00335 00336 /* returns a number of keypart values appended to the key buffer */ 00337 int store_max_key(KEY_PART *key, unsigned char **range_key, uint32_t *range_key_flag); 00338 00339 SEL_ARG *insert(SEL_ARG *key); 00340 SEL_ARG *tree_delete(SEL_ARG *key); 00341 SEL_ARG *find_range(SEL_ARG *key); 00342 SEL_ARG *rb_insert(SEL_ARG *leaf); 00343 00344 friend SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key, SEL_ARG *par); 00345 00346 SEL_ARG *first(); 00347 00348 SEL_ARG *last(); 00349 00350 void make_root(); 00351 00352 inline bool simple_key() 00353 { 00354 return (! next_key_part && elements == 1); 00355 } 00356 00357 void increment_use_count(long count) 00358 { 00359 if (next_key_part) 00360 { 00361 next_key_part->use_count+= count; 00362 count*= (next_key_part->use_count - count); 00363 for (SEL_ARG *pos= next_key_part->first(); pos; pos= pos->next) 00364 if (pos->next_key_part) 00365 pos->increment_use_count(count); 00366 } 00367 } 00368 00369 void free_tree() 00370 { 00371 for (SEL_ARG *pos= first(); pos; pos= pos->next) 00372 if (pos->next_key_part) 00373 { 00374 pos->next_key_part->use_count--; 00375 pos->next_key_part->free_tree(); 00376 } 00377 } 00378 00379 inline SEL_ARG **parent_ptr() 00380 { 00381 return parent->left == this ? &parent->left : &parent->right; 00382 } 00383 00384 00385 /* 00386 Check if this SEL_ARG object represents a single-point interval 00387 00388 SYNOPSIS 00389 is_singlepoint() 00390 00391 DESCRIPTION 00392 Check if this SEL_ARG object (not tree) represents a single-point 00393 interval, i.e. if it represents a "keypart = const" or 00394 "keypart IS NULL". 00395 00396 RETURN 00397 true This SEL_ARG object represents a singlepoint interval 00398 false Otherwise 00399 */ 00400 00401 bool is_singlepoint() 00402 { 00403 /* 00404 Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field) 00405 flags, and the same for right edge. 00406 */ 00407 if (min_flag || max_flag) 00408 return false; 00409 unsigned char *min_val= min_value; 00410 unsigned char *max_val= max_value; 00411 00412 if (maybe_null) 00413 { 00414 /* First byte is a NULL value indicator */ 00415 if (*min_val != *max_val) 00416 return false; 00417 00418 if (*min_val) 00419 return true; /* This "x IS NULL" */ 00420 min_val++; 00421 max_val++; 00422 } 00423 return ! field->key_cmp(min_val, max_val); 00424 } 00425 00426 SEL_ARG *clone_tree(RangeParameter *param); 00427 00428 private: 00429 00430 /* 00431 Check if a compare is ok, when one takes ranges in account 00432 Returns -2 or 2 if the ranges where 'joined' like < 2 and >= 2 00433 */ 00434 int sel_cmp(Field *in_field, 00435 unsigned char *a, 00436 unsigned char *b, 00437 uint8_t a_flag, 00438 uint8_t b_flag) 00439 { 00440 int cmp= 0; 00441 /* First check if there was a compare to a min or max element */ 00442 if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) 00443 { 00444 if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) == 00445 (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))) 00446 return 0; 00447 return (a_flag & NO_MIN_RANGE) ? -1 : 1; 00448 } 00449 if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) 00450 return (b_flag & NO_MIN_RANGE) ? 1 : -1; 00451 00452 if (in_field->real_maybe_null()) // If null is part of key 00453 { 00454 if (*a != *b) 00455 { 00456 return *a ? -1 : 1; 00457 } 00458 if (*a) 00459 goto end; // NULL where equal 00460 a++; b++; // Skip NULL marker 00461 } 00462 cmp= in_field->key_cmp(a , b); 00463 if (cmp) return cmp < 0 ? -1 : 1; // The values differed 00464 00465 // Check if the compared equal arguments was defined with open/closed range 00466 end: 00467 if (a_flag & (NEAR_MIN | NEAR_MAX)) 00468 { 00469 if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX))) 00470 return 0; 00471 if (! (b_flag & (NEAR_MIN | NEAR_MAX))) 00472 return (a_flag & NEAR_MIN) ? 2 : -2; 00473 return (a_flag & NEAR_MIN) ? 1 : -1; 00474 } 00475 if (b_flag & (NEAR_MIN | NEAR_MAX)) 00476 return (b_flag & NEAR_MIN) ? -2 : 2; 00477 return 0; // The elements where equal 00478 } 00479 00480 00481 }; 00482 00483 SEL_ARG *rb_delete_fixup(SEL_ARG *root, 00484 SEL_ARG *key, 00485 SEL_ARG *par); 00486 00487 extern SEL_ARG null_element; 00488 00489 } /* namespace optimizer */ 00490 00491 } /* namespace drizzled */ 00492