Drizzled Public API Documentation

key_field.cc
00001 /* - mode: c; c-basic-offset: 2; indent-tabs-mode: nil; -*-
00002  *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
00003  *
00004  *  Copyright (C) 2008-2009 Sun Microsystems, Inc.
00005  *
00006  *  This program is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License as published by
00008  *  the Free Software Foundation; either version 2 of the License, or
00009  *  (at your option) any later version.
00010  *
00011  *  This program is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *  GNU General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License
00017  *  along with this program; if not, write to the Free Software
00018  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00019  */
00020 
00021 #include <config.h>
00022 
00023 #include <drizzled/sql_select.h>
00024 #include <drizzled/nested_join.h>
00025 #include <drizzled/item/cmpfunc.h>
00026 #include <drizzled/table.h>
00027 #include <drizzled/optimizer/key_field.h>
00028 #include <drizzled/optimizer/key_use.h>
00029 #include <drizzled/sql_lex.h>
00030 #include <drizzled/item/subselect.h>
00031 
00032 #include <vector>
00033 
00034 using namespace std;
00035 
00036 namespace drizzled
00037 {
00038 
00039 void optimizer::add_key_part(DYNAMIC_ARRAY *keyuse_array,
00040                              optimizer::KeyField *key_field)
00041 {
00042   Field *field= key_field->getField();
00043   Table *form= field->getTable();
00044 
00045   if (key_field->isEqualityCondition() &&
00046       ! (key_field->getOptimizeFlags() & KEY_OPTIMIZE_EXISTS))
00047   {
00048     for (uint32_t key= 0; key < form->sizeKeys(); key++)
00049     {
00050       if (! (form->keys_in_use_for_query.test(key)))
00051         continue;
00052 
00053       uint32_t key_parts= (uint32_t) form->key_info[key].key_parts;
00054       for (uint32_t part= 0; part < key_parts; part++)
00055       {
00056         if (field->eq(form->key_info[key].key_part[part].field))
00057         {
00058           optimizer::KeyUse keyuse(field->getTable(),
00059                                    key_field->getValue(),
00060                                    key_field->getValue()->used_tables(),
00061                                    key,
00062                                    part,
00063                                    key_field->getOptimizeFlags() & KEY_OPTIMIZE_REF_OR_NULL,
00064                                    1 << part,
00065                                    0,
00066                                    key_field->rejectNullValues(),
00067                                    key_field->getConditionalGuard());
00068           keyuse_array->push_back(&keyuse);
00069         }
00070       }
00071     }
00072   }
00073 }
00074 
00075 void optimizer::add_key_fields_for_nj(Join *join,
00076                                       TableList *nested_join_table,
00077                                       optimizer::KeyField **end,
00078                                       uint32_t *and_level,
00079                                       vector<optimizer::SargableParam> &sargables)
00080 {
00081   List<TableList>::iterator li(nested_join_table->getNestedJoin()->join_list.begin());
00082   List<TableList>::iterator li2(nested_join_table->getNestedJoin()->join_list.begin());
00083   bool have_another= false;
00084   table_map tables= 0;
00085   TableList *table;
00086   assert(nested_join_table->getNestedJoin());
00087 
00088   while ((table= li++) || (have_another && (li=li2, have_another=false,
00089                                             (table= li++))))
00090   {
00091     if (table->getNestedJoin())
00092     {
00093       if (! table->on_expr)
00094       {
00095         /* It's a semi-join nest. Walk into it as if it wasn't a nest */
00096         have_another= true;
00097         li2= li;
00098         li= List<TableList>::iterator(table->getNestedJoin()->join_list.begin());
00099       }
00100       else
00101         add_key_fields_for_nj(join, table, end, and_level, sargables);
00102     }
00103     else
00104       if (! table->on_expr)
00105         tables|= table->table->map;
00106   }
00107   if (nested_join_table->on_expr)
00108   {
00109     add_key_fields(join,
00110                    end,
00111                    and_level,
00112                    nested_join_table->on_expr,
00113                    tables,
00114                    sargables);
00115   }
00116 }
00117 
00118 optimizer::KeyField *optimizer::merge_key_fields(optimizer::KeyField *start,
00119                                                  optimizer::KeyField *new_fields,
00120                                                  optimizer::KeyField *end,
00121                                                  uint32_t and_level)
00122 {
00123   if (start == new_fields)
00124     return start;       // Impossible or
00125   if (new_fields == end)
00126     return start;       // No new fields, skip all
00127 
00128   optimizer::KeyField *first_free= new_fields;
00129 
00130   /* Mark all found fields in old array */
00131   for (; new_fields != end; new_fields++)
00132   {
00133     for (optimizer::KeyField *old= start; old != first_free; old++)
00134     {
00135       if (old->getField() == new_fields->getField())
00136       {
00137         /*
00138           NOTE: below const_item() call really works as "!used_tables()", i.e.
00139           it can return false where it is feasible to make it return true.
00140 
00141           The cause is as follows: Some of the tables are already known to be
00142           const tables (the detection code is in make_join_statistics(),
00143           above the update_ref_and_keys() call), but we didn't propagate
00144           information about this: Table::const_table is not set to true, and
00145           Item::update_used_tables() hasn't been called for each item.
00146           The result of this is that we're missing some 'ref' accesses.
00147           TODO: OptimizerTeam: Fix this
00148         */
00149         if (! new_fields->getValue()->const_item())
00150         {
00151           /*
00152             If the value matches, we can use the key reference.
00153             If not, we keep it until we have examined all new values
00154           */
00155           if (old->getValue()->eq(new_fields->getValue(), old->getField()->binary()))
00156           {
00157             old->setLevel(and_level);
00158             old->setOptimizeFlags(((old->getOptimizeFlags() &
00159                                     new_fields->getOptimizeFlags() &
00160                                     KEY_OPTIMIZE_EXISTS) |
00161                                    ((old->getOptimizeFlags() |
00162                                      new_fields->getOptimizeFlags()) &
00163                                     KEY_OPTIMIZE_REF_OR_NULL)));
00164             old->setRejectNullValues(old->rejectNullValues() &&
00165                                      new_fields->rejectNullValues());
00166           }
00167         }
00168         else if (old->isEqualityCondition() &&
00169                  new_fields->isEqualityCondition() &&
00170                  old->getValue()->eq_by_collation(new_fields->getValue(),
00171                                                   old->getField()->binary(),
00172                                                   old->getField()->charset()))
00173 
00174         {
00175           old->setLevel(and_level);
00176           old->setOptimizeFlags(((old->getOptimizeFlags() &
00177                                   new_fields->getOptimizeFlags() &
00178                                   KEY_OPTIMIZE_EXISTS) |
00179                                  ((old->getOptimizeFlags() |
00180                                    new_fields->getOptimizeFlags()) &
00181                                  KEY_OPTIMIZE_REF_OR_NULL)));
00182           old->setRejectNullValues(old->rejectNullValues() &&
00183                                    new_fields->rejectNullValues());
00184         }
00185         else if (old->isEqualityCondition() &&
00186                  new_fields->isEqualityCondition() &&
00187                  ((old->getValue()->const_item() &&
00188                    old->getValue()->is_null()) ||
00189                    new_fields->getValue()->is_null()))
00190         {
00191           /* field = expression OR field IS NULL */
00192           old->setLevel(and_level);
00193           old->setOptimizeFlags(KEY_OPTIMIZE_REF_OR_NULL);
00194           /*
00195             Remember the NOT NULL value unless the value does not depend
00196             on other tables.
00197            */
00198           if (! old->getValue()->used_tables() &&
00199               old->getValue()->is_null())
00200           {
00201             old->setValue(new_fields->getValue());
00202           }
00203           /* The referred expression can be NULL: */
00204           old->setRejectNullValues(false);
00205         }
00206         else
00207         {
00208           /*
00209             We are comparing two different const.  In this case we can't
00210             use a key-lookup on this so it's better to remove the value
00211             and let the range optimzier handle it
00212           */
00213           if (old == --first_free)    // If last item
00214             break;
00215           *old= *first_free;      // Remove old value
00216           old--;        // Retry this value
00217         }
00218       }
00219     }
00220   }
00221   /* Remove all not used items */
00222   for (optimizer::KeyField *old= start; old != first_free;)
00223   {
00224     if (old->getLevel() != and_level)
00225     {           // Not used in all levels
00226       if (old == --first_free)
00227         break;
00228       *old= *first_free;      // Remove old value
00229       continue;
00230     }
00231     old++;
00232   }
00233   return first_free;
00234 }
00235 
00236 void optimizer::add_key_field(optimizer::KeyField **key_fields,
00237                               uint32_t and_level,
00238                               Item_func *cond,
00239                               Field *field,
00240                               bool eq_func,
00241                               Item **value,
00242                               uint32_t num_values,
00243                               table_map usable_tables,
00244                               vector<optimizer::SargableParam> &sargables)
00245 {
00246   uint32_t exists_optimize= 0;
00247   if (! (field->flags & PART_KEY_FLAG))
00248   {
00249     // Don't remove column IS NULL on a LEFT JOIN table
00250     if (! eq_func || (*value)->type() != Item::NULL_ITEM ||
00251         ! field->getTable()->maybe_null || field->null_ptr)
00252       return;         // Not a key. Skip it
00253     exists_optimize= KEY_OPTIMIZE_EXISTS;
00254     assert(num_values == 1);
00255   }
00256   else
00257   {
00258     table_map used_tables= 0;
00259     bool optimizable= 0;
00260     for (uint32_t i= 0; i < num_values; i++)
00261     {
00262       used_tables|= (value[i])->used_tables();
00263       if (! ((value[i])->used_tables() & (field->getTable()->map | RAND_TABLE_BIT)))
00264         optimizable= 1;
00265     }
00266     if (! optimizable)
00267       return;
00268     if (! (usable_tables & field->getTable()->map))
00269     {
00270       if (! eq_func || (*value)->type() != Item::NULL_ITEM ||
00271           ! field->getTable()->maybe_null || field->null_ptr)
00272         return;         // Can't use left join optimize
00273       exists_optimize= KEY_OPTIMIZE_EXISTS;
00274     }
00275     else
00276     {
00277       JoinTable *stat= field->getTable()->reginfo.join_tab;
00278       key_map possible_keys= field->key_start;
00279       possible_keys&= field->getTable()->keys_in_use_for_query;
00280       stat[0].keys|= possible_keys;             // Add possible keys
00281 
00282       /*
00283         Save the following cases:
00284         Field op constant
00285         Field LIKE constant where constant doesn't start with a wildcard
00286         Field = field2 where field2 is in a different table
00287         Field op formula
00288         Field IS NULL
00289         Field IS NOT NULL
00290         Field BETWEEN ...
00291         Field IN ...
00292       */
00293       stat[0].key_dependent|= used_tables;
00294 
00295       bool is_const= 1;
00296       for (uint32_t i= 0; i < num_values; i++)
00297       {
00298         if (! (is_const&= value[i]->const_item()))
00299           break;
00300       }
00301       if (is_const)
00302         stat[0].const_keys|= possible_keys;
00303       else if (! eq_func)
00304       {
00305         /*
00306           Save info to be able check whether this predicate can be
00307           considered as sargable for range analisis after reading const tables.
00308           We do not save info about equalities as update_const_equal_items
00309           will take care of updating info on keys from sargable equalities.
00310         */
00311         optimizer::SargableParam tmp(field, value, num_values);
00312         sargables.push_back(tmp);
00313       }
00314       /*
00315         We can't always use indexes when comparing a string index to a
00316         number. cmp_type() is checked to allow compare of dates to numbers.
00317         eq_func is NEVER true when num_values > 1
00318        */
00319       if (! eq_func)
00320       {
00321         /*
00322           Additional optimization: if we're processing
00323           "t.key BETWEEN c1 AND c1" then proceed as if we were processing
00324           "t.key = c1".
00325           TODO: This is a very limited fix. A more generic fix is possible.
00326           There are 2 options:
00327           A) Make equality propagation code be able to handle BETWEEN
00328              (including cases like t1.key BETWEEN t2.key AND t3.key)
00329           B) Make range optimizer to infer additional "t.key = c" equalities
00330              and use them in equality propagation process (see details in
00331              OptimizerKBAndTodo)
00332         */
00333         if ((cond->functype() != Item_func::BETWEEN) ||
00334             ((Item_func_between*) cond)->negated ||
00335             ! value[0]->eq(value[1], field->binary()))
00336           return;
00337         eq_func= true;
00338       }
00339 
00340       if (field->result_type() == STRING_RESULT)
00341       {
00342         if ((*value)->result_type() != STRING_RESULT)
00343         {
00344           if (field->cmp_type() != (*value)->result_type())
00345             return;
00346         }
00347         else
00348         {
00349           /*
00350             We can't use indexes if the effective collation
00351             of the operation differ from the field collation.
00352           */
00353           if (field->cmp_type() == STRING_RESULT &&
00354               ((Field_str*)field)->charset() != cond->compare_collation())
00355             return;
00356         }
00357       }
00358     }
00359   }
00360   /*
00361     For the moment eq_func is always true. This slot is reserved for future
00362     extensions where we want to remembers other things than just eq comparisons
00363   */
00364   assert(eq_func);
00365   /* Store possible eq field */
00366   (*key_fields)->setField(field);
00367   (*key_fields)->setEqualityConditionUsed(eq_func);
00368   (*key_fields)->setValue(*value);
00369   (*key_fields)->setLevel(and_level);
00370   (*key_fields)->setOptimizeFlags(exists_optimize);
00371   /*
00372     If the condition has form "tbl.keypart = othertbl.field" and
00373     othertbl.field can be NULL, there will be no matches if othertbl.field
00374     has NULL value.
00375     We use null_rejecting in add_not_null_conds() to add
00376     'othertbl.field IS NOT NULL' to tab->select_cond.
00377   */
00378   (*key_fields)->setRejectNullValues((cond->functype() == Item_func::EQ_FUNC ||
00379                                       cond->functype() == Item_func::MULT_EQUAL_FUNC) &&
00380                                      ((*value)->type() == Item::FIELD_ITEM) &&
00381                                      ((Item_field*)*value)->field->maybe_null());
00382   (*key_fields)->setConditionalGuard(NULL);
00383   (*key_fields)++;
00384 }
00385 
00386 void optimizer::add_key_equal_fields(optimizer::KeyField **key_fields,
00387                                      uint32_t and_level,
00388                                      Item_func *cond,
00389                                      Item_field *field_item,
00390                                      bool eq_func,
00391                                      Item **val,
00392                                      uint32_t num_values,
00393                                      table_map usable_tables,
00394                                      vector<optimizer::SargableParam> &sargables)
00395 {
00396   Field *field= field_item->field;
00397   add_key_field(key_fields, and_level, cond, field,
00398                 eq_func, val, num_values, usable_tables, sargables);
00399   Item_equal *item_equal= field_item->item_equal;
00400   if (item_equal)
00401   {
00402     /*
00403       Add to the set of possible key values every substitution of
00404       the field for an equal field included into item_equal
00405     */
00406     Item_equal_iterator it(item_equal->begin());
00407     Item_field *item;
00408     while ((item= it++))
00409     {
00410       if (! field->eq(item->field))
00411       {
00412         add_key_field(key_fields, and_level, cond, item->field,
00413                       eq_func, val, num_values, usable_tables,
00414                       sargables);
00415       }
00416     }
00417   }
00418 }
00419 
00420 void optimizer::add_key_fields(Join *join,
00421                                optimizer::KeyField **key_fields,
00422                                uint32_t *and_level,
00423                                COND *cond,
00424                                table_map usable_tables,
00425                                vector<optimizer::SargableParam> &sargables)
00426 {
00427   if (cond->type() == Item_func::COND_ITEM)
00428   {
00429     List<Item>::iterator li(((Item_cond*) cond)->argument_list()->begin());
00430     optimizer::KeyField *org_key_fields= *key_fields;
00431 
00432     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
00433     {
00434       Item *item;
00435       while ((item= li++))
00436       {
00437         add_key_fields(join,
00438                        key_fields,
00439                        and_level,
00440                        item,
00441                        usable_tables,
00442                        sargables);
00443       }
00444       for (; org_key_fields != *key_fields; org_key_fields++)
00445         org_key_fields->setLevel(*and_level);
00446     }
00447     else
00448     {
00449       (*and_level)++;
00450       add_key_fields(join,
00451                      key_fields,
00452                      and_level,
00453                      li++,
00454                      usable_tables,
00455                      sargables);
00456       Item *item;
00457       while ((item= li++))
00458       {
00459         optimizer::KeyField *start_key_fields= *key_fields;
00460         (*and_level)++;
00461         add_key_fields(join,
00462                        key_fields,
00463                        and_level,
00464                        item,
00465                        usable_tables,
00466                        sargables);
00467         *key_fields= merge_key_fields(org_key_fields, start_key_fields,
00468                                       *key_fields, ++(*and_level));
00469       }
00470     }
00471     return;
00472   }
00473 
00474   /*
00475     Subquery optimization: Conditions that are pushed down into subqueries
00476     are wrapped into Item_func_trig_cond. We process the wrapped condition
00477     but need to set cond_guard for KeyUse elements generated from it.
00478   */
00479   {
00480     if (cond->type() == Item::FUNC_ITEM &&
00481         ((Item_func*)cond)->functype() == Item_func::TRIG_COND_FUNC)
00482     {
00483       Item *cond_arg= ((Item_func*)cond)->arguments()[0];
00484       if (! join->group_list &&
00485           ! join->order &&
00486           join->unit->item &&
00487           join->unit->item->substype() == Item_subselect::IN_SUBS &&
00488           ! join->unit->is_union())
00489       {
00490         optimizer::KeyField *save= *key_fields;
00491         add_key_fields(join,
00492                        key_fields,
00493                        and_level,
00494                        cond_arg,
00495                        usable_tables,
00496                        sargables);
00497         /* Indicate that this ref access candidate is for subquery lookup */
00498         for (; save != *key_fields; save++)
00499           save->setConditionalGuard(((Item_func_trig_cond*)cond)->get_trig_var());
00500       }
00501       return;
00502     }
00503   }
00504 
00505   /* If item is of type 'field op field/constant' add it to key_fields */
00506   if (cond->type() != Item::FUNC_ITEM)
00507     return;
00508   Item_func *cond_func= (Item_func*) cond;
00509   switch (cond_func->select_optimize())
00510   {
00511   case Item_func::OPTIMIZE_NONE:
00512     break;
00513   case Item_func::OPTIMIZE_KEY:
00514   {
00515     Item **values;
00516     // BETWEEN, IN, NE
00517     if (cond_func->key_item()->real_item()->type() == Item::FIELD_ITEM &&
00518         ! (cond_func->used_tables() & OUTER_REF_TABLE_BIT))
00519     {
00520       values= cond_func->arguments() + 1;
00521       if (cond_func->functype() == Item_func::NE_FUNC &&
00522           cond_func->arguments()[1]->real_item()->type() == Item::FIELD_ITEM &&
00523           ! (cond_func->arguments()[0]->used_tables() & OUTER_REF_TABLE_BIT))
00524         values--;
00525       assert(cond_func->functype() != Item_func::IN_FUNC ||
00526              cond_func->argument_count() != 2);
00527       add_key_equal_fields(key_fields, *and_level, cond_func,
00528                            (Item_field*) (cond_func->key_item()->real_item()),
00529                            0, values,
00530                            cond_func->argument_count()-1,
00531                            usable_tables, sargables);
00532     }
00533     if (cond_func->functype() == Item_func::BETWEEN)
00534     {
00535       values= cond_func->arguments();
00536       for (uint32_t i= 1 ; i < cond_func->argument_count(); i++)
00537       {
00538         Item_field *field_item;
00539         if (cond_func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM
00540             &&
00541             ! (cond_func->arguments()[i]->used_tables() & OUTER_REF_TABLE_BIT))
00542         {
00543           field_item= (Item_field *) (cond_func->arguments()[i]->real_item());
00544           add_key_equal_fields(key_fields, *and_level, cond_func,
00545                                field_item, 0, values, 1, usable_tables,
00546                                sargables);
00547         }
00548       }
00549     }
00550     break;
00551   }
00552   case Item_func::OPTIMIZE_OP:
00553   {
00554     bool equal_func= (cond_func->functype() == Item_func::EQ_FUNC ||
00555                       cond_func->functype() == Item_func::EQUAL_FUNC);
00556 
00557     if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM &&
00558         ! (cond_func->arguments()[0]->used_tables() & OUTER_REF_TABLE_BIT))
00559     {
00560       add_key_equal_fields(key_fields, *and_level, cond_func,
00561                            (Item_field*) (cond_func->arguments()[0])->real_item(),
00562                            equal_func,
00563                            cond_func->arguments()+1, 1, usable_tables,
00564                            sargables);
00565     }
00566     if (cond_func->arguments()[1]->real_item()->type() == Item::FIELD_ITEM &&
00567         cond_func->functype() != Item_func::LIKE_FUNC &&
00568         ! (cond_func->arguments()[1]->used_tables() & OUTER_REF_TABLE_BIT))
00569     {
00570       add_key_equal_fields(key_fields, *and_level, cond_func,
00571                            (Item_field*) (cond_func->arguments()[1])->real_item(), equal_func,
00572                            cond_func->arguments(),1,usable_tables,
00573                            sargables);
00574     }
00575     break;
00576   }
00577   case Item_func::OPTIMIZE_NULL:
00578     /* column_name IS [NOT] NULL */
00579     if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM &&
00580         ! (cond_func->used_tables() & OUTER_REF_TABLE_BIT))
00581     {
00582       Item *tmp= new Item_null;
00583       if (unlikely(! tmp))                       // Should never be true
00584         return;
00585       add_key_equal_fields(key_fields, *and_level, cond_func,
00586                            (Item_field*) (cond_func->arguments()[0])->real_item(),
00587                            cond_func->functype() == Item_func::ISNULL_FUNC,
00588                            &tmp, 1, usable_tables, sargables);
00589     }
00590     break;
00591   case Item_func::OPTIMIZE_EQUAL:
00592     Item_equal *item_equal= (Item_equal *) cond;
00593     Item *const_item= item_equal->get_const();
00594     Item_equal_iterator it(item_equal->begin());
00595     Item_field *item;
00596     if (const_item)
00597     {
00598       /*
00599         For each field field1 from item_equal consider the equality
00600         field1=const_item as a condition allowing an index access of the table
00601         with field1 by the keys value of field1.
00602       */
00603       while ((item= it++))
00604       {
00605         add_key_field(key_fields, *and_level, cond_func, item->field,
00606                       true, &const_item, 1, usable_tables, sargables);
00607       }
00608     }
00609     else
00610     {
00611       /*
00612         Consider all pairs of different fields included into item_equal.
00613         For each of them (field1, field1) consider the equality
00614         field1=field2 as a condition allowing an index access of the table
00615         with field1 by the keys value of field2.
00616       */
00617       Item_equal_iterator fi(item_equal->begin());
00618       while ((item= fi++))
00619       {
00620         Field *field= item->field;
00621         while ((item= it++))
00622         {
00623           if (! field->eq(item->field))
00624           {
00625             add_key_field(key_fields, *and_level, cond_func, field,
00626                           true, (Item **) &item, 1, usable_tables,
00627                           sargables);
00628           }
00629         }
00630         it= item_equal->begin();
00631       }
00632     }
00633     break;
00634   }
00635 }
00636 
00637 } /* namespace drizzled */