Drizzled Public API Documentation

sum.h
00001 /* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
00002  *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
00003  *
00004  *  Copyright (C) 2008 Sun Microsystems, Inc.
00005  *
00006  *  This program is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License as published by
00008  *  the Free Software Foundation; version 2 of the License.
00009  *
00010  *  This program is distributed in the hope that it will be useful,
00011  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  *  GNU General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with this program; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00018  */
00019 
00020 #pragma once
00021 
00022 /* classes for sum functions */
00023 
00024 #include <drizzled/tree.h>
00025 #include <drizzled/hybrid_type.h>
00026 #include <drizzled/item.h>
00027 #include <drizzled/item/field.h>
00028 #include <drizzled/item/bin_string.h>
00029 #include <drizzled/charset_info.h>
00030 
00031 namespace drizzled
00032 {
00033 
00034 int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
00035                                        const void* key2);
00036 
00037 int group_concat_key_cmp_with_order(void* arg, const void* key1,
00038                                     const void* key2);
00039 
00040 class Select_Lex;
00041 struct Order;
00042 
00043 /*
00044   Class Item_sum is the base class used for special expressions that SQL calls
00045   'set functions'. These expressions are formed with the help of aggregate
00046   functions such as SUM, MAX, GROUP_CONCAT etc.
00047 
00048  GENERAL NOTES
00049 
00050   A set function cannot be used in certain positions where expressions are
00051   accepted. There are some quite explicable restrictions for the usage of
00052   set functions.
00053 
00054   In the query:
00055     SELECT AVG(b) FROM t1 WHERE SUM(b) > 20 GROUP by a
00056   the usage of the set function AVG(b) is legal, while the usage of SUM(b)
00057   is illegal. A WHERE condition must contain expressions that can be
00058   evaluated for each row of the table. Yet the expression SUM(b) can be
00059   evaluated only for each group of rows with the same value of column a.
00060   In the query:
00061     SELECT AVG(b) FROM t1 WHERE c > 30 GROUP BY a HAVING SUM(b) > 20
00062   both set function expressions AVG(b) and SUM(b) are legal.
00063 
00064   We can say that in a query without nested selects an occurrence of a
00065   set function in an expression of the SELECT list or/and in the HAVING
00066   clause is legal, while in the WHERE clause it's illegal.
00067 
00068   The general rule to detect whether a set function is legal in a query with
00069   nested subqueries is much more complicated.
00070 
00071   Consider the the following query:
00072     SELECT t1.a FROM t1 GROUP BY t1.a
00073       HAVING t1.a > ALL (SELECT t2.c FROM t2 WHERE SUM(t1.b) < t2.c).
00074   The set function SUM(b) is used here in the WHERE clause of the subquery.
00075   Nevertheless it is legal since it is under the HAVING clause of the query
00076   to which this function relates. The expression SUM(t1.b) is evaluated
00077   for each group defined in the main query, not for groups of the subquery.
00078 
00079   The problem of finding the query where to aggregate a particular
00080   set function is not so simple as it seems to be.
00081 
00082   In the query:
00083     SELECT t1.a FROM t1 GROUP BY t1.a
00084      HAVING t1.a > ALL(SELECT t2.c FROM t2 GROUP BY t2.c
00085                          HAVING SUM(t1.a) < t2.c)
00086   the set function can be evaluated for both outer and inner selects.
00087   If we evaluate SUM(t1.a) for the outer query then we get the value of t1.a
00088   multiplied by the cardinality of a group in table t1. In this case
00089   in each correlated subquery SUM(t1.a) is used as a constant. But we also
00090   can evaluate SUM(t1.a) for the inner query. In this case t1.a will be a
00091   constant for each correlated subquery and summation is performed
00092   for each group of table t2.
00093   (Here it makes sense to remind that the query
00094     SELECT c FROM t GROUP BY a HAVING SUM(1) < a
00095   is quite legal in our SQL).
00096 
00097   So depending on what query we assign the set function to we
00098   can get different result sets.
00099 
00100   The general rule to detect the query where a set function is to be
00101   evaluated can be formulated as follows.
00102   Consider a set function S(E) where E is an expression with occurrences
00103   of column references C1, ..., CN. Resolve these column references against
00104   subqueries that contain the set function S(E). Let Q be the innermost
00105   subquery of those subqueries. (It should be noted here that S(E)
00106   in no way can be evaluated in the subquery embedding the subquery Q,
00107   otherwise S(E) would refer to at least one unbound column reference)
00108   If S(E) is used in a construct of Q where set functions are allowed then
00109   we evaluate S(E) in Q.
00110   Otherwise we look for a innermost subquery containing S(E) of those where
00111   usage of S(E) is allowed.
00112 
00113   Let's demonstrate how this rule is applied to the following queries.
00114 
00115   1. SELECT t1.a FROM t1 GROUP BY t1.a
00116        HAVING t1.a > ALL(SELECT t2.b FROM t2 GROUP BY t2.b
00117                            HAVING t2.b > ALL(SELECT t3.c FROM t3 GROUP BY t3.c
00118                                                 HAVING SUM(t1.a+t2.b) < t3.c))
00119   For this query the set function SUM(t1.a+t2.b) depends on t1.a and t2.b
00120   with t1.a defined in the outermost query, and t2.b defined for its
00121   subquery. The set function is in the HAVING clause of the subquery and can
00122   be evaluated in this subquery.
00123 
00124   2. SELECT t1.a FROM t1 GROUP BY t1.a
00125        HAVING t1.a > ALL(SELECT t2.b FROM t2
00126                            WHERE t2.b > ALL (SELECT t3.c FROM t3 GROUP BY t3.c
00127                                                HAVING SUM(t1.a+t2.b) < t3.c))
00128   Here the set function SUM(t1.a+t2.b)is in the WHERE clause of the second
00129   subquery - the most upper subquery where t1.a and t2.b are defined.
00130   If we evaluate the function in this subquery we violate the context rules.
00131   So we evaluate the function in the third subquery (over table t3) where it
00132   is used under the HAVING clause.
00133 
00134   3. SELECT t1.a FROM t1 GROUP BY t1.a
00135        HAVING t1.a > ALL(SELECT t2.b FROM t2
00136                            WHERE t2.b > ALL (SELECT t3.c FROM t3
00137                                                WHERE SUM(t1.a+t2.b) < t3.c))
00138   In this query evaluation of SUM(t1.a+t2.b) is not legal neither in the second
00139   nor in the third subqueries. So this query is invalid.
00140 
00141   Mostly set functions cannot be nested. In the query
00142     SELECT t1.a from t1 GROUP BY t1.a HAVING AVG(SUM(t1.b)) > 20
00143   the expression SUM(b) is not acceptable, though it is under a HAVING clause.
00144   Yet it is acceptable in the query:
00145     SELECT t.1 FROM t1 GROUP BY t1.a HAVING SUM(t1.b) > 20.
00146 
00147   An argument of a set function does not have to be a reference to a table
00148   column as we saw it in examples above. This can be a more complex expression
00149     SELECT t1.a FROM t1 GROUP BY t1.a HAVING SUM(t1.b+1) > 20.
00150   The expression SUM(t1.b+1) has a very clear semantics in this context:
00151   we sum up the values of t1.b+1 where t1.b varies for all values within a
00152   group of rows that contain the same t1.a value.
00153 
00154   A set function for an outer query yields a constant within a subquery. So
00155   the semantics of the query
00156     SELECT t1.a FROM t1 GROUP BY t1.a
00157       HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
00158                         HAVING AVG(t2.c+SUM(t1.b)) > 20)
00159   is still clear. For a group of the rows with the same t1.a values we
00160   calculate the value of SUM(t1.b). This value 's' is substituted in the
00161   the subquery:
00162     SELECT t2.c FROM t2 GROUP BY t2.c HAVING AVG(t2.c+s)
00163   than returns some result set.
00164 
00165   By the same reason the following query with a subquery
00166     SELECT t1.a FROM t1 GROUP BY t1.a
00167       HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
00168                         HAVING AVG(SUM(t1.b)) > 20)
00169   is also acceptable.
00170 
00171  IMPLEMENTATION NOTES
00172 
00173   Three methods were added to the class to check the constraints specified
00174   in the previous section. These methods utilize several new members.
00175 
00176   The field 'nest_level' contains the number of the level for the subquery
00177   containing the set function. The main SELECT is of level 0, its subqueries
00178   are of levels 1, the subqueries of the latter are of level 2 and so on.
00179 
00180   The field 'aggr_level' is to contain the nest level of the subquery
00181   where the set function is aggregated.
00182 
00183   The field 'max_arg_level' is for the maximun of the nest levels of the
00184   unbound column references occurred in the set function. A column reference
00185   is unbound  within a set function if it is not bound by any subquery
00186   used as a subexpression in this function. A column reference is bound by
00187   a subquery if it is a reference to the column by which the aggregation
00188   of some set function that is used in the subquery is calculated.
00189   For the set function used in the query
00190     SELECT t1.a FROM t1 GROUP BY t1.a
00191       HAVING t1.a > ALL(SELECT t2.b FROM t2 GROUP BY t2.b
00192                           HAVING t2.b > ALL(SELECT t3.c FROM t3 GROUP BY t3.c
00193                                               HAVING SUM(t1.a+t2.b) < t3.c))
00194   the value of max_arg_level is equal to 1 since t1.a is bound in the main
00195   query, and t2.b is bound by the first subquery whose nest level is 1.
00196   Obviously a set function cannot be aggregated in the subquery whose
00197   nest level is less than max_arg_level. (Yet it can be aggregated in the
00198   subqueries whose nest level is greater than max_arg_level.)
00199   In the query
00200     SELECT t.a FROM t1 HAVING AVG(t1.a+(SELECT MIN(t2.c) FROM t2))
00201   the value of the max_arg_level for the AVG set function is 0 since
00202   the reference t2.c is bound in the subquery.
00203 
00204   The field 'max_sum_func_level' is to contain the maximum of the
00205   nest levels of the set functions that are used as subexpressions of
00206   the arguments of the given set function, but not aggregated in any
00207   subquery within this set function. A nested set function s1 can be
00208   used within set function s0 only if s1.max_sum_func_level <
00209   s0.max_sum_func_level. Set function s1 is considered as nested
00210   for set function s0 if s1 is not calculated in any subquery
00211   within s0.
00212 
00213   A set function that is used as a subexpression in an argument of another
00214   set function refers to the latter via the field 'in_sum_func'.
00215 
00216   The condition imposed on the usage of set functions are checked when
00217   we traverse query subexpressions with the help of the recursive method
00218   fix_fields. When we apply this method to an object of the class
00219   Item_sum, first, on the descent, we call the method init_sum_func_check
00220   that initialize members used at checking. Then, on the ascent, we
00221   call the method check_sum_func that validates the set function usage
00222   and reports an error if it is illegal.
00223   The method register_sum_func serves to link the items for the set functions
00224   that are aggregated in the embedding (sub)queries. Circular chains of such
00225   functions are attached to the corresponding Select_Lex structures
00226   through the field inner_sum_func_list.
00227 
00228   Exploiting the fact that the members mentioned above are used in one
00229   recursive function we could have allocated them on the thread stack.
00230   Yet we don't do it now.
00231 
00232   We assume that the nesting level of subquries does not exceed 127.
00233   TODO: to catch queries where the limit is exceeded to make the
00234   code clean here.
00235 
00236 */
00237 
00238 class Item_sum :public Item_result_field
00239 {
00240 public:
00241   enum Sumfunctype
00242   { COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC,
00243     AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC,
00244     VARIANCE_FUNC, SUM_BIT_FUNC, GROUP_CONCAT_FUNC
00245   };
00246 
00247   Item **args, *tmp_args[2];
00248   Item **ref_by; /* pointer to a ref to the object used to register it */
00249   Item_sum *next; /* next in the circular chain of registered objects  */
00250   uint32_t arg_count;
00251   Item_sum *in_sum_func;  /* embedding set function if any */
00252   Select_Lex * aggr_sel; /* select where the function is aggregated       */
00253   int8_t nest_level;        /* number of the nesting level of the set function */
00254   int8_t aggr_level;        /* nesting level of the aggregating subquery       */
00255   int8_t max_arg_level;     /* max level of unbound column references          */
00256   int8_t max_sum_func_level;/* max level of aggregation for embedded functions */
00257   bool quick_group;     /* If incremental update of fields */
00258   /*
00259     This list is used by the check for mixing non aggregated fields and
00260     sum functions in the ONLY_FULL_GROUP_BY_MODE. We save all outer fields
00261     directly or indirectly used under this function it as it's unclear
00262     at the moment of fixing outer field whether it's aggregated or not.
00263   */
00264   List<Item_field> outer_fields;
00265 
00266 protected:
00267   table_map used_tables_cache;
00268   bool forced_const;
00269 
00270 public:
00271 
00272   void mark_as_sum_func();
00273   Item_sum() :arg_count(0), quick_group(1), forced_const(false)
00274   {
00275     mark_as_sum_func();
00276   }
00277   Item_sum(Item *a) :args(tmp_args), arg_count(1), quick_group(1),
00278     forced_const(false)
00279   {
00280     args[0]=a;
00281     mark_as_sum_func();
00282   }
00283   Item_sum( Item *a, Item *b ) :args(tmp_args), arg_count(2), quick_group(1),
00284     forced_const(false)
00285   {
00286     args[0]=a; args[1]=b;
00287     mark_as_sum_func();
00288   }
00289   Item_sum(List<Item> &list);
00290   //Copy constructor, need to perform subselects with temporary tables
00291   Item_sum(Session *session, Item_sum *item);
00292   enum Type type() const { return SUM_FUNC_ITEM; }
00293   virtual enum Sumfunctype sum_func () const=0;
00294 
00295   /*
00296     This method is similar to add(), but it is called when the current
00297     aggregation group changes. Thus it performs a combination of
00298     clear() and add().
00299   */
00300   inline bool reset() { clear(); return add(); };
00301 
00302   /*
00303     Prepare this item for evaluation of an aggregate value. This is
00304     called by reset() when a group changes, or, for correlated
00305     subqueries, between subquery executions.  E.g. for COUNT(), this
00306     method should set count= 0;
00307   */
00308   virtual void clear()= 0;
00309 
00310   /*
00311     This method is called for the next row in the same group. Its
00312     purpose is to aggregate the new value to the previous values in
00313     the group (i.e. since clear() was called last time). For example,
00314     for COUNT(), do count++.
00315   */
00316   virtual bool add()=0;
00317 
00318   /*
00319     Called when new group is started and results are being saved in
00320     a temporary table. Similar to reset(), but must also store value in
00321     result_field. Like reset() it is supposed to reset start value to
00322     default.
00323     This set of methods (reult_field(), reset_field, update_field()) of
00324     Item_sum is used only if quick_group is not null. Otherwise
00325     copy_or_same() is used to obtain a copy of this item.
00326   */
00327   virtual void reset_field()=0;
00328   /*
00329     Called for each new value in the group, when temporary table is in use.
00330     Similar to add(), but uses temporary table field to obtain current value,
00331     Updated value is then saved in the field.
00332   */
00333   virtual void update_field()=0;
00334   virtual bool keep_field_type(void) const { return 0; }
00335   virtual void fix_length_and_dec() { maybe_null=1; null_value=1; }
00336   /*
00337     This method is used for debug purposes to print the name of an
00338     item to the debug log. The second use of this method is as
00339     a helper function of print(), where it is applicable.
00340     To suit both goals it should return a meaningful,
00341     distinguishable and sintactically correct string.  This method
00342     should not be used for runtime type identification, use enum
00343     {Sum}Functype and Item_func::functype()/Item_sum::sum_func()
00344     instead.
00345 
00346     NOTE: for Items inherited from Item_sum, func_name() return part of
00347     function name till first argument (including '(') to make difference in
00348     names for functions with 'distinct' clause and without 'distinct' and
00349     also to make printing of items inherited from Item_sum uniform.
00350   */
00351   virtual const char *func_name() const= 0;
00352   virtual Item *result_item(Field *field)
00353     { return new Item_field(field); }
00354   table_map used_tables() const { return used_tables_cache; }
00355   void update_used_tables ();
00356   void cleanup()
00357   {
00358     Item::cleanup();
00359     forced_const= false;
00360   }
00361   bool is_null() { return null_value; }
00362   void make_const ()
00363   {
00364     used_tables_cache= 0;
00365     forced_const= true;
00366   }
00367   virtual bool const_item() const { return forced_const; }
00368   virtual bool const_during_execution() const { return false; }
00369   void make_field(SendField *field);
00370   virtual void print(String *str);
00371   void fix_num_length_and_dec();
00372 
00373   /*
00374     This function is called by the execution engine to assign 'NO ROWS
00375     FOUND' value to an aggregate item, when the underlying result set
00376     has no rows. Such value, in a general case, may be different from
00377     the default value of the item after 'clear()': e.g. a numeric item
00378     may be initialized to 0 by clear() and to NULL by
00379     no_rows_in_result().
00380   */
00381   void no_rows_in_result() { clear(); }
00382 
00383   virtual bool setup(Session *) {return 0;}
00384   virtual void make_unique(void) {}
00385   Item *get_tmp_table_item(Session *session);
00386   virtual Field *create_tmp_field(bool group, Table *table,
00387                                   uint32_t convert_blob_length);
00388   bool walk(Item_processor processor, bool walk_subquery, unsigned char *argument);
00389   bool init_sum_func_check(Session *session);
00390   bool check_sum_func(Session *session, Item **ref);
00391   bool register_sum_func(Session *session, Item **ref);
00392   Select_Lex *depended_from()
00393     { return (nest_level == aggr_level ? 0 : aggr_sel); }
00394 };
00395 
00396 
00397 class Item_sum_num :public Item_sum
00398 {
00399 protected:
00400   /*
00401    val_xxx() functions may be called several times during the execution of a
00402    query. Derived classes that require extensive calculation in val_xxx()
00403    maintain cache of aggregate value. This variable governs the validity of
00404    that cache.
00405   */
00406   bool is_evaluated;
00407 public:
00408   Item_sum_num() :Item_sum(),is_evaluated(false) {}
00409   Item_sum_num(Item *item_par)
00410     :Item_sum(item_par), is_evaluated(false) {}
00411   Item_sum_num(Item *a, Item* b) :Item_sum(a,b),is_evaluated(false) {}
00412   Item_sum_num(List<Item> &list)
00413     :Item_sum(list), is_evaluated(false) {}
00414   Item_sum_num(Session *session, Item_sum_num *item)
00415     :Item_sum(session, item),is_evaluated(item->is_evaluated) {}
00416   bool fix_fields(Session *, Item **);
00417   int64_t val_int();
00418   String *val_str(String*str);
00419   type::Decimal *val_decimal(type::Decimal *);
00420   void reset_field();
00421 };
00422 
00423 
00424 class Item_sum_int :public Item_sum_num
00425 {
00426 public:
00427   Item_sum_int(Item *item_par) :Item_sum_num(item_par) {}
00428   Item_sum_int(List<Item> &list) :Item_sum_num(list) {}
00429   Item_sum_int(Session *session, Item_sum_int *item) :Item_sum_num(session, item) {}
00430   double val_real() { assert(fixed == 1); return (double) val_int(); }
00431   String *val_str(String*str);
00432   type::Decimal *val_decimal(type::Decimal *);
00433   enum Item_result result_type () const { return INT_RESULT; }
00434   void fix_length_and_dec()
00435   { decimals=0; max_length=21; maybe_null=null_value=0; }
00436 };
00437 
00438 
00439 class Item_sum_sum :public Item_sum_num
00440 {
00441 protected:
00442   Item_result hybrid_type;
00443   double sum;
00444   type::Decimal dec_buffs[2];
00445   uint32_t curr_dec_buff;
00446   void fix_length_and_dec();
00447 
00448 public:
00449   Item_sum_sum(Item *item_par) :Item_sum_num(item_par) {}
00450   Item_sum_sum(Session *session, Item_sum_sum *item);
00451   enum Sumfunctype sum_func () const {return SUM_FUNC;}
00452   void clear();
00453   bool add();
00454   double val_real();
00455   int64_t val_int();
00456   String *val_str(String*str);
00457   type::Decimal *val_decimal(type::Decimal *);
00458   enum Item_result result_type () const { return hybrid_type; }
00459   void reset_field();
00460   void update_field();
00461   void no_rows_in_result() {}
00462   const char *func_name() const { return "sum("; }
00463   Item *copy_or_same(Session* session);
00464 };
00465 
00466 
00467 
00468 /* Common class for SUM(DISTINCT), AVG(DISTINCT) */
00469 
00470 class Unique;
00471 
00472 class Item_sum_distinct :public Item_sum_num
00473 {
00474 protected:
00475   /* storage for the summation result */
00476   uint64_t count;
00477   Hybrid_type val;
00478   /* storage for unique elements */
00479   Unique *tree;
00480   Table *table;
00481   enum enum_field_types table_field_type;
00482   uint32_t tree_key_length;
00483 protected:
00484   Item_sum_distinct(Session *session, Item_sum_distinct *item);
00485 public:
00486   Item_sum_distinct(Item *item_par);
00487   ~Item_sum_distinct();
00488 
00489   bool setup(Session *session);
00490   void clear();
00491   void cleanup();
00492   bool add();
00493   double val_real();
00494   type::Decimal *val_decimal(type::Decimal *);
00495   int64_t val_int();
00496   String *val_str(String *str);
00497 
00498   /* XXX: does it need make_unique? */
00499 
00500   enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
00501   void reset_field() {} // not used
00502   void update_field() {} // not used
00503   virtual void no_rows_in_result() {}
00504   void fix_length_and_dec();
00505   enum Item_result result_type () const;
00506   virtual void calculate_val_and_count();
00507   virtual bool unique_walk_function(void *elem);
00508 };
00509 
00510 
00511 /*
00512   Item_sum_sum_distinct - implementation of SUM(DISTINCT expr).
00513   See also: MySQL manual, chapter 'Adding New Functions To MySQL'
00514   and comments in item_sum.cc.
00515 */
00516 
00517 class Item_sum_sum_distinct :public Item_sum_distinct
00518 {
00519 private:
00520   Item_sum_sum_distinct(Session *session, Item_sum_sum_distinct *item)
00521     :Item_sum_distinct(session, item) {}
00522 public:
00523   Item_sum_sum_distinct(Item *item_arg) :Item_sum_distinct(item_arg) {}
00524 
00525   enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
00526   const char *func_name() const { return "sum(distinct "; }
00527   Item *copy_or_same(Session* session) { return new Item_sum_sum_distinct(session, this); }
00528 };
00529 
00530 
00531 /* Item_sum_avg_distinct - SELECT AVG(DISTINCT expr) FROM ... */
00532 
00533 class Item_sum_avg_distinct: public Item_sum_distinct
00534 {
00535 private:
00536   Item_sum_avg_distinct(Session *session, Item_sum_avg_distinct *original)
00537     :Item_sum_distinct(session, original) {}
00538 public:
00539   uint32_t prec_increment;
00540   Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
00541 
00542   void fix_length_and_dec();
00543   virtual void calculate_val_and_count();
00544   enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
00545   const char *func_name() const { return "avg(distinct "; }
00546   Item *copy_or_same(Session* session) { return new Item_sum_avg_distinct(session, this); }
00547 };
00548 
00549 
00550 class Item_sum_count :public Item_sum_int
00551 {
00552   int64_t count;
00553 
00554   public:
00555   Item_sum_count(Item *item_par)
00556     :Item_sum_int(item_par),count(0)
00557   {}
00558   Item_sum_count(Session *session, Item_sum_count *item)
00559     :Item_sum_int(session, item), count(item->count)
00560   {}
00561   enum Sumfunctype sum_func () const { return COUNT_FUNC; }
00562   void clear();
00563   void no_rows_in_result() { count=0; }
00564   bool add();
00565   void make_const_count(int64_t count_arg)
00566   {
00567     count=count_arg;
00568     Item_sum::make_const();
00569   }
00570   int64_t val_int();
00571   void reset_field();
00572   void cleanup();
00573   void update_field();
00574   const char *func_name() const { return "count("; }
00575   Item *copy_or_same(Session* session);
00576 };
00577 
00578 
00579 class Tmp_Table_Param;
00580 
00581 class Item_sum_count_distinct :public Item_sum_int
00582 {
00583   Table *table;
00584   uint32_t *field_lengths;
00585   Tmp_Table_Param *tmp_table_param;
00586   bool force_copy_fields;
00587   /*
00588     If there are no blobs, we can use a tree, which
00589     is faster than heap table. In that case, we still use the table
00590     to help get things set up, but we insert nothing in it
00591   */
00592   Unique *tree;
00593   /*
00594    Storage for the value of count between calls to val_int() so val_int()
00595    will not recalculate on each call. Validitiy of the value is stored in
00596    is_evaluated.
00597   */
00598   int64_t count;
00599   /*
00600     Following is 0 normal object and pointer to original one for copy
00601     (to correctly free resources)
00602   */
00603   Item_sum_count_distinct *original;
00604   uint32_t tree_key_length;
00605 
00606 
00607   bool always_null;   // Set to 1 if the result is always NULL
00608 
00609 
00610   friend int composite_key_cmp(void* arg, unsigned char* key1, unsigned char* key2);
00611   friend int simple_str_key_cmp(void* arg, unsigned char* key1, unsigned char* key2);
00612 
00613 public:
00614   Item_sum_count_distinct(List<Item> &list)
00615     :Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
00616      force_copy_fields(0), tree(0), count(0),
00617      original(0), always_null(false)
00618   { quick_group= 0; }
00619   Item_sum_count_distinct(Session *session, Item_sum_count_distinct *item)
00620     :Item_sum_int(session, item), table(item->table),
00621      field_lengths(item->field_lengths),
00622      tmp_table_param(item->tmp_table_param),
00623      force_copy_fields(0), tree(item->tree), count(item->count),
00624      original(item), tree_key_length(item->tree_key_length),
00625      always_null(item->always_null)
00626   {}
00627   ~Item_sum_count_distinct();
00628 
00629   void cleanup();
00630 
00631   enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
00632   void clear();
00633   bool add();
00634   int64_t val_int();
00635   void reset_field() { return ;}    // Never called
00636   void update_field() { return ; }    // Never called
00637   const char *func_name() const { return "count(distinct "; }
00638   bool setup(Session *session);
00639   void make_unique();
00640   Item *copy_or_same(Session* session);
00641   void no_rows_in_result() {}
00642 };
00643 
00644 
00645 /* Item to get the value of a stored sum function */
00646 
00647 class Item_sum_avg;
00648 
00649 class Item_avg_field :public Item_result_field
00650 {
00651 public:
00652   Field *field;
00653   Item_result hybrid_type;
00654   uint32_t f_precision, f_scale, dec_bin_size;
00655   uint32_t prec_increment;
00656   Item_avg_field(Item_result res_type, Item_sum_avg *item);
00657   enum Type type() const { return FIELD_AVG_ITEM; }
00658   double val_real();
00659   int64_t val_int();
00660   type::Decimal *val_decimal(type::Decimal *);
00661   bool is_null() { update_null_value(); return null_value; }
00662   String *val_str(String*);
00663   enum_field_types field_type() const
00664   {
00665     return hybrid_type == DECIMAL_RESULT ?
00666       DRIZZLE_TYPE_DECIMAL : DRIZZLE_TYPE_DOUBLE;
00667   }
00668   void fix_length_and_dec() {}
00669   enum Item_result result_type () const { return hybrid_type; }
00670 };
00671 
00672 
00673 class Item_sum_avg :public Item_sum_sum
00674 {
00675 public:
00676   uint64_t count;
00677   uint32_t prec_increment;
00678   uint32_t f_precision, f_scale, dec_bin_size;
00679 
00680   Item_sum_avg(Item *item_par) :Item_sum_sum(item_par), count(0) {}
00681   Item_sum_avg(Session *session, Item_sum_avg *item)
00682     :Item_sum_sum(session, item), count(item->count),
00683     prec_increment(item->prec_increment) {}
00684 
00685   void fix_length_and_dec();
00686   enum Sumfunctype sum_func () const {return AVG_FUNC;}
00687   void clear();
00688   bool add();
00689   double val_real();
00690   // In SPs we might force the "wrong" type with select into a declare variable
00691   int64_t val_int();
00692   type::Decimal *val_decimal(type::Decimal *);
00693   String *val_str(String *str);
00694   void reset_field();
00695   void update_field();
00696   Item *result_item(Field *)
00697   { return new Item_avg_field(hybrid_type, this); }
00698   void no_rows_in_result() {}
00699   const char *func_name() const { return "avg("; }
00700   Item *copy_or_same(Session* session);
00701   Field *create_tmp_field(bool group, Table *table, uint32_t convert_blob_length);
00702   void cleanup()
00703   {
00704     count= 0;
00705     Item_sum_sum::cleanup();
00706   }
00707 };
00708 
00709 class Item_sum_variance;
00710 
00711 class Item_variance_field :public Item_result_field
00712 {
00713 public:
00714   Field *field;
00715   Item_result hybrid_type;
00716   uint32_t f_precision0, f_scale0;
00717   uint32_t f_precision1, f_scale1;
00718   uint32_t dec_bin_size0, dec_bin_size1;
00719   uint32_t sample;
00720   uint32_t prec_increment;
00721   Item_variance_field(Item_sum_variance *item);
00722   enum Type type() const {return FIELD_VARIANCE_ITEM; }
00723   double val_real();
00724   int64_t val_int();
00725   String *val_str(String *str)
00726   { return val_string_from_real(str); }
00727   type::Decimal *val_decimal(type::Decimal *dec_buf)
00728   { return val_decimal_from_real(dec_buf); }
00729   bool is_null() { update_null_value(); return null_value; }
00730   enum_field_types field_type() const
00731   {
00732     return hybrid_type == DECIMAL_RESULT ?
00733       DRIZZLE_TYPE_DECIMAL : DRIZZLE_TYPE_DOUBLE;
00734   }
00735   void fix_length_and_dec() {}
00736   enum Item_result result_type () const { return hybrid_type; }
00737 };
00738 
00739 
00740 /*
00741   variance(a) =
00742 
00743   =  sum (ai - avg(a))^2 / count(a) )
00744   =  sum (ai^2 - 2*ai*avg(a) + avg(a)^2) / count(a)
00745   =  (sum(ai^2) - sum(2*ai*avg(a)) + sum(avg(a)^2))/count(a) =
00746   =  (sum(ai^2) - 2*avg(a)*sum(a) + count(a)*avg(a)^2)/count(a) =
00747   =  (sum(ai^2) - 2*sum(a)*sum(a)/count(a) + count(a)*sum(a)^2/count(a)^2 )/count(a) =
00748   =  (sum(ai^2) - 2*sum(a)^2/count(a) + sum(a)^2/count(a) )/count(a) =
00749   =  (sum(ai^2) - sum(a)^2/count(a))/count(a)
00750 
00751 But, this falls prey to catastrophic cancellation.  Instead, use the recurrence formulas
00752 
00753   M_{1} = x_{1}, ~ M_{k} = M_{k-1} + (x_{k} - M_{k-1}) / k newline
00754   S_{1} = 0, ~ S_{k} = S_{k-1} + (x_{k} - M_{k-1}) times (x_{k} - M_{k}) newline
00755   for 2 <= k <= n newline
00756   ital variance = S_{n} / (n-1)
00757 
00758 */
00759 
00760 class Item_sum_variance : public Item_sum_num
00761 {
00762   void fix_length_and_dec();
00763 
00764 public:
00765   Item_result hybrid_type;
00766   int cur_dec;
00767   double recurrence_m, recurrence_s;    /* Used in recurrence relation. */
00768   uint64_t count;
00769   uint32_t f_precision0, f_scale0;
00770   uint32_t f_precision1, f_scale1;
00771   uint32_t dec_bin_size0, dec_bin_size1;
00772   uint32_t sample;
00773   uint32_t prec_increment;
00774 
00775   Item_sum_variance(Item *item_par, uint32_t sample_arg) :Item_sum_num(item_par),
00776     hybrid_type(REAL_RESULT), count(0), sample(sample_arg)
00777     {}
00778   Item_sum_variance(Session *session, Item_sum_variance *item);
00779   enum Sumfunctype sum_func () const { return VARIANCE_FUNC; }
00780   void clear();
00781   bool add();
00782   double val_real();
00783   int64_t val_int();
00784   type::Decimal *val_decimal(type::Decimal *);
00785   void reset_field();
00786   void update_field();
00787   Item *result_item(Field *)
00788   { return new Item_variance_field(this); }
00789   void no_rows_in_result() {}
00790   const char *func_name() const
00791     { return sample ? "var_samp(" : "variance("; }
00792   Item *copy_or_same(Session* session);
00793   Field *create_tmp_field(bool group, Table *table, uint32_t convert_blob_length);
00794   enum Item_result result_type () const { return REAL_RESULT; }
00795   void cleanup()
00796   {
00797     count= 0;
00798     Item_sum_num::cleanup();
00799   }
00800 };
00801 
00802 class Item_sum_std;
00803 
00804 class Item_std_field :public Item_variance_field
00805 {
00806 public:
00807   Item_std_field(Item_sum_std *item);
00808   enum Type type() const { return FIELD_STD_ITEM; }
00809   double val_real();
00810   type::Decimal *val_decimal(type::Decimal *);
00811   enum Item_result result_type () const { return REAL_RESULT; }
00812   enum_field_types field_type() const { return DRIZZLE_TYPE_DOUBLE;}
00813 };
00814 
00815 /*
00816    standard_deviation(a) = sqrt(variance(a))
00817 */
00818 
00819 class Item_sum_std :public Item_sum_variance
00820 {
00821   public:
00822   Item_sum_std(Item *item_par, uint32_t sample_arg)
00823     :Item_sum_variance(item_par, sample_arg) {}
00824   Item_sum_std(Session *session, Item_sum_std *item)
00825     :Item_sum_variance(session, item)
00826     {}
00827   enum Sumfunctype sum_func () const { return STD_FUNC; }
00828   double val_real();
00829   Item *result_item(Field *)
00830     { return new Item_std_field(this); }
00831   const char *func_name() const { return "std("; }
00832   Item *copy_or_same(Session* session);
00833   enum Item_result result_type () const { return REAL_RESULT; }
00834   enum_field_types field_type() const { return DRIZZLE_TYPE_DOUBLE;}
00835 };
00836 
00837 // This class is a string or number function depending on num_func
00838 
00839 class Item_sum_hybrid :public Item_sum
00840 {
00841 protected:
00842   String value,tmp_value;
00843   double sum;
00844   int64_t sum_int;
00845   type::Decimal sum_dec;
00846   Item_result hybrid_type;
00847   enum_field_types hybrid_field_type;
00848   int cmp_sign;
00849   bool was_values;  // Set if we have found at least one row (for max/min only)
00850 
00851   public:
00852   Item_sum_hybrid(Item *item_par,int sign)
00853     :Item_sum(item_par), sum(0.0), sum_int(0),
00854     hybrid_type(INT_RESULT), hybrid_field_type(DRIZZLE_TYPE_LONGLONG),
00855     cmp_sign(sign), was_values(true)
00856   { collation.set(&my_charset_bin); }
00857   Item_sum_hybrid(Session *session, Item_sum_hybrid *item);
00858   bool fix_fields(Session *, Item **);
00859   void clear();
00860   double val_real();
00861   int64_t val_int();
00862   type::Decimal *val_decimal(type::Decimal *);
00863   void reset_field();
00864   String *val_str(String *);
00865   bool keep_field_type(void) const { return 1; }
00866   enum Item_result result_type () const { return hybrid_type; }
00867   enum enum_field_types field_type() const { return hybrid_field_type; }
00868   void update_field();
00869   void min_max_update_str_field();
00870   void min_max_update_real_field();
00871   void min_max_update_int_field();
00872   void min_max_update_decimal_field();
00873   void cleanup();
00874   bool any_value() { return was_values; }
00875   void no_rows_in_result();
00876   Field *create_tmp_field(bool group, Table *table,
00877         uint32_t convert_blob_length);
00878 };
00879 
00880 
00881 class Item_sum_min :public Item_sum_hybrid
00882 {
00883 public:
00884   Item_sum_min(Item *item_par) :Item_sum_hybrid(item_par,1) {}
00885   Item_sum_min(Session *session, Item_sum_min *item) :Item_sum_hybrid(session, item) {}
00886   enum Sumfunctype sum_func () const {return MIN_FUNC;}
00887 
00888   bool add();
00889   const char *func_name() const { return "min("; }
00890   Item *copy_or_same(Session* session);
00891 };
00892 
00893 
00894 class Item_sum_max :public Item_sum_hybrid
00895 {
00896 public:
00897   Item_sum_max(Item *item_par) :Item_sum_hybrid(item_par,-1) {}
00898   Item_sum_max(Session *session, Item_sum_max *item) :Item_sum_hybrid(session, item) {}
00899   enum Sumfunctype sum_func () const {return MAX_FUNC;}
00900 
00901   bool add();
00902   const char *func_name() const { return "max("; }
00903   Item *copy_or_same(Session* session);
00904 };
00905 
00906 
00907 class Item_sum_bit :public Item_sum_int
00908 {
00909 protected:
00910   uint64_t reset_bits,bits;
00911 
00912 public:
00913   Item_sum_bit(Item *item_par,uint64_t reset_arg)
00914     :Item_sum_int(item_par),reset_bits(reset_arg),bits(reset_arg) {}
00915   Item_sum_bit(Session *session, Item_sum_bit *item):
00916     Item_sum_int(session, item), reset_bits(item->reset_bits), bits(item->bits) {}
00917   enum Sumfunctype sum_func () const {return SUM_BIT_FUNC;}
00918   void clear();
00919   int64_t val_int();
00920   void reset_field();
00921   void update_field();
00922   void fix_length_and_dec()
00923   { decimals= 0; max_length=21; unsigned_flag= 1; maybe_null= null_value= 0; }
00924   void cleanup()
00925   {
00926     bits= reset_bits;
00927     Item_sum_int::cleanup();
00928   }
00929 };
00930 
00931 
00932 class Item_sum_or :public Item_sum_bit
00933 {
00934 public:
00935   Item_sum_or(Item *item_par) :Item_sum_bit(item_par,0) {}
00936   Item_sum_or(Session *session, Item_sum_or *item) :Item_sum_bit(session, item) {}
00937   bool add();
00938   const char *func_name() const { return "bit_or("; }
00939   Item *copy_or_same(Session* session);
00940 };
00941 
00942 
00943 class Item_sum_and :public Item_sum_bit
00944 {
00945   public:
00946   Item_sum_and(Item *item_par) :Item_sum_bit(item_par, UINT64_MAX) {}
00947   Item_sum_and(Session *session, Item_sum_and *item) :Item_sum_bit(session, item) {}
00948   bool add();
00949   const char *func_name() const { return "bit_and("; }
00950   Item *copy_or_same(Session* session);
00951 };
00952 
00953 class Item_sum_xor :public Item_sum_bit
00954 {
00955   public:
00956   Item_sum_xor(Item *item_par) :Item_sum_bit(item_par,0) {}
00957   Item_sum_xor(Session *session, Item_sum_xor *item) :Item_sum_bit(session, item) {}
00958   bool add();
00959   const char *func_name() const { return "bit_xor("; }
00960   Item *copy_or_same(Session* session);
00961 };
00962 
00963 
00964 
00965 class DRIZZLE_ERROR;
00966 
00967 class Item_func_group_concat : public Item_sum
00968 {
00969   Tmp_Table_Param *tmp_table_param;
00970   DRIZZLE_ERROR *warning;
00971   String result;
00972   String *separator;
00973   TREE tree_base;
00974   TREE *tree;
00975 
00983   Unique *unique_filter;
00984   Table *table;
00985   Order **order;
00986   Name_resolution_context *context;
00988   uint32_t arg_count_order;
00990   uint32_t arg_count_field;
00991   uint32_t count_cut_values;
00992   bool distinct;
00993   bool warning_for_row;
00994   bool always_null;
00995   bool force_copy_fields;
00996   bool no_appended;
00997   /*
00998     Following is 0 normal object and pointer to original one for copy
00999     (to correctly free resources)
01000   */
01001   Item_func_group_concat *original;
01002 
01003   friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
01004                                                 const void* key2);
01005   friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
01006                const void* key2);
01007   friend int dump_leaf_key(unsigned char* key, uint32_t,
01008                            Item_func_group_concat *group_concat_item);
01009 
01010 public:
01011   Item_func_group_concat(Name_resolution_context *context_arg,
01012                          bool is_distinct, List<Item> *is_select,
01013                          SQL_LIST *is_order, String *is_separator);
01014 
01015   Item_func_group_concat(Session *session, Item_func_group_concat *item);
01016   ~Item_func_group_concat();
01017   void cleanup();
01018 
01019   enum Sumfunctype sum_func () const {return GROUP_CONCAT_FUNC;}
01020   const char *func_name() const { return "group_concat"; }
01021   virtual Item_result result_type () const { return STRING_RESULT; }
01022   enum_field_types field_type() const
01023   {
01024     if (max_length/collation.collation->mbmaxlen > CONVERT_IF_BIGGER_TO_BLOB )
01025       return DRIZZLE_TYPE_BLOB;
01026     else
01027       return DRIZZLE_TYPE_VARCHAR;
01028   }
01029   void clear();
01030   bool add();
01031   void reset_field() { assert(0); }        // not used
01032   void update_field() { assert(0); }       // not used
01033   bool fix_fields(Session *,Item **);
01034   bool setup(Session *session);
01035   void make_unique();
01036   double val_real();
01037   int64_t val_int();
01038   type::Decimal *val_decimal(type::Decimal *decimal_value)
01039   {
01040     return val_decimal_from_string(decimal_value);
01041   }
01042   String* val_str(String* str);
01043   Item *copy_or_same(Session* session);
01044   void no_rows_in_result() {}
01045   virtual void print(String *str);
01046   virtual bool change_context_processor(unsigned char *cntx)
01047     { context= (Name_resolution_context *)cntx; return false; }
01048 };
01049 
01050 } /* namespace drizzled */
01051