Skip to content

Commit

Permalink
sql: simplify/optimize/fix the aggregate functions.
Browse files Browse the repository at this point in the history
Prior to this patch, `varianceAggregate`, `sumAggregate` (and
indirectly `avg` and `stddev` aggregates which used the former) tried
to be "smart" and share code between various argument types. However
this actually caused extra complexity and run-time overhead. Indeed,
every time the Add method was called:

- a type switch was performed on the argument (although every value
  added to a given variance aggregator will always be of the same
  type)

- a check was made whether an implementation for its argument's type
  had been instantiated already, to instantiate it if needed (although
  given the argument type is known after type checking, this
  instantiation could have been performed prior to the first Add call)

This patch addresses this shortcoming by separating implementations by
argument type.

In addition, minor optimizations were also implemented:

- the error return value was removed, since there is no error condition.
- the `sum` (and thus `avg`) aggregates now use a single `int64` as
  long as the sum does not overflow.

Also a long-standing bug with COUNT was fixed (the COUNT of a tuple of
NULL values is 1, not 0).

```
name                          old time/op    new time/op    delta
AvgAggregateInt1K-6             57.1µs ± 1%    55.3µs ± 0%   -3.27%          (p=0.000 n=9+8)
AvgAggregateSmallInt1K-6        60.1µs ± 1%    17.8µs ± 2%  -70.33%         (p=0.000 n=9+10)
AvgAggregateFloat1K-6           10.8µs ± 3%     8.8µs ± 1%  -19.02%         (p=0.000 n=10+9)
AvgAggregateDecimal1K-6          252µs ± 3%     249µs ± 3%     ~           (p=0.105 n=10+10)
CountAggregate1K-6              5.47µs ± 2%    3.83µs ± 1%  -29.99%        (p=0.000 n=10+10)
SumAggregateInt1K-6             52.1µs ± 1%    49.2µs ± 1%   -5.59%        (p=0.000 n=10+10)
SumAggregateSmallInt1K-6        55.0µs ± 1%    13.3µs ± 2%  -75.85%        (p=0.000 n=10+10)
SumAggregateFloat1K-6           7.04µs ± 5%    5.22µs ± 1%  -25.79%        (p=0.000 n=10+10)
SumAggregateDecimal1K-6          246µs ± 2%     237µs ± 2%   -3.77%        (p=0.000 n=10+10)
MaxAggregateInt1K-6             8.92µs ± 1%    8.48µs ± 2%   -4.99%         (p=0.000 n=8+10)
MaxAggregateFloat1K-6           9.26µs ± 2%    9.94µs ± 1%   +7.29%        (p=0.000 n=10+10)
MaxAggregateDecimal1K-6          113µs ±12%     126µs ±43%     ~            (p=0.661 n=9+10)
MinAggregateInt1K-6             9.52µs ± 1%    9.23µs ± 1%   -3.02%         (p=0.000 n=9+10)
MinAggregateFloat1K-6           9.12µs ± 1%    8.88µs ± 1%   -2.57%         (p=0.000 n=8+10)
MinAggregateDecimal1K-6          223µs ± 5%     224µs ± 2%     ~            (p=0.842 n=10+9)
VarianceAggregateInt1K-6        1.49ms ± 1%    1.49ms ± 1%     ~            (p=0.274 n=10+8)
VarianceAggregateFloat1K-6      14.2µs ± 1%    11.7µs ± 1%  -17.42%         (p=0.000 n=9+10)
VarianceAggregateDecimal1K-6    1.36ms ± 2%    1.31ms ± 2%   -4.12%        (p=0.000 n=10+10)
StddevAggregateInt1K-6          1.69ms ± 1%    1.65ms ± 2%   -2.47%        (p=0.000 n=10+10)
StddevAggregateFloat1K-6        15.7µs ± 1%    11.8µs ± 2%  -24.81%         (p=0.000 n=9+10)
StddevAggregateDecimal1K-6      1.38ms ± 1%    1.32ms ± 5%   -4.41%         (p=0.000 n=9+10)

name                          old alloc/op   new alloc/op   delta
AvgAggregateInt1K-6               772B ± 0%      692B ± 0%  -10.36%        (p=0.000 n=10+10)
AvgAggregateSmallInt1K-6          756B ± 0%      628B ± 0%  -16.93%        (p=0.000 n=10+10)
AvgAggregateFloat1K-6             128B ± 0%       64B ± 0%  -50.00%        (p=0.000 n=10+10)
AvgAggregateDecimal1K-6          142kB ± 1%     142kB ± 1%     ~           (p=0.239 n=10+10)
CountAggregate1K-6               16.0B ± 0%     16.0B ± 0%     ~     (all samples are equal)
SumAggregateInt1K-6               304B ± 0%      192B ± 0%  -36.84%        (p=0.000 n=10+10)
SumAggregateSmallInt1K-6          304B ± 0%      144B ± 0%  -52.63%        (p=0.000 n=10+10)
SumAggregateFloat1K-6             120B ± 0%       24B ± 0%  -80.00%        (p=0.000 n=10+10)
SumAggregateDecimal1K-6          142kB ± 1%     141kB ± 2%     ~           (p=0.123 n=10+10)
MaxAggregateInt1K-6              16.0B ± 0%     16.0B ± 0%     ~     (all samples are equal)
MaxAggregateFloat1K-6            16.0B ± 0%     16.0B ± 0%     ~     (all samples are equal)
MaxAggregateDecimal1K-6         57.7kB ±17%    67.7kB ±58%     ~            (p=0.645 n=9+10)
MinAggregateInt1K-6              16.0B ± 0%     16.0B ± 0%     ~     (all samples are equal)
MinAggregateFloat1K-6            16.0B ± 0%     16.0B ± 0%     ~     (all samples are equal)
MinAggregateDecimal1K-6          138kB ± 5%     140kB ± 2%     ~            (p=0.305 n=10+9)
VarianceAggregateInt1K-6         773kB ± 0%     773kB ± 0%     ~             (p=0.743 n=8+9)
VarianceAggregateFloat1K-6        104B ± 0%       40B ± 0%  -61.54%        (p=0.000 n=10+10)
VarianceAggregateDecimal1K-6     709kB ± 2%     706kB ± 1%     ~           (p=0.436 n=10+10)
StddevAggregateInt1K-6           869kB ± 0%     868kB ± 0%     ~            (p=0.128 n=9+10)
StddevAggregateFloat1K-6          112B ± 0%       64B ± 0%  -42.86%        (p=0.000 n=10+10)
StddevAggregateDecimal1K-6       716kB ± 1%     723kB ± 2%   +1.06%         (p=0.028 n=9+10)

name                          old allocs/op  new allocs/op  delta
AvgAggregateInt1K-6               16.0 ± 0%      15.0 ± 0%   -6.25%        (p=0.000 n=10+10)
AvgAggregateSmallInt1K-6          16.0 ± 0%      14.0 ± 0%  -12.50%        (p=0.000 n=10+10)
AvgAggregateFloat1K-6             3.00 ± 0%      4.00 ± 0%  +33.33%        (p=0.000 n=10+10)
AvgAggregateDecimal1K-6          2.97k ± 1%     2.96k ± 1%     ~           (p=0.239 n=10+10)
CountAggregate1K-6                2.00 ± 0%      2.00 ± 0%     ~     (all samples are equal)
SumAggregateInt1K-6               5.00 ± 0%      3.00 ± 0%  -40.00%        (p=0.000 n=10+10)
SumAggregateSmallInt1K-6          5.00 ± 0%      2.00 ± 0%  -60.00%        (p=0.000 n=10+10)
SumAggregateFloat1K-6             2.00 ± 0%      2.00 ± 0%     ~     (all samples are equal)
SumAggregateDecimal1K-6          2.95k ± 1%     2.93k ± 2%     ~           (p=0.128 n=10+10)
MaxAggregateInt1K-6               1.00 ± 0%      1.00 ± 0%     ~     (all samples are equal)
MaxAggregateFloat1K-6             1.00 ± 0%      1.00 ± 0%     ~     (all samples are equal)
MaxAggregateDecimal1K-6          1.20k ±17%     1.41k ±58%     ~            (p=0.645 n=9+10)
MinAggregateInt1K-6               1.00 ± 0%      1.00 ± 0%     ~     (all samples are equal)
MinAggregateFloat1K-6             1.00 ± 0%      1.00 ± 0%     ~     (all samples are equal)
MinAggregateDecimal1K-6          2.88k ± 5%     2.92k ± 2%     ~            (p=0.305 n=10+9)
VarianceAggregateInt1K-6         17.4k ± 0%     17.4k ± 0%     ~             (p=0.799 n=8+9)
VarianceAggregateFloat1K-6        3.00 ± 0%      2.00 ± 0%  -33.33%        (p=0.000 n=10+10)
VarianceAggregateDecimal1K-6     15.9k ± 1%     16.0k ± 1%     ~            (p=0.905 n=9+10)
StddevAggregateInt1K-6           19.1k ± 0%     19.1k ± 0%     ~           (p=0.288 n=10+10)
StddevAggregateFloat1K-6          4.00 ± 0%      4.00 ± 0%     ~     (all samples are equal)
StddevAggregateDecimal1K-6       16.1k ± 0%     16.3k ± 1%     ~            (p=0.079 n=9+10)
```
  • Loading branch information
knz committed Aug 20, 2016
1 parent e4892bf commit 72a5c19
Show file tree
Hide file tree
Showing 5 changed files with 260 additions and 242 deletions.
26 changes: 10 additions & 16 deletions sql/group.go
Original file line number Diff line number Diff line change
Expand Up @@ -487,31 +487,27 @@ func (v *extractAggregatesVisitor) VisitPre(expr parser.Expr) (recurse bool, new

switch t := expr.(type) {
case *parser.FuncExpr:
fn, err := t.Name.Normalize()
if err != nil {
v.err = err
return false, expr
}

if impl, ok := parser.Aggregates[strings.ToLower(fn.Function())]; ok {
if agg := t.GetAggregateConstructor(); agg != nil {
if len(t.Exprs) != 1 {
// Type checking has already run on these expressions thus
// if an aggregate function of the wrong arity gets here,
// something has gone really wrong.
panic(fmt.Sprintf("%q has %d arguments (expected 1)", fn, len(t.Exprs)))
panic(fmt.Sprintf("%q has %d arguments (expected 1)", t.Name, len(t.Exprs)))
}

argExpr := t.Exprs[0]

defer v.subAggregateVisitor.Reset()
parser.WalkExprConst(&v.subAggregateVisitor, t.Exprs[0])
parser.WalkExprConst(&v.subAggregateVisitor, argExpr)
if v.subAggregateVisitor.Aggregated {
v.err = fmt.Errorf("aggregate function calls cannot be nested under %s", t.Name)
return false, expr
}

f := &aggregateFuncHolder{
expr: t,
arg: t.Exprs[0].(parser.TypedExpr),
create: impl[0].AggregateFunc,
arg: argExpr.(parser.TypedExpr),
create: agg,
group: v.n,
buckets: make(map[string]parser.AggregateFunc),
}
Expand Down Expand Up @@ -603,7 +599,8 @@ func (a *aggregateFuncHolder) add(bucket []byte, d parser.Datum) error {
a.buckets[string(bucket)] = impl
}

return impl.Add(d)
impl.Add(d)
return nil
}

func (*aggregateFuncHolder) Variable() {}
Expand Down Expand Up @@ -632,10 +629,7 @@ func (a *aggregateFuncHolder) Eval(ctx *parser.EvalContext) (parser.Datum, error
found = a.create()
}

datum, err := found.Result()
if err != nil {
return nil, err
}
datum := found.Result()

// This is almost certainly the identity. Oh well.
return datum.Eval(ctx)
Expand Down
Loading

0 comments on commit 72a5c19

Please sign in to comment.