From b8fb06a52397463bfe9cffc2c89fe71eba56b2ca Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Sun, 17 Jul 2022 03:38:35 +0000 Subject: [PATCH 1/7] chore(release): 0.8.0 --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd052e1f7..feb6da2d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,17 @@ Release Notes --- +## [0.8.0](https://github.com/substrait-io/substrait/compare/v0.7.0...v0.8.0) (2022-07-17) + + +### ⚠ BREAKING CHANGES + +* The signature of divide functions for multiple types now specify an enumeration prior to specifying operands. + +### Bug Fixes + +* add overflow behavior to integer division ([#223](https://github.com/substrait-io/substrait/issues/223)) ([cf552d7](https://github.com/substrait-io/substrait/commit/cf552d7c76da9a91bce992391356c6ffb5a969ac)) + ## [0.7.0](https://github.com/substrait-io/substrait/compare/v0.6.0...v0.7.0) (2022-07-11) From 4b2072a40447a4f1a3f6875fa0476cc57145ba30 Mon Sep 17 00:00:00 2001 From: weijie Date: Tue, 26 Jul 2022 00:15:11 +0800 Subject: [PATCH 2/7] feat: support window functions (#224) Co-authored-by: Jacques Nadeau --- extensions/functions_arithmetic.yaml | 56 ++++++++++++++++++++++++++++ text/simple_extensions_schema.yaml | 7 +++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index a750df682..ffd43f4c4 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -362,3 +362,59 @@ aggregate_functions: decomposable: MANY intermediate: fp64? return: fp64? +window_functions: + - name: "row_number" + description: "the number of the current row within its partition." + impls: + - args: [] + nullability: DECLARED_OUTPUT + decomposable: NONE + return: i64? + window_type: PARTITION + - name: "rank" + description: "the rank of the current row, with gaps." + impls: + - args: [] + nullability: DECLARED_OUTPUT + decomposable: NONE + return: i64? + window_type: PARTITION + - name: "dense_rank" + description: "the rank of the current row, without gaps." + impls: + - args: [] + nullability: DECLARED_OUTPUT + decomposable: NONE + return: i64? + window_type: PARTITION + - name: "percent_rank" + description: "the relative rank of the current row." + impls: + - args: [] + nullability: DECLARED_OUTPUT + decomposable: NONE + return: fp64? + window_type: PARTITION + - name: "cume_dist" + description: "the cumulative distribution." + impls: + - args: [] + nullability: DECLARED_OUTPUT + decomposable: NONE + return: fp64? + window_type: PARTITION + - name: "ntile" + description: "Return an integer ranging from 1 to the argument value,dividing the partition as equally as possible." + impls: + - args: + - value: i32 + nullability: DECLARED_OUTPUT + decomposable: NONE + return: i32? + window_type: PARTITION + - args: + - value: i64 + nullability: DECLARED_OUTPUT + decomposable: NONE + return: i64? + window_type: PARTITION diff --git a/text/simple_extensions_schema.yaml b/text/simple_extensions_schema.yaml index 6fe274c78..911aa0bf7 100644 --- a/text/simple_extensions_schema.yaml +++ b/text/simple_extensions_schema.yaml @@ -45,6 +45,10 @@ properties: type: array items: $ref: "#/$defs/aggregateFunction" + window_functions: + type: array + items: + $ref: "#/$defs/windowFunction" $defs: type: @@ -220,6 +224,7 @@ $defs: $ref: "#/$defs/maxset" decomposable: $ref: "#/$defs/decomposable" + windowFunction: type: object additionalProperties: false @@ -235,7 +240,7 @@ $defs: items: type: object additionalProperties: false - required: [ intermediate, return ] + required: [ return ] properties: args: $ref: "#/$defs/arguments" From 66a347603bd0a2cba27d749864a9bdb1164eb1dd Mon Sep 17 00:00:00 2001 From: Almann Goo Date: Mon, 25 Jul 2022 11:05:16 -0700 Subject: [PATCH 3/7] fix: removes cast function definition (#253) Given that the `Expression` node has an explicit `Cast` variant that was added in #88, the `cast` function definition (added in #152) seems redundant. BREAKING CHANGE: Existing plans that are modeling `cast` with the `cast` function (as opposed to the `cast` expression) will no longer be valid. All producers/consumers should use the `cast` expression type. --- extensions/functions_cast.yaml | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 extensions/functions_cast.yaml diff --git a/extensions/functions_cast.yaml b/extensions/functions_cast.yaml deleted file mode 100644 index 245d27946..000000000 --- a/extensions/functions_cast.yaml +++ /dev/null @@ -1,11 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: cast - description: Cast one type to another. - impls: - - args: - - value: any1 - - type: output - return: output From 5a8fff8c4be3a2dfac4d7fa9d100b1fd9ad66825 Mon Sep 17 00:00:00 2001 From: Jeroen van Straten Date: Mon, 25 Jul 2022 21:15:29 +0200 Subject: [PATCH 4/7] docs: clarify result of sum() aggregate for zero elements (#262) --- extensions/functions_arithmetic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index ffd43f4c4..13a38cc5e 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -183,7 +183,7 @@ scalar_functions: return: i64 aggregate_functions: - name: "sum" - description: Sum a set of values. + description: Sum a set of values. The sum of zero elements yields null. impls: - args: - options: [ SILENT, SATURATE, ERROR ] From 42d9ca31a032e1fac0248a998501241eaa27b56f Mon Sep 17 00:00:00 2001 From: Jeroen van Straten Date: Mon, 25 Jul 2022 21:17:23 +0200 Subject: [PATCH 5/7] docs: better explain aggregate relations (#260) BREAKING CHANGE: the grouping set index column now only exists if there is more than one grouping set. --- site/docs/relations/logical_relations.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/site/docs/relations/logical_relations.md b/site/docs/relations/logical_relations.md index c62a015be..c409e3b16 100644 --- a/site/docs/relations/logical_relations.md +++ b/site/docs/relations/logical_relations.md @@ -282,7 +282,17 @@ The aggregate operation groups input data on one or more sets of grouping keys, | Inputs | 1 | | Outputs | 1 | | Property Maintenance | Maintains distribution if all distribution fields are contained in every grouping set. No orderedness guaranteed. | -| Direct Output Order | The list of distinct columns from each grouping set (ordered by their first appearance) followed by the list of measures in declaration order, followed by an integer describing the associated particular grouping set the value is derived from. | +| Direct Output Order | The list of distinct columns from each grouping set (ordered by their first appearance) followed by the list of measures in declaration order, followed by an `i32` describing the associated particular grouping set the value is derived from (if applicable). | + +In its simplest form, an aggregation has only measures. In this case, all records are folded into one, and a column is returned for each aggregate expression in the measures list. + +Grouping sets can be used for finer-grained control over which records are folded. Within a grouping set, two records will be folded together if and only if each expressions in the grouping set yields the same value for each. The values returned by the grouping sets will be returned as columns to the left of the columns for the aggregate expressions. If a grouping set contains no grouping expressions, all rows will be folded for that grouping set. + +It's possible to specify multiple grouping sets in a single aggregate operation. The grouping sets behave more or less independently, with each returned record belonging to one of the grouping sets. The values for the grouping expression columns that are not part of the grouping set for a particular record will be set to null. Two grouping expressions will be returned using the same column if they represent the protobuf messages describing the expressions are equal. The columns for grouping expressions that do *not* appear in *all* grouping sets will be nullable (regardless of the nullability of the type returned by the grouping expression) to accomodate the null insertion. + +To further disambiguate which record belongs to which grouping set, an aggregate relation with more than one grouping set receives an extra `i32` column on the right-hand side. The value of this field will be the zero-based index of the grouping set that yielded the record. + +If at least one grouping expression is present, the aggregation is allowed to not have any aggregate expressions. An aggregate relation is invalid if it would yield zero columns. ### Aggregate Properties @@ -290,7 +300,7 @@ The aggregate operation groups input data on one or more sets of grouping keys, | ---------------- | ------------------------------------------------------------ | --------------------------------------- | | Input | The relational input. | Required | | Grouping Sets | One or more grouping sets. | Optional, required if no measures. | -| Per Grouping Set | A list of expression grouping that the aggregation measured should be calculated for. | Optional, defaults to 0. | +| Per Grouping Set | A list of expression grouping that the aggregation measured should be calculated for. | Optional. | | Measures | A list of one or more aggregate expressions along with an optional filter. | Optional, required if no grouping sets. | From f7c5da5625f50514ba70b9e8a32cb2e7085c24f1 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Mon, 25 Jul 2022 15:19:15 -0400 Subject: [PATCH 6/7] feat: add functions for arithmetic, rounding, logarithmic, and string transformations (#245) * feat: add functions for arithmetic, rounding, logarithmic, and string transformations Co-authored-by: Sanjiban Sengupta --- extensions/functions_arithmetic.yaml | 79 +++++++++++++++++++ extensions/functions_comparison.yaml | 10 +++ extensions/functions_logarithmic.yaml | 106 ++++++++++++++++++++++++++ extensions/functions_rounding.yaml | 23 ++++++ extensions/functions_string.yaml | 3 +- 5 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 extensions/functions_logarithmic.yaml create mode 100644 extensions/functions_rounding.yaml diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 13a38cc5e..4578c09e7 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -161,6 +161,36 @@ scalar_functions: - value: fp64 - value: fp64 return: fp64 + - + name: "negate" + description: "Negation of the value" + impls: + - args: + - options: [ SILENT, SATURATE, ERROR ] + required: false + - value: i8 + return: i8 + - args: + - options: [ SILENT, SATURATE, ERROR ] + required: false + - value: i16 + return: i16 + - args: + - options: [ SILENT, SATURATE, ERROR ] + required: false + - value: i32 + return: i32 + - args: + - options: [ SILENT, SATURATE, ERROR ] + required: false + - value: i64 + return: i64 + - args: + - value: fp32 + return: fp32 + - args: + - value: fp64 + return: fp64 - name: "modulus" description: "Get the remainder when dividing one value by another." @@ -181,6 +211,55 @@ scalar_functions: - value: i64 - value: i64 return: i64 + - + name: "power" + description: "Take the power with the first value as the base and second as exponent." + impls: + - args: + - options: [ SILENT, SATURATE, ERROR ] + required: false + - value: i64 + - value: i64 + return: i64 + - args: + - value: fp32 + - value: fp32 + return: fp32 + - args: + - value: fp64 + - value: fp64 + return: fp64 + - + name: "sqrt" + description: "Square root of the value" + impls: + - args: + - name: rounding + options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + required: false + - name: on_domain_error + options: [ NAN, ERROR ] + required: false + - value: i64 + return: fp64 + - args: + - name: rounding + options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + required: false + - name: on_domain_error + options: [ NAN, ERROR ] + required: false + - value: fp32 + return: fp32 + - args: + - name: rounding + options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + required: false + - name: on_domain_error + options: [ NAN, ERROR ] + required: false + - value: fp64 + return: fp64 aggregate_functions: - name: "sum" description: Sum a set of values. The sum of zero elements yields null. diff --git a/extensions/functions_comparison.yaml b/extensions/functions_comparison.yaml index ab7d118d6..28e164371 100644 --- a/extensions/functions_comparison.yaml +++ b/extensions/functions_comparison.yaml @@ -73,4 +73,14 @@ scalar_functions: - value: any1 return: BOOLEAN nullability: DECLARED_OUTPUT + - + name: "is_nan" + description: Whether a value is not a number. + impls: + - args: + - value: fp32 + return: BOOLEAN + - args: + - value: fp64 + return: BOOLEAN diff --git a/extensions/functions_logarithmic.yaml b/extensions/functions_logarithmic.yaml new file mode 100644 index 000000000..8f9925c28 --- /dev/null +++ b/extensions/functions_logarithmic.yaml @@ -0,0 +1,106 @@ +%YAML 1.2 +--- +scalar_functions: + - + name: "ln" + description: "Natural logarithm of the value" + impls: + - args: + - name: rounding + options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + required: false + - name: on_domain_error + options: [ NAN, ERROR ] + required: false + - value: fp32 + return: fp32 + - args: + - name: rounding + options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + required: false + - name: on_domain_error + options: [ NAN, ERROR ] + required: false + - value: fp64 + return: fp64 + - + name: "log10" + description: "Logarithm to base 10 of the value" + impls: + - args: + - name: rounding + options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + required: false + - name: on_domain_error + options: [ NAN, ERROR ] + required: false + - value: fp32 + return: fp32 + - args: + - name: rounding + options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + required: false + - name: on_domain_error + options: [ NAN, ERROR ] + required: false + - value: fp64 + return: fp64 + - + name: "log2" + description: "Logarithm to base 2 of the value" + impls: + - args: + - name: rounding + options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + required: false + - name: on_domain_error + options: [ NAN, ERROR ] + required: false + - value: fp32 + return: fp32 + - args: + - name: rounding + options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + required: false + - name: on_domain_error + options: [ NAN, ERROR ] + required: false + - value: fp64 + return: fp64 + - + name: "logb" + description: > + Logarithm of the value with the given base + + logb(x, b) => log_{b} (x) + impls: + - args: + - name: rounding + options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + required: false + - name: on_domain_error + options: [ NAN, ERROR ] + required: false + - value: fp32 + name: "x" + description: "The number `x` to compute the logarithm of" + - value: fp32 + name: "base" + description: "The logarithm base `b` to use" + return: fp32 + - args: + - name: rounding + options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] + required: false + - name: on_domain_error + options: [ NAN, ERROR ] + required: false + - value: fp64 + name: "x" + description: "The number `x` to compute the logarithm of" + - value: fp64 + name: "base" + description: "The logarithm base `b` to use" + return: fp64 + + diff --git a/extensions/functions_rounding.yaml b/extensions/functions_rounding.yaml new file mode 100644 index 000000000..5ffa93582 --- /dev/null +++ b/extensions/functions_rounding.yaml @@ -0,0 +1,23 @@ +%YAML 1.2 +--- +scalar_functions: + - + name: "ceil" + description: "Rounding to the ceiling of the value" + impls: + - args: + - value: fp32 + return: fp32 + - args: + - value: fp64 + return: fp64 + - + name: "floor" + description: "Rounding to the floor of the value" + impls: + - args: + - value: fp32 + return: fp32 + - args: + - value: fp64 + return: fp64 diff --git a/extensions/functions_string.yaml b/extensions/functions_string.yaml index 66b389301..f98132c92 100644 --- a/extensions/functions_string.yaml +++ b/extensions/functions_string.yaml @@ -44,7 +44,8 @@ scalar_functions: - value: i32 - value: i32 return: "string" - - name: starts_with + - + name: starts_with description: Whether this string starts with another string. impls: - args: From d6b9b344f0f0865573a79feb6ec818c146b47f62 Mon Sep 17 00:00:00 2001 From: Richard Tia Date: Mon, 25 Jul 2022 16:46:59 -0700 Subject: [PATCH 7/7] feat: add string containment functions (#256) --- extensions/functions_string.yaml | 84 ++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/extensions/functions_string.yaml b/extensions/functions_string.yaml index f98132c92..a6aa27502 100644 --- a/extensions/functions_string.yaml +++ b/extensions/functions_string.yaml @@ -164,3 +164,87 @@ scalar_functions: - value: "fixedchar" - value: "varchar" return: "BOOLEAN" + - + name: strpos + description: >- + Return the position of the first occurrence of a string in another string. The first + character of the string is at position 1. If no occurrence is found, 0 is returned. + impls: + - args: + - value: "string" + name: "input" + description: The input string. + - value: "string" + name: "substring" + description: The substring to search for. + return: i64 + - args: + - value: "varchar" + name: "input" + description: The input string. + - value: "varchar" + name: "substring" + description: The substring to search for. + return: i64 + - args: + - value: "fixedchar" + name: "input" + description: The input string. + - value: "fixedchar" + name: "substring" + description: The substring to search for. + return: i64 + - + name: count_substring + description: Return the number of non-overlapping occurrences of a substring in an input string. + impls: + - args: + - value: "string" + name: "input" + description: The input string. + - value: "string" + name: "substring" + description: The substring to count. + return: i64 + - args: + - value: "varchar" + name: "input" + description: The input string. + - value: "varchar" + name: "substring" + description: The substring to count. + return: i64 + - args: + - value: "fixedchar" + name: "input" + description: The input string. + - value: "fixedchar" + name: "substring" + description: The substring to count. + return: i64 + - name: replace + description: >- + Replace all occurrences of the substring with the replacement string. + impls: + - args: + - value: "string" + name: "input" + description: Input string. + - value: "string" + name: "substring" + description: The substring to replace. + - value: "string" + name: "replacement" + description: The replacement string. + return: "string" + - args: + - value: "varchar" + name: "input" + description: Input string. + - value: "varchar" + name: "substring" + description: The substring to replace. + - value: "varchar" + name: "replacement" + description: The replacement string. + return: "varchar"