Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.calcite.udf.udaf;

import org.opensearch.sql.calcite.udf.UserDefinedAggFunction;

/**
* FIRST aggregation function - returns the first non-null value in natural document order. Returns
* NULL if no records exist, or if all records have NULL values for the field.
*/
public class FirstAggFunction implements UserDefinedAggFunction<FirstAggFunction.FirstAccumulator> {

@Override
public FirstAccumulator init() {
return new FirstAccumulator();
}

@Override
public Object result(FirstAccumulator accumulator) {
return accumulator.value();
}

@Override
public FirstAccumulator add(FirstAccumulator acc, Object... values) {
Object candidateValue = values[0];
// Only accept the first non-null value encountered
// Skip null values to find the first actual value
if (candidateValue != null) {
acc.setValue(candidateValue);
}
return acc;
}

public static class FirstAccumulator implements Accumulator {
private volatile Object first;
private volatile boolean hasValue;

public FirstAccumulator() {
this.first = null;
this.hasValue = false;
}

public synchronized void setValue(Object value) {
if (!hasValue) {
this.first = value;
this.hasValue = true;
}
}

@Override
public Object value(Object... argList) {
return first;
}

public int size() {
return hasValue ? 1 : 0;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.calcite.udf.udaf;

import org.opensearch.sql.calcite.udf.UserDefinedAggFunction;

/**
* LAST aggregation function - returns the last non-null value in natural document order. Returns
* NULL if no records exist, or if all records have NULL values for the field.
*/
public class LastAggFunction implements UserDefinedAggFunction<LastAggFunction.LastAccumulator> {

@Override
public LastAccumulator init() {
return new LastAccumulator();
}

@Override
public Object result(LastAccumulator accumulator) {
return accumulator.value();
}

@Override
public LastAccumulator add(LastAccumulator acc, Object... values) {
Object candidateValue = values[0];
// Only update with non-null values to keep the last non-null value
// Skip null values to preserve the previous non-null value
if (candidateValue != null) {
acc.setValue(candidateValue);
}
return acc;
}

public static class LastAccumulator implements Accumulator {
private volatile Object last;

public LastAccumulator() {
this.last = null;
}

public synchronized void setValue(Object value) {
this.last = value;
}

@Override
public Object value(Object... argList) {
return last;
}

public int size() {
return last != null ? 1 : 0;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,9 @@ public enum BuiltinFunctionName {
LIST(FunctionName.of("list")),
// Not always an aggregation query
NESTED(FunctionName.of("nested")),
// Document order aggregation functions
FIRST(FunctionName.of("first")),
LAST(FunctionName.of("last")),

/** Text Functions. */
ASCII(FunctionName.of("ascii")),
Expand Down Expand Up @@ -359,6 +362,8 @@ public enum BuiltinFunctionName {
.put("distinct_count_approx", BuiltinFunctionName.DISTINCT_COUNT_APPROX)
.put("list", BuiltinFunctionName.LIST)
.put("pattern", BuiltinFunctionName.INTERNAL_PATTERN)
.put("first", BuiltinFunctionName.FIRST)
.put("last", BuiltinFunctionName.LAST)
.build();

private static final Map<String, BuiltinFunctionName> WINDOW_FUNC_MAPPING =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import org.apache.calcite.sql.type.SqlTypeTransforms;
import org.apache.calcite.sql.util.ReflectiveSqlOperatorTable;
import org.apache.calcite.util.BuiltInMethod;
import org.opensearch.sql.calcite.udf.udaf.FirstAggFunction;
import org.opensearch.sql.calcite.udf.udaf.LastAggFunction;
import org.opensearch.sql.calcite.udf.udaf.ListAggFunction;
import org.opensearch.sql.calcite.udf.udaf.LogPatternAggFunction;
import org.opensearch.sql.calcite.udf.udaf.NullableSqlAvgAggFunction;
Expand Down Expand Up @@ -423,6 +425,12 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
"TAKE",
PPLReturnTypes.ARG0_ARRAY,
PPLOperandTypes.ANY_OPTIONAL_INTEGER);
public static final SqlAggFunction FIRST =
createUserDefinedAggFunction(
FirstAggFunction.class, "FIRST", ReturnTypes.ARG0, PPLOperandTypes.ANY_OPTIONAL_INTEGER);
public static final SqlAggFunction LAST =
createUserDefinedAggFunction(
LastAggFunction.class, "LAST", ReturnTypes.ARG0, PPLOperandTypes.ANY_OPTIONAL_INTEGER);
public static final SqlAggFunction PERCENTILE_APPROX =
createUserDefinedAggFunction(
PercentileApproxFunction.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.EXPM1;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.EXTRACT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.FILTER;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.FIRST;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.FLOOR;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.FORALL;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.FROM_DAYS;
Expand Down Expand Up @@ -102,6 +103,7 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_OBJECT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_SET;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_VALID;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LAST;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LAST_DAY;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LATEST;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LEFT;
Expand Down Expand Up @@ -1176,6 +1178,26 @@ void populate() {
},
wrapSqlOperandTypeChecker(
SqlStdOperatorTable.ARG_MAX.getOperandTypeChecker(), LATEST.name(), false));

// Register FIRST function - uses document order
register(
FIRST,
(distinct, field, argList, ctx) -> {
// Use our custom FirstAggFunction for document order aggregation
return ctx.relBuilder.aggregateCall(PPLBuiltinOperators.FIRST, field);
},
wrapSqlOperandTypeChecker(
PPLBuiltinOperators.FIRST.getOperandTypeChecker(), FIRST.name(), false));

// Register LAST function - uses document order
register(
LAST,
(distinct, field, argList, ctx) -> {
// Use our custom LastAggFunction for document order aggregation
return ctx.relBuilder.aggregateCall(PPLBuiltinOperators.LAST, field);
},
wrapSqlOperandTypeChecker(
PPLBuiltinOperators.LAST.getOperandTypeChecker(), LAST.name(), false));
}
}

Expand Down
76 changes: 75 additions & 1 deletion docs/user/ppl/cmd/stats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ The following table dataSources the aggregation functions and also indicates how
+----------+-------------+-------------+
| MIN | Ignore | Ignore |
+----------+-------------+-------------+
| FIRST | Ignore | Ignore |
+----------+-------------+-------------+
| LAST | Ignore | Ignore |
+----------+-------------+-------------+
| LIST | Ignore | Ignore |
+----------+-------------+-------------+

Expand Down Expand Up @@ -463,13 +467,83 @@ Example with custom time field::
| inactive | users |
+----------------------------+----------+

FIRST
-----

Description
>>>>>>>>>>>

Version: 3.3.0

Usage: FIRST(field). Return the first non-null value of a field based on natural document order. Returns NULL if no records exist, or if all records have NULL values for the field.

* field: mandatory. The field to return the first value for.

Note: This function requires Calcite to be enabled (see `Configuration`_ section above).

Example::

os> source=accounts | stats first(firstname) by gender;
fetched rows / total rows = 2/2
+------------------+--------+
| first(firstname) | gender |
|------------------+--------|
| Nanette | F |
| Amber | M |
+------------------+--------+

Example with count aggregation::

os> source=accounts | stats first(firstname), count() by gender;
fetched rows / total rows = 2/2
+------------------+---------+--------+
| first(firstname) | count() | gender |
|------------------+---------+--------|
| Nanette | 1 | F |
| Amber | 3 | M |
+------------------+---------+--------+

LAST
----

Description
>>>>>>>>>>>

Version: 3.3.0

Usage: LAST(field). Return the last non-null value of a field based on natural document order. Returns NULL if no records exist, or if all records have NULL values for the field.

* field: mandatory. The field to return the last value for.

Note: This function requires Calcite to be enabled (see `Configuration`_ section above).

Example::

os> source=accounts | stats last(firstname) by gender;
fetched rows / total rows = 2/2
+-----------------+--------+
| last(firstname) | gender |
|-----------------+--------|
| Nanette | F |
| Dale | M |
+-----------------+--------+

Example with different fields::

os> source=accounts | stats first(account_number), last(balance), first(age);
fetched rows / total rows = 1/1
+-----------------------+---------------+------------+
| first(account_number) | last(balance) | first(age) |
|-----------------------+---------------+------------|
| 1 | 4180 | 32 |
+-----------------------+---------------+------------+

LIST
----

Description
>>>>>>>>>>>

=======
Version: 3.3.0 (Calcite engine only)

Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,19 @@ public void testExplainOnEarliestLatestWithCustomTimeField() throws IOException
TEST_INDEX_LOGS)));
}

// Only for Calcite
@Test
public void testExplainOnFirstLast() throws IOException {
String expected = loadExpectedPlan("explain_first_last.json");
assertJsonEqualsIgnoreId(
expected,
explainQueryToString(
String.format(
"source=%s | stats first(firstname) as first_name, last(firstname) as"
+ " last_name by gender",
TEST_INDEX_BANK)));
}

@Test
public void testListAggregationExplain() throws IOException {
String expected = loadExpectedPlan("explain_list_aggregation.json");
Expand Down
Loading
Loading