Skip to content

Commit 7186378

Browse files
Add FILLNULL command in PPL (#3032)
* Add FILLNULL command in PPL Signed-off-by: Norman Jordan <norman.jordan@improving.com>
1 parent 3b86612 commit 7186378

File tree

12 files changed

+374
-0
lines changed

12 files changed

+374
-0
lines changed

core/src/main/java/org/opensearch/sql/analysis/Analyzer.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import org.opensearch.sql.ast.tree.Dedupe;
4747
import org.opensearch.sql.ast.tree.Eval;
4848
import org.opensearch.sql.ast.tree.FetchCursor;
49+
import org.opensearch.sql.ast.tree.FillNull;
4950
import org.opensearch.sql.ast.tree.Filter;
5051
import org.opensearch.sql.ast.tree.Head;
5152
import org.opensearch.sql.ast.tree.Kmeans;
@@ -558,6 +559,29 @@ public LogicalPlan visitAD(AD node, AnalysisContext context) {
558559
return new LogicalAD(child, options);
559560
}
560561

562+
/** Build {@link LogicalAD} for fillnull command. */
563+
@Override
564+
public LogicalPlan visitFillNull(final FillNull node, final AnalysisContext context) {
565+
LogicalPlan child = node.getChild().get(0).accept(this, context);
566+
567+
ImmutableList.Builder<Pair<ReferenceExpression, Expression>> expressionsBuilder =
568+
new Builder<>();
569+
for (FillNull.NullableFieldFill fieldFill : node.getNullableFieldFills()) {
570+
Expression fieldExpr =
571+
expressionAnalyzer.analyze(fieldFill.getNullableFieldReference(), context);
572+
ReferenceExpression ref =
573+
DSL.ref(fieldFill.getNullableFieldReference().getField().toString(), fieldExpr.type());
574+
FunctionExpression ifNullFunction =
575+
DSL.ifnull(ref, expressionAnalyzer.analyze(fieldFill.getReplaceNullWithMe(), context));
576+
expressionsBuilder.add(new ImmutablePair<>(ref, ifNullFunction));
577+
TypeEnvironment typeEnvironment = context.peek();
578+
// define the new reference in type env.
579+
typeEnvironment.define(ref);
580+
}
581+
582+
return new LogicalEval(child, expressionsBuilder.build());
583+
}
584+
561585
/** Build {@link LogicalML} for ml command. */
562586
@Override
563587
public LogicalPlan visitML(ML node, AnalysisContext context) {

core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import org.opensearch.sql.ast.tree.Dedupe;
4646
import org.opensearch.sql.ast.tree.Eval;
4747
import org.opensearch.sql.ast.tree.FetchCursor;
48+
import org.opensearch.sql.ast.tree.FillNull;
4849
import org.opensearch.sql.ast.tree.Filter;
4950
import org.opensearch.sql.ast.tree.Head;
5051
import org.opensearch.sql.ast.tree.Kmeans;
@@ -312,4 +313,8 @@ public T visitFetchCursor(FetchCursor cursor, C context) {
312313
public T visitCloseCursor(CloseCursor closeCursor, C context) {
313314
return visitChildren(closeCursor, context);
314315
}
316+
317+
public T visitFillNull(FillNull fillNull, C context) {
318+
return visitChildren(fillNull, context);
319+
}
315320
}

core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55

66
package org.opensearch.sql.ast.dsl;
77

8+
import com.google.common.collect.ImmutableList;
89
import java.util.Arrays;
910
import java.util.List;
1011
import java.util.stream.Collectors;
1112
import lombok.experimental.UtilityClass;
13+
import org.apache.commons.lang3.tuple.ImmutablePair;
1214
import org.apache.commons.lang3.tuple.Pair;
1315
import org.opensearch.sql.ast.expression.AggregateFunction;
1416
import org.opensearch.sql.ast.expression.Alias;
@@ -46,6 +48,7 @@
4648
import org.opensearch.sql.ast.tree.Aggregation;
4749
import org.opensearch.sql.ast.tree.Dedupe;
4850
import org.opensearch.sql.ast.tree.Eval;
51+
import org.opensearch.sql.ast.tree.FillNull;
4952
import org.opensearch.sql.ast.tree.Filter;
5053
import org.opensearch.sql.ast.tree.Head;
5154
import org.opensearch.sql.ast.tree.Limit;
@@ -471,4 +474,22 @@ public static Parse parse(
471474
java.util.Map<String, Literal> arguments) {
472475
return new Parse(parseMethod, sourceField, pattern, arguments, input);
473476
}
477+
478+
public static FillNull fillNull(UnresolvedExpression replaceNullWithMe, Field... fields) {
479+
return new FillNull(
480+
FillNull.ContainNullableFieldFill.ofSameValue(
481+
replaceNullWithMe, ImmutableList.copyOf(fields)));
482+
}
483+
484+
public static FillNull fillNull(
485+
List<ImmutablePair<Field, UnresolvedExpression>> fieldAndReplacements) {
486+
ImmutableList.Builder<FillNull.NullableFieldFill> replacementsBuilder = ImmutableList.builder();
487+
for (ImmutablePair<Field, UnresolvedExpression> fieldAndReplacement : fieldAndReplacements) {
488+
replacementsBuilder.add(
489+
new FillNull.NullableFieldFill(
490+
fieldAndReplacement.getLeft(), fieldAndReplacement.getRight()));
491+
}
492+
return new FillNull(
493+
FillNull.ContainNullableFieldFill.ofVariousValue(replacementsBuilder.build()));
494+
}
474495
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.ast.tree;
7+
8+
import java.util.List;
9+
import java.util.Objects;
10+
import lombok.AllArgsConstructor;
11+
import lombok.Getter;
12+
import lombok.NonNull;
13+
import lombok.RequiredArgsConstructor;
14+
import org.opensearch.sql.ast.AbstractNodeVisitor;
15+
import org.opensearch.sql.ast.Node;
16+
import org.opensearch.sql.ast.expression.Field;
17+
import org.opensearch.sql.ast.expression.UnresolvedExpression;
18+
19+
@RequiredArgsConstructor
20+
@AllArgsConstructor
21+
public class FillNull extends UnresolvedPlan {
22+
23+
@Getter
24+
@RequiredArgsConstructor
25+
public static class NullableFieldFill {
26+
@NonNull private final Field nullableFieldReference;
27+
@NonNull private final UnresolvedExpression replaceNullWithMe;
28+
}
29+
30+
public interface ContainNullableFieldFill {
31+
List<NullableFieldFill> getNullFieldFill();
32+
33+
static ContainNullableFieldFill ofVariousValue(List<NullableFieldFill> replacements) {
34+
return new VariousValueNullFill(replacements);
35+
}
36+
37+
static ContainNullableFieldFill ofSameValue(
38+
UnresolvedExpression replaceNullWithMe, List<Field> nullableFieldReferences) {
39+
return new SameValueNullFill(replaceNullWithMe, nullableFieldReferences);
40+
}
41+
}
42+
43+
private static class SameValueNullFill implements ContainNullableFieldFill {
44+
@Getter(onMethod_ = @Override)
45+
private final List<NullableFieldFill> nullFieldFill;
46+
47+
public SameValueNullFill(
48+
UnresolvedExpression replaceNullWithMe, List<Field> nullableFieldReferences) {
49+
Objects.requireNonNull(replaceNullWithMe, "Null replacement is required");
50+
this.nullFieldFill =
51+
Objects.requireNonNull(nullableFieldReferences, "Nullable field reference is required")
52+
.stream()
53+
.map(nullableReference -> new NullableFieldFill(nullableReference, replaceNullWithMe))
54+
.toList();
55+
}
56+
}
57+
58+
@RequiredArgsConstructor
59+
private static class VariousValueNullFill implements ContainNullableFieldFill {
60+
@NonNull
61+
@Getter(onMethod_ = @Override)
62+
private final List<NullableFieldFill> nullFieldFill;
63+
}
64+
65+
private UnresolvedPlan child;
66+
67+
@NonNull private final ContainNullableFieldFill containNullableFieldFill;
68+
69+
public List<NullableFieldFill> getNullableFieldFills() {
70+
return containNullableFieldFill.getNullFieldFill();
71+
}
72+
73+
@Override
74+
public UnresolvedPlan attach(UnresolvedPlan child) {
75+
this.child = child;
76+
return this;
77+
}
78+
79+
@Override
80+
public List<? extends Node> getChild() {
81+
return child == null ? List.of() : List.of(child);
82+
}
83+
84+
@Override
85+
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
86+
return nodeVisitor.visitFillNull(this, context);
87+
}
88+
}

core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
import org.opensearch.sql.ast.dsl.AstDSL;
7474
import org.opensearch.sql.ast.expression.Argument;
7575
import org.opensearch.sql.ast.expression.DataType;
76+
import org.opensearch.sql.ast.expression.Field;
7677
import org.opensearch.sql.ast.expression.HighlightFunction;
7778
import org.opensearch.sql.ast.expression.Literal;
7879
import org.opensearch.sql.ast.expression.ParseMethod;
@@ -81,6 +82,7 @@
8182
import org.opensearch.sql.ast.tree.AD;
8283
import org.opensearch.sql.ast.tree.CloseCursor;
8384
import org.opensearch.sql.ast.tree.FetchCursor;
85+
import org.opensearch.sql.ast.tree.FillNull;
8486
import org.opensearch.sql.ast.tree.Kmeans;
8587
import org.opensearch.sql.ast.tree.ML;
8688
import org.opensearch.sql.ast.tree.Paginate;
@@ -1437,6 +1439,48 @@ public void kmeanns_relation() {
14371439
new Kmeans(AstDSL.relation("schema"), argumentMap));
14381440
}
14391441

1442+
@Test
1443+
public void fillnull_same_value() {
1444+
assertAnalyzeEqual(
1445+
LogicalPlanDSL.eval(
1446+
LogicalPlanDSL.relation("schema", table),
1447+
ImmutablePair.of(
1448+
DSL.ref("integer_value", INTEGER),
1449+
DSL.ifnull(DSL.ref("integer_value", INTEGER), DSL.literal(0))),
1450+
ImmutablePair.of(
1451+
DSL.ref("int_null_value", INTEGER),
1452+
DSL.ifnull(DSL.ref("int_null_value", INTEGER), DSL.literal(0)))),
1453+
new FillNull(
1454+
AstDSL.relation("schema"),
1455+
FillNull.ContainNullableFieldFill.ofSameValue(
1456+
AstDSL.intLiteral(0),
1457+
ImmutableList.<Field>builder()
1458+
.add(AstDSL.field("integer_value"))
1459+
.add(AstDSL.field("int_null_value"))
1460+
.build())));
1461+
}
1462+
1463+
@Test
1464+
public void fillnull_various_values() {
1465+
assertAnalyzeEqual(
1466+
LogicalPlanDSL.eval(
1467+
LogicalPlanDSL.relation("schema", table),
1468+
ImmutablePair.of(
1469+
DSL.ref("integer_value", INTEGER),
1470+
DSL.ifnull(DSL.ref("integer_value", INTEGER), DSL.literal(0))),
1471+
ImmutablePair.of(
1472+
DSL.ref("int_null_value", INTEGER),
1473+
DSL.ifnull(DSL.ref("int_null_value", INTEGER), DSL.literal(1)))),
1474+
new FillNull(
1475+
AstDSL.relation("schema"),
1476+
FillNull.ContainNullableFieldFill.ofVariousValue(
1477+
ImmutableList.of(
1478+
new FillNull.NullableFieldFill(
1479+
AstDSL.field("integer_value"), AstDSL.intLiteral(0)),
1480+
new FillNull.NullableFieldFill(
1481+
AstDSL.field("int_null_value"), AstDSL.intLiteral(1))))));
1482+
}
1483+
14401484
@Test
14411485
public void ad_batchRCF_relation() {
14421486
Map<String, Literal> argumentMap =

docs/user/ppl/cmd/fillnull.rst

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
=============
2+
fillnull
3+
=============
4+
5+
.. rubric:: Table of contents
6+
7+
.. contents::
8+
:local:
9+
:depth: 2
10+
11+
12+
Description
13+
============
14+
| The ``fillnull`` command replaces null values for one or more fields.
15+
16+
17+
Syntax
18+
============
19+
fillnull "with" <expression> <field> ["," <field> ]...
20+
21+
* field: mandatory. Name of an existing field that was piped into ``fillnull``. Null values for all specified fields are replaced with the value of expression.
22+
* expression: mandatory. Any expression support by the system. The expression value type must match the type of field.
23+
24+
fillnull "using" <field> "=" <expression> ["," <field> "=" <expression> ]...
25+
26+
* field: mandatory. Name of an existing field that was piped into ``fillnull``.
27+
* expression: mandatory. Any expression support by the system. The expression value type must match the type of field.
28+
29+
Example 1: Replace null values with the same value for multiple fields
30+
======================================================================
31+
32+
The example show to replace null values for email and host with "<not found>".
33+
34+
PPL query::
35+
36+
os> source=accounts | fields email, host | fillnull with '<not found>' email, host ;
37+
fetched rows / total rows = 4/4
38+
+-----------------------+------------+
39+
| email | host |
40+
|-----------------------+------------|
41+
| amberduke@pyrami.com | pyrami.com |
42+
| hattiebond@netagy.com | netagy.com |
43+
| <not found> | |
44+
| daleadams@boink.com | boink.com |
45+
+-----------------------+------------+
46+
47+
Example 2: Replace null values for multiple fields with different values
48+
========================================================================
49+
50+
The example show to replace null values for email with "<not found>" and null values for host with "<no host>".
51+
52+
PPL query::
53+
54+
os> source=accounts | fields email, host | fillnull using email = '<not found>', host = '<no host>' ;
55+
fetched rows / total rows = 4/4
56+
+-----------------------+------------+
57+
| email | host |
58+
|-----------------------+------------|
59+
| amberduke@pyrami.com | pyrami.com |
60+
| hattiebond@netagy.com | netagy.com |
61+
| <not found> | |
62+
| daleadams@boink.com | boink.com |
63+
+-----------------------+------------+
64+
65+
Limitation
66+
==========
67+
The ``fillnull`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node.

ppl/src/main/antlr/OpenSearchPPLLexer.g4

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ NEW_FIELD: 'NEW_FIELD';
3535
KMEANS: 'KMEANS';
3636
AD: 'AD';
3737
ML: 'ML';
38+
FILLNULL: 'FILLNULL';
3839

3940
// COMMAND ASSIST KEYWORDS
4041
AS: 'AS';
@@ -44,6 +45,8 @@ INDEX: 'INDEX';
4445
D: 'D';
4546
DESC: 'DESC';
4647
DATASOURCES: 'DATASOURCES';
48+
USING: 'USING';
49+
WITH: 'WITH';
4750

4851
// CLAUSE KEYWORDS
4952
SORTBY: 'SORTBY';

ppl/src/main/antlr/OpenSearchPPLParser.g4

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ commands
4949
| kmeansCommand
5050
| adCommand
5151
| mlCommand
52+
| fillnullCommand
5253
;
5354

5455
searchCommand
@@ -127,6 +128,27 @@ patternsMethod
127128
| REGEX
128129
;
129130

131+
fillnullCommand
132+
: FILLNULL (fillNullWithTheSameValue
133+
| fillNullWithFieldVariousValues)
134+
;
135+
136+
fillNullWithTheSameValue
137+
: WITH nullReplacement IN nullableField (COMMA nullableField)*
138+
;
139+
140+
fillNullWithFieldVariousValues
141+
: USING nullableField EQUAL nullReplacement (COMMA nullableField EQUAL nullReplacement)*
142+
;
143+
144+
nullableField
145+
: fieldExpression
146+
;
147+
148+
nullReplacement
149+
: expression
150+
;
151+
130152
kmeansCommand
131153
: KMEANS (kmeansParameter)*
132154
;

0 commit comments

Comments
 (0)