Skip to content

Commit 3eb65d6

Browse files
committed
Remove optional seed from ES|QL SAMPLE (elastic#128887)
* Remove optional seed from ES|QL SAMPLE * make it clear that seed is for testing
1 parent b954ed7 commit 3eb65d6

File tree

18 files changed

+169
-267
lines changed

18 files changed

+169
-267
lines changed

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/SampleOperator.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import org.elasticsearch.TransportVersion;
1111
import org.elasticsearch.TransportVersions;
12+
import org.elasticsearch.common.Randomness;
1213
import org.elasticsearch.common.Strings;
1314
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1415
import org.elasticsearch.common.io.stream.StreamInput;
@@ -27,16 +28,29 @@
2728

2829
public class SampleOperator implements Operator {
2930

30-
public record Factory(double probability, int seed) implements OperatorFactory {
31+
public static class Factory implements OperatorFactory {
32+
33+
private final double probability;
34+
private final Integer seed;
35+
36+
public Factory(double probability) {
37+
this(probability, null);
38+
}
39+
40+
// visible for testing
41+
Factory(double probability, Integer seed) {
42+
this.probability = probability;
43+
this.seed = seed;
44+
}
3145

3246
@Override
3347
public SampleOperator get(DriverContext driverContext) {
34-
return new SampleOperator(probability, seed);
48+
return new SampleOperator(probability, seed == null ? Randomness.get().nextInt() : seed);
3549
}
3650

3751
@Override
3852
public String describe() {
39-
return "SampleOperator[probability = " + probability + ", seed = " + seed + "]";
53+
return "SampleOperator[probability = " + probability + "]";
4054
}
4155
}
4256

x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/SampleOperatorTests.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import static org.hamcrest.Matchers.equalTo;
2222
import static org.hamcrest.Matchers.greaterThan;
2323
import static org.hamcrest.Matchers.lessThan;
24-
import static org.hamcrest.Matchers.matchesPattern;
2524

2625
public class SampleOperatorTests extends OperatorTestCase {
2726

@@ -46,7 +45,7 @@ protected SampleOperator.Factory simple() {
4645

4746
@Override
4847
protected Matcher<String> expectedDescriptionOfSimple() {
49-
return matchesPattern("SampleOperator\\[probability = 0.5, seed = -?\\d+]");
48+
return equalTo("SampleOperator[probability = 0.5]");
5049
}
5150

5251
@Override

x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,5 +389,5 @@ completionCommand
389389
;
390390

391391
sampleCommand
392-
: DEV_SAMPLE probability=decimalValue seed=integerValue?
392+
: DEV_SAMPLE probability=decimalValue
393393
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineSample.java

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,7 @@ protected LogicalPlan rule(Sample sample, LogicalOptimizerContext context) {
6161
var child = sample.child();
6262
if (child instanceof Sample sampleChild) {
6363
var probability = combinedProbability(context, sample, sampleChild);
64-
var seed = combinedSeed(context, sample, sampleChild);
65-
plan = new Sample(sample.source(), probability, seed, sampleChild.child());
64+
plan = new Sample(sample.source(), probability, sampleChild.child());
6665
} else if (child instanceof Enrich
6766
|| child instanceof Eval
6867
|| child instanceof Filter
@@ -80,22 +79,4 @@ private static Expression combinedProbability(LogicalOptimizerContext context, S
8079
var childProbability = (double) Foldables.valueOf(context.foldCtx(), child.probability());
8180
return Literal.of(parent.probability(), parentProbability * childProbability);
8281
}
83-
84-
private static Expression combinedSeed(LogicalOptimizerContext context, Sample parent, Sample child) {
85-
var parentSeed = parent.seed();
86-
var childSeed = child.seed();
87-
Expression seed;
88-
if (parentSeed != null) {
89-
if (childSeed != null) {
90-
var seedValue = (int) Foldables.valueOf(context.foldCtx(), parentSeed);
91-
seedValue ^= (int) Foldables.valueOf(context.foldCtx(), childSeed);
92-
seed = Literal.of(parentSeed, seedValue);
93-
} else {
94-
seed = parentSeed;
95-
}
96-
} else {
97-
seed = childSeed;
98-
}
99-
return seed;
100-
}
10182
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushSampleToSource.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,6 @@ protected PhysicalPlan rule(SampleExec sample, LocalPhysicalOptimizerContext ctx
3333
}
3434

3535
var sampleQuery = new RandomSamplingQueryBuilder((double) Foldables.valueOf(ctx.foldCtx(), sample.probability()));
36-
if (sample.seed() != null) {
37-
sampleQuery.seed((int) Foldables.valueOf(ctx.foldCtx(), sample.seed()));
38-
}
3936

4037
fullQuery.filter(sampleQuery);
4138

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseParser.interp

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseParser.java

Lines changed: 118 additions & 133 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -708,20 +708,6 @@ public Literal inferenceId(EsqlBaseParser.IdentifierOrParameterContext ctx) {
708708

709709
public PlanFactory visitSampleCommand(EsqlBaseParser.SampleCommandContext ctx) {
710710
var probability = visitDecimalValue(ctx.probability);
711-
Literal seed;
712-
if (ctx.seed != null) {
713-
seed = visitIntegerValue(ctx.seed);
714-
if (seed.dataType() != DataType.INTEGER) {
715-
throw new ParsingException(
716-
seed.source(),
717-
"seed must be an integer, provided [{}] of type [{}]",
718-
ctx.seed.getText(),
719-
seed.dataType()
720-
);
721-
}
722-
} else {
723-
seed = null;
724-
}
725-
return plan -> new Sample(source(ctx), probability, seed, plan);
711+
return plan -> new Sample(source(ctx), probability, plan);
726712
}
727713
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Sample.java

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1010
import org.elasticsearch.common.io.stream.StreamInput;
1111
import org.elasticsearch.common.io.stream.StreamOutput;
12-
import org.elasticsearch.core.Nullable;
1312
import org.elasticsearch.search.aggregations.bucket.sampler.random.RandomSamplingQuery;
1413
import org.elasticsearch.xpack.esql.capabilities.PostAnalysisVerificationAware;
1514
import org.elasticsearch.xpack.esql.capabilities.TelemetryAware;
@@ -30,19 +29,16 @@ public class Sample extends UnaryPlan implements TelemetryAware, PostAnalysisVer
3029
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Sample", Sample::new);
3130

3231
private final Expression probability;
33-
private final Expression seed;
3432

35-
public Sample(Source source, Expression probability, @Nullable Expression seed, LogicalPlan child) {
33+
public Sample(Source source, Expression probability, LogicalPlan child) {
3634
super(source, child);
3735
this.probability = probability;
38-
this.seed = seed;
3936
}
4037

4138
private Sample(StreamInput in) throws IOException {
4239
this(
4340
Source.readFrom((PlanStreamInput) in),
4441
in.readNamedWriteable(Expression.class), // probability
45-
in.readOptionalNamedWriteable(Expression.class), // seed
4642
in.readNamedWriteable(LogicalPlan.class) // child
4743
);
4844
}
@@ -51,7 +47,6 @@ private Sample(StreamInput in) throws IOException {
5147
public void writeTo(StreamOutput out) throws IOException {
5248
source().writeTo(out);
5349
out.writeNamedWriteable(probability);
54-
out.writeOptionalNamedWriteable(seed);
5550
out.writeNamedWriteable(child());
5651
}
5752

@@ -62,30 +57,26 @@ public String getWriteableName() {
6257

6358
@Override
6459
protected NodeInfo<Sample> info() {
65-
return NodeInfo.create(this, Sample::new, probability, seed, child());
60+
return NodeInfo.create(this, Sample::new, probability, child());
6661
}
6762

6863
@Override
6964
public Sample replaceChild(LogicalPlan newChild) {
70-
return new Sample(source(), probability, seed, newChild);
65+
return new Sample(source(), probability, newChild);
7166
}
7267

7368
public Expression probability() {
7469
return probability;
7570
}
7671

77-
public Expression seed() {
78-
return seed;
79-
}
80-
8172
@Override
8273
public boolean expressionsResolved() {
83-
return probability.resolved() && (seed == null || seed.resolved());
74+
return probability.resolved();
8475
}
8576

8677
@Override
8778
public int hashCode() {
88-
return Objects.hash(probability, seed, child());
79+
return Objects.hash(probability, child());
8980
}
9081

9182
@Override
@@ -99,7 +90,7 @@ public boolean equals(Object obj) {
9990

10091
var other = (Sample) obj;
10192

102-
return Objects.equals(probability, other.probability) && Objects.equals(seed, other.seed) && Objects.equals(child(), other.child());
93+
return Objects.equals(probability, other.probability) && Objects.equals(child(), other.child());
10394
}
10495

10596
@Override

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/SampleExec.java

Lines changed: 7 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,6 @@
1010
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1111
import org.elasticsearch.common.io.stream.StreamInput;
1212
import org.elasticsearch.common.io.stream.StreamOutput;
13-
import org.elasticsearch.core.Nullable;
14-
import org.elasticsearch.xpack.esql.capabilities.PostPhysicalOptimizationVerificationAware;
15-
import org.elasticsearch.xpack.esql.common.Failures;
1613
import org.elasticsearch.xpack.esql.core.expression.Expression;
1714
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
1815
import org.elasticsearch.xpack.esql.core.tree.Source;
@@ -21,30 +18,25 @@
2118
import java.io.IOException;
2219
import java.util.Objects;
2320

24-
import static org.elasticsearch.xpack.esql.common.Failure.fail;
25-
26-
public class SampleExec extends UnaryExec implements PostPhysicalOptimizationVerificationAware {
21+
public class SampleExec extends UnaryExec {
2722
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
2823
PhysicalPlan.class,
2924
"SampleExec",
3025
SampleExec::new
3126
);
3227

3328
private final Expression probability;
34-
private final Expression seed;
3529

36-
public SampleExec(Source source, PhysicalPlan child, Expression probability, @Nullable Expression seed) {
30+
public SampleExec(Source source, PhysicalPlan child, Expression probability) {
3731
super(source, child);
3832
this.probability = probability;
39-
this.seed = seed;
4033
}
4134

4235
public SampleExec(StreamInput in) throws IOException {
4336
this(
4437
Source.readFrom((PlanStreamInput) in),
4538
in.readNamedWriteable(PhysicalPlan.class), // child
46-
in.readNamedWriteable(Expression.class), // probability
47-
in.readOptionalNamedWriteable(Expression.class) // seed
39+
in.readNamedWriteable(Expression.class) // probability
4840
);
4941
}
5042

@@ -53,17 +45,16 @@ public void writeTo(StreamOutput out) throws IOException {
5345
source().writeTo(out);
5446
out.writeNamedWriteable(child());
5547
out.writeNamedWriteable(probability);
56-
out.writeOptionalNamedWriteable(seed);
5748
}
5849

5950
@Override
6051
public UnaryExec replaceChild(PhysicalPlan newChild) {
61-
return new SampleExec(source(), newChild, probability, seed);
52+
return new SampleExec(source(), newChild, probability);
6253
}
6354

6455
@Override
6556
protected NodeInfo<? extends PhysicalPlan> info() {
66-
return NodeInfo.create(this, SampleExec::new, child(), probability, seed);
57+
return NodeInfo.create(this, SampleExec::new, child(), probability);
6758
}
6859

6960
/**
@@ -78,13 +69,9 @@ public Expression probability() {
7869
return probability;
7970
}
8071

81-
public Expression seed() {
82-
return seed;
83-
}
84-
8572
@Override
8673
public int hashCode() {
87-
return Objects.hash(child(), probability, seed);
74+
return Objects.hash(child(), probability);
8875
}
8976

9077
@Override
@@ -98,17 +85,6 @@ public boolean equals(Object obj) {
9885

9986
var other = (SampleExec) obj;
10087

101-
return Objects.equals(child(), other.child()) && Objects.equals(probability, other.probability) && Objects.equals(seed, other.seed);
102-
}
103-
104-
@Override
105-
public void postPhysicalOptimizationVerification(Failures failures) {
106-
// It's currently impossible in ES|QL to handle all data in deterministic order, therefore
107-
// a fixed random seed in the sample operator doesn't work as intended and is disallowed.
108-
// TODO: fix this.
109-
if (seed != null) {
110-
// TODO: what should the error message here be? This doesn't seem right.
111-
failures.add(fail(seed, "Seed not supported when sampling can't be pushed down to Lucene"));
112-
}
88+
return Objects.equals(child(), other.child()) && Objects.equals(probability, other.probability);
11389
}
11490
}

0 commit comments

Comments
 (0)