Skip to content

ES|QL: Improve generative tests for FORK [130015] #131206

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,46 @@ public static CommandGenerator randomPipeCommandGenerator() {
return randomFrom(PIPE_COMMANDS);
}

public interface Executor {
void run(CommandGenerator generator, CommandGenerator.CommandDescription current);

List<CommandGenerator.CommandDescription> previousCommands();

boolean continueExecuting();

List<EsqlQueryGenerator.Column> currentSchema();

}

public static void generatePipeline(
final int depth,
CommandGenerator commandGenerator,
final CommandGenerator.QuerySchema schema,
Executor executor
) {
CommandGenerator.CommandDescription desc = commandGenerator.generate(List.of(), List.of(), schema);
executor.run(commandGenerator, desc);
if (executor.continueExecuting() == false) {
return;
}

for (int j = 0; j < depth; j++) {
if (executor.currentSchema().isEmpty()) {
break;
}
commandGenerator = EsqlQueryGenerator.randomPipeCommandGenerator();
desc = commandGenerator.generate(executor.previousCommands(), executor.currentSchema(), schema);
if (desc == CommandGenerator.EMPTY_DESCRIPTION) {
continue;
}

executor.run(commandGenerator, desc);
if (executor.continueExecuting() == false) {
break;
}
}
}

public static String booleanExpression(List<Column> previousOutput) {
// TODO LIKE, RLIKE, functions etc.
return switch (randomIntBetween(0, 3)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,43 +88,53 @@ public void test() throws IOException {
List<LookupIdx> lookupIndices = lookupIndices();
List<CsvTestsDataLoader.EnrichConfig> policies = availableEnrichPolicies();
CommandGenerator.QuerySchema mappingInfo = new CommandGenerator.QuerySchema(indices, lookupIndices, policies);
EsqlQueryGenerator.QueryExecuted previousResult = null;

for (int i = 0; i < ITERATIONS; i++) {
List<CommandGenerator.CommandDescription> previousCommands = new ArrayList<>();
CommandGenerator commandGenerator = EsqlQueryGenerator.sourceCommand();
CommandGenerator.CommandDescription desc = commandGenerator.generate(List.of(), List.of(), mappingInfo);
String command = desc.commandString();
EsqlQueryGenerator.QueryExecuted result = execute(command, 0);
if (result.exception() != null) {
checkException(result);
continue;
}
if (checkResults(List.of(), commandGenerator, desc, null, result).success() == false) {
continue;
}
previousResult = result;
previousCommands.add(desc);
for (int j = 0; j < MAX_DEPTH; j++) {
if (result.outputSchema().isEmpty()) {
break;
var exec = new EsqlQueryGenerator.Executor() {
@Override
public void run(CommandGenerator generator, CommandGenerator.CommandDescription current) {
previousCommands.add(current);
final String command = current.commandString();

final EsqlQueryGenerator.QueryExecuted result = previousResult == null
? execute(command, 0)
: execute(previousResult.query() + command, previousResult.depth());
previousResult = result;

final boolean hasException = result.exception() != null;
if (hasException || checkResults(List.of(), generator, current, previousResult, result).success() == false) {
if (hasException) {
checkException(result);
}
continueExecuting = false;
currentSchema = List.of();
} else {
continueExecuting = true;
currentSchema = result.outputSchema();
}
}
commandGenerator = EsqlQueryGenerator.randomPipeCommandGenerator();
desc = commandGenerator.generate(previousCommands, result.outputSchema(), mappingInfo);
if (desc == CommandGenerator.EMPTY_DESCRIPTION) {
continue;

@Override
public List<CommandGenerator.CommandDescription> previousCommands() {
return previousCommands;
}
command = desc.commandString();
result = execute(result.query() + command, result.depth() + 1);
if (result.exception() != null) {
checkException(result);
break;

@Override
public boolean continueExecuting() {
return continueExecuting;
}
if (checkResults(previousCommands, commandGenerator, desc, previousResult, result).success() == false) {
break;

@Override
public List<EsqlQueryGenerator.Column> currentSchema() {
return currentSchema;
}
previousCommands.add(desc);
previousResult = result;
}

boolean continueExecuting;
List<EsqlQueryGenerator.Column> currentSchema;
final List<CommandGenerator.CommandDescription> previousCommands = new ArrayList<>();
EsqlQueryGenerator.QueryExecuted previousResult;
};
EsqlQueryGenerator.generatePipeline(MAX_DEPTH, EsqlQueryGenerator.sourceCommand(), mappingInfo, exec);
}
}

Expand Down Expand Up @@ -164,7 +174,7 @@ private void checkException(EsqlQueryGenerator.QueryExecuted query) {
}

@SuppressWarnings("unchecked")
private EsqlQueryGenerator.QueryExecuted execute(String command, int depth) {
public static EsqlQueryGenerator.QueryExecuted execute(String command, int depth) {
try {
Map<String, Object> a = RestEsqlTestCase.runEsql(
new RestEsqlTestCase.RequestObjectBuilder().query(command).build(),
Expand All @@ -184,7 +194,7 @@ private EsqlQueryGenerator.QueryExecuted execute(String command, int depth) {
}

@SuppressWarnings("unchecked")
private List<EsqlQueryGenerator.Column> outputSchema(Map<String, Object> a) {
private static List<EsqlQueryGenerator.Column> outputSchema(Map<String, Object> a) {
List<Map<String, String>> cols = (List<Map<String, String>>) a.get("columns");
if (cols == null) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@
package org.elasticsearch.xpack.esql.qa.rest.generative.command.pipe;

import org.elasticsearch.xpack.esql.qa.rest.generative.EsqlQueryGenerator;
import org.elasticsearch.xpack.esql.qa.rest.generative.GenerativeRestTest;
import org.elasticsearch.xpack.esql.qa.rest.generative.command.CommandGenerator;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import static org.elasticsearch.test.ESTestCase.randomIntBetween;

Expand All @@ -28,17 +31,103 @@ public CommandDescription generate(
) {
// FORK can only be allowed once - so we skip adding another FORK if we already have one
// otherwise, most generated queries would only result in a validation error
StringBuilder completeCommand = new StringBuilder();
for (CommandDescription command : previousCommands) {
if (command.commandName().equals(FORK)) {
return new CommandDescription(FORK, this, " ", Map.of());
return EMPTY_DESCRIPTION;
}

completeCommand.append(command.commandString());
}

int n = randomIntBetween(2, 3);
final int branchCount = randomIntBetween(2, 3);
final int branchToRetain = randomIntBetween(1, branchCount);

StringBuilder forkCmd = new StringBuilder(" | FORK ");
for (int i = 0; i < branchCount; i++) {
var expr = WhereGenerator.randomExpression(randomIntBetween(1, 2), previousOutput);
if (expr == null) {
expr = "true";
}
forkCmd.append(" (").append("where ").append(expr);

var exec = new EsqlQueryGenerator.Executor() {
@Override
public void run(CommandGenerator generator, CommandDescription current) {
final String command = current.commandString();

// Try appending new command to parent of Fork. If we successfully execute (without exception) AND still retain the same
// schema (all Fork branches must have the same schema), we append the command.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a strict constraint? Isn't it enough that there are no type conflicts between branches?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We would generate more interesting commands if we did not have this restriction here.
Right now AFAICS when I run this, the FORK branches contain mostly WHERE/MV_EXPAND/SORT and ENRICH sometimes.

FORK branches don't need to have the same schema - we just need to be sure that if a column is present in multiple branches, it has the same data type everywhere.

Copy link
Contributor Author

@svilen-mihaylov-elastic svilen-mihaylov-elastic Jul 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes correct, the schemas do not need to be exactly the same, just the overlapping columns need to have the same type.

I spend some time thinking about this and decided to go with the simple solution (for now). I think it will be challenging to allow different schemas which have compatible types AND allow for (mostly independent) fork sub-pipelines. Particularly for non-trivial subpipelines (> 5 stages), it will be non-trivial to keep adding random stages and checking if the types remain compatible. This may lead to a lot of repetitions and discarded results which will make the test run (possibly) a lot slower.

I will update the comment., and in any case, options are open later to iterate to further on this condition to balance coverage of Fork sub-pipelines and performance of the test itself.

final EsqlQueryGenerator.QueryExecuted result = previousResult == null
? GenerativeRestTest.execute(command, 0)
: GenerativeRestTest.execute(previousResult.query() + command, previousResult.depth());
previousResult = result;

continueExecuting = result.exception() == null && result.outputSchema().equals(previousOutput);
if (continueExecuting) {
previousCommands.add(current);
}
}

@Override
public List<CommandDescription> previousCommands() {
return previousCommands;
}

@Override
public boolean continueExecuting() {
return continueExecuting;
}

String cmd = " | FORK " + "( WHERE true ) ".repeat(n) + " | WHERE _fork == \"fork" + randomIntBetween(1, n) + "\" | DROP _fork";
@Override
public List<EsqlQueryGenerator.Column> currentSchema() {
return previousOutput;
}

final List<CommandGenerator.CommandDescription> previousCommands = new ArrayList<>();
boolean continueExecuting;
EsqlQueryGenerator.QueryExecuted previousResult;
};

var gen = new CommandGenerator() {
@Override
public CommandDescription generate(
List<CommandDescription> previousCommands,
List<EsqlQueryGenerator.Column> previousOutput,
QuerySchema schema
) {
return new CommandDescription(FORK, this, completeCommand.toString(), Map.of());
}

@Override
public ValidationResult validateOutput(
List<CommandDescription> previousCommands,
CommandDescription command,
List<EsqlQueryGenerator.Column> previousColumns,
List<List<Object>> previousOutput,
List<EsqlQueryGenerator.Column> columns,
List<List<Object>> output
) {
return VALIDATION_OK;
}
};

EsqlQueryGenerator.generatePipeline(3, gen, schema, exec);
if (exec.previousCommands().size() > 1) {
String previousCmd = exec.previousCommands()
.stream()
.skip(1)
.map(CommandDescription::commandString)
.collect(Collectors.joining(" "));
forkCmd.append(previousCmd);
}

forkCmd.append(")");
}
forkCmd.append(" | WHERE _fork == \"fork").append(branchToRetain).append("\" | DROP _fork");

return new CommandDescription(FORK, this, cmd, Map.of());
// System.out.println("Generated fork command: " + forkCmd);
return new CommandDescription(FORK, this, forkCmd.toString(), Map.of());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,15 @@ public class WhereGenerator implements CommandGenerator {
public static final String WHERE = "where";
public static final CommandGenerator INSTANCE = new WhereGenerator();

@Override
public CommandDescription generate(
List<CommandDescription> previousCommands,
List<EsqlQueryGenerator.Column> previousOutput,
QuerySchema schema
) {
public static String randomExpression(final int nConditions, List<EsqlQueryGenerator.Column> previousOutput) {
// TODO more complex conditions
StringBuilder result = new StringBuilder(" | where ");
int nConditions = randomIntBetween(1, 5);
var result = new StringBuilder();

for (int i = 0; i < nConditions; i++) {
String exp = EsqlQueryGenerator.booleanExpression(previousOutput);
if (exp == null) {
// cannot generate expressions, just skip
return EMPTY_DESCRIPTION;
// Cannot generate expressions, just skip.
return null;
}
if (i > 0) {
result.append(randomBoolean() ? " AND " : " OR ");
Expand All @@ -45,8 +40,20 @@ public CommandDescription generate(
result.append(exp);
}

String cmd = result.toString();
return new CommandDescription(WHERE, this, cmd, Map.of());
return result.toString();
}

@Override
public CommandDescription generate(
List<CommandDescription> previousCommands,
List<EsqlQueryGenerator.Column> previousOutput,
QuerySchema schema
) {
String expression = randomExpression(randomIntBetween(1, 5), previousOutput);
if (expression == null) {
return EMPTY_DESCRIPTION;
}
return new CommandDescription(WHERE, this, " | where " + expression, Map.of());
}

@Override
Expand Down