Skip to content

[8.19] Synthetic source: avoid storing multi fields of type text and match_only_text by default #129251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/129126.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 129126
summary: "Synthetic source: avoid storing multi fields of type text and `match_only_text`\
\ by default"
area: Mapping
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldDataContext;
Expand Down Expand Up @@ -101,12 +102,9 @@ public static class Builder extends FieldMapper.Builder {
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

private final TextParams.Analyzers analyzers;
private final boolean withinMultiField;

public Builder(String name, IndexAnalyzers indexAnalyzers) {
this(name, IndexVersion.current(), indexAnalyzers);
}

public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) {
super(name);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
Expand All @@ -115,6 +113,7 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind
m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap,
indexCreatedVersion
);
this.withinMultiField = withinMultiField;
}

@Override
Expand All @@ -140,18 +139,21 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
@Override
public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
MatchOnlyTextFieldType tft = buildFieldType(context);
return new MatchOnlyTextFieldMapper(
leafName(),
Defaults.FIELD_TYPE,
tft,
builderParams(this, context),
context.isSourceSynthetic(),
this
);
final boolean storeSource;
if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19)) {
storeSource = context.isSourceSynthetic()
&& withinMultiField == false
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
} else {
storeSource = context.isSourceSynthetic();
}
return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this);
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
public static final TypeParser PARSER = new TypeParser(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
);

public static class MatchOnlyTextFieldType extends StringFieldType {

Expand Down Expand Up @@ -406,6 +408,7 @@ private String storedFieldNameForSyntheticSource() {
private final int positionIncrementGap;
private final boolean storeSource;
private final FieldType fieldType;
private final boolean withinMultiField;

private MatchOnlyTextFieldMapper(
String simpleName,
Expand All @@ -424,6 +427,7 @@ private MatchOnlyTextFieldMapper(
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
this.storeSource = storeSource;
this.withinMultiField = builder.withinMultiField;
}

@Override
Expand All @@ -433,7 +437,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this);
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument;
Expand All @@ -46,8 +47,10 @@
import java.util.stream.Collectors;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.core.Is.is;

public class MatchOnlyTextFieldMapperTests extends MapperTestCase {

Expand Down Expand Up @@ -255,4 +258,91 @@ public void testDocValuesLoadedFromSynthetic() throws IOException {
protected IngestScriptSupport ingestScriptSupport() {
throw new AssumptionViolatedException("not supported");
}

public void testStoreParameterDefaultsSyntheticSource() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "match_only_text");
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);

{
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(true));
}
}

public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "match_only_text");
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
assertThat(fields, empty());
}
}

public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "keyword");
b.startObject("fields");
b.startObject("text");
b.field("type", "match_only_text");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name.text._original");
assertThat(fields, empty());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ private static IndexVersion def(int id, Version luceneVersion) {
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ_BACKPORT_8_X = def(8_530_0_00, Version.LUCENE_9_12_1);
public static final IndexVersion SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X = def(8_531_0_00, Version.LUCENE_9_12_1);
public static final IndexVersion INDEX_INT_SORT_INT_TYPE_8_19 = def(8_532_0_00, Version.LUCENE_9_12_1);
public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19 = def(8_533_0_00, Version.LUCENE_9_12_1);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,11 +287,19 @@ public static class Builder extends FieldMapper.Builder {

final TextParams.Analyzers analyzers;

private final boolean withinMultiField;

public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled);
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false);
}

public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
public Builder(
String name,
IndexVersion indexCreatedVersion,
IndexAnalyzers indexAnalyzers,
boolean isSyntheticSourceEnabled,
boolean withinMultiField
) {
super(name);

// If synthetic source is used we need to either store this field
Expand All @@ -300,10 +308,17 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind
// storing the field without requiring users to explicitly set 'store'.
//
// If 'store' parameter was explicitly provided we'll reject the request.
this.store = Parameter.storeParam(
m -> ((TextFieldMapper) m).store,
() -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
);
// Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field
this.withinMultiField = withinMultiField;
this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> {
if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19)) {
return isSyntheticSourceEnabled
&& this.withinMultiField == false
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
} else {
return isSyntheticSourceEnabled;
}
});
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
Expand Down Expand Up @@ -484,7 +499,13 @@ public TextFieldMapper build(MapperBuilderContext context) {
private static final IndexVersion MINIMUM_COMPATIBILITY_VERSION = IndexVersion.fromId(5000099);

public static final TypeParser PARSER = new TypeParser(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings())),
(n, c) -> new Builder(
n,
c.indexVersionCreated(),
c.getIndexAnalyzers(),
SourceFieldMapper.isSynthetic(c.getIndexSettings()),
c.isWithinMultiField()
),
MINIMUM_COMPATIBILITY_VERSION
);

Expand Down Expand Up @@ -1307,6 +1328,7 @@ public Query existsQuery(SearchExecutionContext context) {
private final SubFieldInfo phraseFieldInfo;

private final boolean isSyntheticSourceEnabled;
private final boolean isWithinMultiField;

private TextFieldMapper(
String simpleName,
Expand Down Expand Up @@ -1340,6 +1362,7 @@ private TextFieldMapper(
this.freqFilter = builder.freqFilter.getValue();
this.fieldData = builder.fieldData.get();
this.isSyntheticSourceEnabled = builder.isSyntheticSourceEnabled;
this.isWithinMultiField = builder.withinMultiField;
}

@Override
Expand All @@ -1363,7 +1386,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled).init(this);
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,73 @@ public void testStoreParameterDefaults() throws IOException {
}
}

public void testStoreParameterDefaultsSyntheticSource() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(true));
}

public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}

public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "keyword");
b.startObject("fields");
b.startObject("text");
b.field("type", "text");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}

public void testBWCSerialization() throws IOException {
MapperService mapperService = createMapperService(fieldMapping(b -> {
b.field("type", "text");
Expand Down