Skip to content

Synthetic source: avoid storing multi fields of type text and match_only_text by default. #129126

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 10, 2025
6 changes: 6 additions & 0 deletions docs/changelog/129126.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 129126
summary: "Synthetic source: avoid storing multi fields of type text and `match_only_text`\
\ by default"
area: Mapping
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldDataContext;
Expand Down Expand Up @@ -101,12 +102,9 @@ public static class Builder extends FieldMapper.Builder {
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

private final TextParams.Analyzers analyzers;
private final boolean withinMultiField;

public Builder(String name, IndexAnalyzers indexAnalyzers) {
this(name, IndexVersion.current(), indexAnalyzers);
}

public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) {
super(name);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
Expand All @@ -115,6 +113,7 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind
m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap,
indexCreatedVersion
);
this.withinMultiField = withinMultiField;
}

@Override
Expand All @@ -140,18 +139,21 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
@Override
public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
MatchOnlyTextFieldType tft = buildFieldType(context);
return new MatchOnlyTextFieldMapper(
leafName(),
Defaults.FIELD_TYPE,
tft,
builderParams(this, context),
context.isSourceSynthetic(),
this
);
final boolean storeSource;
if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) {
storeSource = context.isSourceSynthetic()
&& withinMultiField == false
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
} else {
storeSource = context.isSourceSynthetic();
}
return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this);
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
public static final TypeParser PARSER = new TypeParser(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
);

public static class MatchOnlyTextFieldType extends StringFieldType {

Expand Down Expand Up @@ -406,6 +408,7 @@ private String storedFieldNameForSyntheticSource() {
private final int positionIncrementGap;
private final boolean storeSource;
private final FieldType fieldType;
private final boolean withinMultiField;

private MatchOnlyTextFieldMapper(
String simpleName,
Expand All @@ -424,6 +427,7 @@ private MatchOnlyTextFieldMapper(
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
this.storeSource = storeSource;
this.withinMultiField = builder.withinMultiField;
}

@Override
Expand All @@ -433,7 +437,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this);
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument;
Expand All @@ -46,8 +47,10 @@
import java.util.stream.Collectors;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.core.Is.is;

public class MatchOnlyTextFieldMapperTests extends MapperTestCase {

Expand Down Expand Up @@ -255,4 +258,91 @@ public void testDocValuesLoadedFromSynthetic() throws IOException {
protected IngestScriptSupport ingestScriptSupport() {
throw new AssumptionViolatedException("not supported");
}

public void testStoreParameterDefaultsSyntheticSource() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "match_only_text");
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);

{
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(true));
}
}

public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "match_only_text");
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
assertThat(fields, empty());
}
}

public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "keyword");
b.startObject("fields");
b.startObject("text");
b.field("type", "match_only_text");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name.text._original");
assertThat(fields, empty());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion DEFAULT_TO_ACORN_HNSW_FILTER_HEURISTIC = def(9_026_0_00, Version.LUCENE_10_2_1);
public static final IndexVersion SEQ_NO_WITHOUT_POINTS = def(9_027_0_00, Version.LUCENE_10_2_1);
public static final IndexVersion INDEX_INT_SORT_INT_TYPE = def(9_028_0_00, Version.LUCENE_10_2_1);
public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED = def(9_029_0_00, Version.LUCENE_10_2_1);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,11 +287,19 @@ public static class Builder extends FieldMapper.Builder {

final TextParams.Analyzers analyzers;

private final boolean withinMultiField;

public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled);
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false);
}

public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
public Builder(
String name,
IndexVersion indexCreatedVersion,
IndexAnalyzers indexAnalyzers,
boolean isSyntheticSourceEnabled,
boolean withinMultiField
) {
super(name);

// If synthetic source is used we need to either store this field
Expand All @@ -300,10 +308,17 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind
// storing the field without requiring users to explicitly set 'store'.
//
// If 'store' parameter was explicitly provided we'll reject the request.
this.store = Parameter.storeParam(
m -> ((TextFieldMapper) m).store,
() -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
);
// Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field
this.withinMultiField = withinMultiField;
this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> {
if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) {
return isSyntheticSourceEnabled
&& this.withinMultiField == false
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
} else {
return isSyntheticSourceEnabled;
}
});
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
Expand Down Expand Up @@ -482,7 +497,13 @@ public TextFieldMapper build(MapperBuilderContext context) {
}

public static final TypeParser PARSER = createTypeParserWithLegacySupport(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings()))
(n, c) -> new Builder(
n,
c.indexVersionCreated(),
c.getIndexAnalyzers(),
SourceFieldMapper.isSynthetic(c.getIndexSettings()),
c.isWithinMultiField()
)
);

private static class PhraseWrappedAnalyzer extends AnalyzerWrapper {
Expand Down Expand Up @@ -1304,6 +1325,7 @@ public Query existsQuery(SearchExecutionContext context) {
private final SubFieldInfo phraseFieldInfo;

private final boolean isSyntheticSourceEnabled;
private final boolean isWithinMultiField;

private TextFieldMapper(
String simpleName,
Expand Down Expand Up @@ -1337,6 +1359,7 @@ private TextFieldMapper(
this.freqFilter = builder.freqFilter.getValue();
this.fieldData = builder.fieldData.get();
this.isSyntheticSourceEnabled = builder.isSyntheticSourceEnabled;
this.isWithinMultiField = builder.withinMultiField;
}

@Override
Expand All @@ -1360,7 +1383,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled).init(this);
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,73 @@ public void testStoreParameterDefaults() throws IOException {
}
}

public void testStoreParameterDefaultsSyntheticSource() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(true));
}

public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}

public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "keyword");
b.startObject("fields");
b.startObject("text");
b.field("type", "text");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}

public void testBWCSerialization() throws IOException {
MapperService mapperService = createMapperService(fieldMapping(b -> {
b.field("type", "text");
Expand Down
Loading