Skip to content

Commit 76e935e

Browse files
authored
Fail shards early when we can detect a type missmatch (#79869)
Resolves #72276 Generally speaking, we can't detect field type mismatches between different shards until reduce time, which then causes us to fail the whole aggregation. There is an exception though when the user has specified a value type. Since the value type gets pushed out to all the shards, we can detect on the shard if the field type doesn't match the specified value type, and fail only that shard allowing for a partial result on the aggregation. In the case where the user supplies a script as well, we don't fail the shard, because it's possible the script changes the type (this was a pattern before runtime fields)
1 parent 256521e commit 76e935e

File tree

6 files changed

+409
-12
lines changed

6 files changed

+409
-12
lines changed

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/20_terms.yml

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,3 +1368,143 @@ huge size:
13681368
- match: { aggregations.str_terms.buckets.1.doc_count: 2 }
13691369
- match: { aggregations.str_terms.buckets.2.key: c }
13701370
- match: { aggregations.str_terms.buckets.2.doc_count: 3 }
1371+
1372+
---
1373+
Value type mismatch fails shard:
1374+
- skip:
1375+
version: " - 8.0.99"
1376+
reason: "Fixed in 8.1"
1377+
1378+
- do:
1379+
indices.create:
1380+
index: valuetype_test_1
1381+
body:
1382+
settings:
1383+
number_of_shards: 1
1384+
number_of_replicas: 0
1385+
mappings:
1386+
properties:
1387+
ip:
1388+
type: keyword
1389+
- do:
1390+
indices.create:
1391+
index: valuetype_test_2
1392+
body:
1393+
settings:
1394+
number_of_shards: 1
1395+
number_of_replicas: 0
1396+
mappings:
1397+
properties:
1398+
ip:
1399+
type: ip
1400+
1401+
- do:
1402+
bulk:
1403+
index: valuetype_test_1
1404+
refresh: true
1405+
body: |
1406+
{ "index": {} }
1407+
{ "ip": "192.168.7.1" }
1408+
{ "index": {} }
1409+
{ "ip": "192.168.7.2" }
1410+
{ "index": {} }
1411+
{ "ip": "192.168.7.3" }
1412+
1413+
- do:
1414+
bulk:
1415+
index: valuetype_test_2
1416+
refresh: true
1417+
body: |
1418+
{ "index": {} }
1419+
{ "ip": "127.0.0.1" }
1420+
{ "index": {} }
1421+
{ "ip": "192.168.0.1" }
1422+
{ "index": {} }
1423+
{ "ip": "192.168.0.2" }
1424+
{ "index": {} }
1425+
{ "ip": "192.168.0.3" }
1426+
- do:
1427+
search:
1428+
index: valuetype_test_1,valuetype_test_2
1429+
body:
1430+
size: 0
1431+
aggs:
1432+
str_terms:
1433+
terms:
1434+
field: ip
1435+
value_type: ip
1436+
1437+
- match: { _shards.failed: 1 }
1438+
- length: { aggregations.str_terms.buckets: 4 }
1439+
- match: { aggregations.str_terms.buckets.0.key: "127.0.0.1" }
1440+
- match: { aggregations.str_terms.buckets.0.doc_count: 1 }
1441+
- match: { aggregations.str_terms.buckets.1.key: "192.168.0.1" }
1442+
- match: { aggregations.str_terms.buckets.1.doc_count: 1 }
1443+
- match: { aggregations.str_terms.buckets.2.key: "192.168.0.2" }
1444+
- match: { aggregations.str_terms.buckets.2.doc_count: 1 }
1445+
- match: { aggregations.str_terms.buckets.3.key: "192.168.0.3" }
1446+
- match: { aggregations.str_terms.buckets.3.doc_count: 1 }
1447+
1448+
---
1449+
Value type mismatch fails shard with no docs:
1450+
- skip:
1451+
version: " - 8.0.99"
1452+
reason: "Fixed in 8.1"
1453+
1454+
- do:
1455+
indices.create:
1456+
index: valuetype_test_1
1457+
body:
1458+
settings:
1459+
number_of_shards: 1
1460+
number_of_replicas: 0
1461+
mappings:
1462+
properties:
1463+
ip:
1464+
type: keyword
1465+
- do:
1466+
indices.create:
1467+
index: valuetype_test_2
1468+
body:
1469+
settings:
1470+
number_of_shards: 1
1471+
number_of_replicas: 0
1472+
mappings:
1473+
properties:
1474+
ip:
1475+
type: ip
1476+
1477+
- do:
1478+
bulk:
1479+
index: valuetype_test_2
1480+
refresh: true
1481+
body: |
1482+
{ "index": {} }
1483+
{ "ip": "127.0.0.1" }
1484+
{ "index": {} }
1485+
{ "ip": "192.168.0.1" }
1486+
{ "index": {} }
1487+
{ "ip": "192.168.0.2" }
1488+
{ "index": {} }
1489+
{ "ip": "192.168.0.3" }
1490+
- do:
1491+
search:
1492+
index: valuetype_test_1,valuetype_test_2
1493+
body:
1494+
size: 0
1495+
aggs:
1496+
str_terms:
1497+
terms:
1498+
field: ip
1499+
value_type: ip
1500+
1501+
- match: { _shards.failed: 1 }
1502+
- length: { aggregations.str_terms.buckets: 4 }
1503+
- match: { aggregations.str_terms.buckets.0.key: "127.0.0.1" }
1504+
- match: { aggregations.str_terms.buckets.0.doc_count: 1 }
1505+
- match: { aggregations.str_terms.buckets.1.key: "192.168.0.1" }
1506+
- match: { aggregations.str_terms.buckets.1.doc_count: 1 }
1507+
- match: { aggregations.str_terms.buckets.2.key: "192.168.0.2" }
1508+
- match: { aggregations.str_terms.buckets.2.doc_count: 1 }
1509+
- match: { aggregations.str_terms.buckets.3.key: "192.168.0.3" }
1510+
- match: { aggregations.str_terms.buckets.3.doc_count: 1 }

server/src/main/java/org/elasticsearch/search/aggregations/support/FieldContext.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,8 @@ public MappedFieldType fieldType() {
4747
return fieldType;
4848
}
4949

50+
public String getTypeName() {
51+
return fieldType.typeName();
52+
}
53+
5054
}

server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,26 @@ private static ValuesSourceConfig internalResolve(
148148
if (valuesSourceType == null) {
149149
// We have a field, and the user didn't specify a type, so get the type from the field
150150
valuesSourceType = fieldResolver.getValuesSourceType(fieldContext, userValueTypeHint, defaultValueSourceType);
151-
}
151+
} else if (valuesSourceType != fieldResolver.getValuesSourceType(fieldContext, userValueTypeHint, defaultValueSourceType)
152+
&& script == null) {
153+
/*
154+
* This is the case where the user has specified the type they expect, but we found a field of a different type.
155+
* Usually this happens because of a mapping error, e.g. an older index mapped an IP address as a keyword. If
156+
* the aggregation proceeds, it will usually break during reduction and return no results. So instead, we fail the
157+
* shard with the conflict at this point, allowing the correctly mapped shards to return results with a partial
158+
* failure.
159+
*
160+
* Note that if a script is specified, the assumption is that the script adapts the field into the specified type,
161+
* and we allow the aggregation to continue.
162+
*/
163+
throw new IllegalArgumentException(
164+
"Field type ["
165+
+ fieldContext.getTypeName()
166+
+ "] is incompatible with specified value_type ["
167+
+ userValueTypeHint
168+
+ "]"
169+
);
170+
}
152171
}
153172
}
154173
if (valuesSourceType == null) {

server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/BinaryTermsAggregatorTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ public void testBadUserValueTypeHint() throws IOException {
124124
ValueType.NUMERIC // numeric type hint
125125
)
126126
);
127-
assertThat(e.getMessage(), equalTo("Expected numeric type on field [binary], but got [binary]"));
127+
assertThat(e.getMessage(), equalTo("Field type [binary] is incompatible with specified value_type [numeric]"));
128128
}
129129

130130
private void testSearchCase(

0 commit comments

Comments
 (0)