Skip to content

Commit b33490b

Browse files
authored
Extend system test to validate absence of _ignored (#1738)
* add test whether any docs contain _ignored * review debug output * list out fields * Update runner.go * update readme * Update runner.go * show sample docs * fix * shorten debug output * allow to skip fields * review comments
1 parent a213867 commit b33490b

File tree

3 files changed

+124
-5
lines changed

3 files changed

+124
-5
lines changed

docs/howto/system_testing.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,17 @@ elastic-package test system -v --tear-down
669669
elastic-package stack down -v
670670
```
671671

672+
### Detecting ignored fields
673+
674+
As part of the system test, `elastic-package` checks whether any documents couldn't successfully map any fields. Common issues are the configured field limit being exceeded or keyword fields receiving values longer than `ignore_above`. You can learn more in the [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-ignored-field.html).
675+
676+
In this case, `elastic-package test system` will fail with an error and print a sample of affected documents. To fix the issue, check which fields got ignored and the `ignored_field_values` and either adapt the mapping or the ingest pipeline to accomodate for the problematic values. In case an ignored field can't be meaningfully mitigated, it's possible to skip the check by listing the field under the `skip_ignored_fields` property in the system test config of the data stream:
677+
```
678+
# data_stream/<data stream name>/_dev/test/system/test-default-config.yml
679+
skip_ignored_fields:
680+
- field.to.ignore
681+
```
682+
672683
## Continuous Integration
673684

674685
`elastic-package` runs a set of system tests on some [dummy packages](https://github.com/elastic/elastic-package/tree/main/test/packages) to ensure it's functionalities work as expected. This allows to test changes affecting package testing within `elastic-package` before merging and releasing the changes.

internal/testrunner/runners/system/runner.go

Lines changed: 112 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,40 @@ const (
4343
testRunMaxID = 99999
4444
testRunMinID = 10000
4545

46-
allFieldsBody = `{"fields": ["*"]}`
47-
DevDeployDir = "_dev/deploy"
46+
checkFieldsBody = `{
47+
"fields": ["*"],
48+
"runtime_mappings": {
49+
"my_ignored": {
50+
"type": "keyword",
51+
"script": {
52+
"source": "for (def v : params['_fields']._ignored.values) { emit(v); }"
53+
}
54+
}
55+
},
56+
"aggs": {
57+
"all_ignored": {
58+
"filter": {
59+
"exists": {
60+
"field": "_ignored"
61+
}
62+
},
63+
"aggs": {
64+
"ignored_fields": {
65+
"terms": {
66+
"size": 100,
67+
"field": "my_ignored"
68+
}
69+
},
70+
"ignored_docs": {
71+
"top_hits": {
72+
"size": 5
73+
}
74+
}
75+
}
76+
}
77+
}
78+
}`
79+
DevDeployDir = "_dev/deploy"
4880
)
4981

5082
func init() {
@@ -678,8 +710,10 @@ func (r *runner) isSyntheticsEnabled(ctx context.Context, dataStream, componentT
678710
}
679711

680712
type hits struct {
681-
Source []common.MapStr `json:"_source"`
682-
Fields []common.MapStr `json:"fields"`
713+
Source []common.MapStr `json:"_source"`
714+
Fields []common.MapStr `json:"fields"`
715+
IgnoredFields []string
716+
DegradedDocs []common.MapStr
683717
}
684718

685719
func (h hits) getDocs(syntheticsEnabled bool) []common.MapStr {
@@ -700,7 +734,7 @@ func (r *runner) getDocs(ctx context.Context, dataStream string) (*hits, error)
700734
r.options.API.Search.WithSort("@timestamp:asc"),
701735
r.options.API.Search.WithSize(elasticsearchQuerySize),
702736
r.options.API.Search.WithSource("true"),
703-
r.options.API.Search.WithBody(strings.NewReader(allFieldsBody)),
737+
r.options.API.Search.WithBody(strings.NewReader(checkFieldsBody)),
704738
r.options.API.Search.WithIgnoreUnavailable(true),
705739
)
706740
if err != nil {
@@ -727,6 +761,21 @@ func (r *runner) getDocs(ctx context.Context, dataStream string) (*hits, error)
727761
Fields common.MapStr `json:"fields"`
728762
}
729763
}
764+
Aggregations struct {
765+
AllIgnored struct {
766+
DocCount int `json:"doc_count"`
767+
IgnoredFields struct {
768+
Buckets []struct {
769+
Key string `json:"key"`
770+
} `json:"buckets"`
771+
} `json:"ignored_fields"`
772+
IgnoredDocs struct {
773+
Hits struct {
774+
Hits []common.MapStr `json:"hits"`
775+
} `json:"hits"`
776+
} `json:"ignored_docs"`
777+
} `json:"all_ignored"`
778+
} `json:"aggregations"`
730779
Error *struct {
731780
Type string
732781
Reason string
@@ -751,6 +800,10 @@ func (r *runner) getDocs(ctx context.Context, dataStream string) (*hits, error)
751800
hits.Source = append(hits.Source, hit.Source)
752801
hits.Fields = append(hits.Fields, hit.Fields)
753802
}
803+
for _, bucket := range results.Aggregations.AllIgnored.IgnoredFields.Buckets {
804+
hits.IgnoredFields = append(hits.IgnoredFields, bucket.Key)
805+
}
806+
hits.DegradedDocs = results.Aggregations.AllIgnored.IgnoredDocs.Hits.Hits
754807

755808
return &hits, nil
756809
}
@@ -763,6 +816,8 @@ type scenarioTest struct {
763816
kibanaDataStream kibana.PackageDataStream
764817
syntheticEnabled bool
765818
docs []common.MapStr
819+
ignoredFields []string
820+
degradedDocs []common.MapStr
766821
agent agentdeployer.DeployedAgent
767822
startTestTime time.Time
768823
}
@@ -1107,6 +1162,8 @@ func (r *runner) prepareScenario(ctx context.Context, config *testConfig, svcInf
11071162
logger.Debugf("data stream %s has synthetics enabled: %t", scenario.dataStream, scenario.syntheticEnabled)
11081163

11091164
scenario.docs = hits.getDocs(scenario.syntheticEnabled)
1165+
scenario.ignoredFields = hits.IgnoredFields
1166+
scenario.degradedDocs = hits.DegradedDocs
11101167

11111168
if r.options.RunSetup {
11121169
opts := scenarioStateOpts{
@@ -1374,6 +1431,11 @@ func (r *runner) validateTestScenario(ctx context.Context, result *testrunner.Re
13741431
return result.WithError(err)
13751432
}
13761433

1434+
err = validateIgnoredFields(r.stackVersion.Number, scenario, config)
1435+
if err != nil {
1436+
return result.WithError(err)
1437+
}
1438+
13771439
docs := scenario.docs
13781440
if scenario.syntheticEnabled {
13791441
docs, err = fieldsValidator.SanitizeSyntheticSourceDocs(scenario.docs)
@@ -1934,6 +1996,51 @@ func validateFields(docs []common.MapStr, fieldsValidator *fields.Validator, dat
19341996
return nil
19351997
}
19361998

1999+
func validateIgnoredFields(stackVersionString string, scenario *scenarioTest, config *testConfig) error {
2000+
skipIgnoredFields := append([]string(nil), config.SkipIgnoredFields...)
2001+
stackVersion, err := semver.NewVersion(stackVersionString)
2002+
if err != nil {
2003+
return fmt.Errorf("failed to parse stack version: %w", err)
2004+
}
2005+
if stackVersion.LessThan(semver.MustParse("8.14.0")) {
2006+
// Pre 8.14 Elasticsearch commonly has event.original not mapped correctly, exclude from check: https://github.com/elastic/elasticsearch/pull/106714
2007+
skipIgnoredFields = append(skipIgnoredFields, "event.original")
2008+
}
2009+
2010+
ignoredFields := make([]string, 0, len(scenario.ignoredFields))
2011+
2012+
for _, field := range scenario.ignoredFields {
2013+
if !slices.Contains(skipIgnoredFields, field) {
2014+
ignoredFields = append(ignoredFields, field)
2015+
}
2016+
}
2017+
2018+
if len(ignoredFields) > 0 {
2019+
issues := make([]struct {
2020+
ID any `json:"_id"`
2021+
Timestamp any `json:"@timestamp,omitempty"`
2022+
IgnoredFields any `json:"ignored_field_values"`
2023+
}, len(scenario.degradedDocs))
2024+
for i, d := range scenario.degradedDocs {
2025+
issues[i].ID = d["_id"]
2026+
if source, ok := d["_source"].(map[string]any); ok {
2027+
if ts, ok := source["@timestamp"]; ok {
2028+
issues[i].Timestamp = ts
2029+
}
2030+
}
2031+
issues[i].IgnoredFields = d["ignored_field_values"]
2032+
}
2033+
degradedDocsJSON, err := json.MarshalIndent(issues, "", " ")
2034+
if err != nil {
2035+
return fmt.Errorf("failed to marshal degraded docs to JSON: %w", err)
2036+
}
2037+
2038+
return fmt.Errorf("found ignored fields in data stream %s: %v. Affected documents: %s", scenario.dataStream, ignoredFields, degradedDocsJSON)
2039+
}
2040+
2041+
return nil
2042+
}
2043+
19372044
func assertHitCount(expected int, docs []common.MapStr) (pass bool, message string) {
19382045
if expected != 0 {
19392046
observed := len(docs)

internal/testrunner/runners/system/test_config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ type testConfig struct {
3434
ServiceNotifySignal string `config:"service_notify_signal"` // Signal to send when the agent policy is applied.
3535
IgnoreServiceError bool `config:"ignore_service_error"`
3636
WaitForDataTimeout time.Duration `config:"wait_for_data_timeout"`
37+
SkipIgnoredFields []string `config:"skip_ignored_fields"`
3738

3839
Vars common.MapStr `config:"vars"`
3940
DataStream struct {

0 commit comments

Comments
 (0)