forked from elastic/package-spec
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement basic field assertion (types, presence) (elastic#143)
* Add partially AWS package to test * Implement field assertion * Fix: check error.message * Add partial SNS data stream (wildcard fields) * Sort imports * Validate document body * Enable field validator in system runner * More fixes for tests * Multiline multierror * Use custom multierror * Fix: pass failure details from system runner * Fix: return nil * Still fighting with validation * Fix: add missing field definitions * Fix for geo_point * Basic type verification * Fix: invalid types * Skip validation for generic fields * Add comment to count=1 * Rename to fileInfo * Rename to scalar * Fix: typo * Add continue * Add comment about regexp * Notice about skipValidation * Link * Rename to findElementDefinition * Add comment * Add missing fields * More fixes * Aggregate errors * Fix: properly append errors * Fix: unit tests * Fix: unique values * Test unique multierror * Add more fields * More fields * Fix * Consider some fields as skipped * Fix
- Loading branch information
Showing
44 changed files
with
5,035 additions
and
563 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
// or more contributor license agreements. Licensed under the Elastic License; | ||
// you may not use this file except in compliance with the Elastic License. | ||
|
||
package fields | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
"io/ioutil" | ||
"path/filepath" | ||
"reflect" | ||
"regexp" | ||
"strings" | ||
|
||
"github.com/elastic/elastic-package/internal/multierror" | ||
|
||
"github.com/pkg/errors" | ||
"gopkg.in/yaml.v3" | ||
|
||
"github.com/elastic/elastic-package/internal/common" | ||
) | ||
|
||
// Validator is responsible for fields validation. | ||
type Validator struct { | ||
schema []fieldDefinition | ||
} | ||
|
||
// CreateValidatorForDataStream method creates a validator for the data stream. | ||
func CreateValidatorForDataStream(dataStreamRootPath string) (*Validator, error) { | ||
fieldsDir := filepath.Join(dataStreamRootPath, "fields") | ||
fileInfos, err := ioutil.ReadDir(fieldsDir) | ||
if err != nil { | ||
return nil, errors.Wrapf(err, "reading directory with fields failed (path: %s)", fieldsDir) | ||
} | ||
|
||
var fields []fieldDefinition | ||
for _, fileInfo := range fileInfos { | ||
f := filepath.Join(fieldsDir, fileInfo.Name()) | ||
body, err := ioutil.ReadFile(f) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "reading fields file failed") | ||
} | ||
|
||
var u []fieldDefinition | ||
err = yaml.Unmarshal(body, &u) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "unmarshalling field body failed") | ||
} | ||
fields = append(fields, u...) | ||
} | ||
return &Validator{ | ||
schema: fields, | ||
}, nil | ||
} | ||
|
||
// ValidateDocumentBody validates the provided document body. | ||
func (v *Validator) ValidateDocumentBody(body json.RawMessage) multierror.Error { | ||
var c common.MapStr | ||
err := json.Unmarshal(body, &c) | ||
if err != nil { | ||
var errs multierror.Error | ||
errs = append(errs, errors.Wrap(err, "unmarshalling document body failed")) | ||
return errs | ||
} | ||
|
||
errs := v.validateMapElement("", c) | ||
if len(errs) == 0 { | ||
return nil | ||
} | ||
return errs | ||
} | ||
|
||
// ValidateDocumentMap validates the provided document as common.MapStr. | ||
func (v *Validator) ValidateDocumentMap(body common.MapStr) multierror.Error { | ||
errs := v.validateMapElement("", body) | ||
if len(errs) == 0 { | ||
return nil | ||
} | ||
return errs | ||
} | ||
|
||
func (v *Validator) validateMapElement(root string, elem common.MapStr) multierror.Error { | ||
var errs multierror.Error | ||
for name, val := range elem { | ||
key := strings.TrimLeft(root+"."+name, ".") | ||
|
||
switch val.(type) { | ||
case []map[string]interface{}: | ||
for _, m := range val.([]map[string]interface{}) { | ||
err := v.validateMapElement(key, m) | ||
if err != nil { | ||
errs = append(errs, err...) | ||
} | ||
} | ||
case map[string]interface{}: | ||
err := v.validateMapElement(key, val.(map[string]interface{})) | ||
if err != nil { | ||
errs = append(errs, err...) | ||
} | ||
default: | ||
err := v.validateScalarElement(key, val) | ||
if err != nil { | ||
errs = append(errs, err) | ||
} | ||
} | ||
} | ||
return errs | ||
} | ||
|
||
func (v *Validator) validateScalarElement(key string, val interface{}) error { | ||
if key == "" { | ||
return nil // root key is always valid | ||
} | ||
|
||
definition := findElementDefinition("", key, v.schema) | ||
if definition == nil && skipValidationForField(key) { | ||
return nil // generic field, let's skip validation for now | ||
} | ||
if definition == nil { | ||
return fmt.Errorf(`field "%s" is undefined`, key) | ||
} | ||
|
||
err := parseElementValue(key, *definition, val) | ||
if err != nil { | ||
return errors.Wrap(err, "parsing field value failed") | ||
} | ||
return nil | ||
} | ||
|
||
// skipValidationForField skips field validation (field presence) of special fields. The special fields are present | ||
// in every (most?) documents collected by Elastic Agent, but aren't defined in any integration in `fields.yml` files. | ||
// FIXME https://github.com/elastic/elastic-package/issues/147 | ||
func skipValidationForField(key string) bool { | ||
return isFieldFamilyMatching("agent", key) || | ||
isFieldFamilyMatching("elastic_agent", key) || | ||
isFieldFamilyMatching("cloud", key) || // too many common fields | ||
isFieldFamilyMatching("event", key) || // too many common fields | ||
isFieldFamilyMatching("host", key) || // too many common fields | ||
isFieldFamilyMatching("metricset", key) || // field is deprecated | ||
isFieldFamilyMatching("event.module", key) // field is deprecated | ||
} | ||
|
||
func isFieldFamilyMatching(family, key string) bool { | ||
return key == family || strings.HasPrefix(key, family+".") | ||
} | ||
|
||
func findElementDefinition(root, searchedKey string, fieldDefinitions []fieldDefinition) *fieldDefinition { | ||
for _, def := range fieldDefinitions { | ||
key := strings.TrimLeft(root+"."+def.Name, ".") | ||
if compareKeys(key, def, searchedKey) { | ||
return &def | ||
} | ||
|
||
if len(def.Fields) == 0 { | ||
continue | ||
} | ||
|
||
fd := findElementDefinition(key, searchedKey, def.Fields) | ||
if fd != nil { | ||
return fd | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func compareKeys(key string, def fieldDefinition, searchedKey string) bool { | ||
k := strings.ReplaceAll(key, ".", "\\.") | ||
k = strings.ReplaceAll(k, "*", "[^.]+") | ||
|
||
// Workaround for potential geo_point, as "lon" and "lat" fields are not present in field definitions. | ||
if def.Type == "geo_point" { | ||
k += "\\.(lon|lat)" | ||
} | ||
|
||
k = fmt.Sprintf("^%s$", k) | ||
matched, err := regexp.MatchString(k, searchedKey) | ||
if err != nil { | ||
panic(errors.Wrapf(err, "regexp built using the given field/key (%s) is invalid", k)) | ||
} | ||
return matched | ||
} | ||
|
||
func parseElementValue(key string, definition fieldDefinition, val interface{}) error { | ||
val, ok := ensureSingleElementValue(val) | ||
if !ok { | ||
return nil // it's an array, but it's not possible to extract the single value. | ||
} | ||
|
||
var valid bool | ||
switch definition.Type { | ||
case "date", "ip", "constant_keyword", "keyword", "text": | ||
_, valid = val.(string) | ||
case "float", "long", "double": | ||
_, valid = val.(float64) | ||
default: | ||
valid = true // all other types are considered valid not blocking validation | ||
} | ||
|
||
if !valid { | ||
return fmt.Errorf("field \"%s\" has invalid type, expected: %s, actual Go type: %s", key, definition.Type, reflect.TypeOf(val)) | ||
} | ||
return nil | ||
} | ||
|
||
// ensureSingleElementValue extracts single entity from a potential array, which is a valid field representation | ||
// in Elasticsearch. For type assertion we need a single value. | ||
func ensureSingleElementValue(val interface{}) (interface{}, bool) { | ||
arr, isArray := val.([]interface{}) | ||
if !isArray { | ||
return val, true | ||
} | ||
if len(arr) > 0 { | ||
return arr[0], true | ||
} | ||
return nil, false // false: empty array, can't deduce single value type | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
// or more contributor license agreements. Licensed under the Elastic License; | ||
// you may not use this file except in compliance with the Elastic License. | ||
|
||
package fields_test | ||
|
||
import ( | ||
"encoding/json" | ||
"io/ioutil" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
|
||
"github.com/elastic/elastic-package/internal/fields" | ||
) | ||
|
||
type results struct { | ||
Expected []json.RawMessage | ||
} | ||
|
||
func TestValidate_NoWildcardFields(t *testing.T) { | ||
validator, err := fields.CreateValidatorForDataStream("../../test/packages/aws/data_stream/elb_logs") | ||
require.NoError(t, err) | ||
require.NotNil(t, validator) | ||
|
||
f := readTestResults(t, "../../test/packages/aws/data_stream/elb_logs/_dev/test/pipeline/test-alb.log-expected.json") | ||
for _, e := range f.Expected { | ||
errs := validator.ValidateDocumentBody(e) | ||
require.Empty(t, errs) | ||
} | ||
} | ||
|
||
func TestValidate_WithWildcardFields(t *testing.T) { | ||
validator, err := fields.CreateValidatorForDataStream("../../test/packages/aws/data_stream/sns") | ||
require.NoError(t, err) | ||
require.NotNil(t, validator) | ||
|
||
e := readSampleEvent(t, "../../test/packages/aws/data_stream/sns/sample_event.json") | ||
errs := validator.ValidateDocumentBody(e) | ||
require.Empty(t, errs) | ||
} | ||
|
||
func readTestResults(t *testing.T, path string) (f results) { | ||
c, err := ioutil.ReadFile(path) | ||
require.NoError(t, err) | ||
|
||
err = json.Unmarshal(c, &f) | ||
require.NoError(t, err) | ||
return | ||
} | ||
|
||
func readSampleEvent(t *testing.T, path string) json.RawMessage { | ||
c, err := ioutil.ReadFile(path) | ||
require.NoError(t, err) | ||
return c | ||
} |
Oops, something went wrong.