Skip to content

Commit

Permalink
Implement basic field assertion (types, presence) (elastic#143)
Browse files Browse the repository at this point in the history
* Add partially AWS package to test

* Implement field assertion

* Fix: check error.message

* Add partial SNS data stream (wildcard fields)

* Sort imports

* Validate document body

* Enable field validator in system runner

* More fixes for tests

* Multiline multierror

* Use custom multierror

* Fix: pass failure details from system runner

* Fix: return nil

* Still fighting with validation

* Fix: add missing field definitions

* Fix for geo_point

* Basic type verification

* Fix: invalid types

* Skip validation for generic fields

* Add comment to count=1

* Rename to fileInfo

* Rename to scalar

* Fix: typo

* Add continue

* Add comment about regexp

* Notice about skipValidation

* Link

* Rename to findElementDefinition

* Add comment

* Add missing fields

* More fixes

* Aggregate errors

* Fix: properly append errors

* Fix: unit tests

* Fix: unique values

* Test unique multierror

* Add more fields

* More fields

* Fix

* Consider some fields as skipped

* Fix
  • Loading branch information
mtojek authored Oct 27, 2020
1 parent 9feafcd commit 86e1f26
Show file tree
Hide file tree
Showing 44 changed files with 5,035 additions and 563 deletions.
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,19 @@ licenser:
gomod:
go mod tidy

test-go:
# -count=1 is included to invalidate the test cache. This way, if you run "make test-go" multiple times
# you will get fresh test results each time. For instance, changing the source of mocked packages
# does not invalidate the cache so having the -count=1 to invalidate the test cache is useful.
go test -v -count 1 ./...

test-stack-command:
./scripts/test-stack-command.sh

test-check-packages:
./scripts/test-check-packages.sh

test: test-stack-command test-check-packages
test: test-go test-stack-command test-check-packages

check-git-clean:
git update-index --really-refresh
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ require (
github.com/Masterminds/semver v1.5.0
github.com/aymerick/raymond v2.0.2+incompatible
github.com/cespare/xxhash/v2 v2.1.1
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f
github.com/elastic/go-elasticsearch/v7 v7.9.0
github.com/elastic/go-ucfg v0.8.3
github.com/elastic/package-spec/code/go v0.0.0-20201026084717-cdb8fbed8491
Expand All @@ -25,6 +24,7 @@ require (
github.com/pkg/errors v0.9.1
github.com/spf13/cobra v1.0.0
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.6.1
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a // indirect
golang.org/x/net v0.0.0-20201021035429-f5854403a974 // indirect
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 h1:BHsljHzVlRcyQhjrss6TZTdY2VfCqZPbv5k3iBFa2ZQ=
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/gliderlabs/ssh v0.2.2 h1:6zsha5zo/TWhRhwqCD3+EarCAgZ2yN28ipRnGPnwkI0=
github.com/gliderlabs/ssh v0.2.2/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
Expand Down
8 changes: 4 additions & 4 deletions internal/common/mapstr.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ import (
)

var (
// errKeyNotFound indicates that the specified key was not found.
errKeyNotFound = errors.New("key not found")
// ErrKeyNotFound indicates that the specified key was not found.
ErrKeyNotFound = errors.New("key not found")
)

// MapStr is a map[string]interface{} wrapper with utility methods for common
Expand All @@ -33,7 +33,7 @@ func (m MapStr) GetValue(key string) (interface{}, error) {
return nil, err
}
if !found {
return nil, errKeyNotFound
return nil, ErrKeyNotFound
}
return v, nil
}
Expand Down Expand Up @@ -120,7 +120,7 @@ func mapFind(
d = MapStr{}
data[k] = d
} else {
return "", nil, nil, false, errKeyNotFound
return "", nil, nil, false, ErrKeyNotFound
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package system
package fields

// Document corresponds to the logs or metrics event stored in the data stream.
type Document struct {
Error *struct {
Message string
}
type fieldDefinition struct {
Name string `yaml:"name"`
Type string `yaml:"type"`
Fields []fieldDefinition `yaml:"fields"`
}
217 changes: 217 additions & 0 deletions internal/fields/validate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package fields

import (
"encoding/json"
"fmt"
"io/ioutil"
"path/filepath"
"reflect"
"regexp"
"strings"

"github.com/elastic/elastic-package/internal/multierror"

"github.com/pkg/errors"
"gopkg.in/yaml.v3"

"github.com/elastic/elastic-package/internal/common"
)

// Validator is responsible for fields validation.
type Validator struct {
schema []fieldDefinition
}

// CreateValidatorForDataStream method creates a validator for the data stream.
func CreateValidatorForDataStream(dataStreamRootPath string) (*Validator, error) {
fieldsDir := filepath.Join(dataStreamRootPath, "fields")
fileInfos, err := ioutil.ReadDir(fieldsDir)
if err != nil {
return nil, errors.Wrapf(err, "reading directory with fields failed (path: %s)", fieldsDir)
}

var fields []fieldDefinition
for _, fileInfo := range fileInfos {
f := filepath.Join(fieldsDir, fileInfo.Name())
body, err := ioutil.ReadFile(f)
if err != nil {
return nil, errors.Wrap(err, "reading fields file failed")
}

var u []fieldDefinition
err = yaml.Unmarshal(body, &u)
if err != nil {
return nil, errors.Wrap(err, "unmarshalling field body failed")
}
fields = append(fields, u...)
}
return &Validator{
schema: fields,
}, nil
}

// ValidateDocumentBody validates the provided document body.
func (v *Validator) ValidateDocumentBody(body json.RawMessage) multierror.Error {
var c common.MapStr
err := json.Unmarshal(body, &c)
if err != nil {
var errs multierror.Error
errs = append(errs, errors.Wrap(err, "unmarshalling document body failed"))
return errs
}

errs := v.validateMapElement("", c)
if len(errs) == 0 {
return nil
}
return errs
}

// ValidateDocumentMap validates the provided document as common.MapStr.
func (v *Validator) ValidateDocumentMap(body common.MapStr) multierror.Error {
errs := v.validateMapElement("", body)
if len(errs) == 0 {
return nil
}
return errs
}

func (v *Validator) validateMapElement(root string, elem common.MapStr) multierror.Error {
var errs multierror.Error
for name, val := range elem {
key := strings.TrimLeft(root+"."+name, ".")

switch val.(type) {
case []map[string]interface{}:
for _, m := range val.([]map[string]interface{}) {
err := v.validateMapElement(key, m)
if err != nil {
errs = append(errs, err...)
}
}
case map[string]interface{}:
err := v.validateMapElement(key, val.(map[string]interface{}))
if err != nil {
errs = append(errs, err...)
}
default:
err := v.validateScalarElement(key, val)
if err != nil {
errs = append(errs, err)
}
}
}
return errs
}

func (v *Validator) validateScalarElement(key string, val interface{}) error {
if key == "" {
return nil // root key is always valid
}

definition := findElementDefinition("", key, v.schema)
if definition == nil && skipValidationForField(key) {
return nil // generic field, let's skip validation for now
}
if definition == nil {
return fmt.Errorf(`field "%s" is undefined`, key)
}

err := parseElementValue(key, *definition, val)
if err != nil {
return errors.Wrap(err, "parsing field value failed")
}
return nil
}

// skipValidationForField skips field validation (field presence) of special fields. The special fields are present
// in every (most?) documents collected by Elastic Agent, but aren't defined in any integration in `fields.yml` files.
// FIXME https://github.com/elastic/elastic-package/issues/147
func skipValidationForField(key string) bool {
return isFieldFamilyMatching("agent", key) ||
isFieldFamilyMatching("elastic_agent", key) ||
isFieldFamilyMatching("cloud", key) || // too many common fields
isFieldFamilyMatching("event", key) || // too many common fields
isFieldFamilyMatching("host", key) || // too many common fields
isFieldFamilyMatching("metricset", key) || // field is deprecated
isFieldFamilyMatching("event.module", key) // field is deprecated
}

func isFieldFamilyMatching(family, key string) bool {
return key == family || strings.HasPrefix(key, family+".")
}

func findElementDefinition(root, searchedKey string, fieldDefinitions []fieldDefinition) *fieldDefinition {
for _, def := range fieldDefinitions {
key := strings.TrimLeft(root+"."+def.Name, ".")
if compareKeys(key, def, searchedKey) {
return &def
}

if len(def.Fields) == 0 {
continue
}

fd := findElementDefinition(key, searchedKey, def.Fields)
if fd != nil {
return fd
}
}
return nil
}

func compareKeys(key string, def fieldDefinition, searchedKey string) bool {
k := strings.ReplaceAll(key, ".", "\\.")
k = strings.ReplaceAll(k, "*", "[^.]+")

// Workaround for potential geo_point, as "lon" and "lat" fields are not present in field definitions.
if def.Type == "geo_point" {
k += "\\.(lon|lat)"
}

k = fmt.Sprintf("^%s$", k)
matched, err := regexp.MatchString(k, searchedKey)
if err != nil {
panic(errors.Wrapf(err, "regexp built using the given field/key (%s) is invalid", k))
}
return matched
}

func parseElementValue(key string, definition fieldDefinition, val interface{}) error {
val, ok := ensureSingleElementValue(val)
if !ok {
return nil // it's an array, but it's not possible to extract the single value.
}

var valid bool
switch definition.Type {
case "date", "ip", "constant_keyword", "keyword", "text":
_, valid = val.(string)
case "float", "long", "double":
_, valid = val.(float64)
default:
valid = true // all other types are considered valid not blocking validation
}

if !valid {
return fmt.Errorf("field \"%s\" has invalid type, expected: %s, actual Go type: %s", key, definition.Type, reflect.TypeOf(val))
}
return nil
}

// ensureSingleElementValue extracts single entity from a potential array, which is a valid field representation
// in Elasticsearch. For type assertion we need a single value.
func ensureSingleElementValue(val interface{}) (interface{}, bool) {
arr, isArray := val.([]interface{})
if !isArray {
return val, true
}
if len(arr) > 0 {
return arr[0], true
}
return nil, false // false: empty array, can't deduce single value type
}
56 changes: 56 additions & 0 deletions internal/fields/validate_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package fields_test

import (
"encoding/json"
"io/ioutil"
"testing"

"github.com/stretchr/testify/require"

"github.com/elastic/elastic-package/internal/fields"
)

type results struct {
Expected []json.RawMessage
}

func TestValidate_NoWildcardFields(t *testing.T) {
validator, err := fields.CreateValidatorForDataStream("../../test/packages/aws/data_stream/elb_logs")
require.NoError(t, err)
require.NotNil(t, validator)

f := readTestResults(t, "../../test/packages/aws/data_stream/elb_logs/_dev/test/pipeline/test-alb.log-expected.json")
for _, e := range f.Expected {
errs := validator.ValidateDocumentBody(e)
require.Empty(t, errs)
}
}

func TestValidate_WithWildcardFields(t *testing.T) {
validator, err := fields.CreateValidatorForDataStream("../../test/packages/aws/data_stream/sns")
require.NoError(t, err)
require.NotNil(t, validator)

e := readSampleEvent(t, "../../test/packages/aws/data_stream/sns/sample_event.json")
errs := validator.ValidateDocumentBody(e)
require.Empty(t, errs)
}

func readTestResults(t *testing.T, path string) (f results) {
c, err := ioutil.ReadFile(path)
require.NoError(t, err)

err = json.Unmarshal(c, &f)
require.NoError(t, err)
return
}

func readSampleEvent(t *testing.T, path string) json.RawMessage {
c, err := ioutil.ReadFile(path)
require.NoError(t, err)
return c
}
Loading

0 comments on commit 86e1f26

Please sign in to comment.