Skip to content

Commit

Permalink
Add default_field option to fields.yml (elastic#14341)
Browse files Browse the repository at this point in the history
* Add default_field option to fields.yml

The number of fields in the Elasticsearch index template's `settings.index.query.default_field` option has grown over time, and is now greater than 1024 in Filebeat (Elastic licensed version). This causes queries to Elasticsearch to fail when a list of fields is not specified because there is a default limit of 1024 in Elasticsearch.

This adds a new setting to fields.yml called `default_field` whose value can be true/false (defaults to true). When true the text/keyword fields are added to the `default_field` list (as was the behavior before this change). And when set to false the field is omitted from the default_field list.

This adds a test for every beat to check if the default_field list contains more than 1000 fields. The limit is a little less than 1024 because `fields.*` is in the default_field list already and at query time that wildcard will be expanded and count toward the limit.

Fixes elastic#14262

* Exclude new zeek datasets from default_field list
  • Loading branch information
andrewkroh authored and jorgemarey committed Dec 13, 2019
1 parent 73febf6 commit 2eaf43e
Show file tree
Hide file tree
Showing 56 changed files with 431 additions and 23 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG-developer.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,5 @@ The list below covers the major changes between 7.0.0-rc2 and master only.
- Add checks for types and formats used in fields definitions in `fields.yml` files. {pull}13188[13188]
- Makefile included in generator copies files from beats repository using `git archive` instead of cp. {pull}13193[13193]
- Strip debug symbols from binaries to reduce binary sizes. {issue}12768[12768]
- Compare event by event in `testadata` framework to avoid sorting problems {pull}13747[13747]
- Added a `default_field` option to fields in fields.yml to offer a way to exclude fields from the default_field list. {issue}14262[14262] {pull}14341[14341]
5 changes: 5 additions & 0 deletions auditbeat/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"testing"

"github.com/elastic/beats/auditbeat/cmd"
"github.com/elastic/beats/libbeat/tests/system/template"
)

var systemTest *bool
Expand All @@ -41,3 +42,7 @@ func TestSystem(t *testing.T) {
main()
}
}

func TestTemplate(t *testing.T) {
template.TestTemplate(t, cmd.Name)
}
5 changes: 5 additions & 0 deletions filebeat/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"testing"

"github.com/elastic/beats/filebeat/cmd"
"github.com/elastic/beats/libbeat/tests/system/template"
)

var systemTest *bool
Expand All @@ -40,3 +41,7 @@ func TestSystem(t *testing.T) {
main()
}
}

func TestTemplate(t *testing.T) {
template.TestTemplate(t, cmd.Name)
}
5 changes: 5 additions & 0 deletions heartbeat/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"testing"

"github.com/elastic/beats/heartbeat/cmd"
"github.com/elastic/beats/libbeat/tests/system/template"
)

var systemTest *bool
Expand All @@ -40,3 +41,7 @@ func TestSystem(t *testing.T) {
main()
}
}

func TestTemplate(t *testing.T) {
template.TestTemplate(t, cmd.Name)
}
6 changes: 5 additions & 1 deletion journalbeat/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"testing"

"github.com/elastic/beats/journalbeat/cmd"
"github.com/elastic/beats/libbeat/tests/system/template"
)

var systemTest *bool
Expand All @@ -37,8 +38,11 @@ func init() {

// Test started when the test binary is started. Only calls main.
func TestSystem(t *testing.T) {

if *systemTest {
main()
}
}

func TestTemplate(t *testing.T) {
template.TestTemplate(t, cmd.Name)
}
6 changes: 6 additions & 0 deletions libbeat/libbeat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ package main
import (
"flag"
"testing"

"github.com/elastic/beats/libbeat/tests/system/template"
)

var systemTest *bool
Expand All @@ -37,3 +39,7 @@ func TestSystem(t *testing.T) {
main()
}
}

func TestTemplate(t *testing.T) {
template.TestTemplate(t, "mockbeat")
}
5 changes: 3 additions & 2 deletions libbeat/mapping/field.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,9 @@ type Field struct {
UrlTemplate []VersionizedString `config:"url_template"`
OpenLinkInCurrentTab *bool `config:"open_link_in_current_tab"`

Overwrite bool `config:"overwrite"`
Path string
Overwrite bool `config:"overwrite"`
DefaultField *bool `config:"default_field"`
Path string
}

// ObjectTypeCfg defines type and configuration of object attributes
Expand Down
42 changes: 27 additions & 15 deletions libbeat/template/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,27 @@ var (

const scalingFactorKey = "scalingFactor"

type fieldState struct {
DefaultField bool
Path string
}

// Process recursively processes the given fields and writes the template in the given output
func (p *Processor) Process(fields mapping.Fields, path string, output common.MapStr) error {
for _, field := range fields {
func (p *Processor) Process(fields mapping.Fields, state *fieldState, output common.MapStr) error {
if state == nil {
// Set the defaults.
state = &fieldState{DefaultField: true}
}

for _, field := range fields {
if field.Name == "" {
continue
}

field.Path = path
field.Path = state.Path
if field.DefaultField == nil {
field.DefaultField = &state.DefaultField
}
var indexMapping common.MapStr

switch field.Type {
Expand All @@ -69,12 +81,6 @@ func (p *Processor) Process(fields mapping.Fields, path string, output common.Ma
case "alias":
indexMapping = p.alias(&field)
case "group":
var newPath string
if path == "" {
newPath = field.Name
} else {
newPath = path + "." + field.Name
}
indexMapping = common.MapStr{}
if field.Dynamic.Value != nil {
indexMapping["dynamic"] = field.Dynamic.Value
Expand All @@ -93,17 +99,23 @@ func (p *Processor) Process(fields mapping.Fields, path string, output common.Ma
}
}

if err := p.Process(field.Fields, newPath, properties); err != nil {
groupState := &fieldState{Path: field.Name, DefaultField: *field.DefaultField}
if state.Path != "" {
groupState.Path = state.Path + "." + field.Name
}
if err := p.Process(field.Fields, groupState, properties); err != nil {
return err
}
indexMapping["properties"] = properties
default:
indexMapping = p.other(&field)
}

switch field.Type {
case "", "keyword", "text":
addToDefaultFields(&field)
if *field.DefaultField {
switch field.Type {
case "", "keyword", "text":
addToDefaultFields(&field)
}
}

if len(indexMapping) > 0 {
Expand Down Expand Up @@ -207,7 +219,7 @@ func (p *Processor) keyword(f *mapping.Field) common.MapStr {

if len(f.MultiFields) > 0 {
fields := common.MapStr{}
p.Process(f.MultiFields, "", fields)
p.Process(f.MultiFields, nil, fields)
property["fields"] = fields
}

Expand Down Expand Up @@ -243,7 +255,7 @@ func (p *Processor) text(f *mapping.Field) common.MapStr {

if len(f.MultiFields) > 0 {
fields := common.MapStr{}
p.Process(f.MultiFields, "", fields)
p.Process(f.MultiFields, nil, fields)
properties["fields"] = fields
}

Expand Down
82 changes: 80 additions & 2 deletions libbeat/template/processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ func TestPropertiesCombine(t *testing.T) {
}

p := Processor{EsVersion: *version}
err = p.Process(fields, "", output)
err = p.Process(fields, nil, output)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -570,7 +570,7 @@ func TestProcessNoName(t *testing.T) {
}

p := Processor{EsVersion: *version}
err = p.Process(fields, "", output)
err = p.Process(fields, nil, output)
if err != nil {
t.Fatal(err)
}
Expand All @@ -589,3 +589,81 @@ func TestProcessNoName(t *testing.T) {

assert.Equal(t, expectedOutput, output)
}

func TestProcessDefaultField(t *testing.T) {
// NOTE: This package stores global state. It must be cleared before this test.
defaultFields = nil

var (
enableDefaultField = true
disableDefaultField = false
)

fields := mapping.Fields{
// By default foo will be included in default_field.
mapping.Field{
Name: "foo",
Type: "keyword",
},
// bar is explicitly set to be included in default_field.
mapping.Field{
Name: "bar",
Type: "keyword",
DefaultField: &enableDefaultField,
},
// baz is explicitly set to be excluded from default_field.
mapping.Field{
Name: "baz",
Type: "keyword",
DefaultField: &disableDefaultField,
},
mapping.Field{
Name: "nested",
Type: "group",
DefaultField: &enableDefaultField,
Fields: mapping.Fields{
mapping.Field{
Name: "bar",
Type: "keyword",
},
},
},
// The nested group is disabled default_field but one of the children
// has explicitly requested to be included.
mapping.Field{
Name: "nested",
Type: "group",
DefaultField: &disableDefaultField,
Fields: mapping.Fields{
mapping.Field{
Name: "foo",
Type: "keyword",
DefaultField: &enableDefaultField,
},
mapping.Field{
Name: "baz",
Type: "keyword",
},
},
},
}

version, err := common.NewVersion("7.0.0")
if err != nil {
t.Fatal(err)
}

p := Processor{EsVersion: *version}
output := common.MapStr{}
if err = p.Process(fields, nil, output); err != nil {
t.Fatal(err)
}

assert.Len(t, defaultFields, 4)
assert.Contains(t, defaultFields,
"foo",
"bar",
"nested.foo",
"nested.bar",
)
}
2 changes: 1 addition & 1 deletion libbeat/template/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ func (t *Template) load(fields mapping.Fields) (common.MapStr, error) {
// Start processing at the root
properties := common.MapStr{}
processor := Processor{EsVersion: t.esVersion, Migration: t.migration}
if err := processor.Process(fields, "", properties); err != nil {
if err := processor.Process(fields, nil, properties); err != nil {
return nil, err
}
output := t.Generate(properties, dynamicTemplates)
Expand Down
86 changes: 86 additions & 0 deletions libbeat/tests/system/template/template.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package template

import (
"strings"
"testing"

"github.com/elastic/beats/libbeat/asset"
"github.com/elastic/beats/libbeat/common"
"github.com/elastic/beats/libbeat/template"
"github.com/elastic/beats/libbeat/version"
)

// MaxDefaultFieldLength is the limit on the number of default_field values
// allowed by the test. This is less that the 1024 limit of Elasticsearch to
// give a little room for custom fields and the expansion of `fields.*`.
const MaxDefaultFieldLength = 1000

// TestTemplate executes tests on the Beat's index template.
func TestTemplate(t *testing.T, beatName string) {
t.Run("default_field length", testTemplateDefaultFieldLength(beatName))
}

// testTemplateDefaultFieldLength constructs a template based on the embedded
// fields.yml data verifies that the length is less than 1000.
func testTemplateDefaultFieldLength(beatName string) func(*testing.T) {
return func(t *testing.T) {
// 7.0 is when default_field was introduced.
esVersion, err := common.NewVersion("7.0.0")
if err != nil {
t.Fatal(err)
}

// Generate a template based on the embedded fields.yml data.
tmpl, err := template.New(version.GetDefaultVersion(), beatName, *esVersion, template.TemplateConfig{}, false)
if err != nil {
t.Fatal(err)
}

fieldsBytes, err := asset.GetFields(beatName)
if err != nil {
t.Fatal("Failed to get embedded fields.yml asset data:", err)
}

fields, err := tmpl.LoadBytes(fieldsBytes)
if err != nil {
t.Fatal("Failed to load template bytes:", err)
}

templateMap := tmpl.Generate(fields, nil)

v, _ := templateMap.GetValue("settings.index.query.default_field")
defaultValue, ok := v.([]string)
if !ok {
t.Fatalf("settings.index.query.default_field value has an unexpected type: %T", v)
}

if len(defaultValue) > MaxDefaultFieldLength {
t.Fatalf("Too many fields (%d>%d) in %v index template"+
"settings.index.query.default_field for comfort. By default "+
"Elasticsearch has a limit of 1024 fields in a query so we need "+
"to keep the number of fields below 1024. Adding 'default_field: "+
"false' to fields or groups in a fields.yml can be used to "+
"reduce the number of text/keyword fields that end up in "+
"default_field.",
len(defaultValue), MaxDefaultFieldLength, strings.Title(beatName))
}
t.Logf("%v template has %d fields in default_field.", strings.Title(beatName), len(defaultValue))
}
}
Loading

0 comments on commit 2eaf43e

Please sign in to comment.