Skip to content

Commit

Permalink
Header support for json_array_parser (#30814)
Browse files Browse the repository at this point in the history
**Description:** <Describe what has changed.>
<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue.
Ex. Adding a feature - Explain what this achieves.-->
Adding a feature following
#30644.
This feature allow json_array_parser parser to accept a comma-delimited
header and for every json array it parses, output a map which contains
the header fileds as keys and the matching values are the ones parsed
from the input json array.

This feature as added mainly for performance reasons as from a
functional POV, this is mostly similar to chaining the 2 operators:
`json_array_parser -> assign_keys `
**Link to tracking Issue:** <Issue number if applicable>

#30321
**Testing:** <Describe what testing was performed and which tests were
added.>

- unittests
- End to end tests
Used generated traffic on a running otel collector thats using the
parser and verified the data is as expected in the end table and
performance looks good

**Documentation:** <Describe the documentation added.>

-
[json_array_parser.md](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/58cc91ca30eabbd35c074d79db8630fc474164d9/pkg/stanza/docs/operators/json_array_parser.md)
  • Loading branch information
RoeiDimi authored Jan 31, 2024
1 parent ba3d660 commit f70c8a3
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 34 deletions.
16 changes: 16 additions & 0 deletions .chloggen/add_jarray_parser_header.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/stanza

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add support in a header configuration for json array parser.

# One or more tracking issues related to the change
issues: [30321]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
43 changes: 42 additions & 1 deletion pkg/stanza/docs/operators/json_array_parser.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ More information on json arrays can be found [here](https://json-schema.org/unde
|--------------------|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------|
| `id` | `json_array_parser` | A unique identifier for the operator. |
| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries. |
| `header` | optional | A string of comma delimited field names. When a header is set, the output will be a map containing the header fields as keys and the parsed input json array fields as matching values |
| `parse_from` | `body` | The [field](../types/field.md) from which the value will be parsed. |
| `parse_to` | required. can be one of `body` or a nested field inside `body`, `attributes` or `resource` (ie `attributes.parsed`) | The [field](../types/field.md) to which the value will be parsed. |
| `parse_to` | required. can be one of `body` or a nested field inside `body`, `attributes` or `resource` (ie `attributes.parsed`). When a header is used, `attributes` is also valid | The [field](../types/field.md) to which the value will be parsed. |
| `on_error` | `send` | The behavior of the operator if it encounters an error. See [on_error](../types/on_error.md). |
| `timestamp` | `nil` | An optional [timestamp](../types/timestamp.md) block which will parse a timestamp field before passing the entry to the output operator. |
| `severity` | `nil` | An optional [severity](../types/severity.md) block which will parse a severity field before passing the entry to the output operator. |
Expand Down Expand Up @@ -124,6 +125,46 @@ Configuration:
}
```

</td>
</tr>
</table>

#### Parse the field `body` with a json array parser and a header into attributes

Configuration:

```yaml
- type: json_array_parser
parse_to: attributes
header: origin,sev,message,isBool
```
<table>
<tr><td> Input Entry </td> <td> Output Entry </td></tr>
<tr>
<td>
```json
{
"body": "[1,\"debug\",\"Debug Message\", true]"
}
```

</td>
<td>

```json
{
"body": "[1,\"debug\",\"Debug Message\", true]",
"attributes": {
"origin": 1,
"sev": "debug",
"message": "Debug Message",
"isBool": true,
}
}
```

</td>
</tr>
</table>
9 changes: 9 additions & 0 deletions pkg/stanza/operator/parser/jsonarray/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ func TestConfig(t *testing.T) {
return p
}(),
},
{
Name: "parse_with_header_as_attributes",
Expect: func() *Config {
p := NewConfig()
p.ParseTo = entry.RootableField{Field: entry.NewAttributeField()}
p.Header = "A,B,C"
return p
}(),
},
},
}.Run(t)
}
123 changes: 90 additions & 33 deletions pkg/stanza/operator/parser/jsonarray/json_array_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"context"
"errors"
"fmt"
"strings"

"github.com/valyala/fastjson"
"go.opentelemetry.io/collector/featuregate"
Expand All @@ -17,6 +18,7 @@ import (
)

const operatorType = "json_array_parser"
const headerDelimiter = ","

var jsonArrayParserFeatureGate = featuregate.GlobalRegistry().MustRegister(
"logs.jsonParserArray",
Expand Down Expand Up @@ -46,6 +48,7 @@ func NewConfigWithID(operatorID string) *Config {
// Config is the configuration of a json array parser operator.
type Config struct {
helper.ParserConfig `mapstructure:",squash"`
Header string `mapstructure:"header"`
}

// Build will build a json array parser operator.
Expand All @@ -55,59 +58,113 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) {
return nil, err
}

if c.Header != "" {
return &Parser{
ParserOperator: parserOperator,
parse: generateParseToMapFunc(new(fastjson.ParserPool), strings.Split(c.Header, headerDelimiter)),
}, nil
}

return &Parser{
ParserOperator: parserOperator,
pool: new(fastjson.ParserPool),
parse: generateParseToArrayFunc(new(fastjson.ParserPool)),
}, nil
}

// Parser is an operator that parses json array in an entry.
type Parser struct {
helper.ParserOperator
pool *fastjson.ParserPool
parse parseFunc
}

type parseFunc func(any) (any, error)

// Process will parse an entry for json array.
func (r *Parser) Process(ctx context.Context, e *entry.Entry) error {
return r.ParserOperator.ProcessWith(ctx, e, r.parse)
}

func (r *Parser) parse(value any) (any, error) {
jArrayLine, err := valueAsString(value)
if err != nil {
return nil, err
}
func generateParseToArrayFunc(pool *fastjson.ParserPool) parseFunc {
return func(value any) (any, error) {
jArrayLine, err := valueAsString(value)
if err != nil {
return nil, err
}

p := r.pool.Get()
v, err := p.Parse(jArrayLine)
r.pool.Put(p)
if err != nil {
return nil, errors.New("failed to parse entry")
}
p := pool.Get()
v, err := p.Parse(jArrayLine)
pool.Put(p)
if err != nil {
return nil, errors.New("failed to parse entry")
}

jArray := v.GetArray() // a is a []*Value slice
parsedValues := make([]any, len(jArray))
for i := range jArray {
switch jArray[i].Type() {
case fastjson.TypeNumber:
parsedValues[i] = jArray[i].GetInt64()
case fastjson.TypeString:
parsedValues[i] = string(jArray[i].GetStringBytes())
case fastjson.TypeTrue:
parsedValues[i] = true
case fastjson.TypeFalse:
parsedValues[i] = false
case fastjson.TypeNull:
parsedValues[i] = nil
case fastjson.TypeObject:
// Nested objects handled as a string since this parser doesn't support nested headers
parsedValues[i] = jArray[i].String()
default:
return nil, errors.New("failed to parse entry: " + string(jArray[i].MarshalTo(nil)))
jArray := v.GetArray() // a is a []*Value slice
parsedValues := make([]any, len(jArray))
for i := range jArray {
switch jArray[i].Type() {
case fastjson.TypeNumber:
parsedValues[i] = jArray[i].GetInt64()
case fastjson.TypeString:
parsedValues[i] = string(jArray[i].GetStringBytes())
case fastjson.TypeTrue:
parsedValues[i] = true
case fastjson.TypeFalse:
parsedValues[i] = false
case fastjson.TypeNull:
parsedValues[i] = nil
case fastjson.TypeObject:
// Nested objects handled as a string since this parser doesn't support nested headers
parsedValues[i] = jArray[i].String()
default:
return nil, errors.New("failed to parse entry: " + string(jArray[i].MarshalTo(nil)))
}
}

return parsedValues, nil
}
}

func generateParseToMapFunc(pool *fastjson.ParserPool, header []string) parseFunc {
return func(value any) (any, error) {
jArrayLine, err := valueAsString(value)
if err != nil {
return nil, err
}

p := pool.Get()
v, err := p.Parse(jArrayLine)
pool.Put(p)
if err != nil {
return nil, errors.New("failed to parse entry")
}

return parsedValues, nil
jArray := v.GetArray() // a is a []*Value slice
if len(header) != len(jArray) {
return nil, fmt.Errorf("wrong number of fields: expected %d, found %d", len(header), len(jArray))
}
parsedValues := make(map[string]any, len(jArray))
for i := range jArray {
switch jArray[i].Type() {
case fastjson.TypeNumber:
parsedValues[header[i]] = jArray[i].GetInt64()
case fastjson.TypeString:
parsedValues[header[i]] = string(jArray[i].GetStringBytes())
case fastjson.TypeTrue:
parsedValues[header[i]] = true
case fastjson.TypeFalse:
parsedValues[header[i]] = false
case fastjson.TypeNull:
parsedValues[header[i]] = nil
case fastjson.TypeObject:
// Nested objects handled as a string since this parser doesn't support nested headers
parsedValues[header[i]] = jArray[i].String()
default:
return nil, errors.New("failed to parse entry: " + string(jArray[i].MarshalTo(nil)))
}
}

return parsedValues, nil
}
}

// valueAsString interprets the given value as a string.
Expand Down
37 changes: 37 additions & 0 deletions pkg/stanza/operator/parser/jsonarray/json_array_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ func TestParserInvalidType(t *testing.T) {
require.Contains(t, err.Error(), "type '[]int' cannot be parsed as json array")
}

func TestParserByteFailureHeadersMismatch(t *testing.T) {
cfg := NewConfigWithID("test")
cfg.Header = "name,sev,msg"
op, err := cfg.Build(testutil.Logger(t))
require.NoError(t, err)
parser := op.(*Parser)
_, err = parser.parse("[\"stanza\",\"INFO\",\"started agent\", 42, true]")
require.Error(t, err)
require.Contains(t, err.Error(), "wrong number of fields: expected 3, found 5")
}

func TestParserJarray(t *testing.T) {
cases := []struct {
name string
Expand Down Expand Up @@ -193,6 +204,32 @@ func TestParserJarray(t *testing.T) {
false,
false,
},
{
"parse-as-attributes-with-header",
func(p *Config) {
p.ParseTo = entry.RootableField{Field: entry.NewAttributeField()}
p.Header = "origin,sev,message,count,isBool"
},
[]entry.Entry{
{
Body: "[\"stanza\",\"INFO\",\"started agent\", 42, true]",
},
},
[]entry.Entry{
{
Body: "[\"stanza\",\"INFO\",\"started agent\", 42, true]",
Attributes: map[string]any{
"origin": "stanza",
"sev": "INFO",
"message": "started agent",
"count": int64(42),
"isBool": true,
},
},
},
false,
false,
},
}

for _, tc := range cases {
Expand Down
4 changes: 4 additions & 0 deletions pkg/stanza/operator/parser/jsonarray/testdata/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ parse_to_body:
parse_to_resource:
type: json_array_parser
parse_to: resource.output
parse_with_header_as_attributes:
type: json_array_parser
parse_to: attributes
header: A,B,C

0 comments on commit f70c8a3

Please sign in to comment.