Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(otelarrowreceiver): add admission control to otlp path #35021

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
fb4bbe8
add admission control to otlp path
moh-osman3 Sep 5, 2024
9706583
support deprecated option + changelog + unit tests
moh-osman3 Sep 6, 2024
35d9d09
rename
moh-osman3 Sep 6, 2024
b3feccf
replace statement?
moh-osman3 Sep 6, 2024
36fc7b2
lint
moh-osman3 Sep 6, 2024
75788fd
lint
moh-osman3 Sep 6, 2024
0944757
make crosslink?
moh-osman3 Sep 6, 2024
6eeee4b
lint again
moh-osman3 Sep 6, 2024
692484f
address review feedback
moh-osman3 Sep 6, 2024
2dcdd8e
fix tests
moh-osman3 Sep 6, 2024
4e4a4ed
lint
moh-osman3 Sep 6, 2024
4202afd
add yaml
moh-osman3 Sep 6, 2024
2d26469
fix test after rebase
moh-osman3 Sep 10, 2024
d80fff4
fix test
moh-osman3 Sep 10, 2024
58c525e
change admission config level and name
moh-osman3 Sep 10, 2024
1fae410
gofmt
moh-osman3 Sep 10, 2024
57f9d56
add nil check
moh-osman3 Sep 10, 2024
c04c5bd
rebase + review feedback
moh-osman3 Sep 12, 2024
3469d9a
review feedback
moh-osman3 Sep 17, 2024
c5791ae
indent chlog?
moh-osman3 Sep 17, 2024
3a878ac
update readme link
moh-osman3 Sep 17, 2024
f5f170b
move log warning to newOtelArrowReceiver
moh-osman3 Sep 18, 2024
c748c5e
move mutation to Unmarshal
jmacd Sep 20, 2024
73217bf
Merge branch 'main' of github.com:open-telemetry/opentelemetry-collec…
jmacd Sep 20, 2024
62e8f82
tidy
jmacd Sep 20, 2024
91c1ddc
Merge branch 'main' of github.com:open-telemetry/opentelemetry-collec…
jmacd Sep 23, 2024
3510b3a
Merge branch 'main' of github.com:open-telemetry/opentelemetry-collec…
jmacd Sep 23, 2024
411d5f7
comments
jmacd Sep 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .chloggen/admission-control-for-otlp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement
crobert-1 marked this conversation as resolved.
Show resolved Hide resolved

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: otelarrowreceiver

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add admission control in the otelarrow receiver's standard otlp data path.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [35021]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
42 changes: 34 additions & 8 deletions receiver/otelarrowreceiver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,30 @@ type Protocols struct {
Arrow ArrowConfig `mapstructure:"arrow"`
}

type AdmissionConfig struct {
crobert-1 marked this conversation as resolved.
Show resolved Hide resolved
// RequestLimitMiB limits the number of requests that are received by the stream based on
// request size information available. Request size is used to control how much traffic we admit
// for processing, but does not control how much memory is used during request processing.
RequestLimitMiB uint64 `mapstructure:"request_limit_mib"`
jmacd marked this conversation as resolved.
Show resolved Hide resolved

// WaiterLimit is the limit on the number of waiters waiting to be processed and consumed.
// This is a dimension of memory limiting to ensure waiters are not consuming an
// unexpectedly large amount of memory in the arrow receiver.
WaiterLimit int64 `mapstructure:"waiter_limit"`
}

// ArrowConfig support configuring the Arrow receiver.
type ArrowConfig struct {
// MemoryLimitMiB is the size of a shared memory region used
// by all Arrow streams, in MiB. When too much load is
// passing through, they will see ResourceExhausted errors.
MemoryLimitMiB uint64 `mapstructure:"memory_limit_mib"`

// AdmissionLimitMiB limits the number of requests that are received by the stream based on
// request size information available. Request size is used to control how much traffic we admit
// for processing, but does not control how much memory is used during request processing.
AdmissionLimitMiB uint64 `mapstructure:"admission_limit_mib"`
// Deprecated: This field is no longer supported, use cfg.Admission.RequestLimitMiB instead.
DeprecatedAdmissionLimitMiB uint64 `mapstructure:"admission_limit_mib"`

// WaiterLimit is the limit on the number of waiters waiting to be processed and consumed.
// This is a dimension of memory limiting to ensure waiters are not consuming an
// unexpectedly large amount of memory in the arrow receiver.
WaiterLimit int64 `mapstructure:"waiter_limit"`
// Deprecated: This field is no longer supported, use cfg.Admission.WaiterLimit instead.
DeprecatedWaiterLimit int64 `mapstructure:"waiter_limit"`

// Zstd settings apply to OTel-Arrow use of gRPC specifically.
Zstd zstd.DecoderConfig `mapstructure:"zstd"`
Expand All @@ -43,6 +51,8 @@ type ArrowConfig struct {
type Config struct {
// Protocols is the configuration for gRPC and Arrow.
Protocols `mapstructure:"protocols"`
// Admission is the configuration for controlling amount of request memory entering the receiver.
Admission AdmissionConfig `mapstructure:"admission"`
}

var _ component.Config = (*Config)(nil)
Expand All @@ -54,3 +64,19 @@ func (cfg *ArrowConfig) Validate() error {
}
return nil
}

func (cfg *Config) Validate() error {
crobert-1 marked this conversation as resolved.
Show resolved Hide resolved
if err := cfg.GRPC.Validate(); err != nil {
return err
}
if err := cfg.Arrow.Validate(); err != nil {
return err
}
if cfg.Arrow.DeprecatedAdmissionLimitMiB != 0 {
cfg.Admission.RequestLimitMiB = cfg.Arrow.DeprecatedAdmissionLimitMiB
crobert-1 marked this conversation as resolved.
Show resolved Hide resolved
}
if cfg.Arrow.DeprecatedWaiterLimit != 0 {
cfg.Admission.WaiterLimit = cfg.Arrow.DeprecatedWaiterLimit
}
return nil
}
40 changes: 34 additions & 6 deletions receiver/otelarrowreceiver/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,41 @@ func TestUnmarshalConfig(t *testing.T) {
},
},
Arrow: ArrowConfig{
MemoryLimitMiB: 123,
AdmissionLimitMiB: 80,
WaiterLimit: 100,
MemoryLimitMiB: 123,
},
},
Admission: AdmissionConfig{
RequestLimitMiB: 80,
WaiterLimit: 100,
},
}, cfg)

}

// Tests that a deprecated config validation sets RequestLimitMiB and WaiterLimit in the correct config block.
func TestValidateDeprecatedConfig(t *testing.T) {
cm, err := confmaptest.LoadConf(filepath.Join("testdata", "deprecated.yaml"))
require.NoError(t, err)
cfg := &Config{}
assert.NoError(t, cm.Unmarshal(cfg))
assert.NoError(t, cfg.Validate())
assert.Equal(t,
&Config{
Protocols: Protocols{
Arrow: ArrowConfig{
MemoryLimitMiB: 123,
DeprecatedAdmissionLimitMiB: 80,
DeprecatedWaiterLimit: 100,
},
},
Admission: AdmissionConfig{
// cfg.Validate should now set these fields.
RequestLimitMiB: 80,
WaiterLimit: 100,
},
}, cfg)
}

func TestUnmarshalConfigUnix(t *testing.T) {
cm, err := confmaptest.LoadConf(filepath.Join("testdata", "uds.yaml"))
require.NoError(t, err)
Expand All @@ -103,11 +129,13 @@ func TestUnmarshalConfigUnix(t *testing.T) {
ReadBufferSize: 512 * 1024,
},
Arrow: ArrowConfig{
MemoryLimitMiB: defaultMemoryLimitMiB,
AdmissionLimitMiB: defaultAdmissionLimitMiB,
WaiterLimit: defaultWaiterLimit,
MemoryLimitMiB: defaultMemoryLimitMiB,
},
},
Admission: AdmissionConfig{
RequestLimitMiB: defaultRequestLimitMiB,
WaiterLimit: defaultWaiterLimit,
},
}, cfg)
}

Expand Down
14 changes: 8 additions & 6 deletions receiver/otelarrowreceiver/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ import (
const (
defaultGRPCEndpoint = "0.0.0.0:4317"

defaultMemoryLimitMiB = 128
defaultAdmissionLimitMiB = defaultMemoryLimitMiB / 2
defaultWaiterLimit = 1000
defaultMemoryLimitMiB = 128
defaultRequestLimitMiB = 128
defaultWaiterLimit = 1000
)

// NewFactory creates a new OTel-Arrow receiver factory.
Expand All @@ -47,11 +47,13 @@ func createDefaultConfig() component.Config {
ReadBufferSize: 512 * 1024,
},
Arrow: ArrowConfig{
MemoryLimitMiB: defaultMemoryLimitMiB,
AdmissionLimitMiB: defaultAdmissionLimitMiB,
WaiterLimit: defaultWaiterLimit,
MemoryLimitMiB: defaultMemoryLimitMiB,
},
},
Admission: AdmissionConfig{
RequestLimitMiB: defaultRequestLimitMiB,
WaiterLimit: defaultWaiterLimit,
},
}
}

Expand Down
26 changes: 24 additions & 2 deletions receiver/otelarrowreceiver/internal/logs/otlp.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ import (
"context"

"go.opentelemetry.io/collector/consumer"
"go.opentelemetry.io/collector/pdata/plog"
"go.opentelemetry.io/collector/pdata/plog/plogotlp"
"go.opentelemetry.io/collector/receiver/receiverhelper"
"go.uber.org/zap"

"github.com/open-telemetry/opentelemetry-collector-contrib/internal/otelarrow/admission"
)

const dataFormatProtobuf = "protobuf"
Expand All @@ -18,13 +22,19 @@ type Receiver struct {
plogotlp.UnimplementedGRPCServer
nextConsumer consumer.Logs
obsrecv *receiverhelper.ObsReport
boundedQueue *admission.BoundedQueue
sizer *plog.ProtoMarshaler
logger *zap.Logger
}

// New creates a new Receiver reference.
func New(nextConsumer consumer.Logs, obsrecv *receiverhelper.ObsReport) *Receiver {
func New(logger *zap.Logger, nextConsumer consumer.Logs, obsrecv *receiverhelper.ObsReport, bq *admission.BoundedQueue) *Receiver {
return &Receiver{
nextConsumer: nextConsumer,
obsrecv: obsrecv,
boundedQueue: bq,
sizer: &plog.ProtoMarshaler{},
logger: logger,
}
}

Expand All @@ -37,7 +47,19 @@ func (r *Receiver) Export(ctx context.Context, req plogotlp.ExportRequest) (plog
}

ctx = r.obsrecv.StartLogsOp(ctx)
err := r.nextConsumer.ConsumeLogs(ctx, ld)

sizeBytes := int64(r.sizer.LogsSize(req.Logs()))
err := r.boundedQueue.Acquire(ctx, sizeBytes)
if err != nil {
return plogotlp.NewExportResponse(), err
}
defer func() {
if releaseErr := r.boundedQueue.Release(sizeBytes); releaseErr != nil {
r.logger.Error("Error releasing bytes from semaphore", zap.Error(releaseErr))
}
}()

err = r.nextConsumer.ConsumeLogs(ctx, ld)
r.obsrecv.EndLogsOp(ctx, dataFormatProtobuf, numSpans, err)

return plogotlp.NewExportResponse(), err
Expand Down
71 changes: 70 additions & 1 deletion receiver/otelarrowreceiver/internal/logs/otlp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ import (
"context"
"errors"
"net"
"sync"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand All @@ -17,10 +19,20 @@ import (
"go.opentelemetry.io/collector/pdata/plog/plogotlp"
"go.opentelemetry.io/collector/receiver/receiverhelper"
"go.opentelemetry.io/collector/receiver/receivertest"
"go.opentelemetry.io/otel/trace/noop"
"go.uber.org/multierr"
"go.uber.org/zap"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"

"github.com/open-telemetry/opentelemetry-collector-contrib/internal/otelarrow/admission"
"github.com/open-telemetry/opentelemetry-collector-contrib/internal/otelarrow/testdata"
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/otelarrowreceiver/internal/testconsumer"
)

const (
maxWaiters = 10
maxBytes = int64(250)
)

func TestExport(t *testing.T) {
Expand Down Expand Up @@ -57,6 +69,61 @@ func TestExport_ErrorConsumer(t *testing.T) {
assert.Equal(t, plogotlp.ExportResponse{}, resp)
}

func TestExport_AdmissionLimitBytesExceeded(t *testing.T) {
ld := testdata.GenerateLogs(10)
logSink := new(consumertest.LogsSink)
req := plogotlp.NewExportRequestFromLogs(ld)

logClient := makeLogsServiceClient(t, logSink)
resp, err := logClient.Export(context.Background(), req)
assert.EqualError(t, err, "rpc error: code = Unknown desc = rejecting request, request size larger than configured limit")
assert.Equal(t, plogotlp.ExportResponse{}, resp)
}

func TestExport_TooManyWaiters(t *testing.T) {
bc := testconsumer.NewBlockingConsumer()

logsClient := makeLogsServiceClient(t, bc)
bg := context.Background()
var errs, err error
ld := testdata.GenerateLogs(1)
req := plogotlp.NewExportRequestFromLogs(ld)
var mtx sync.Mutex
numResponses := 0
// Send request that will acquire all of the semaphores bytes and block.
go func() {
_, err = logsClient.Export(bg, req)
mtx.Lock()
errs = multierr.Append(errs, err)
numResponses++
mtx.Unlock()
}()

for i := 0; i < maxWaiters+1; i++ {
go func() {
_, err := logsClient.Export(bg, req)
mtx.Lock()
errs = multierr.Append(errs, err)
numResponses++
mtx.Unlock()
}()
}

// sleep so all async requests are blocked on semaphore Acquire.
time.Sleep(1 * time.Second)

// unblock and wait for errors to be returned and written.
bc.Unblock()
assert.Eventually(t, func() bool {
mtx.Lock()
defer mtx.Unlock()
errSlice := multierr.Errors(errs)
return numResponses == maxWaiters+2 && len(errSlice) == 1
}, 3*time.Second, 10*time.Millisecond)

assert.ErrorContains(t, errs, "too many waiters")
}

func makeLogsServiceClient(t *testing.T, lc consumer.Logs) plogotlp.GRPCClient {
addr := otlpReceiverOnGRPCServer(t, lc)
cc, err := grpc.NewClient(addr.String(), grpc.WithTransportCredentials(insecure.NewCredentials()))
Expand Down Expand Up @@ -84,7 +151,9 @@ func otlpReceiverOnGRPCServer(t *testing.T, lc consumer.Logs) net.Addr {
ReceiverCreateSettings: set,
})
require.NoError(t, err)
r := New(lc, obsrecv)

bq := admission.NewBoundedQueue(noop.NewTracerProvider(), maxBytes, maxWaiters)
r := New(zap.NewNop(), lc, obsrecv, bq)
// Now run it as a gRPC server
srv := grpc.NewServer()
plogotlp.RegisterGRPCServer(srv, r)
Expand Down
Loading
Loading