Skip to content

Commit

Permalink
feat(new sink): Initial azure_monitor_logs sink (vectordotdev#2811)
Browse files Browse the repository at this point in the history
Solves vectordotdev#1808
Will add validations for configuration and unit tests

Co-authored-by: Luke Steensen <luke.steensen@gmail.com>
Co-authored-by: James Turnbull <james@lovedthanlost.net>
Signed-off-by: Brian Menges <brian.menges@anaplan.com>
  • Loading branch information
3 people authored and Brian Menges committed Dec 9, 2020
1 parent eee89cc commit 110a83e
Show file tree
Hide file tree
Showing 6 changed files with 854 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .meta/_partials/descriptions/_azure_monitor.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
description = """\
[Azure Monitor][urls.azure_monitor] is a service in Azure that provides \
performance and availability monitoring for applications and services in Azure, other \
cloud environments, or on-premises. Azure Monitor collects data from multiple sources into a \
common data platform where it can be analyzed for trends and anomalies.\
"""
69 changes: 69 additions & 0 deletions .meta/sinks/azure_monitor_logs.toml.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
[sinks.azure_monitor_logs]
title = "Azure Monitor Logs"
noun = "Azure Monitor Logs"
beta = true
common = false
<%= render("_partials/descriptions/_azure_monitor.toml") %>
delivery_guarantee = "at_least_once"
egress_method = "batching"
features = [
"Send logs to Azure Monitor.",
"Batch data to maximize throughput.",
"Automatically retry failed requests, with backoff.",
"Buffer your data in-memory or on-disk for performance and durability.",
]
function_category = "transmit"
healthcheck = true
input_types = ["log"]
service_providers = ["Azure"]
write_to_description = "[Azure Monitor's][urls.azure_monitor] logs via the [REST endpoint][urls.azure_monitor_logs_endpoints]"
requirements = {}

<%= render("_partials/fields/_component_options.toml", type: "sink", name: "azure_monitor_logs") %>
<%= render("_partials/fields/_batch_options.toml", namespace: "sinks.azure_monitor_logs.options", common: false, max_bytes: 30000000, max_events: nil, timeout_secs: 1) %>
<%= render("_partials/fields/_buffer_options.toml", namespace: "sinks.azure_monitor_logs.options") %>
<%= render(
"_partials/fields/_encoding_options.toml",
namespace: "sinks.azure_monitor_logs.options",
encodings: ["json", "text"]
) %>

[sinks.azure_monitor_logs.options.customer_id]
type = "string"
common = true
examples = ["5ce893d9-2c32-4b6c-91a9-b0887c2de2d6", "97ce69d9-b4be-4241-8dbd-d265edcf06c4"]
required = true
description = "The [unique identifier](https://docs.microsoft.com/en-us/azure/azure-monitor/platform/data-collector-api#request-uri-parameters) for the Log Analytics workspace."

[sinks.azure_monitor_logs.options.shared_key]
type = "string"
common = true
examples = ["${AZURE_MONITOR_SHARED_KEY_ENV_VAR}", "SERsIYhgMVlJB6uPsq49gCxNiruf6v0vhMYE+lfzbSGcXjdViZdV/e5pEMTYtw9f8SkVLf4LFlLCc2KxtRZfCA=="]
required = true
description = "The [primary or the secondary key](https://docs.microsoft.com/en-us/azure/azure-monitor/platform/data-collector-api#authorization) for the Log Analytics workspace."

[sinks.azure_monitor_logs.options.log_type]
type = "string"
common = true
examples = ["MyTableName", "MyRecordType"]
required = true
description = "The [record type of the data that is being submitted](https://docs.microsoft.com/en-us/azure/azure-monitor/platform/data-collector-api#request-headers). Can only contain letters, numbers, and underscore (_), and may not exceed 100 characters."

[sinks.azure_monitor_logs.options.azure_resource_id]
type = "string"
common = true
examples = ["/subscriptions/11111111-1111-1111-1111-111111111111/resourceGroups/otherResourceGroup/providers/Microsoft.Storage/storageAccounts/examplestorage", "/subscriptions/11111111-1111-1111-1111-111111111111/resourceGroups/examplegroup/providers/Microsoft.SQL/servers/serverName/databases/databaseName"]
required = false
description = "[Resource ID](https://docs.microsoft.com/en-us/azure/azure-monitor/platform/data-collector-api#request-headers) of the Azure resource the data should be associated with."

<%= render(
"_partials/fields/_tls_connector_options.toml",
namespace: "sinks.azure_monitor_logs.options",
can_enable: true,
enabled_default: true,
can_verify_certificate: true,
can_verify_hostname: true
) %>
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ sinks = [
"sinks-aws_kinesis_firehose",
"sinks-aws_kinesis_streams",
"sinks-aws_s3",
"sinks-azure_monitor_logs",
"sinks-blackhole",
"sinks-clickhouse",
"sinks-console",
Expand Down Expand Up @@ -400,6 +401,7 @@ sinks-aws_cloudwatch_metrics = ["rusoto_core", "rusoto_credential", "rusoto_sign
sinks-aws_kinesis_firehose = ["rusoto_core", "rusoto_credential", "rusoto_signature", "rusoto_sts", "rusoto_firehose"]
sinks-aws_kinesis_streams = ["rusoto_core", "rusoto_credential", "rusoto_signature", "rusoto_sts", "rusoto_kinesis"]
sinks-aws_s3 = ["bytesize", "rusoto_core", "rusoto_credential", "rusoto_signature", "rusoto_sts", "rusoto_s3"]
sinks-azure_monitor_logs = ["bytesize"]
sinks-blackhole = []
sinks-clickhouse = ["bytesize"]
sinks-console = []
Expand Down
294 changes: 294 additions & 0 deletions config/vector.spec.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4260,6 +4260,300 @@ require('custom_module')
# * type: string
Tag1 = "Value1"

# Batches `log` events to Azure Monitor via the HTTP Data Collector API.
[sinks.azure_monitor_logs]
#
# General
#

# The unique identifier for the Log Analytics workspace.
#
# * required
# * type: string
customer_id = "5ce893d9-2c32-4b6c-91a9-b0887c2de2d6"
customer_id = "97ce69d9-b4be-4241-8dbd-d265edcf06c4"

# The primary or the secondary key for the Log Analytics workspace.
#
# * required
# * type: string
shared_key = "${AZURE_MONITOR_SHARED_KEY_ENV_VAR}"
shared_key = "SERsIYhgMVlJB6uPsq49gCxNiruf6v0vhMYE+lfzbSGcXjdViZdV/e5pEMTYtw9f8SkVLf4LFlLCc2KxtRZfCA=="

# The record type of the data that is being submitted. Can only contain letters,
# numbers, and underscore (_), and may not exceed 100 characters.
#
# * required
# * type: string
log_type = "MyTableName"
log_type = "MyRecordType"

# Resource ID of the Azure resource the data should be associated with.
#
# * optional
# * no default
# * type: string
azure_resource_id = "/subscriptions/11111111-1111-1111-1111-111111111111/resourceGroups/otherResourceGroup/providers/Microsoft.Storage/storageAccounts/examplestorage"
azure_resource_id = "/subscriptions/11111111-1111-1111-1111-111111111111/resourceGroups/examplegroup/providers/Microsoft.SQL/servers/serverName/databases/databaseName"

# Enables/disables the sink healthcheck upon start.
#
# * optional
# * default: true
# * type: bool
healthcheck = true
healthcheck = false

# A list of upstream source or transform IDs. See configuration for more info.
#
# * required
# * type: [string]
inputs = ["my-source-or-transform-id"]

# The component type. This is a required field that tells Vector which
# component to use. The value _must_ be `#{name}`.
#
# * required
# * type: string
# * must be: "azure_monitor_logs"
type = "azure_monitor_logs"

[sinks.azure_monitor_logs.batch]
# The maximum size of a batch, in bytes, before it is flushed.
#
# * optional
# * default: 5242880
# * type: uint
# * unit: bytes
max_bytes = 5242880

# The maximum size of a batch, in events, before it is flushed.
#
# * optional
# * no default
# * type: uint
# * unit: events
max_events = 1000

# The maximum age of a batch before it is flushed.
#
# * optional
# * default: 1
# * type: uint
# * unit: seconds
timeout_secs = 1

#
# Buffer
#

[sinks.azure_monitor_logs.buffer]
# The maximum number of events allowed in the buffer.
#
# * optional
# * default: 500
# * type: uint
# * unit: events
# * relevant when type = "memory"
max_events = 500

# The maximum size of the buffer on the disk.
#
# * required
# * type: uint
# * unit: bytes
# * required when type = "disk"
max_size = 104900000

# The buffer's type and storage mechanism.
#
# * optional
# * default: "memory"
# * type: string
# * enum: "memory" or "disk"
type = "memory"
type = "disk"

# The behavior when the buffer becomes full.
#
# * optional
# * default: "block"
# * type: string
# * enum: "block" or "drop_newest"
when_full = "block"
when_full = "drop_newest"

#
# Request
#

[sinks.azure_monitor_logs.request]
# The maximum number of in-flight requests allowed at any given time. If this
# is not set, this limit will vary continuously based on the timing and
# contents of responses received from the remote service.
#
# * optional
# * no default
# * type: uint
# * unit: requests
in_flight_limit = 5

# The time window, in seconds, used for the `rate_limit_num` option.
#
# * optional
# * default: 1
# * type: uint
# * unit: seconds
rate_limit_duration_secs = 1

# The maximum number of requests allowed within the `rate_limit_duration_secs`
# time window.
#
# * optional
# * default: 5
# * type: uint
rate_limit_num = 5

# The maximum number of retries to make for failed requests. The default, for
# all intents and purposes, represents an infinite number of retries.
#
# * optional
# * default: 18446744073709551615
# * type: uint
retry_attempts = 18446744073709551615

# The amount of time to wait before attempting the first retry for a failed
# request. Once, the first retry has failed the fibonacci sequence will be used
# to select future backoffs.
#
# * optional
# * default: 1
# * type: uint
# * unit: seconds
retry_initial_backoff_secs = 1

# The maximum amount of time, in seconds, to wait between retries.
#
# * optional
# * default: 10
# * type: uint
# * unit: seconds
retry_max_duration_secs = 10

# The maximum time a request can take before being aborted. It is highly
# recommended that you do not lower value below the service's internal timeout,
# as this could create orphaned requests, pile on retries, and result in
# duplicate data downstream.
#
# * optional
# * default: 60
# * type: uint
# * unit: seconds
timeout_secs = 60

#
# Encoding
#

[sinks.azure_monitor_logs.encoding]
# The encoding codec used to serialize the events before outputting.
#
# * required
# * type: string
# * enum: "json" or "text"
codec = "json"
codec = "text"

# Prevent the sink from encoding the specified labels.
#
# * optional
# * no default
# * type: [string]
except_fields = ["timestamp", "message", "host"]

# Limit the sink to only encoding the specified labels.
#
# * optional
# * no default
# * type: [string]
only_fields = ["timestamp", "message", "host"]

# How to format event timestamps.
#
# * optional
# * default: "rfc3339"
# * type: string
# * enum: "rfc3339" or "unix"
timestamp_format = "rfc3339"
timestamp_format = "unix"

#
# TLS
#

[sinks.azure_monitor_logs.tls]
# Absolute path to an additional CA certificate file, in DER or PEM format
# (X.509), or an inline CA certificate in PEM format.
#
# * optional
# * no default
# * type: string
ca_file = "/path/to/certificate_authority.crt"

# Absolute path to a certificate file used to identify this connection, in DER
# or PEM format (X.509) or PKCS#12, or an inline certificate in PEM format. If
# this is set and is not a PKCS#12 archive, `key_file` must also be set.
#
# * optional
# * no default
# * type: string
crt_file = "/path/to/host_certificate.crt"

# Enable TLS during connections to the remote.
#
# * optional
# * default: false
# * type: bool
enabled = false
enabled = true

# Absolute path to a private key file used to identify this connection, in DER
# or PEM format (PKCS#8), or an inline private key in PEM format. If this is
# set, `crt_file` must also be set.
#
# * optional
# * no default
# * type: string
key_file = "/path/to/host_certificate.key"

# Pass phrase used to unlock the encrypted key file. This has no effect unless
# `key_file` is set.
#
# * optional
# * no default
# * type: string
key_pass = "${KEY_PASS_ENV_VAR}"
key_pass = "PassWord1"

# If `true` (the default), Vector will validate the TLS certificate of the
# remote host.
#
# * optional
# * default: true
# * type: bool
verify_certificate = true
verify_certificate = false

# If `true` (the default), Vector will validate the configured remote host name
# against the remote host's TLS certificate. Do NOT set this to `false` unless
# you understand the risks of not verifying the remote hostname.
#
# * optional
# * default: true
# * type: bool
verify_hostname = true
verify_hostname = false

# Streams `log` and `metric` events to a blackhole that simply discards data, designed for testing and benchmarking purposes.
[sinks.blackhole]
# A list of upstream source or transform IDs. See configuration for more info.
Expand Down
Loading

0 comments on commit 110a83e

Please sign in to comment.