Skip to content

Commit

Permalink
Merge branch 'master' into sk/terraform-changes
Browse files Browse the repository at this point in the history
  • Loading branch information
sivakg2000 authored Apr 3, 2020
2 parents 6de91ba + a9c0f06 commit 5285e32
Show file tree
Hide file tree
Showing 17 changed files with 387 additions and 240 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ protobuf==3.6.1
pycparser==2.19
pyfakefs==3.5.7
Pygments==2.3.1
pyhcl==0.3.10
pyhcl==0.4.0
pylint==2.2.2
pyOpenSSL==19.0.0
pyparsing==2.3.1
Expand Down
65 changes: 48 additions & 17 deletions terraform/cloudwatch_dashboard.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ locals {
}
EOF


yara_rules = <<EOF
{
"type": "metric",
Expand All @@ -45,6 +46,7 @@ EOF
}
EOF


analyzed_binaries = <<EOF
{
"type": "metric",
Expand All @@ -61,6 +63,7 @@ EOF
}
EOF


sqs_analyzer = <<EOF
{
"type": "metric",
Expand All @@ -78,6 +81,7 @@ EOF
}
EOF


sqs_analyzer_age = <<EOF
{
"type": "metric",
Expand Down Expand Up @@ -108,11 +112,12 @@ EOF
}
EOF


// Due to https://github.com/hashicorp/terraform/issues/11574, both ternary branches are always
// computed, so we have to use this special idiom (same as modules/lambda/outputs.tf).
downloader_function_name = "${module.binaryalert_downloader.function_name}"
downloader_function_name = module.binaryalert_downloader.function_name

downloader_queue_name = "${element(concat(aws_sqs_queue.downloader_queue.*.name, list("")), 0)}"
downloader_queue_name = element(concat(aws_sqs_queue.downloader_queue.*.name, [""]), 0)

sqs_downloader = <<EOF
{
Expand All @@ -131,6 +136,7 @@ EOF
}
EOF


sqs_downloader_age = <<EOF
{
"type": "metric",
Expand All @@ -149,23 +155,37 @@ EOF
"horizontal": [
{
"label": "Max",
"value": "${element(concat(aws_sqs_queue.downloader_queue.*.message_retention_seconds, list("")), 0)}"
"value": "${element(
concat(
aws_sqs_queue.downloader_queue.*.message_retention_seconds,
[""],
),
0,
)}"
},
{
"label": "Alarm",
"value": "${element(concat(aws_cloudwatch_metric_alarm.downloader_sqs_age.*.threshold, list("")), 0)}"
"value": "${element(
concat(
aws_cloudwatch_metric_alarm.downloader_sqs_age.*.threshold,
[""],
),
0,
)}"
}
]
}
}
}
EOF

downloader = <<EOF

downloader = <<EOF
,[".", ".", ".", "${local.downloader_function_name}", {"label": "Downloader"}]
EOF

lambda_invocations = <<EOF

lambda_invocations = <<EOF
{
"type": "metric",
"width": 12,
Expand All @@ -185,7 +205,8 @@ EOF
}
EOF

max_lambda_duration = <<EOF

max_lambda_duration = <<EOF
{
"type": "metric",
"width": 12,
Expand Down Expand Up @@ -213,7 +234,8 @@ EOF
}
EOF

lambda_errors = <<EOF

lambda_errors = <<EOF
{
"type": "metric",
"width": 12,
Expand All @@ -233,7 +255,8 @@ EOF
}
EOF

lambda_throttles = <<EOF

lambda_throttles = <<EOF
{
"type": "metric",
"width": 12,
Expand All @@ -253,7 +276,8 @@ EOF
}
EOF

s3_download_latency = <<EOF

s3_download_latency = <<EOF
{
"type": "metric",
"width": 12,
Expand All @@ -269,7 +293,8 @@ EOF
}
EOF

sns_publications = <<EOF

sns_publications = <<EOF
{
"type": "metric",
"width": 12,
Expand All @@ -289,11 +314,13 @@ EOF
}
EOF

downloader_logs = <<EOF

downloader_logs = <<EOF
,[".", ".", ".", "/aws/lambda/${local.downloader_function_name}", {"label": "Downloader"}]
EOF

log_bytes = <<EOF

log_bytes = <<EOF
{
"type": "metric",
"width": 12,
Expand All @@ -314,7 +341,8 @@ EOF
}
EOF

dashboard_body_without_downloader = <<EOF

dashboard_body_without_downloader = <<EOF
{
"widgets": [
${local.s3_bucket_stats}, ${local.yara_rules},
Expand All @@ -327,7 +355,8 @@ EOF
}
EOF

dashboard_body_with_downloader = <<EOF

dashboard_body_with_downloader = <<EOF
{
"widgets": [
${local.s3_bucket_stats}, ${local.yara_rules},
Expand All @@ -341,13 +370,15 @@ EOF
}
EOF

dashboard_body = "${var.enable_carbon_black_downloader ? local.dashboard_body_with_downloader : local.dashboard_body_without_downloader}"

dashboard_body = var.enable_carbon_black_downloader ? local.dashboard_body_with_downloader : local.dashboard_body_without_downloader
}

resource "aws_cloudwatch_dashboard" "binaryalert" {
dashboard_name = "BinaryAlert"

// Terraform automatically converts numbers to strings when putting them in a list.
// We have to strip quotes around numbers, so that {"value": "123"} turns into {"value": 123}
dashboard_body = "${replace(local.dashboard_body, "/\"([0-9]+)\"/", "$1")}"
dashboard_body = replace(local.dashboard_body, "/\"([0-9]+)\"/", "$1")
}

71 changes: 44 additions & 27 deletions terraform/cloudwatch_metric_alarm.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@

locals {
// Use the existing SNS alarm topic if specified, otherwise use the created one
alarm_target = "${element(concat(aws_sns_topic.metric_alarms.*.arn, list(var.metric_alarm_sns_topic_arn)), 0)}"
alarm_target = element(
concat(
aws_sns_topic.metric_alarms.*.arn,
[var.metric_alarm_sns_topic_arn],
),
0,
)
}

// The production BinaryAlert analyzer is not analyzing binaries.
Expand All @@ -15,17 +21,19 @@ ${module.binaryalert_analyzer.function_name} is not analyzing any binaries!
- Binaries may not be arriving in the S3 bucket.
EOF


namespace = "BinaryAlert"
metric_name = "AnalyzedBinaries"
statistic = "Sum"

// No binaries analyzed for a while.
comparison_operator = "LessThanOrEqualToThreshold"
threshold = 0
period = "${format("%d", var.expected_analysis_frequency_minutes * 60)}"
evaluation_periods = 1
alarm_actions = ["${local.alarm_target}"]
insufficient_data_actions = ["${local.alarm_target}"]
comparison_operator = "LessThanOrEqualToThreshold"
threshold = 0
period = format("%d", var.expected_analysis_frequency_minutes * 60)
evaluation_periods = 1

alarm_actions = [local.alarm_target]
insufficient_data_actions = [local.alarm_target]
}

// The analyzer SQS queue is falling behind.
Expand All @@ -39,48 +47,52 @@ messages are reaching 75% of the queue retention and may be expired soon.
- Consider raising the retention period for this queue
EOF


namespace = "AWS/SQS"
metric_name = "ApproximateAgeOfOldestMessage"
statistic = "Minimum"

dimensions = {
QueueName = "${aws_sqs_queue.analyzer_queue.name}"
QueueName = aws_sqs_queue.analyzer_queue.name
}

comparison_operator = "GreaterThanThreshold"
threshold = "${format("%d", ceil(var.analyze_queue_retention_secs * 0.75))}"
period = 60
evaluation_periods = 10
alarm_actions = ["${local.alarm_target}"]
insufficient_data_actions = ["${local.alarm_target}"]
comparison_operator = "GreaterThanThreshold"
threshold = format("%d", ceil(var.analyze_queue_retention_secs * 0.75))
period = 60
evaluation_periods = 10

alarm_actions = [local.alarm_target]
insufficient_data_actions = [local.alarm_target]
}

// The downloader SQS queue is falling behind.
resource "aws_cloudwatch_metric_alarm" "downloader_sqs_age" {
count = "${var.enable_carbon_black_downloader ? 1 : 0}"
alarm_name = "${aws_sqs_queue.downloader_queue.name}_old_age"
count = var.enable_carbon_black_downloader ? 1 : 0
alarm_name = "${aws_sqs_queue.downloader_queue[0].name}_old_age"

alarm_description = <<EOF
The queue ${aws_sqs_queue.downloader_queue.name} is not being processed quickly enough:
The queue ${aws_sqs_queue.downloader_queue[0].name} is not being processed quickly enough:
messages are reaching 75% of the queue retention and may be expired soon.
- Consider increasing the lambda_download_concurrency_limit to process more events
- Consider raising the retention period for this queue
EOF


namespace = "AWS/SQS"
metric_name = "ApproximateAgeOfOldestMessage"
statistic = "Minimum"

dimensions = {
QueueName = "${aws_sqs_queue.downloader_queue.name}"
QueueName = aws_sqs_queue.downloader_queue[0].name
}

comparison_operator = "GreaterThanThreshold"
threshold = "${format("%d", ceil(var.download_queue_retention_secs * 0.75))}"
period = 60
evaluation_periods = 10
alarm_actions = ["${local.alarm_target}"]
insufficient_data_actions = ["${local.alarm_target}"]
comparison_operator = "GreaterThanThreshold"
threshold = format("%d", ceil(var.download_queue_retention_secs * 0.75))
period = 60
evaluation_periods = 10

alarm_actions = [local.alarm_target]
insufficient_data_actions = [local.alarm_target]
}

// There are very few YARA rules.
Expand All @@ -92,6 +104,7 @@ The number of YARA rules in BinaryAlert is surprisingly low.
Check if a recent deploy accidentally removed most YARA rules.
EOF


namespace = "BinaryAlert"
metric_name = "YaraRules"
statistic = "Maximum"
Expand All @@ -101,7 +114,8 @@ EOF
threshold = 5
period = 300
evaluation_periods = 1
alarm_actions = ["${local.alarm_target}"]

alarm_actions = [local.alarm_target]
}

// Dynamo requests are being throttled.
Expand All @@ -117,17 +131,20 @@ Read or write requests to the BinaryAlert DynamoDB table are being throttled.
- If this is normal/expected behavior, increase the dynamo_read_capacity in the BinaryAlet config.
EOF


namespace = "AWS/DynamoDB"
metric_name = "ThrottledRequests"
statistic = "Sum"

dimensions = {
TableName = "${aws_dynamodb_table.binaryalert_yara_matches.name}"
TableName = aws_dynamodb_table.binaryalert_yara_matches.name
}

comparison_operator = "GreaterThanThreshold"
threshold = 0
period = 60
evaluation_periods = 1
alarm_actions = ["${local.alarm_target}"]

alarm_actions = [local.alarm_target]
}

9 changes: 5 additions & 4 deletions terraform/dynamo.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ resource "aws_dynamodb_table" "binaryalert_yara_matches" {
name = "${var.name_prefix}_binaryalert_matches"
hash_key = "SHA256"
range_key = "AnalyzerVersion"
read_capacity = "${var.dynamo_read_capacity}"
write_capacity = "${var.dynamo_write_capacity}"
read_capacity = var.dynamo_read_capacity
write_capacity = var.dynamo_write_capacity

// Only attributes used as hash/range keys are defined here.
attribute {
Expand All @@ -22,7 +22,8 @@ resource "aws_dynamodb_table" "binaryalert_yara_matches" {
enabled = true
}

tags {
Name = "${var.tagged_name}"
tags = {
Name = var.tagged_name
}
}

Loading

0 comments on commit 5285e32

Please sign in to comment.