Skip to content

Commit

Permalink
examples: athena terraform (gravitational#29895)
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiaszheller authored Aug 10, 2023
1 parent e32248c commit 597ae4d
Show file tree
Hide file tree
Showing 2 changed files with 325 additions and 0 deletions.
280 changes: 280 additions & 0 deletions examples/athena/athena.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
provider "aws" {
region = var.aws_region
}

data "aws_caller_identity" "current" {}

resource "aws_kms_key" "audit_key" {
description = "KMS key for Athena audit log"
enable_key_rotation = true
}

resource "aws_kms_key_policy" "audit_key_policy" {
key_id = aws_kms_key.audit_key.id
policy = jsonencode({
Statement = [
{
Action = [
"kms:*"
]
Effect = "Allow"
Principal = {
AWS = data.aws_caller_identity.current.account_id
}
Resource = "*"
Sid = "Default Policy"
},
{
Action = [
"kms:GenerateDataKey",
"kms:Decrypt"
]
Effect = "Allow"
Principal = {
Service = "sns.amazonaws.com"
}
Resource = "*"
Sid = "SnsUsage"
Condition = {
StringEquals = {
"aws:SourceAccount" = data.aws_caller_identity.current.account_id
}
ArnLike = {
"aws:SourceArn" : aws_sns_topic.audit_topic.arn
}
}
},
]
Version = "2012-10-17"
})
}

resource "aws_kms_alias" "audit_key_alias" {
name = "alias/${var.kms_key_alias}"
target_key_id = aws_kms_key.audit_key.key_id
}

resource "aws_sns_topic" "audit_topic" {
name = var.sns_topic_name
kms_master_key_id = aws_kms_key.audit_key.arn
}

resource "aws_sqs_queue" "audit_queue_dlq" {
name = var.sqs_dlq_name
kms_master_key_id = aws_kms_key.audit_key.arn
kms_data_key_reuse_period_seconds = 300
message_retention_seconds = 604800 // 7 days which is three days longer than default 4 of sqs queue
}

resource "aws_sqs_queue" "audit_queue" {
name = var.sqs_queue_name
kms_master_key_id = aws_kms_key.audit_key.arn
kms_data_key_reuse_period_seconds = 300

redrive_policy = jsonencode({
deadLetterTargetArn = aws_sqs_queue.audit_queue_dlq.arn
maxReceiveCount = var.max_receive_count
})
}

resource "aws_sns_topic_subscription" "audit_sqs_target" {
topic_arn = aws_sns_topic.audit_topic.arn
protocol = "sqs"
endpoint = aws_sqs_queue.audit_queue.arn
raw_message_delivery = true
}

data "aws_iam_policy_document" "audit_policy" {
statement {
actions = [
"SQS:SendMessage",
]
effect = "Allow"
principals {
type = "Service"
identifiers = ["sns.amazonaws.com"]
}
resources = [aws_sqs_queue.audit_queue.arn]
condition {
test = "ArnEquals"
variable = "aws:SourceArn"
values = [aws_sns_topic.audit_topic.arn]
}
}
}

resource "aws_sqs_queue_policy" "audit_policy" {
queue_url = aws_sqs_queue.audit_queue.url
policy = data.aws_iam_policy_document.audit_policy.json
}

resource "aws_s3_bucket" "long_term_storage" {
bucket = var.long_term_bucket_name
force_destroy = true
# On production we recommend enabling object lock to provide deletion protection.
object_lock_enabled = false
}

resource "aws_s3_bucket_server_side_encryption_configuration" "long_term_storage" {
bucket = aws_s3_bucket.long_term_storage.id
rule {
apply_server_side_encryption_by_default {
kms_master_key_id = aws_kms_key.audit_key.arn
sse_algorithm = "aws:kms"
}
}
}

resource "aws_s3_bucket_ownership_controls" "long_term_storage" {
bucket = aws_s3_bucket.long_term_storage.id
rule {
object_ownership = "BucketOwnerEnforced"
}
}

resource "aws_s3_bucket_versioning" "long_term_storage" {
bucket = aws_s3_bucket.long_term_storage.id
versioning_configuration {
status = "Enabled"
}
}

resource "aws_s3_bucket_public_access_block" "long_term_storage" {
bucket = aws_s3_bucket.long_term_storage.id
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
}

resource "aws_s3_bucket" "transient_storage" {
bucket = var.transient_bucket_name
force_destroy = true
# On production we recommend enabling lifecycle configuration to clean transient data.
}

resource "aws_s3_bucket_server_side_encryption_configuration" "transient_storage" {
bucket = aws_s3_bucket.transient_storage.id
rule {
apply_server_side_encryption_by_default {
kms_master_key_id = aws_kms_key.audit_key.arn
sse_algorithm = "aws:kms"
}
}
}

resource "aws_s3_bucket_ownership_controls" "transient_storage" {
bucket = aws_s3_bucket.transient_storage.id
rule {
object_ownership = "BucketOwnerEnforced"
}
}

resource "aws_s3_bucket_versioning" "transient_storage" {
bucket = aws_s3_bucket.transient_storage.id
versioning_configuration {
status = "Enabled"
}
}

resource "aws_s3_bucket_public_access_block" "transient_storage" {
bucket = aws_s3_bucket.transient_storage.id
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
}

resource "aws_glue_catalog_database" "audit_db" {
name = var.database_name
}

resource "aws_glue_catalog_table" "audit_table" {
name = var.table_name
database_name = aws_glue_catalog_database.audit_db.name
table_type = "EXTERNAL_TABLE"
parameters = {
"EXTERNAL" = "TRUE",
"projection.enabled" = "true",
"projection.event_date.type" = "date",
"projection.event_date.format" = "yyyy-MM-dd",
"projection.event_date.interval" = "1",
"projection.event_date.interval.unit" = "DAYS",
"projection.event_date.range" = "NOW-4YEARS,NOW",
"storage.location.template" = format("s3://%s/events/$${event_date}/", aws_s3_bucket.long_term_storage.bucket)
"classification" = "parquet"
"classification" = "parquet",
"parquet.compression" = "SNAPPY",
}
storage_descriptor {
location = format("s3://%s", aws_s3_bucket.long_term_storage.bucket)
input_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"
output_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"
ser_de_info {
name = "example"
parameters = { "serialization.format" = "1" }
serialization_library = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
}
columns {
name = "uid"
type = "string"
}
columns {
name = "session_id"
type = "string"
}
columns {
name = "event_type"
type = "string"
}
columns {
name = "event_time"
type = "timestamp"
}
columns {
name = "event_data"
type = "string"
}
columns {
name = "user"
type = "string"
}
}
partition_keys {
name = "event_date"
type = "date"
}
}

resource "aws_athena_workgroup" "workgroup" {
name = var.workgroup
force_destroy = true
configuration {
engine_version {
selected_engine_version = "Athena engine version 3"
}
result_configuration {
output_location = format("s3://%s/results", aws_s3_bucket.transient_storage.bucket)
encryption_configuration {
encryption_option = "SSE_KMS"
kms_key_arn = aws_kms_key.audit_key.arn
}
}
}
}


output "athena_url" {
value = format("athena://%s.%s?%s",
aws_glue_catalog_database.audit_db.name,
aws_glue_catalog_table.audit_table.name,
join("&", [
format("topicArn=%s", aws_sns_topic.audit_topic.arn),
format("largeEventsS3=s3://%s/large_payloads", aws_s3_bucket.transient_storage.bucket),
format("locationS3=s3://%s/events", aws_s3_bucket.long_term_storage.bucket),
format("workgroup=%s", aws_athena_workgroup.workgroup.name),
format("queueURL=%s", aws_sqs_queue.audit_queue.url),
format("queryResultsS3=s3://%s/query_results", aws_s3_bucket.transient_storage.bucket),
])
)
}
45 changes: 45 additions & 0 deletions examples/athena/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
variable "aws_region" {
description = "AWS region"
default = "us-west-2"
}

variable "sns_topic_name" {
description = "Name of the SNS topic used for publishing audit events"
}

variable "sqs_queue_name" {
description = "Name of the SQS queue used for subscription for audit events topic"
}

variable "sqs_dlq_name" {
description = "Name of the SQS Dead-Letter Queue used for handling unprocessable events"
}

variable "max_receive_count" {
description = "Number of times a message can be received before it is sent to the DLQ"
default = 10
}

variable "kms_key_alias" {
description = "The alias of a custom KMS key"
}

variable "long_term_bucket_name" {
description = "Name of the long term storage bucket used for storing audit events"
}

variable "transient_bucket_name" {
description = "Name of the transient storage bucket used for storing query results and large events payloads"
}

variable "database_name" {
description = "Name of Glue database"
}

variable "table_name" {
description = "Name of Glue table"
}

variable "workgroup" {
description = "Name of Athena workgroup"
}

0 comments on commit 597ae4d

Please sign in to comment.