diff --git a/infra/gcp/clusters/projects/kubernetes-public/aaa/12-iam.tf b/infra/gcp/clusters/projects/kubernetes-public/aaa/12-iam.tf deleted file mode 100644 index 97d7ff3259a..00000000000 --- a/infra/gcp/clusters/projects/kubernetes-public/aaa/12-iam.tf +++ /dev/null @@ -1,26 +0,0 @@ -/* -Ensure google groups defined in 'members' have read-only access to -all monitoring data and configurations. -Detailed list of permissions: https://cloud.google.com/monitoring/access-control#roles -*/ -resource "google_project_iam_binding" "readonlymonitoringbinding" { - project = data.google_project.project.id - role = "roles/monitoring.viewer" - - members = [ - "group:gke-security-groups@kubernetes.io", - ] -} - -/* -Ensure google groups defined in 'members' have read-only access to logs. -Detailed list of permissions: https://cloud.google.com/logging/docs/access-control#permissions_and_roles -*/ -resource "google_project_iam_binding" "readonlyloggingbinding" { - project = data.google_project.project.id - role = "roles/logging.privateLogViewer" - - members = [ - "group:gke-security-groups@kubernetes.io", - ] -} diff --git a/infra/gcp/ensure-main-project.sh b/infra/gcp/ensure-main-project.sh index 9859cabe311..66cfab4abac 100755 --- a/infra/gcp/ensure-main-project.sh +++ b/infra/gcp/ensure-main-project.sh @@ -34,249 +34,347 @@ if [ $# != 0 ]; then fi # The GCP project name. -PROJECT="kubernetes-public" +readonly PROJECT="kubernetes-public" # The BigQuery dataset for billing data. -BQ_BILLING_DATASET="kubernetes_public_billing" +readonly BQ_BILLING_DATASET="kubernetes_public_billing" # The BigQuery admins group. -BQ_ADMINS_GROUP="k8s-infra-bigquery-admins@kubernetes.io" +readonly BQ_ADMINS_GROUP="k8s-infra-bigquery-admins@kubernetes.io" # The cluster admins group. -CLUSTER_ADMINS_GROUP="k8s-infra-cluster-admins@kubernetes.io" +readonly CLUSTER_ADMINS_GROUP="k8s-infra-cluster-admins@kubernetes.io" # The accounting group. -ACCOUNTING_GROUP="k8s-infra-gcp-accounting@kubernetes.io" +readonly ACCOUNTING_GROUP="k8s-infra-gcp-accounting@kubernetes.io" # The GCS bucket which hold terraform state for clusters -LEGACY_CLUSTER_TERRAFORM_BUCKET="k8s-infra-clusters-terraform" +readonly LEGACY_CLUSTER_TERRAFORM_BUCKET="k8s-infra-clusters-terraform" # The GKE security groups group -CLUSTER_USERS_GROUP="gke-security-groups@kubernetes.io" +readonly CLUSTER_USERS_GROUP="gke-security-groups@kubernetes.io" # The DNS admins group. -DNS_GROUP="k8s-infra-dns-admins@kubernetes.io" +readonly DNS_GROUP="k8s-infra-dns-admins@kubernetes.io" -color 6 "Ensuring project exists: ${PROJECT}" -ensure_project "${PROJECT}" - -# Enable APIs we know we need -apis=( - bigquery-json.googleapis.com - compute.googleapis.com - container.googleapis.com - dns.googleapis.com - logging.googleapis.com - monitoring.googleapis.com - oslogin.googleapis.com - secretmanager.googleapis.com - storage-component.googleapis.com -) -ensure_only_services "${PROJECT}" "${apis[@]}" - -# buckets to hold terraform state +# GCS buckets to hold terraform state +# # - since we are using uniform bucket level access (ubla), each bucket should # represent a logical group of access, with org admins given storage.admin # for break-glass purposes # - the legacy bucket (k8s-infra-clusters-terraform) assumed the same set of -# users should have access to all gke clusters +# users should have access to all gke clusters (~all terraform-based infra) # - new bucket schema is "k8s-infra-tf-{folder}[-{suffix}]" where: # - folder: intended GCP folder for GCP projects managed by this terraform, # access level is ~owners of folder # - suffix: some subset of resources contained somewhere underneath folder, # access level is ~editors of those resources # - entry syntax is "bucket_name:owners_group" (: is invalid bucket name char) -terraform_state_bucket_entries=( - "${LEGACY_CLUSTER_TERRAFORM_BUCKET}:${CLUSTER_ADMINS_GROUP}" - k8s-infra-tf-aws:k8s-infra-aws-admins@kubernetes.io - k8s-infra-tf-prow-clusters:k8s-infra-prow-oncall@kubernetes.io - k8s-infra-tf-public-clusters:"${CLUSTER_ADMINS_GROUP}" - k8s-infra-tf-sandbox-ii:k8s-infra-ii-coop@kubernetes.io +readonly TERRAFORM_STATE_BUCKET_ENTRIES=( + "${LEGACY_CLUSTER_TERRAFORM_BUCKET}:${CLUSTER_ADMINS_GROUP}" + k8s-infra-tf-aws:k8s-infra-aws-admins@kubernetes.io + k8s-infra-tf-prow-clusters:k8s-infra-prow-oncall@kubernetes.io + k8s-infra-tf-public-clusters:"${CLUSTER_ADMINS_GROUP}" + k8s-infra-tf-sandbox-ii:k8s-infra-ii-coop@kubernetes.io ) -color 6 "Ensuring terraform state buckets exist with correct permissions" -for entry in "${terraform_state_bucket_entries[@]}"; do - bucket="gs://$(echo "${entry}" | cut -d: -f1)" - owners="$(echo "${entry}" | cut -d: -f2-)" - color 6 "Ensuring '${bucket}' exists as private with owners '${owners}'"; ( - ensure_private_gcs_bucket "${PROJECT}" "${bucket}" - empower_group_to_admin_gcs_bucket "${owners}" "${bucket}" - ensure_gcs_role_binding "${bucket}" "group:k8s-infra-gcp-org-admins@kubernetes.io" "roles/storage.admin" - ) 2>&1 | indent -done 2>&1 | indent - -color 6 "Empowering BigQuery admins" -ensure_project_role_binding \ - "${PROJECT}" \ - "group:${BQ_ADMINS_GROUP}" \ - "roles/bigquery.admin" - -color 6 "Empowering cluster admins" -# TODO: this can also be a custom role -cluster_admin_roles=( - roles/compute.viewer - roles/container.admin - roles/compute.loadBalancerAdmin - $(custom_org_role_name iam.serviceAccountLister) + +# The services we explicitly want enabled for the main project +# +# NOTE: Expected services include dependencies of these services, which may be +# more than direct dependencies. Do we care to statically encode the +# graph here? ensure_only_services dynamically computes the set of +# expected services +readonly MAIN_PROJECT_SERVICES=( + # billing data gets exported to bigquery + bigquery.googleapis.com + # GKE clusters are hosted in this project + container.googleapis.com + # DNS zones are managed in this project + dns.googleapis.com + # We look at logs in this project (e.g. from GKE) + logging.googleapis.com + # We look at monitoring dashboards in this project + monitoring.googleapis.com + # Secrets are hosted in this project + secretmanager.googleapis.com + # GCS buckets are hosted in this project + storage-component.googleapis.com ) -for role in "${cluster_admin_roles[@]}"; do - ensure_project_role_binding "${PROJECT}" "group:${CLUSTER_ADMINS_GROUP}" "${role}" -done -# TODO(spiffxp): remove when bindings for custom project role are gone -ensure_removed_project_role_binding "${PROJECT}" "group:${CLUSTER_ADMINS_GROUP}" "$(custom_project_role_name "${PROJECT}" ServiceAccountLister)" -ensure_removed_custom_project_iam_role "${PROJECT}" "ServiceAccountLister" - - -color 6 "Empowering cluster users" -ensure_project_role_binding \ - "${PROJECT}" \ - "group:${CLUSTER_USERS_GROUP}" \ - "roles/container.clusterViewer" - -color 6 "Empowering GCP accounting" -ensure_project_role_binding \ - "${PROJECT}" \ - "group:${ACCOUNTING_GROUP}" \ - "roles/bigquery.jobUser" - -color 6 "Ensuring the k8s-infra-gcp-auditor serviceaccount exists" -ensure_service_account \ - "${PROJECT}" \ - "k8s-infra-gcp-auditor" \ - "Grants readonly access to org resources" - -color 6 "Empowering k8s-infra-gcp-auditor serviceaccount to be used on trusted build cluster" -empower_ksa_to_svcacct \ - "k8s-infra-prow-build-trusted.svc.id.goog[test-pods/k8s-infra-gcp-auditor]" \ - "${PROJECT}" \ - $(svc_acct_email "${PROJECT}" "k8s-infra-gcp-auditor") -# TODO(spiffxp): delete this binding -empower_ksa_to_svcacct \ - "kubernetes-public.svc.id.goog[test-pods/k8s-infra-gcp-auditor]" \ - "${PROJECT}" \ - $(svc_acct_email "${PROJECT}" "k8s-infra-gcp-auditor") - -color 6 "Ensuring the k8s-infra-dns-updater serviceaccount exists" -ensure_service_account \ - "${PROJECT}" \ - "k8s-infra-dns-updater" \ - "k8s-infra dns updater" - -color 6 "Empowering k8s-infra-dns-updater serviceaccount to be used on build cluster" -empower_ksa_to_svcacct \ - "k8s-infra-prow-build-trusted.svc.id.goog[test-pods/k8s-infra-dns-updater]" \ - "${PROJECT}" \ - "$(svc_acct_email "${PROJECT}" "k8s-infra-dns-updater")" - -color 6 "Empowering ${DNS_GROUP}" -color 6 "Empowering BigQuery admins" -ensure_project_role_binding \ - "${PROJECT}" \ - "group:${DNS_GROUP}" \ - "roles/dns.admin" - -# Monitoring -MONITORING_SVCACCT_NAME="$(svc_acct_email "${PROJECT}" \ - "k8s-infra-monitoring-viewer")" - -color 6 "Ensuring the k8s-infra-monitoring-viewer serviceaccount exists" -ensure_service_account \ - "${PROJECT}" \ - "k8s-infra-monitoring-viewer" \ - "k8s-infra monitoring viewer" - -color 6 "Empowering k8s-infra-monitoring-viewer serviceaccount to be used on the 'aaa' cluster inside the 'monitoring' namespace" -empower_ksa_to_svcacct \ - "kubernetes-public.svc.id.goog[monitoring/k8s-infra-monitoring-viewer]" \ - "${PROJECT}" \ - "${MONITORING_SVCACCT_NAME}" - -color 6 "Empowering service account ${MONITORING_SVCACCT_NAME}" -ensure_project_role_binding \ - "${PROJECT}" \ - "serviceAccount:${MONITORING_SVCACCT_NAME}" \ - "roles/monitoring.viewer" - -# Kubernetes External Secrets -color 6 -n "Ensure the kubernetes-external-secrets serviceaccount exists" -ensure_service_account \ - "${PROJECT}" \ - "kubernetes-external-secrets" \ - "Kubernetes External Secrets Service Account" - -color 6 -n "Empowering kubernetes-external-secrets serviceaccount to be used on aaa cluster" -empower_ksa_to_svcacct \ - "kubernetes-public.svc.id.goog[kubernetes-external-secrets/kubernetes-external-secrets]" \ - "${PROJECT}" \ - "$(svc_acct_email "${PROJECT}" "kubernetes-external-secrets")" - -color 6 "Empowering service account kubernetes-external-services to access payloads of the secrets" -ensure_project_role_binding "${PROJECT}" \ - "serviceAccount:$(svc_acct_email "${PROJECT}" "kubernetes-external-secrets")" \ - roles/secretmanager.secretAccessor - -# Bootstrap DNS zones -ensure_dns_zone "${PROJECT}" "k8s-io" "k8s.io" -ensure_dns_zone "${PROJECT}" "kubernetes-io" "kubernetes.io" -ensure_dns_zone "${PROJECT}" "x-k8s-io" "x-k8s.io" -ensure_dns_zone "${PROJECT}" "k8s-e2e-com" "k8s-e2e.com" -ensure_dns_zone "${PROJECT}" "canary-k8s-io" "canary.k8s.io" -ensure_dns_zone "${PROJECT}" "canary-kubernetes-io" "canary.kubernetes.io" -ensure_dns_zone "${PROJECT}" "canary-x-k8s-io" "canary.x-k8s.io" -ensure_dns_zone "${PROJECT}" "canary-k8s-e2e-com" "canary.k8s-e2e.com" - -color 6 "Creating the BigQuery dataset for billing data" -if ! bq --project_id "${PROJECT}" ls "${BQ_BILLING_DATASET}" >/dev/null 2>&1; then - bq --project_id "${PROJECT}" mk "${BQ_BILLING_DATASET}" -fi -color 6 "Setting BigQuery permissions" +# Create a GCP service account intended for use by GKE cluster workloads +# $1: The GCP project hosting the service account (e.g. k8s-infra-foo) +# $2: The name of the GCP service account (e.g. k8s-infra-doer) +# $3: The IAM role the service account will have on the project (e.g. roles/foo.bar) +# $4: The GCP project hosting GKE cluster(s) that will use this service account (e.g. k8s-infra-foo-clusters) +# $5: The K8s namespace hosting the service account that will use this GCP SA (e.g. my-app-ns) +# +# e.g. the above allows pods running as my-app-ns/k8s-infra-doer in clusters in +# k8s-infra-foo-clusters to act as k8s-infra-doer@k8s-infra-foo.iam.gserviceaccount.com, +# using the permissions granted by roles/foo.bar on k8s-infra-foo +function ensure_workload_identity_serviceaccount() { + if [ $# -lt 5 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ] || [ -z "$5" ]; then + echo "${FUNCNAME[0]}(project, name, role, cluster_project, cluster_namespace) requires 5 arguments" >&2 + return 1 + fi + local serviceaccount_project="${1}" + local serviceaccount_name="${2}" + local serviceaccount_email + serviceaccount_email="$(svc_acct_email "${serviceaccount_project}" "${serviceaccount_name}")" + local serviceaccount_description="${serviceaccount_name}" + local serviceaccount_role="${3}" + local cluster_project="${4}" + local cluster_namespace="${5}" + + color 6 "Ensuring serviceaccount ${serviceaccount_email} exists" + ensure_service_account "${serviceaccount_project}" "${serviceaccount_name}" "${serviceaccount_description}" + + color 6 "Empowering ${serviceaccount_email} with role ${serviceaccount_role} in project ${serviceaccount_project}" + ensure_project_role_binding \ + "${serviceaccount_project}" \ + "serviceAccount:${serviceaccount_email}" \ + "${serviceaccount_role}" + + local clusters="clusters in project ${cluster_project}" + local k8s_sa="KSA ${cluster_namespace}/${serviceaccount_name}" + local gcp_sa="GCP SA ${serviceaccount_email}" + + color 6 "Empowering ${clusters} to run workloads as ${k8s_sa} to act as ${gcp_sa}" + empower_gke_for_serviceaccount "${cluster_project}" "${cluster_namespace}" "${serviceaccount_email}" +} + +# +# main functions +# -# Merge existing permissions with the ones we need to exist. We merge -# permissions because: -# * The full list is large and has stuff that is inherited listed in it -# * All of our other IAM binding logic calls are additive +function ensure_terraform_state_buckets() { + if [ $# -ne 1 ] || [ -z "$1" ]; then + echo "${FUNCNAME[0]}(gcp_project) requires 1 argument" >&2 + return 1 + fi + local project="${1}" + + for entry in "${TERRAFORM_STATE_BUCKET_ENTRIES[@]}"; do + bucket="gs://$(echo "${entry}" | cut -d: -f1)" + owners="$(echo "${entry}" | cut -d: -f2-)" + color 6 "Ensuring '${bucket}' exists as private with owners '${owners}'"; ( + ensure_private_gcs_bucket "${project}" "${bucket}" + empower_group_to_admin_gcs_bucket "${owners}" "${bucket}" + ensure_gcs_role_binding "${bucket}" "group:k8s-infra-gcp-org-admins@kubernetes.io" "admin" + ) 2>&1 | indent + done +} -CUR=${TMPDIR}/k8s-infra-bq-access.before.json -bq show --format=prettyjson "${PROJECT}":"${BQ_BILLING_DATASET}" > "${CUR}" +function empower_cluster_admins_and_users() { + if [ $# -ne 1 ] || [ -z "$1" ]; then + echo "${FUNCNAME[0]}(gcp_project) requires 1 argument" >&2 + return 1 + fi + local project="${1}" + + color 6 "Empowering cluster admins" + # TODO(spiffxp): make this a custom role + cluster_admin_roles=( + roles/compute.viewer + roles/container.admin + roles/compute.loadBalancerAdmin + "$(custom_org_role_name iam.serviceAccountLister)" + ) + for role in "${cluster_admin_roles[@]}"; do + ensure_project_role_binding "${project}" "group:${CLUSTER_ADMINS_GROUP}" "${role}" + done + # TODO(spiffxp): remove when these bindings have been removed + removed_cluster_admin_roles=( + "$(custom_project_role_name "${project}" ServiceAccountLister)" + ) + for role in "${removed_cluster_admin_roles[@]}"; do + ensure_removed_project_role_binding "${project}" "group:${CLUSTER_ADMINS_GROUP}" "${role}" + done + # TODO(spiffxp): ensure this is removed when the binding(s) using it have been removed + ensure_removed_custom_project_iam_role "${project}" "ServiceAccountLister" + + color 6 "Empowering cluster users" + cluster_user_roles=( + roles/container.clusterViewer + roles/logging.privateLogViewer + roles/monitoring.viewer + ) + for role in "${cluster_user_roles[@]}"; do + ensure_project_role_binding "${project}" "group:${CLUSTER_USERS_GROUP}" "${role}" + done +} -ENSURE=${TMPDIR}/k8s-infra-bq-access.ensure.json -cat > "${ENSURE}" << __EOF__ -{ - "access": [ - { - "groupByEmail": "${ACCOUNTING_GROUP}", - "role": "READER" - }, - { - "groupByEmail": "${ACCOUNTING_GROUP}", - "role": "roles/bigquery.metadataViewer" - }, + +function ensure_dns() { + if [ $# -ne 1 ] || [ -z "$1" ]; then + echo "${FUNCNAME[0]}(gcp_project) requires 1 argument" >&2 + return 1 + fi + local project="${1}" + + local domains=( + k8s.io + kubernetes.io + x-k8s.io + k8s-e2e.com + ) + + color 6 "Empowering ${DNS_GROUP}" + ensure_project_role_binding \ + "${project}" \ + "group:${DNS_GROUP}" \ + "roles/dns.admin" + + # Bootstrap DNS zones + for domain in "${domains[@]}"; do + # canary domain for each domain, e.g. k8s.io and canary.k8s.io + for prefix in "" "canary."; do + zone="${prefix}${domain}" + name="${zone//./-}" + color 6 "Ensuring DNS zone ${zone}" + ensure_dns_zone "${project}" "${name}" "${zone}" 2>&1 | indent + done + done +} + +function ensure_billing_bigquery() { + if [ $# -ne 1 ] || [ -z "$1" ]; then + echo "${FUNCNAME[0]}(gcp_project) requires 1 argument" >&2 + return 1 + fi + local project="${1}" + + color 6 "Empowering BigQuery admins" + ensure_project_role_binding \ + "${project}" \ + "group:${BQ_ADMINS_GROUP}" \ + "roles/bigquery.admin" + + color 6 "Empowering GCP accounting" + ensure_project_role_binding \ + "${project}" \ + "group:${ACCOUNTING_GROUP}" \ + "roles/bigquery.jobUser" + + color 6 "Creating the BigQuery dataset for billing data" + if ! bq --project_id "${project}" ls "${BQ_BILLING_DATASET}" >/dev/null 2>&1; then + bq --project_id "${project}" mk "${BQ_BILLING_DATASET}" + fi + + color 6 "Setting BigQuery permissions" + # Merge existing permissions with the ones we need to exist. We merge + # permissions because: + # * The full list is large and has stuff that is inherited listed in it + # * All of our other IAM binding logic calls are additive + local before="${TMPDIR}/k8s-infra-bq-access.before.json" + local ensure="${TMPDIR}/k8s-infra-bq-access.ensure.json" + local final="${TMPDIR}/k8s-infra-bq-access.final.json" + + bq show --format=prettyjson "${project}":"${BQ_BILLING_DATASET}" > "${before}" + + cat > "${ensure}" < "${final}" + + bq update --source "${final}" "${project}":"${BQ_BILLING_DATASET}" + + color 4 "To enable billing export, a human must log in to the cloud" + color 4 -n "console. Go to " + color 6 -n "Billing" + color 4 -n "; " + color 6 -n "Billing export" + color 4 " and export to BigQuery" + color 4 -n "in project " + color 6 -n "${project}" + color 4 -n " dataset " + color 6 -n "${BQ_BILLING_DATASET}" + color 4 " ." + echo + color 4 "Press enter to acknowledge" + read -rs } -__EOF__ - -FINAL=${TMPDIR}/k8s-infra-bq-access.final.json -jq -s '.[0].access + .[1].access | { access: . }' "${CUR}" "${ENSURE}" > "${FINAL}" - -bq update --source "${FINAL}" "${PROJECT}":"${BQ_BILLING_DATASET}" - -color 4 "To enable billing export, a human must log in to the cloud" -color 4 -n "console. Go to " -color 6 -n "Billing" -color 4 -n "; " -color 6 -n "Billing export" -color 4 " and export to BigQuery" -color 4 -n "in project " -color 6 -n "${PROJECT}" -color 4 -n " dataset " -color 6 -n "${BQ_BILLING_DATASET}" -color 4 " ." -echo -color 4 "Press enter to acknowledge" -read -s - -color 6 "Done" + +function ensure_main_project() { + if [ $# -ne 1 ] || [ -z "$1" ]; then + echo "${FUNCNAME[0]}(gcp_project) requires 1 argument" >&2 + return 1 + fi + local project="${1}" + + color 6 "Ensuring project exists: ${project}" + ensure_project "${project}" 2>&1 | indent + + color 6 "Ensuring main project services enabled for: ${project}" + # TODO: there are many services that appear to have been enabled piecemeal + # and it's not yet clear which ones we actually rely upon, so this is + # not being run with K8S_INFRA_ENSURE_ONLY_SERVICES_WILL_FORCE_DISABLE + ensure_only_services "${project}" "${MAIN_PROJECT_SERVICES[@]}" 2>&1 | indent + + color 6 "Ensuring terraform state buckets exist with correct permissions in: ${project}" + ensure_terraform_state_buckets "${project}" 2>&1 | indent + + color 6 "Empowering cluster users and admins for clusters in: ${project}" + empower_cluster_admins_and_users "${project}" 2>&1 | indent + + color 6 "Ensuring specific workload identity serviceaccounts exist in: ${project}"; ( + local svcacct_args cluster_args + + color 6 "Ensuring GCP Auditor serviceaccount" + # roles/viewer on kubernetes-public is a bootstrap; the true purpose + # is custom role audit.viewer on the kubernetes.io org, but that is + # handled by ensure-organization.sh + svcacct_args=("${project}" "k8s-infra-gcp-auditor" "roles/viewer") + cluster_args=("k8s-infra-prow-build-trusted" "test-pods") + ensure_workload_identity_serviceaccount "${svcacct_args[@]}" "${cluster_args[@]}" 2>&1 | indent + + # TODO(spiffxp): remove once this binding has been deleted + local gcp_auditor_email + gcp_auditor_email=$(svc_acct_email "${project}" "k8s-infra-gcp-auditor") + color 6 "Ensuring removed workload identity on kubernetes-public/test-pods for ${gcp_auditor_email}" + unempower_gke_for_serviceaccount \ + "kubernetes-public" \ + "test-pods" \ + "${gcp_auditor_email}" 2>&1 | indent + + color 6 "Ensuring DNS Updater serviceaccount" + svcacct_args=("${project}" "k8s-infra-dns-updater" "roles/dns.admin") + cluster_args=("k8s-infra-prow-build-trusted" "test-pods") + ensure_workload_identity_serviceaccount "${svcacct_args[@]}" "${cluster_args[@]}" 2>&1 | indent + + color 6 "Ensuring Monitoring Viewer serviceaccount" + svcacct_args=("${project}" "k8s-infra-monitoring-viewer" "roles/monitoring.viewer") + cluster_args=("${project}" "monitoring") + ensure_workload_identity_serviceaccount "${svcacct_args[@]}" "${cluster_args[@]}" 2>&1 | indent + + color 6 "Ensuring Kubernetes External Secrets serviceaccount" + svcacct_args=("${project}" "kubernetes-external-secrets" "roles/secretmanager.secretAccessor") + cluster_args=("${project}" "kubernetes-external-secrets") + ensure_workload_identity_serviceaccount "${svcacct_args[@]}" "${cluster_args[@]}" 2>&1 | indent + ) 2>&1 | indent + + color 6 "Ensuring DNS is configured in: ${project}" + ensure_dns "${project}" 2>&1 | indent + + color 6 "Ensuring biquery configured for billing and access by appropriate groups in: ${project}" + ensure_billing_bigquery "${project}" 2>&1 | indent + + color 6 "Done" +} + +ensure_main_project "${PROJECT}" diff --git a/infra/gcp/ensure-organization.sh b/infra/gcp/ensure-organization.sh index f43e7809565..00d22f17da8 100755 --- a/infra/gcp/ensure-organization.sh +++ b/infra/gcp/ensure-organization.sh @@ -43,50 +43,97 @@ org_roles=( iam.serviceAccountLister ) -old_org_roles=() +removed_org_roles=() -color 6 "Ensuring organization custom roles exist" -( +org_role_bindings=( + # empower k8s-infra-org-admins@ + # NOTE: roles/owner has too many permissions to aggregate into a custom role, + # and some services automatically add bindings for roles/owner, e.g. + # https://cloud.google.com/storage/docs/access-control/iam-roles#basic-roles-intrinsic + "group:k8s-infra-gcp-org-admins@kubernetes.io:roles/owner" + "group:k8s-infra-gcp-org-admins@kubernetes.io:$(custom_org_role_name "organization.admin")" + + # empower k8s-infra-prow-oncall@ to use GCP Console to navigate to their projects + "group:k8s-infra-prow-oncall@kubernetes.io:roles/browser" + + # TODO: not sure this is required for GKE Google Group RBAC, is this proxy + # for "let people running apps in aaa use GCP console to navigate" ? + "group:gke-security-groups@kubernetes.io:roles/browser" + + # TODO: what is the purpose of this role? + "group:k8s-infra-gcp-accounting@kubernetes.io:$(custom_org_role_name "CustomRole")" + + # empower k8s-infra-gcp-auditors@ and equivalent service-account + "group:k8s-infra-gcp-auditors@kubernetes.io:$(custom_org_role_name "audit.viewer")" + "serviceAccount:$(svc_acct_email "kubernetes-public" "k8s-infra-gcp-auditor"):$(custom_org_role_name "audit.viewer")" +) + +removed_org_role_bindings=() + +function ensure_org_roles() { for role in "${org_roles[@]}"; do - color 6 "Ensuring organization custom role ${role}" - ensure_custom_org_iam_role_from_file "${role}" "${SCRIPT_DIR}/roles/${role}.yaml" + color 6 "Ensuring organization custom role ${role}" + ensure_custom_org_iam_role_from_file "${role}" "${SCRIPT_DIR}/roles/${role}.yaml" + done +} + +function ensure_removed_org_roles() { + for role in "${removed_org_roles[@]}"; do + color 6 "Ensuring removed organization custom role ${role}" + ensure_removed_custom_org_iam_role "${role}" + done +} + +function ensure_org_role_bindings() { + for binding in "${org_role_bindings[@]}"; do + principal_type="$(echo "${binding}" | cut -d: -f1)" + principal_email="$(echo "${binding}" | cut -d: -f2)" + principal="$(echo "${binding}" | cut -d: -f1-2)" + role="$(echo "${binding}" | cut -d: -f3-)" + + # avoid dependency-cycles by skipping, e.g. + # - serviceaccount bound in this script, created by ensure-main-project.sh + # - custom org roles used by ensure-main-project.sh, created by this script + # - so allow this to run to completion before ensure-main-project.sh, and + # accept that bootstrapping means: run this, then that, then this again + case ${principal_type} in + serviceAccount) + if ! gcloud iam service-accounts describe "${principal_email}" >/dev/null 2>&1; then + color 2 "Skipping ${principal} bound to ${role}: principal does not exist" + continue + fi + ;; + group|user) ;; # TODO: need access to Directory API + esac + + color 6 "Ensuring ${principal} bound to ${role}" + ensure_org_role_binding "${principal}" "${role}" 2>&1 | indent done -) 2>&1 | indent - -color 6 "Ensuring organization IAM bindings exist" -( - # k8s-infra-prow-oncall@kubernetes.io should be able to browse org resources - ensure_org_role_binding "group:k8s-infra-prow-oncall@kubernetes.io" "roles/browser" - - # TODO: this already exists, but seems overprivileged for a group that is about - # access to the "aaa" cluster in "kubernetes-public" - ensure_org_role_binding "group:gke-security-groups@kubernetes.io" "roles/browser" - - # k8s-infra-gcp-accounting@ - ensure_org_role_binding "group:k8s-infra-gcp-accounting@kubernetes.io" "$(custom_org_role_name "CustomRole")" - - # k8s-infra-gcp-auditors@ - ensure_org_role_binding "group:k8s-infra-gcp-auditors@kubernetes.io" "$(custom_org_role_name "audit.viewer")" - - # k8s-infra-org-admins@ - # roles/owner has too many permissions to aggregate into a custom role, - # and some services (e.g. storage) add bindings based on membership in it - ensure_org_role_binding "group:k8s-infra-gcp-org-admins@kubernetes.io" "roles/owner" - # everything org admins need beyond roles/owner to manage the org - ensure_org_role_binding "group:k8s-infra-gcp-org-admins@kubernetes.io" "$(custom_org_role_name "organization.admin")" -) 2>&1 | indent - -color 6 "Ensuring removed organization IAM bindings do not exist" -( - color 6 "No bindings to remove" -) 2>&1 | indent - -color 6 "Ensuring removed organization custom roles do not exist" -( - for role in "${old_org_roles[@]}"; do - color 6 "Ensuring removed organization custom role ${role}" - ensure_removed_custom_org_iam_role "${role}" +} + +function ensure_removed_org_role_bindings() { + for binding in "${removed_org_role_bindings[@]}"; do + principal="$(echo "${binding}" | cut -d: -f1-2)" + role="$(echo "${binding}" | cut -d: -f3-)" + color 6 "Ensuring ${principal} bound to ${role}" + ensure_removed_org_role_binding "${principal}" "${role}" 2>&1 | indent done -) 2>&1 | indent +} + +function main() { + color 6 "Ensuring organization custom roles exist" + ensure_org_roles 2>&1 | indent + + color 6 "Ensuring organization IAM bindings exist" + ensure_org_role_bindings 2>&1 | indent + + color 6 "Ensuring removed organization IAM bindings do not exist" + ensure_removed_org_role_bindings 2>&1 | indent + + color 6 "Ensuring removed organization custom roles do not exist" + ensure_removed_org_roles 2>&1 | indent + + color 6 "All done!" +} -color 6 "All done!" +main diff --git a/infra/gcp/lib.sh b/infra/gcp/lib.sh index 06b5e92466b..de23afb23ce 100644 --- a/infra/gcp/lib.sh +++ b/infra/gcp/lib.sh @@ -490,6 +490,67 @@ function empower_ksa_to_svcacct() { ensure_serviceaccount_role_binding "${gcp_svcacct}" "serviceAccount:${ksa_scope}" "roles/iam.workloadIdentityUser" } +# Allow GKE clusters in the given GCP project to run workloads using a +# Kubernetes service account in the given namepsace to act as the given +# GCP service account via Workload Identity when the name of the Kubernetes +# service account matches the optionally provided name if given, or the +# name of the GCP service account. +# +# ref: https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity +# +# $1: The GCP project that hosts the GKE clusters (e.g. k8s-infra-foo-clusters) +# $2: The K8s namespace that hosts the Kubernetes service account (e.g. my-app-ns) +# $3: The GCP service account to be bound (e.g. k8s-infra-doer@k8s-infra-foo.iam.gserviceaccount.com) +# [$4]: Optional: The Kubernetes service account name (e.g. my-app-doer; default: k8s-infra-doer) +# +# e.g. the above allows pods running as my-app-ns/my-app-doer in clusters in +# k8s-infra-foo-clusters to act as k8s-infra-doer@k8s-infra-foo.iam.gserviceaccount.com +function empower_gke_for_serviceaccount() { + if [ $# -lt 3 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then + echo "${FUNCNAME[0]}(gcp_project, k8s_namespace, gcp_sa_email, [k8s_sa_name]) requires at least 3 arguments" >&2 + return 1 + fi + + local gke_project="$1" + local k8s_namespace="$2" + local gcp_sa_email="${3}" + local k8s_sa_name="${4:-""}" + if [ -z "${k8s_sa_name}" ]; then + k8s_sa_name="$(echo "${gcp_sa_email}" | cut -d@ -f1)" + fi + + local principal="serviceAccount:${gke_project}.svc.id.goog[${k8s_namespace}/${k8s_sa_name}]" + + ensure_serviceaccount_role_binding "${gcp_sa_email}" "${principal}" "roles/iam.workloadIdentityUser" +} + +# Prevent clusters in the given GCP project from running workloads using a +# Kubernetes service account in the given namespace to act as the given +# GCP service account. aka the opposite of empower_gke_for_serviceaccount +# +# $1: The GCP project that hosts the GKE clusters (e.g. k8s-infra-foo-clusters) +# $2: The K8s namespace that hosts the Kubernetes service account (e.g. my-app-ns) +# $3: The GCP service account to be unbound (e.g. k8s-infra-doer@k8s-infra-foo.iam.gserviceaccount.com) +# [$4]: Optional: The Kubernetes service account name (e.g. my-app-doer; default: k8s-infra-doer) +function unempower_gke_for_serviceaccount() { + if [ $# -lt 3 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then + echo "${FUNCNAME[0]}(gcp_project, k8s_namespace, gcp_sa_email, [k8s_sa_name]) requires at least 3 arguments" >&2 + return 1 + fi + + local gke_project="$1" + local k8s_namespace="$2" + local gcp_sa_email="${3}" + local k8s_sa_name="${4:-""}" + if [ -z "${k8s_sa_name}" ]; then + k8s_sa_name="$(echo "${gcp_sa_email}" | cut -d@ -f1)" + fi + + local principal="serviceAccount:${gke_project}.svc.id.goog[${k8s_namespace}/${k8s_sa_name}]" + + ensure_removed_serviceaccount_role_binding "${gcp_sa_email}" "${principal}" "roles/iam.workloadIdentityUser" +} + # Ensure that a global ip address exists, creating one if needed # $1 The GCP project # $2 The address name (e.g. foo-ingress), IPv6 addresses must have a "-v6" suffix diff --git a/infra/gcp/lib_gcs.sh b/infra/gcp/lib_gcs.sh index db8337e545e..9c72f42f51c 100644 --- a/infra/gcp/lib_gcs.sh +++ b/infra/gcp/lib_gcs.sh @@ -68,7 +68,27 @@ function ensure_private_gcs_bucket() { local bucket="$2" _ensure_gcs_bucket "${project}" "${bucket}" - gsutil iam ch -d allUsers "${bucket}" + ensure_removed_gcs_role_binding "${bucket}" "allUsers" "objectViewer" + # roles/storage.legacyBucketOwner contains storage.buckets.setIamPolicy, + # but not storage.objects.get. This means someone with roles/editor on the + # project could grant themselves access to read bucket contents that they + # aren't supposed to be able to read. + # + # Given that roles/editor has no *.setIamPolicy permissions for other + # service resources, this seems like a security gap that should be closed. + # + # Ideally we would do this in _ensure_gcs_bucket. However, removing this + # role means removing other (possibly needed) permissions that may be used + # by GCP service agent service accounts (e.g. App Engine, GCR, GCE): + # - storage.buckets.get + # - storage.multipartUploads.(abort|create|list|listParts) + # - storage.objects.(create|delete|list) + # + # So until we have time to research what buckets those service agents + # specifically need access to, we'll leave them alone and constrain this + # policy to "private" gcs buckets that are currently only used by humans + # to store terraform state containing potentially sensitive info + ensure_removed_gcs_role_binding "${bucket}" "projectEditor:${project}" "legacyBucketOwner" } # Sets the web policy on the bucket, including a default index.html page @@ -181,20 +201,38 @@ ensure_gcs_role_binding() { local principal="${2}" local role="${3}" - local before="${TMPDIR}/gcs-bind.before.yaml" - local after="${TMPDIR}/gcs-bind.after.yaml" + # use json for gsutil iam set, yaml for easier diffing + local before_json="${TMPDIR}/gsutil-iam-get.before.json" + local after_json="${TMPDIR}/gsutil-iam-get.after.json" + local before_yaml="${TMPDIR}/gcs-role-binding.before.yaml" + local after_yaml="${TMPDIR}/gcs-role-binding.after.yaml" - gsutil iam get "${bucket}" | yq -y | _format_iam_policy > "${before}" + gsutil iam get "${bucket}" > "${before_json}" + <"${before_json}" yq -y | _format_iam_policy > "${before_yaml}" - # `gsutil iam ch` is idempotent, but avoid calling if we can, to reduce output noise - if ! <"${before}" yq --exit-status \ + # Avoid calling `gsutil iam set` if we can, to reduce output noise + if ! <"${before_yaml}" yq --exit-status \ ".[] | select(contains({role: \"${role}\", member: \"${principal}\"}))" \ >/dev/null; then - gsutil iam ch "${principal}:${role}" "${bucket}" - gsutil iam get "${bucket}" | yq -y | _format_iam_policy > "${after}" - - diff_colorized "${before}" "${after}" + # add the binding, then merge with existing bindings + <"${before_json}" yq --arg role "${role}" --arg principal "${principal}" \ + '.bindings |= ( + . + [{ + members: [$principal], + role: ("roles/storage." + $role) + }] + | group_by(.role) + | map({ + members: map(.members) | flatten | unique, + role: .[0].role + }) + )' > "${after_json}" + + gsutil iam set "${after_json}" "${bucket}" + <"${after_json}" yq -y | _format_iam_policy > "${after_yaml}" + + diff_colorized "${before_yaml}" "${after_yaml}" fi } @@ -213,20 +251,35 @@ ensure_removed_gcs_role_binding() { local principal="${2}" local role="${3}" - local before="${TMPDIR}/gcs-bind.before.yaml" - local after="${TMPDIR}/gcs-bind.after.yaml" + # use json for gsutil iam set, yaml for easier diffing + local before_json="${TMPDIR}/gsutil-iam-get.before.json" + local after_json="${TMPDIR}/gsutil-iam-get.after.json" + local before_yaml="${TMPDIR}/gcs-role-binding.before.yaml" + local after_yaml="${TMPDIR}/gcs-role-binding.after.yaml" - gsutil iam get "${bucket}" | yq -y | _format_iam_policy > "${before}" + gsutil iam get "${bucket}" > "${before_json}" + <"${before_json}" yq -y | _format_iam_policy > "${before_yaml}" - # `gsutil iam ch` is idempotent, but avoid calling if we can, to reduce output noise - if <"${before}" yq --exit-status \ + # Avoid calling `gsutil iam set` if we can, to reduce output noise + if <"${before_yaml}" yq --exit-status \ ".[] | select(contains({role: \"${role}\", member: \"${principal}\"}))" \ >/dev/null; then - gsutil iam ch -d "${principal}:${role}" "${bucket}" - gsutil iam get "${bucket}" | yq -y | _format_iam_policy > "${after}" - - diff_colorized "${before}" "${after}" + # remove member from role if it exists; gcs deletes bindings with no + # members, so we don't need to bother pruning them here + <"${before_json}" yq --arg role "${role}" --arg principal "${principal}" \ + '.bindings |= map( + if .role == ("roles/storage." + $role) then + .members -= [$principal] + else + . + end + )' > "${after_json}" + + gsutil iam set "${after_json}" "${bucket}" + <"${after_json}" yq -y | _format_iam_policy > "${after_yaml}" + + diff_colorized "${before_yaml}" "${after_yaml}" fi } diff --git a/infra/gcp/lib_iam.sh b/infra/gcp/lib_iam.sh index 1c547114841..43b5961a20b 100644 --- a/infra/gcp/lib_iam.sh +++ b/infra/gcp/lib_iam.sh @@ -216,8 +216,8 @@ function ensure_secret_role_binding() { # $2: The principal (e.g. "group:k8s-infra-foo@kubernetes.io") # $3: The role name (e.g. "roles/storage.objectAdmin") function ensure_serviceaccount_role_binding() { - if [ ! $# -eq 3 -o -z "$1" -o -z "$2" -o -z "$3" ]; then - echo "ensure_project_role_binding(serviceaccount, principal, role) requires 3 arguments" >&2 + if [ ! $# -eq 3 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then + echo "${FUNCNAME[0]}(serviceaccount, principal, role) requires 3 arguments" >&2 return 1 fi @@ -225,7 +225,15 @@ function ensure_serviceaccount_role_binding() { local principal="${2}" local role="${3}" - _ensure_resource_role_binding "iam service-accounts" "${serviceaccount}" "${principal}" "${role}" + # When running without `--project foo`, gcloud iam service-accounts... + # - add-iam-policy-binding: expects 'projects/p/serviceAccounts/email' + # - get-iam-policy: rejects the above, expects 'numeric id' or 'email' + # + # So, parse out project, to allow use of 'email' for read/write + local project + project="$(gcloud iam service-accounts describe "${serviceaccount}" --format="value(name)" | cut -d/ -f2)" + + _ensure_resource_role_binding "iam service-accounts" "${serviceaccount}" "${principal}" "${role}" "${project}" } # Ensure that IAM binding has been removed from organization @@ -287,8 +295,8 @@ function ensure_removed_secret_role_binding() { # $2: The principal (e.g. "group:k8s-infra-foo@kubernetes.io") # $3: The role name (e.g. "roles/storage.objectAdmin") function ensure_removed_serviceaccount_role_binding() { - if [ ! $# -eq 3 -o -z "$1" -o -z "$2" -o -z "$3" ]; then - echo "ensure_removed_serviceaccount_role_binding(serviceaccount, principal, role) requires 3 arguments" >&2 + if [ ! $# -eq 3 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then + echo "${FUNCNAME[0]}(serviceaccount, principal, role) requires 3 arguments" >&2 return 1 fi @@ -296,7 +304,15 @@ function ensure_removed_serviceaccount_role_binding() { local principal="${2}" local role="${3}" - _ensure_removed_resource_role_binding "iam service-accounts" "${serviceaccount}" "${principal}" "${role}" + # When running without `--project foo`, gcloud iam service-accounts... + # - remove-iam-policy-binding: expects 'projects/p/serviceAccounts/email' + # - get-iam-policy: rejects the above, expects 'numeric id' or 'email' + # + # So, parse out project, to allow use of 'email' for read/write + local project + project="$(gcloud iam service-accounts describe "${serviceaccount}" --format="value(name)" | cut -d/ -f2)" + + _ensure_removed_resource_role_binding "iam service-accounts" "${serviceaccount}" "${principal}" "${role}" "${project}" } # Ensure that custom IAM role exists in scope and in sync with definition in file @@ -411,9 +427,10 @@ function _format_iam_policy() { # $2: The id of the resource (e.g. "k8s-infra-foo", "12345") # $3: The principal (e.g. "group:k8s-infra-foo@kubernetes.io") # $4: The role name (e.g. "roles/foo.bar") +# [$5]: (Optional) the id of the project hosting the resource (e.g. "k8s-infra-foo") function _ensure_resource_role_binding() { - if [ ! $# -eq 4 -o -z "$1" -o -z "$2" -o -z "$3" -o -z "$4" ]; then - echo "_ensure_resource_role_binding(resource, id, principal, role) requires 4 arguments" >&2 + if [ $# -lt 4 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ]; then + echo "${FUNCNAME[0]}(resource, id, principal, role, [project]) requires at least 4 arguments" >&2 return 1 fi @@ -421,13 +438,19 @@ function _ensure_resource_role_binding() { local id="${2}" local principal="${3}" local role="${4}" + local project="${5:-""}" + + local flags=() + if [ -n "${project}" ]; then + flags+=(--project "${project}") + fi local before="${TMPDIR}/iam-bind.before.yaml" local after="${TMPDIR}/iam-bind.after.yaml" # intentionally word-split resource, e.g. iam service-accounts # shellcheck disable=SC2086 - gcloud ${resource} get-iam-policy "${id}" | _format_iam_policy >"${before}" + gcloud ${resource} get-iam-policy "${id}" "${flags[@]}" | _format_iam_policy >"${before}" # `gcloud add-iam-policy-binding` is idempotent, # but avoid calling if we can, to reduce output noise @@ -441,6 +464,7 @@ function _ensure_resource_role_binding() { ${resource} add-iam-policy-binding "${id}" \ --member "${principal}" \ --role "${role}" \ + "${flags[@]}" \ | _format_iam_policy >"${after}" diff_colorized "${before}" "${after}" @@ -453,9 +477,10 @@ function _ensure_resource_role_binding() { # $2: The id of the resource (e.g. "k8s-infra-foo", "12345") # $3: The principal (e.g. "group:k8s-infra-foo@kubernetes.io") # $4: The role name (e.g. "roles/foo.bar") +# [$5]: (Optional) the id of the project hosting the resource (e.g. "k8s-infra-foo") function _ensure_removed_resource_role_binding() { - if [ ! $# -eq 4 -o -z "$1" -o -z "$2" -o -z "$3" -o -z "$4" ]; then - echo "_ensure_removed_resource_role_binding(resource, id, principal, role) requires 4 arguments" >&2 + if [ $# -lt 4 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ]; then + echo "${FUNCNAME[0]}(resource, id, principal, role, [project]) requires at least 4 arguments" >&2 return 1 fi @@ -463,13 +488,19 @@ function _ensure_removed_resource_role_binding() { local id="${2}" local principal="${3}" local role="${4}" + local project="${5:-""}" + + local flags=() + if [ -n "${project}" ]; then + flags+=(--project "${project}") + fi local before="${TMPDIR}/iam-bind.before.txt" local after="${TMPDIR}/iam-bind.after.txt" # intentionally word-split resource, e.g. iam service-accounts # shellcheck disable=SC2086 - gcloud ${resource} get-iam-policy "${id}" | _format_iam_policy >"${before}" + gcloud ${resource} get-iam-policy "${id}" "${flags[@]}" | _format_iam_policy >"${before}" # `gcloud remove-iam-policy-binding` errors if binding doesn't exist, # so avoid calling if we can, to reduce output noise @@ -483,6 +514,7 @@ function _ensure_removed_resource_role_binding() { ${resource} remove-iam-policy-binding "${id}" \ --member "${principal}" \ --role "${role}" \ + "${flags[@]}" \ | _format_iam_policy >"${after}" diff_colorized "${before}" "${after}" diff --git a/infra/gcp/lib_util.sh b/infra/gcp/lib_util.sh index 5014a5f0e63..08639f43458 100644 --- a/infra/gcp/lib_util.sh +++ b/infra/gcp/lib_util.sh @@ -36,9 +36,8 @@ function _nocolor() { # $1: The color code (numeric, see `tput setf`) # $2+: The things to print function color() { - _color "$1" - shift - echo "$*$(_nocolor)" + _color "$1"; shift + echo "${@}" "$(_nocolor)" } # ensure_gnu_sed