From ed3df5b5c56f5e9bebc1d866f83225343f252b6c Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Wed, 5 May 2021 03:24:22 -0400 Subject: [PATCH 1/9] infra/gcp/gcs: use gsutil iam set instead of ch `gsutil ch` specifically doesn't support convenience bindings (e.g. a role binding with "projectEditors:project-foo" as a member that will expand to all members with "roles/editor" on project-foo) as it goes against the principle of least privilege Annoyingly, this is the case not just for new bindings, but also attempts to remove existing bindings. Instead, we must follow a read-modify-write pattern with `gsutil iam get` and `gsutil iam set` This was really only needed for ensure_removed_gcs_binding but for parity I redid ensure_gcs_binding to follow the same pattern. If this turns out to be wildly slower than before, we should revert for the common case of adding new bindings. --- infra/gcp/lib_gcs.sh | 71 ++++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 19 deletions(-) diff --git a/infra/gcp/lib_gcs.sh b/infra/gcp/lib_gcs.sh index db8337e545e..ab5ee8439a4 100644 --- a/infra/gcp/lib_gcs.sh +++ b/infra/gcp/lib_gcs.sh @@ -68,7 +68,7 @@ function ensure_private_gcs_bucket() { local bucket="$2" _ensure_gcs_bucket "${project}" "${bucket}" - gsutil iam ch -d allUsers "${bucket}" + ensure_removed_gcs_role_binding "${bucket}" "allUsers" "objectViewer" } # Sets the web policy on the bucket, including a default index.html page @@ -181,20 +181,38 @@ ensure_gcs_role_binding() { local principal="${2}" local role="${3}" - local before="${TMPDIR}/gcs-bind.before.yaml" - local after="${TMPDIR}/gcs-bind.after.yaml" + # use json for gsutil iam set, yaml for easier diffing + local before_json="${TMPDIR}/gsutil-iam-get.before.json" + local after_json="${TMPDIR}/gsutil-iam-get.after.json" + local before_yaml="${TMPDIR}/gcs-role-binding.before.yaml" + local after_yaml="${TMPDIR}/gcs-role-binding.after.yaml" - gsutil iam get "${bucket}" | yq -y | _format_iam_policy > "${before}" + gsutil iam get "${bucket}" > "${before_json}" + <"${before_json}" yq -y | _format_iam_policy > "${before_yaml}" - # `gsutil iam ch` is idempotent, but avoid calling if we can, to reduce output noise - if ! <"${before}" yq --exit-status \ + # Avoid calling `gsutil iam set` if we can, to reduce output noise + if ! <"${before_yaml}" yq --exit-status \ ".[] | select(contains({role: \"${role}\", member: \"${principal}\"}))" \ >/dev/null; then - gsutil iam ch "${principal}:${role}" "${bucket}" - gsutil iam get "${bucket}" | yq -y | _format_iam_policy > "${after}" - - diff_colorized "${before}" "${after}" + # add the binding, then merge with existing bindings + <"${before_json}" yq --arg role "${role}" --arg principal "${principal}" \ + '.bindings |= ( + . + [{ + members: [$principal], + role: ("roles/storage." + $role) + }] + | group_by(.role) + | map({ + members: map(.members) | flatten | unique, + role: .[0].role + }) + )' > "${after_json}" + + gsutil iam set "${after_json}" "${bucket}" + <"${after_json}" yq -y | _format_iam_policy > "${after_yaml}" + + diff_colorized "${before_yaml}" "${after_yaml}" fi } @@ -213,20 +231,35 @@ ensure_removed_gcs_role_binding() { local principal="${2}" local role="${3}" - local before="${TMPDIR}/gcs-bind.before.yaml" - local after="${TMPDIR}/gcs-bind.after.yaml" + # use json for gsutil iam set, yaml for easier diffing + local before_json="${TMPDIR}/gsutil-iam-get.before.json" + local after_json="${TMPDIR}/gsutil-iam-get.after.json" + local before_yaml="${TMPDIR}/gcs-role-binding.before.yaml" + local after_yaml="${TMPDIR}/gcs-role-binding.after.yaml" - gsutil iam get "${bucket}" | yq -y | _format_iam_policy > "${before}" + gsutil iam get "${bucket}" > "${before_json}" + <"${before_json}" yq -y | _format_iam_policy > "${before_yaml}" - # `gsutil iam ch` is idempotent, but avoid calling if we can, to reduce output noise - if <"${before}" yq --exit-status \ + # Avoid calling `gsutil iam set` if we can, to reduce output noise + if <"${before_yaml}" yq --exit-status \ ".[] | select(contains({role: \"${role}\", member: \"${principal}\"}))" \ >/dev/null; then - gsutil iam ch -d "${principal}:${role}" "${bucket}" - gsutil iam get "${bucket}" | yq -y | _format_iam_policy > "${after}" - - diff_colorized "${before}" "${after}" + # remove member from role if it exists; gcs deletes bindings with no + # members, so we don't need to bother pruning them here + <"${before_json}" yq --arg role "${role}" --arg principal "${principal}" \ + '.bindings |= map( + if .role == ("roles/storage." + $role) then + .members -= [$principal] + else + . + end + )' > "${after_json}" + + gsutil iam set "${after_json}" "${bucket}" + <"${after_json}" yq -y | _format_iam_policy > "${after_yaml}" + + diff_colorized "${before_yaml}" "${after_yaml}" fi } From 818e40b7e6adfc1cf0840be50fcaf697915aa106 Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Wed, 28 Apr 2021 16:26:49 -0400 Subject: [PATCH 2/9] infra/gcp/gcs: revoke projectEditor access to private gcs buckets roles/storage.legacyBucketOwner contains storage.buckets.setIamPolicy, but not storage.objects.get. This means someone with roles/editor on the project could grant themselves access to read bucket contents that they aren't supposed to be able to read. Given that roles/editor has no *.setIamPolicy permissions for other service resources, this seems like a security gap that should be closed. Ideally we would do this in _ensure_gcs_bucket. However, removing this role means removing other (possibly needed) permissions that may be used by GCP service agent service accounts (e.g. App Engine, GCR, GCE): - storage.buckets.get - storage.multipartUploads.(abort|create|list|listParts) - storage.objects.(create|delete|list) So until we have time to research what buckets those service agents specifically need access to, we'll leave them alone and constrain this policy to "private" gcs buckets that are currently only used by humans to store terraform state containing potentially sensitive info --- infra/gcp/lib_gcs.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/infra/gcp/lib_gcs.sh b/infra/gcp/lib_gcs.sh index ab5ee8439a4..9c72f42f51c 100644 --- a/infra/gcp/lib_gcs.sh +++ b/infra/gcp/lib_gcs.sh @@ -69,6 +69,26 @@ function ensure_private_gcs_bucket() { _ensure_gcs_bucket "${project}" "${bucket}" ensure_removed_gcs_role_binding "${bucket}" "allUsers" "objectViewer" + # roles/storage.legacyBucketOwner contains storage.buckets.setIamPolicy, + # but not storage.objects.get. This means someone with roles/editor on the + # project could grant themselves access to read bucket contents that they + # aren't supposed to be able to read. + # + # Given that roles/editor has no *.setIamPolicy permissions for other + # service resources, this seems like a security gap that should be closed. + # + # Ideally we would do this in _ensure_gcs_bucket. However, removing this + # role means removing other (possibly needed) permissions that may be used + # by GCP service agent service accounts (e.g. App Engine, GCR, GCE): + # - storage.buckets.get + # - storage.multipartUploads.(abort|create|list|listParts) + # - storage.objects.(create|delete|list) + # + # So until we have time to research what buckets those service agents + # specifically need access to, we'll leave them alone and constrain this + # policy to "private" gcs buckets that are currently only used by humans + # to store terraform state containing potentially sensitive info + ensure_removed_gcs_role_binding "${bucket}" "projectEditor:${project}" "legacyBucketOwner" } # Sets the web policy on the bucket, including a default index.html page From 2605f593db053da80ce3f8c556b0f7a28afcd4dc Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Wed, 5 May 2021 03:21:41 -0400 Subject: [PATCH 3/9] infra/gcp/main: fix typo in gcs storage binding ensure_gcs_role_binding prepends "roles/storage." to roles given to it, just like `gsutil ch` effectively does when its results are viewed through `gsutil iam get` So pass "admin" to end up binding "roles/storage.admin" --- infra/gcp/ensure-main-project.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/gcp/ensure-main-project.sh b/infra/gcp/ensure-main-project.sh index 9859cabe311..721cccdde9b 100755 --- a/infra/gcp/ensure-main-project.sh +++ b/infra/gcp/ensure-main-project.sh @@ -100,7 +100,7 @@ for entry in "${terraform_state_bucket_entries[@]}"; do color 6 "Ensuring '${bucket}' exists as private with owners '${owners}'"; ( ensure_private_gcs_bucket "${PROJECT}" "${bucket}" empower_group_to_admin_gcs_bucket "${owners}" "${bucket}" - ensure_gcs_role_binding "${bucket}" "group:k8s-infra-gcp-org-admins@kubernetes.io" "roles/storage.admin" + ensure_gcs_role_binding "${bucket}" "group:k8s-infra-gcp-org-admins@kubernetes.io" "admin" ) 2>&1 | indent done 2>&1 | indent From 92c08290b07bd91c8a5023aff8087b1b630b066d Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Mon, 19 Apr 2021 21:33:40 -0400 Subject: [PATCH 4/9] infra/gcp/util: fix color Basically full circle back to linusa's original fix to color. The "$*" syntax was hiding args from echo, e.g. color 6 -n "foo" came out as `echo "-n foo"`, which was messing up some of the output from ensure-main-project.sh The original "${@}$(_nocolor)" was failing shellcheck SC2145, which cautions that "${@}$x" can lead to unspecified behavior. Since the intent here is to ensure "$(_nocolor)" is the last thing echo prints, pass it explicitly as another argument: "${@}" "$(_nocolor)" --- infra/gcp/lib_util.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/infra/gcp/lib_util.sh b/infra/gcp/lib_util.sh index 5014a5f0e63..08639f43458 100644 --- a/infra/gcp/lib_util.sh +++ b/infra/gcp/lib_util.sh @@ -36,9 +36,8 @@ function _nocolor() { # $1: The color code (numeric, see `tput setf`) # $2+: The things to print function color() { - _color "$1" - shift - echo "$*$(_nocolor)" + _color "$1"; shift + echo "${@}" "$(_nocolor)" } # ensure_gnu_sed From bd0dee0fb9e2abb31a8a5fb03cb9df833d8835fb Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Wed, 5 May 2021 11:27:11 -0400 Subject: [PATCH 5/9] infra/gcp/iam: use --project for serviceaccount bindings Either ensure_serviceaccount_role_binding never worked, or something has changed recently. For whatever reason, when running without `--project foo`, it is impossible to use a consistent resource identifier for subcommands of `gcloud iam service-accounts`: - add-iam-policy-binding: expects 'projects/p/serviceAccounts/email' - get-iam-policy: rejects the above, expects 'numeric id' or 'email' Apparently, for add-iam-policy-binding, the project hosting the service-account is _not_ parsed out of its email. Instead, the project will come from: - the fully specified name `projects/foo/serviceAccounts/email` - `--project foo` if given - `gcloud config get-value core/project` otherwise ref: https://cloud.google.com/sdk/gcloud/reference/iam/service-accounts/add-iam-policy-binding#SERVICE_ACCOUNT So to allow use of a consistent identifier (email) while reading or updating service-account iam-policy, update _ensure_resource_role_binding to optionally accept a project argument. Then parse out and pass the project for service-accounts only (since they are thus far the only resource to require this). Ditto all of the above for remove-iam-policy-binding, _ensure_removed_resource_role_binding. I ran into this because I intentionally leave `core/project` set to a personal project, to ensure I have to be explicit about making changes to shared projects. When I ran ensure-main-project.sh it couldn't find the kubernetes-public service accounts in my personal project when trying to modify workload identity bindings on those service accounts. --- infra/gcp/lib_iam.sh | 56 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/infra/gcp/lib_iam.sh b/infra/gcp/lib_iam.sh index 1c547114841..43b5961a20b 100644 --- a/infra/gcp/lib_iam.sh +++ b/infra/gcp/lib_iam.sh @@ -216,8 +216,8 @@ function ensure_secret_role_binding() { # $2: The principal (e.g. "group:k8s-infra-foo@kubernetes.io") # $3: The role name (e.g. "roles/storage.objectAdmin") function ensure_serviceaccount_role_binding() { - if [ ! $# -eq 3 -o -z "$1" -o -z "$2" -o -z "$3" ]; then - echo "ensure_project_role_binding(serviceaccount, principal, role) requires 3 arguments" >&2 + if [ ! $# -eq 3 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then + echo "${FUNCNAME[0]}(serviceaccount, principal, role) requires 3 arguments" >&2 return 1 fi @@ -225,7 +225,15 @@ function ensure_serviceaccount_role_binding() { local principal="${2}" local role="${3}" - _ensure_resource_role_binding "iam service-accounts" "${serviceaccount}" "${principal}" "${role}" + # When running without `--project foo`, gcloud iam service-accounts... + # - add-iam-policy-binding: expects 'projects/p/serviceAccounts/email' + # - get-iam-policy: rejects the above, expects 'numeric id' or 'email' + # + # So, parse out project, to allow use of 'email' for read/write + local project + project="$(gcloud iam service-accounts describe "${serviceaccount}" --format="value(name)" | cut -d/ -f2)" + + _ensure_resource_role_binding "iam service-accounts" "${serviceaccount}" "${principal}" "${role}" "${project}" } # Ensure that IAM binding has been removed from organization @@ -287,8 +295,8 @@ function ensure_removed_secret_role_binding() { # $2: The principal (e.g. "group:k8s-infra-foo@kubernetes.io") # $3: The role name (e.g. "roles/storage.objectAdmin") function ensure_removed_serviceaccount_role_binding() { - if [ ! $# -eq 3 -o -z "$1" -o -z "$2" -o -z "$3" ]; then - echo "ensure_removed_serviceaccount_role_binding(serviceaccount, principal, role) requires 3 arguments" >&2 + if [ ! $# -eq 3 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then + echo "${FUNCNAME[0]}(serviceaccount, principal, role) requires 3 arguments" >&2 return 1 fi @@ -296,7 +304,15 @@ function ensure_removed_serviceaccount_role_binding() { local principal="${2}" local role="${3}" - _ensure_removed_resource_role_binding "iam service-accounts" "${serviceaccount}" "${principal}" "${role}" + # When running without `--project foo`, gcloud iam service-accounts... + # - remove-iam-policy-binding: expects 'projects/p/serviceAccounts/email' + # - get-iam-policy: rejects the above, expects 'numeric id' or 'email' + # + # So, parse out project, to allow use of 'email' for read/write + local project + project="$(gcloud iam service-accounts describe "${serviceaccount}" --format="value(name)" | cut -d/ -f2)" + + _ensure_removed_resource_role_binding "iam service-accounts" "${serviceaccount}" "${principal}" "${role}" "${project}" } # Ensure that custom IAM role exists in scope and in sync with definition in file @@ -411,9 +427,10 @@ function _format_iam_policy() { # $2: The id of the resource (e.g. "k8s-infra-foo", "12345") # $3: The principal (e.g. "group:k8s-infra-foo@kubernetes.io") # $4: The role name (e.g. "roles/foo.bar") +# [$5]: (Optional) the id of the project hosting the resource (e.g. "k8s-infra-foo") function _ensure_resource_role_binding() { - if [ ! $# -eq 4 -o -z "$1" -o -z "$2" -o -z "$3" -o -z "$4" ]; then - echo "_ensure_resource_role_binding(resource, id, principal, role) requires 4 arguments" >&2 + if [ $# -lt 4 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ]; then + echo "${FUNCNAME[0]}(resource, id, principal, role, [project]) requires at least 4 arguments" >&2 return 1 fi @@ -421,13 +438,19 @@ function _ensure_resource_role_binding() { local id="${2}" local principal="${3}" local role="${4}" + local project="${5:-""}" + + local flags=() + if [ -n "${project}" ]; then + flags+=(--project "${project}") + fi local before="${TMPDIR}/iam-bind.before.yaml" local after="${TMPDIR}/iam-bind.after.yaml" # intentionally word-split resource, e.g. iam service-accounts # shellcheck disable=SC2086 - gcloud ${resource} get-iam-policy "${id}" | _format_iam_policy >"${before}" + gcloud ${resource} get-iam-policy "${id}" "${flags[@]}" | _format_iam_policy >"${before}" # `gcloud add-iam-policy-binding` is idempotent, # but avoid calling if we can, to reduce output noise @@ -441,6 +464,7 @@ function _ensure_resource_role_binding() { ${resource} add-iam-policy-binding "${id}" \ --member "${principal}" \ --role "${role}" \ + "${flags[@]}" \ | _format_iam_policy >"${after}" diff_colorized "${before}" "${after}" @@ -453,9 +477,10 @@ function _ensure_resource_role_binding() { # $2: The id of the resource (e.g. "k8s-infra-foo", "12345") # $3: The principal (e.g. "group:k8s-infra-foo@kubernetes.io") # $4: The role name (e.g. "roles/foo.bar") +# [$5]: (Optional) the id of the project hosting the resource (e.g. "k8s-infra-foo") function _ensure_removed_resource_role_binding() { - if [ ! $# -eq 4 -o -z "$1" -o -z "$2" -o -z "$3" -o -z "$4" ]; then - echo "_ensure_removed_resource_role_binding(resource, id, principal, role) requires 4 arguments" >&2 + if [ $# -lt 4 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ]; then + echo "${FUNCNAME[0]}(resource, id, principal, role, [project]) requires at least 4 arguments" >&2 return 1 fi @@ -463,13 +488,19 @@ function _ensure_removed_resource_role_binding() { local id="${2}" local principal="${3}" local role="${4}" + local project="${5:-""}" + + local flags=() + if [ -n "${project}" ]; then + flags+=(--project "${project}") + fi local before="${TMPDIR}/iam-bind.before.txt" local after="${TMPDIR}/iam-bind.after.txt" # intentionally word-split resource, e.g. iam service-accounts # shellcheck disable=SC2086 - gcloud ${resource} get-iam-policy "${id}" | _format_iam_policy >"${before}" + gcloud ${resource} get-iam-policy "${id}" "${flags[@]}" | _format_iam_policy >"${before}" # `gcloud remove-iam-policy-binding` errors if binding doesn't exist, # so avoid calling if we can, to reduce output noise @@ -483,6 +514,7 @@ function _ensure_removed_resource_role_binding() { ${resource} remove-iam-policy-binding "${id}" \ --member "${principal}" \ --role "${role}" \ + "${flags[@]}" \ | _format_iam_policy >"${after}" diff_colorized "${before}" "${after}" From 6ef8a6c5167bc185b7c4e116c36e635e932e53df Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Tue, 20 Apr 2021 20:05:05 -0400 Subject: [PATCH 6/9] infa/gcp/lib: add empower_gke_for_serviceaccount As an alternative to empower_ksa_to_svcacct that reduces boilerplate on the caller. These functions may better belong in lib_iam, and perhaps a better name is empower_serviceaccount_for_gke, but this is good enough for now. Can revisit whenever we feel like replacing uses of empower_ksa_to_svcacct --- infra/gcp/lib.sh | 61 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/infra/gcp/lib.sh b/infra/gcp/lib.sh index 06b5e92466b..de23afb23ce 100644 --- a/infra/gcp/lib.sh +++ b/infra/gcp/lib.sh @@ -490,6 +490,67 @@ function empower_ksa_to_svcacct() { ensure_serviceaccount_role_binding "${gcp_svcacct}" "serviceAccount:${ksa_scope}" "roles/iam.workloadIdentityUser" } +# Allow GKE clusters in the given GCP project to run workloads using a +# Kubernetes service account in the given namepsace to act as the given +# GCP service account via Workload Identity when the name of the Kubernetes +# service account matches the optionally provided name if given, or the +# name of the GCP service account. +# +# ref: https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity +# +# $1: The GCP project that hosts the GKE clusters (e.g. k8s-infra-foo-clusters) +# $2: The K8s namespace that hosts the Kubernetes service account (e.g. my-app-ns) +# $3: The GCP service account to be bound (e.g. k8s-infra-doer@k8s-infra-foo.iam.gserviceaccount.com) +# [$4]: Optional: The Kubernetes service account name (e.g. my-app-doer; default: k8s-infra-doer) +# +# e.g. the above allows pods running as my-app-ns/my-app-doer in clusters in +# k8s-infra-foo-clusters to act as k8s-infra-doer@k8s-infra-foo.iam.gserviceaccount.com +function empower_gke_for_serviceaccount() { + if [ $# -lt 3 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then + echo "${FUNCNAME[0]}(gcp_project, k8s_namespace, gcp_sa_email, [k8s_sa_name]) requires at least 3 arguments" >&2 + return 1 + fi + + local gke_project="$1" + local k8s_namespace="$2" + local gcp_sa_email="${3}" + local k8s_sa_name="${4:-""}" + if [ -z "${k8s_sa_name}" ]; then + k8s_sa_name="$(echo "${gcp_sa_email}" | cut -d@ -f1)" + fi + + local principal="serviceAccount:${gke_project}.svc.id.goog[${k8s_namespace}/${k8s_sa_name}]" + + ensure_serviceaccount_role_binding "${gcp_sa_email}" "${principal}" "roles/iam.workloadIdentityUser" +} + +# Prevent clusters in the given GCP project from running workloads using a +# Kubernetes service account in the given namespace to act as the given +# GCP service account. aka the opposite of empower_gke_for_serviceaccount +# +# $1: The GCP project that hosts the GKE clusters (e.g. k8s-infra-foo-clusters) +# $2: The K8s namespace that hosts the Kubernetes service account (e.g. my-app-ns) +# $3: The GCP service account to be unbound (e.g. k8s-infra-doer@k8s-infra-foo.iam.gserviceaccount.com) +# [$4]: Optional: The Kubernetes service account name (e.g. my-app-doer; default: k8s-infra-doer) +function unempower_gke_for_serviceaccount() { + if [ $# -lt 3 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then + echo "${FUNCNAME[0]}(gcp_project, k8s_namespace, gcp_sa_email, [k8s_sa_name]) requires at least 3 arguments" >&2 + return 1 + fi + + local gke_project="$1" + local k8s_namespace="$2" + local gcp_sa_email="${3}" + local k8s_sa_name="${4:-""}" + if [ -z "${k8s_sa_name}" ]; then + k8s_sa_name="$(echo "${gcp_sa_email}" | cut -d@ -f1)" + fi + + local principal="serviceAccount:${gke_project}.svc.id.goog[${k8s_namespace}/${k8s_sa_name}]" + + ensure_removed_serviceaccount_role_binding "${gcp_sa_email}" "${principal}" "roles/iam.workloadIdentityUser" +} + # Ensure that a global ip address exists, creating one if needed # $1 The GCP project # $2 The address name (e.g. foo-ingress), IPv6 addresses must have a "-v6" suffix From 939ebc5d9bd70865a3a6238a7ecf88d824d6773b Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Fri, 23 Apr 2021 15:44:41 -0400 Subject: [PATCH 7/9] infra/gcp/org: redo iam binding for org this was motivated by ensuring that serviceaccounts are bound to their needed roles directly instead of via group membership - refactor blocks into functions - use array of principal:role bindings - add support for binding serviceaccounts to roles, but skipping if the serviceaccount doesn't exist (to avoid dependency cycles) --- infra/gcp/ensure-organization.sh | 131 +++++++++++++++++++++---------- 1 file changed, 89 insertions(+), 42 deletions(-) diff --git a/infra/gcp/ensure-organization.sh b/infra/gcp/ensure-organization.sh index f43e7809565..00d22f17da8 100755 --- a/infra/gcp/ensure-organization.sh +++ b/infra/gcp/ensure-organization.sh @@ -43,50 +43,97 @@ org_roles=( iam.serviceAccountLister ) -old_org_roles=() +removed_org_roles=() -color 6 "Ensuring organization custom roles exist" -( +org_role_bindings=( + # empower k8s-infra-org-admins@ + # NOTE: roles/owner has too many permissions to aggregate into a custom role, + # and some services automatically add bindings for roles/owner, e.g. + # https://cloud.google.com/storage/docs/access-control/iam-roles#basic-roles-intrinsic + "group:k8s-infra-gcp-org-admins@kubernetes.io:roles/owner" + "group:k8s-infra-gcp-org-admins@kubernetes.io:$(custom_org_role_name "organization.admin")" + + # empower k8s-infra-prow-oncall@ to use GCP Console to navigate to their projects + "group:k8s-infra-prow-oncall@kubernetes.io:roles/browser" + + # TODO: not sure this is required for GKE Google Group RBAC, is this proxy + # for "let people running apps in aaa use GCP console to navigate" ? + "group:gke-security-groups@kubernetes.io:roles/browser" + + # TODO: what is the purpose of this role? + "group:k8s-infra-gcp-accounting@kubernetes.io:$(custom_org_role_name "CustomRole")" + + # empower k8s-infra-gcp-auditors@ and equivalent service-account + "group:k8s-infra-gcp-auditors@kubernetes.io:$(custom_org_role_name "audit.viewer")" + "serviceAccount:$(svc_acct_email "kubernetes-public" "k8s-infra-gcp-auditor"):$(custom_org_role_name "audit.viewer")" +) + +removed_org_role_bindings=() + +function ensure_org_roles() { for role in "${org_roles[@]}"; do - color 6 "Ensuring organization custom role ${role}" - ensure_custom_org_iam_role_from_file "${role}" "${SCRIPT_DIR}/roles/${role}.yaml" + color 6 "Ensuring organization custom role ${role}" + ensure_custom_org_iam_role_from_file "${role}" "${SCRIPT_DIR}/roles/${role}.yaml" + done +} + +function ensure_removed_org_roles() { + for role in "${removed_org_roles[@]}"; do + color 6 "Ensuring removed organization custom role ${role}" + ensure_removed_custom_org_iam_role "${role}" + done +} + +function ensure_org_role_bindings() { + for binding in "${org_role_bindings[@]}"; do + principal_type="$(echo "${binding}" | cut -d: -f1)" + principal_email="$(echo "${binding}" | cut -d: -f2)" + principal="$(echo "${binding}" | cut -d: -f1-2)" + role="$(echo "${binding}" | cut -d: -f3-)" + + # avoid dependency-cycles by skipping, e.g. + # - serviceaccount bound in this script, created by ensure-main-project.sh + # - custom org roles used by ensure-main-project.sh, created by this script + # - so allow this to run to completion before ensure-main-project.sh, and + # accept that bootstrapping means: run this, then that, then this again + case ${principal_type} in + serviceAccount) + if ! gcloud iam service-accounts describe "${principal_email}" >/dev/null 2>&1; then + color 2 "Skipping ${principal} bound to ${role}: principal does not exist" + continue + fi + ;; + group|user) ;; # TODO: need access to Directory API + esac + + color 6 "Ensuring ${principal} bound to ${role}" + ensure_org_role_binding "${principal}" "${role}" 2>&1 | indent done -) 2>&1 | indent - -color 6 "Ensuring organization IAM bindings exist" -( - # k8s-infra-prow-oncall@kubernetes.io should be able to browse org resources - ensure_org_role_binding "group:k8s-infra-prow-oncall@kubernetes.io" "roles/browser" - - # TODO: this already exists, but seems overprivileged for a group that is about - # access to the "aaa" cluster in "kubernetes-public" - ensure_org_role_binding "group:gke-security-groups@kubernetes.io" "roles/browser" - - # k8s-infra-gcp-accounting@ - ensure_org_role_binding "group:k8s-infra-gcp-accounting@kubernetes.io" "$(custom_org_role_name "CustomRole")" - - # k8s-infra-gcp-auditors@ - ensure_org_role_binding "group:k8s-infra-gcp-auditors@kubernetes.io" "$(custom_org_role_name "audit.viewer")" - - # k8s-infra-org-admins@ - # roles/owner has too many permissions to aggregate into a custom role, - # and some services (e.g. storage) add bindings based on membership in it - ensure_org_role_binding "group:k8s-infra-gcp-org-admins@kubernetes.io" "roles/owner" - # everything org admins need beyond roles/owner to manage the org - ensure_org_role_binding "group:k8s-infra-gcp-org-admins@kubernetes.io" "$(custom_org_role_name "organization.admin")" -) 2>&1 | indent - -color 6 "Ensuring removed organization IAM bindings do not exist" -( - color 6 "No bindings to remove" -) 2>&1 | indent - -color 6 "Ensuring removed organization custom roles do not exist" -( - for role in "${old_org_roles[@]}"; do - color 6 "Ensuring removed organization custom role ${role}" - ensure_removed_custom_org_iam_role "${role}" +} + +function ensure_removed_org_role_bindings() { + for binding in "${removed_org_role_bindings[@]}"; do + principal="$(echo "${binding}" | cut -d: -f1-2)" + role="$(echo "${binding}" | cut -d: -f3-)" + color 6 "Ensuring ${principal} bound to ${role}" + ensure_removed_org_role_binding "${principal}" "${role}" 2>&1 | indent done -) 2>&1 | indent +} + +function main() { + color 6 "Ensuring organization custom roles exist" + ensure_org_roles 2>&1 | indent + + color 6 "Ensuring organization IAM bindings exist" + ensure_org_role_bindings 2>&1 | indent + + color 6 "Ensuring removed organization IAM bindings do not exist" + ensure_removed_org_role_bindings 2>&1 | indent + + color 6 "Ensuring removed organization custom roles do not exist" + ensure_removed_org_roles 2>&1 | indent + + color 6 "All done!" +} -color 6 "All done!" +main From 306fd98947d8148f54ae8200e06c27779ad70c10 Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Sat, 24 Apr 2021 12:44:21 -0400 Subject: [PATCH 8/9] infra/gcp/main: refactor into functions specifically: - refactored by: - grouping blocks of code into functions - parameterizing those functions on PROJECT - adding ensure_workload_identity_serviceaccount to handle the sundry single-role service accounts intended for use by GKE clusters - prune main_project_services down to just what we directly use - drop compute: direct dep of container - drop oslogin: direct dep of compute - swap bigquery-json for biquery: the former doesn't exist - add comments explaning each of the services we do use - fix a bunch of shellcheck nits - add some TODO(spiffxp) lines for role or binding removal as followup --- infra/gcp/ensure-main-project.sh | 518 ++++++++++++++++++------------- 1 file changed, 306 insertions(+), 212 deletions(-) diff --git a/infra/gcp/ensure-main-project.sh b/infra/gcp/ensure-main-project.sh index 721cccdde9b..5c27e314d87 100755 --- a/infra/gcp/ensure-main-project.sh +++ b/infra/gcp/ensure-main-project.sh @@ -34,249 +34,343 @@ if [ $# != 0 ]; then fi # The GCP project name. -PROJECT="kubernetes-public" +readonly PROJECT="kubernetes-public" # The BigQuery dataset for billing data. -BQ_BILLING_DATASET="kubernetes_public_billing" +readonly BQ_BILLING_DATASET="kubernetes_public_billing" # The BigQuery admins group. -BQ_ADMINS_GROUP="k8s-infra-bigquery-admins@kubernetes.io" +readonly BQ_ADMINS_GROUP="k8s-infra-bigquery-admins@kubernetes.io" # The cluster admins group. -CLUSTER_ADMINS_GROUP="k8s-infra-cluster-admins@kubernetes.io" +readonly CLUSTER_ADMINS_GROUP="k8s-infra-cluster-admins@kubernetes.io" # The accounting group. -ACCOUNTING_GROUP="k8s-infra-gcp-accounting@kubernetes.io" +readonly ACCOUNTING_GROUP="k8s-infra-gcp-accounting@kubernetes.io" # The GCS bucket which hold terraform state for clusters -LEGACY_CLUSTER_TERRAFORM_BUCKET="k8s-infra-clusters-terraform" +readonly LEGACY_CLUSTER_TERRAFORM_BUCKET="k8s-infra-clusters-terraform" # The GKE security groups group -CLUSTER_USERS_GROUP="gke-security-groups@kubernetes.io" +readonly CLUSTER_USERS_GROUP="gke-security-groups@kubernetes.io" # The DNS admins group. -DNS_GROUP="k8s-infra-dns-admins@kubernetes.io" +readonly DNS_GROUP="k8s-infra-dns-admins@kubernetes.io" -color 6 "Ensuring project exists: ${PROJECT}" -ensure_project "${PROJECT}" - -# Enable APIs we know we need -apis=( - bigquery-json.googleapis.com - compute.googleapis.com - container.googleapis.com - dns.googleapis.com - logging.googleapis.com - monitoring.googleapis.com - oslogin.googleapis.com - secretmanager.googleapis.com - storage-component.googleapis.com -) -ensure_only_services "${PROJECT}" "${apis[@]}" - -# buckets to hold terraform state +# GCS buckets to hold terraform state +# # - since we are using uniform bucket level access (ubla), each bucket should # represent a logical group of access, with org admins given storage.admin # for break-glass purposes # - the legacy bucket (k8s-infra-clusters-terraform) assumed the same set of -# users should have access to all gke clusters +# users should have access to all gke clusters (~all terraform-based infra) # - new bucket schema is "k8s-infra-tf-{folder}[-{suffix}]" where: # - folder: intended GCP folder for GCP projects managed by this terraform, # access level is ~owners of folder # - suffix: some subset of resources contained somewhere underneath folder, # access level is ~editors of those resources # - entry syntax is "bucket_name:owners_group" (: is invalid bucket name char) -terraform_state_bucket_entries=( - "${LEGACY_CLUSTER_TERRAFORM_BUCKET}:${CLUSTER_ADMINS_GROUP}" - k8s-infra-tf-aws:k8s-infra-aws-admins@kubernetes.io - k8s-infra-tf-prow-clusters:k8s-infra-prow-oncall@kubernetes.io - k8s-infra-tf-public-clusters:"${CLUSTER_ADMINS_GROUP}" - k8s-infra-tf-sandbox-ii:k8s-infra-ii-coop@kubernetes.io +readonly TERRAFORM_STATE_BUCKET_ENTRIES=( + "${LEGACY_CLUSTER_TERRAFORM_BUCKET}:${CLUSTER_ADMINS_GROUP}" + k8s-infra-tf-aws:k8s-infra-aws-admins@kubernetes.io + k8s-infra-tf-prow-clusters:k8s-infra-prow-oncall@kubernetes.io + k8s-infra-tf-public-clusters:"${CLUSTER_ADMINS_GROUP}" + k8s-infra-tf-sandbox-ii:k8s-infra-ii-coop@kubernetes.io ) -color 6 "Ensuring terraform state buckets exist with correct permissions" -for entry in "${terraform_state_bucket_entries[@]}"; do - bucket="gs://$(echo "${entry}" | cut -d: -f1)" - owners="$(echo "${entry}" | cut -d: -f2-)" - color 6 "Ensuring '${bucket}' exists as private with owners '${owners}'"; ( - ensure_private_gcs_bucket "${PROJECT}" "${bucket}" - empower_group_to_admin_gcs_bucket "${owners}" "${bucket}" - ensure_gcs_role_binding "${bucket}" "group:k8s-infra-gcp-org-admins@kubernetes.io" "admin" - ) 2>&1 | indent -done 2>&1 | indent - -color 6 "Empowering BigQuery admins" -ensure_project_role_binding \ - "${PROJECT}" \ - "group:${BQ_ADMINS_GROUP}" \ - "roles/bigquery.admin" - -color 6 "Empowering cluster admins" -# TODO: this can also be a custom role -cluster_admin_roles=( - roles/compute.viewer - roles/container.admin - roles/compute.loadBalancerAdmin - $(custom_org_role_name iam.serviceAccountLister) + +# The services we explicitly want enabled for the main project +# +# NOTE: Expected services include dependencies of these services, which may be +# more than direct dependencies. Do we care to statically encode the +# graph here? ensure_only_services dynamically computes the set of +# expected services +readonly MAIN_PROJECT_SERVICES=( + # billing data gets exported to bigquery + bigquery.googleapis.com + # GKE clusters are hosted in this project + container.googleapis.com + # DNS zones are managed in this project + dns.googleapis.com + # We look at logs in this project (e.g. from GKE) + logging.googleapis.com + # We look at monitoring dashboards in this project + monitoring.googleapis.com + # Secrets are hosted in this project + secretmanager.googleapis.com + # GCS buckets are hosted in this project + storage-component.googleapis.com ) -for role in "${cluster_admin_roles[@]}"; do - ensure_project_role_binding "${PROJECT}" "group:${CLUSTER_ADMINS_GROUP}" "${role}" -done -# TODO(spiffxp): remove when bindings for custom project role are gone -ensure_removed_project_role_binding "${PROJECT}" "group:${CLUSTER_ADMINS_GROUP}" "$(custom_project_role_name "${PROJECT}" ServiceAccountLister)" -ensure_removed_custom_project_iam_role "${PROJECT}" "ServiceAccountLister" - - -color 6 "Empowering cluster users" -ensure_project_role_binding \ - "${PROJECT}" \ - "group:${CLUSTER_USERS_GROUP}" \ - "roles/container.clusterViewer" - -color 6 "Empowering GCP accounting" -ensure_project_role_binding \ - "${PROJECT}" \ - "group:${ACCOUNTING_GROUP}" \ - "roles/bigquery.jobUser" - -color 6 "Ensuring the k8s-infra-gcp-auditor serviceaccount exists" -ensure_service_account \ - "${PROJECT}" \ - "k8s-infra-gcp-auditor" \ - "Grants readonly access to org resources" - -color 6 "Empowering k8s-infra-gcp-auditor serviceaccount to be used on trusted build cluster" -empower_ksa_to_svcacct \ - "k8s-infra-prow-build-trusted.svc.id.goog[test-pods/k8s-infra-gcp-auditor]" \ - "${PROJECT}" \ - $(svc_acct_email "${PROJECT}" "k8s-infra-gcp-auditor") -# TODO(spiffxp): delete this binding -empower_ksa_to_svcacct \ - "kubernetes-public.svc.id.goog[test-pods/k8s-infra-gcp-auditor]" \ - "${PROJECT}" \ - $(svc_acct_email "${PROJECT}" "k8s-infra-gcp-auditor") - -color 6 "Ensuring the k8s-infra-dns-updater serviceaccount exists" -ensure_service_account \ - "${PROJECT}" \ - "k8s-infra-dns-updater" \ - "k8s-infra dns updater" - -color 6 "Empowering k8s-infra-dns-updater serviceaccount to be used on build cluster" -empower_ksa_to_svcacct \ - "k8s-infra-prow-build-trusted.svc.id.goog[test-pods/k8s-infra-dns-updater]" \ - "${PROJECT}" \ - "$(svc_acct_email "${PROJECT}" "k8s-infra-dns-updater")" - -color 6 "Empowering ${DNS_GROUP}" -color 6 "Empowering BigQuery admins" -ensure_project_role_binding \ - "${PROJECT}" \ - "group:${DNS_GROUP}" \ - "roles/dns.admin" - -# Monitoring -MONITORING_SVCACCT_NAME="$(svc_acct_email "${PROJECT}" \ - "k8s-infra-monitoring-viewer")" - -color 6 "Ensuring the k8s-infra-monitoring-viewer serviceaccount exists" -ensure_service_account \ - "${PROJECT}" \ - "k8s-infra-monitoring-viewer" \ - "k8s-infra monitoring viewer" - -color 6 "Empowering k8s-infra-monitoring-viewer serviceaccount to be used on the 'aaa' cluster inside the 'monitoring' namespace" -empower_ksa_to_svcacct \ - "kubernetes-public.svc.id.goog[monitoring/k8s-infra-monitoring-viewer]" \ - "${PROJECT}" \ - "${MONITORING_SVCACCT_NAME}" - -color 6 "Empowering service account ${MONITORING_SVCACCT_NAME}" -ensure_project_role_binding \ - "${PROJECT}" \ - "serviceAccount:${MONITORING_SVCACCT_NAME}" \ - "roles/monitoring.viewer" - -# Kubernetes External Secrets -color 6 -n "Ensure the kubernetes-external-secrets serviceaccount exists" -ensure_service_account \ - "${PROJECT}" \ - "kubernetes-external-secrets" \ - "Kubernetes External Secrets Service Account" - -color 6 -n "Empowering kubernetes-external-secrets serviceaccount to be used on aaa cluster" -empower_ksa_to_svcacct \ - "kubernetes-public.svc.id.goog[kubernetes-external-secrets/kubernetes-external-secrets]" \ - "${PROJECT}" \ - "$(svc_acct_email "${PROJECT}" "kubernetes-external-secrets")" - -color 6 "Empowering service account kubernetes-external-services to access payloads of the secrets" -ensure_project_role_binding "${PROJECT}" \ - "serviceAccount:$(svc_acct_email "${PROJECT}" "kubernetes-external-secrets")" \ - roles/secretmanager.secretAccessor - -# Bootstrap DNS zones -ensure_dns_zone "${PROJECT}" "k8s-io" "k8s.io" -ensure_dns_zone "${PROJECT}" "kubernetes-io" "kubernetes.io" -ensure_dns_zone "${PROJECT}" "x-k8s-io" "x-k8s.io" -ensure_dns_zone "${PROJECT}" "k8s-e2e-com" "k8s-e2e.com" -ensure_dns_zone "${PROJECT}" "canary-k8s-io" "canary.k8s.io" -ensure_dns_zone "${PROJECT}" "canary-kubernetes-io" "canary.kubernetes.io" -ensure_dns_zone "${PROJECT}" "canary-x-k8s-io" "canary.x-k8s.io" -ensure_dns_zone "${PROJECT}" "canary-k8s-e2e-com" "canary.k8s-e2e.com" - -color 6 "Creating the BigQuery dataset for billing data" -if ! bq --project_id "${PROJECT}" ls "${BQ_BILLING_DATASET}" >/dev/null 2>&1; then - bq --project_id "${PROJECT}" mk "${BQ_BILLING_DATASET}" -fi -color 6 "Setting BigQuery permissions" +# Create a GCP service account intended for use by GKE cluster workloads +# $1: The GCP project hosting the service account (e.g. k8s-infra-foo) +# $2: The name of the GCP service account (e.g. k8s-infra-doer) +# $3: The IAM role the service account will have on the project (e.g. roles/foo.bar) +# $4: The GCP project hosting GKE cluster(s) that will use this service account (e.g. k8s-infra-foo-clusters) +# $5: The K8s namespace hosting the service account that will use this GCP SA (e.g. my-app-ns) +# +# e.g. the above allows pods running as my-app-ns/k8s-infra-doer in clusters in +# k8s-infra-foo-clusters to act as k8s-infra-doer@k8s-infra-foo.iam.gserviceaccount.com, +# using the permissions granted by roles/foo.bar on k8s-infra-foo +function ensure_workload_identity_serviceaccount() { + if [ $# -lt 5 ] || [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ] || [ -z "$5" ]; then + echo "${FUNCNAME[0]}(project, name, role, cluster_project, cluster_namespace) requires 5 arguments" >&2 + return 1 + fi + local serviceaccount_project="${1}" + local serviceaccount_name="${2}" + local serviceaccount_email + serviceaccount_email="$(svc_acct_email "${serviceaccount_project}" "${serviceaccount_name}")" + local serviceaccount_description="${serviceaccount_name}" + local serviceaccount_role="${3}" + local cluster_project="${4}" + local cluster_namespace="${5}" + + color 6 "Ensuring serviceaccount ${serviceaccount_email} exists" + ensure_service_account "${serviceaccount_project}" "${serviceaccount_name}" "${serviceaccount_description}" + + color 6 "Empowering ${serviceaccount_email} with role ${serviceaccount_role} in project ${serviceaccount_project}" + ensure_project_role_binding \ + "${serviceaccount_project}" \ + "serviceAccount:${serviceaccount_email}" \ + "${serviceaccount_role}" + + local clusters="clusters in project ${cluster_project}" + local k8s_sa="KSA ${cluster_namespace}/${serviceaccount_name}" + local gcp_sa="GCP SA ${serviceaccount_email}" + + color 6 "Empowering ${clusters} to run workloads as ${k8s_sa} to act as ${gcp_sa}" + empower_gke_for_serviceaccount "${cluster_project}" "${cluster_namespace}" "${serviceaccount_email}" +} + +# +# main functions +# -# Merge existing permissions with the ones we need to exist. We merge -# permissions because: -# * The full list is large and has stuff that is inherited listed in it -# * All of our other IAM binding logic calls are additive +function ensure_terraform_state_buckets() { + if [ $# -ne 1 ] || [ -z "$1" ]; then + echo "${FUNCNAME[0]}(gcp_project) requires 1 argument" >&2 + return 1 + fi + local project="${1}" + + for entry in "${TERRAFORM_STATE_BUCKET_ENTRIES[@]}"; do + bucket="gs://$(echo "${entry}" | cut -d: -f1)" + owners="$(echo "${entry}" | cut -d: -f2-)" + color 6 "Ensuring '${bucket}' exists as private with owners '${owners}'"; ( + ensure_private_gcs_bucket "${project}" "${bucket}" + empower_group_to_admin_gcs_bucket "${owners}" "${bucket}" + ensure_gcs_role_binding "${bucket}" "group:k8s-infra-gcp-org-admins@kubernetes.io" "admin" + ) 2>&1 | indent + done +} -CUR=${TMPDIR}/k8s-infra-bq-access.before.json -bq show --format=prettyjson "${PROJECT}":"${BQ_BILLING_DATASET}" > "${CUR}" +function empower_cluster_admins_and_users() { + if [ $# -ne 1 ] || [ -z "$1" ]; then + echo "${FUNCNAME[0]}(gcp_project) requires 1 argument" >&2 + return 1 + fi + local project="${1}" + + color 6 "Empowering cluster admins" + # TODO(spiffxp): make this a custom role + cluster_admin_roles=( + roles/compute.viewer + roles/container.admin + roles/compute.loadBalancerAdmin + "$(custom_org_role_name iam.serviceAccountLister)" + ) + for role in "${cluster_admin_roles[@]}"; do + ensure_project_role_binding "${project}" "group:${CLUSTER_ADMINS_GROUP}" "${role}" + done + # TODO(spiffxp): remove when these bindings have been removed + removed_cluster_admin_roles=( + "$(custom_project_role_name "${project}" ServiceAccountLister)" + ) + for role in "${removed_cluster_admin_roles[@]}"; do + ensure_removed_project_role_binding "${project}" "group:${CLUSTER_ADMINS_GROUP}" "${role}" + done + # TODO(spiffxp): ensure this is removed when the binding(s) using it have been removed + ensure_removed_custom_project_iam_role "${project}" "ServiceAccountLister" + + color 6 "Empowering cluster users" + ensure_project_role_binding \ + "${project}" \ + "group:${CLUSTER_USERS_GROUP}" \ + "roles/container.clusterViewer" +} -ENSURE=${TMPDIR}/k8s-infra-bq-access.ensure.json -cat > "${ENSURE}" << __EOF__ -{ - "access": [ - { - "groupByEmail": "${ACCOUNTING_GROUP}", - "role": "READER" - }, - { - "groupByEmail": "${ACCOUNTING_GROUP}", - "role": "roles/bigquery.metadataViewer" - }, + +function ensure_dns() { + if [ $# -ne 1 ] || [ -z "$1" ]; then + echo "${FUNCNAME[0]}(gcp_project) requires 1 argument" >&2 + return 1 + fi + local project="${1}" + + local domains=( + k8s.io + kubernetes.io + x-k8s.io + k8s-e2e.com + ) + + color 6 "Empowering ${DNS_GROUP}" + ensure_project_role_binding \ + "${project}" \ + "group:${DNS_GROUP}" \ + "roles/dns.admin" + + # Bootstrap DNS zones + for domain in "${domains[@]}"; do + # canary domain for each domain, e.g. k8s.io and canary.k8s.io + for prefix in "" "canary."; do + zone="${prefix}${domain}" + name="${zone//./-}" + color 6 "Ensuring DNS zone ${zone}" + ensure_dns_zone "${project}" "${name}" "${zone}" 2>&1 | indent + done + done +} + +function ensure_billing_bigquery() { + if [ $# -ne 1 ] || [ -z "$1" ]; then + echo "${FUNCNAME[0]}(gcp_project) requires 1 argument" >&2 + return 1 + fi + local project="${1}" + + color 6 "Empowering BigQuery admins" + ensure_project_role_binding \ + "${project}" \ + "group:${BQ_ADMINS_GROUP}" \ + "roles/bigquery.admin" + + color 6 "Empowering GCP accounting" + ensure_project_role_binding \ + "${project}" \ + "group:${ACCOUNTING_GROUP}" \ + "roles/bigquery.jobUser" + + color 6 "Creating the BigQuery dataset for billing data" + if ! bq --project_id "${project}" ls "${BQ_BILLING_DATASET}" >/dev/null 2>&1; then + bq --project_id "${project}" mk "${BQ_BILLING_DATASET}" + fi + + color 6 "Setting BigQuery permissions" + # Merge existing permissions with the ones we need to exist. We merge + # permissions because: + # * The full list is large and has stuff that is inherited listed in it + # * All of our other IAM binding logic calls are additive + local before="${TMPDIR}/k8s-infra-bq-access.before.json" + local ensure="${TMPDIR}/k8s-infra-bq-access.ensure.json" + local final="${TMPDIR}/k8s-infra-bq-access.final.json" + + bq show --format=prettyjson "${project}":"${BQ_BILLING_DATASET}" > "${before}" + + cat > "${ensure}" < "${final}" + + bq update --source "${final}" "${project}":"${BQ_BILLING_DATASET}" + + color 4 "To enable billing export, a human must log in to the cloud" + color 4 -n "console. Go to " + color 6 -n "Billing" + color 4 -n "; " + color 6 -n "Billing export" + color 4 " and export to BigQuery" + color 4 -n "in project " + color 6 -n "${project}" + color 4 -n " dataset " + color 6 -n "${BQ_BILLING_DATASET}" + color 4 " ." + echo + color 4 "Press enter to acknowledge" + read -rs } -__EOF__ - -FINAL=${TMPDIR}/k8s-infra-bq-access.final.json -jq -s '.[0].access + .[1].access | { access: . }' "${CUR}" "${ENSURE}" > "${FINAL}" - -bq update --source "${FINAL}" "${PROJECT}":"${BQ_BILLING_DATASET}" - -color 4 "To enable billing export, a human must log in to the cloud" -color 4 -n "console. Go to " -color 6 -n "Billing" -color 4 -n "; " -color 6 -n "Billing export" -color 4 " and export to BigQuery" -color 4 -n "in project " -color 6 -n "${PROJECT}" -color 4 -n " dataset " -color 6 -n "${BQ_BILLING_DATASET}" -color 4 " ." -echo -color 4 "Press enter to acknowledge" -read -s - -color 6 "Done" + +function ensure_main_project() { + if [ $# -ne 1 ] || [ -z "$1" ]; then + echo "${FUNCNAME[0]}(gcp_project) requires 1 argument" >&2 + return 1 + fi + local project="${1}" + + color 6 "Ensuring project exists: ${project}" + ensure_project "${project}" 2>&1 | indent + + color 6 "Ensuring main project services enabled for: ${project}" + # TODO: there are many services that appear to have been enabled piecemeal + # and it's not yet clear which ones we actually rely upon, so this is + # not being run with K8S_INFRA_ENSURE_ONLY_SERVICES_WILL_FORCE_DISABLE + ensure_only_services "${project}" "${MAIN_PROJECT_SERVICES[@]}" 2>&1 | indent + + color 6 "Ensuring terraform state buckets exist with correct permissions in: ${project}" + ensure_terraform_state_buckets "${project}" 2>&1 | indent + + color 6 "Empowering cluster users and admins for clusters in: ${project}" + empower_cluster_admins_and_users "${project}" 2>&1 | indent + + color 6 "Ensuring specific workload identity serviceaccounts exist in: ${project}"; ( + local svcacct_args cluster_args + + color 6 "Ensuring GCP Auditor serviceaccount" + # roles/viewer on kubernetes-public is a bootstrap; the true purpose + # is custom role audit.viewer on the kubernetes.io org, but that is + # handled by ensure-organization.sh + svcacct_args=("${project}" "k8s-infra-gcp-auditor" "roles/viewer") + cluster_args=("k8s-infra-prow-build-trusted" "test-pods") + ensure_workload_identity_serviceaccount "${svcacct_args[@]}" "${cluster_args[@]}" 2>&1 | indent + + # TODO(spiffxp): remove once this binding has been deleted + local gcp_auditor_email + gcp_auditor_email=$(svc_acct_email "${project}" "k8s-infra-gcp-auditor") + color 6 "Ensuring removed workload identity on kubernetes-public/test-pods for ${gcp_auditor_email}" + unempower_gke_for_serviceaccount \ + "kubernetes-public" \ + "test-pods" \ + "${gcp_auditor_email}" 2>&1 | indent + + color 6 "Ensuring DNS Updater serviceaccount" + svcacct_args=("${project}" "k8s-infra-dns-updater" "roles/dns.admin") + cluster_args=("k8s-infra-prow-build-trusted" "test-pods") + ensure_workload_identity_serviceaccount "${svcacct_args[@]}" "${cluster_args[@]}" 2>&1 | indent + + color 6 "Ensuring Monitoring Viewer serviceaccount" + svcacct_args=("${project}" "k8s-infra-monitoring-viewer" "roles/monitoring.viewer") + cluster_args=("${project}" "monitoring") + ensure_workload_identity_serviceaccount "${svcacct_args[@]}" "${cluster_args[@]}" 2>&1 | indent + + color 6 "Ensuring Kubernetes External Secrets serviceaccount" + svcacct_args=("${project}" "kubernetes-external-secrets" "roles/secretmanager.secretAccessor") + cluster_args=("${project}" "kubernetes-external-secrets") + ensure_workload_identity_serviceaccount "${svcacct_args[@]}" "${cluster_args[@]}" 2>&1 | indent + ) 2>&1 | indent + + color 6 "Ensuring DNS is configured in: ${project}" + ensure_dns "${project}" 2>&1 | indent + + color 6 "Ensuring biquery configured for billing and access by appropriate groups in: ${project}" + ensure_billing_bigquery "${project}" 2>&1 | indent + + color 6 "Done" +} + +ensure_main_project "${PROJECT}" From 4f628cf7197618271be67616e85242051bc339e0 Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Wed, 5 May 2021 14:43:02 -0400 Subject: [PATCH 9/9] clusters/aaa: move some iam bindings to infra/gcp `terraform apply` and `./infra/gcp/ensure-main-project.sh` were getting in a fight over authoritative IAM bindings for the kubernetes-public project IMO terraform shouldn't be setting authoritative bindings for a project unless it's also managing the project. Whereas in this case, the module in question is just for managing a GKE cluster within the project. The infra/gcp/ensure-main-project.sh script is the source of truth about the configuration of the kubernetes-public project. I opted to move the bindings over to ensure-main-project.sh instead of updating the terraform module to non-authoritatively add IAM members, since they were related to a group that is managed outside of terraform vs. service accounts that were created by terraform. As part of this commit I preemptively ran `terraform state rm` for the two resources that were removed, so that terraform won't try to destroy them --- .../projects/kubernetes-public/aaa/12-iam.tf | 26 ------------------- infra/gcp/ensure-main-project.sh | 12 ++++++--- 2 files changed, 8 insertions(+), 30 deletions(-) delete mode 100644 infra/gcp/clusters/projects/kubernetes-public/aaa/12-iam.tf diff --git a/infra/gcp/clusters/projects/kubernetes-public/aaa/12-iam.tf b/infra/gcp/clusters/projects/kubernetes-public/aaa/12-iam.tf deleted file mode 100644 index 97d7ff3259a..00000000000 --- a/infra/gcp/clusters/projects/kubernetes-public/aaa/12-iam.tf +++ /dev/null @@ -1,26 +0,0 @@ -/* -Ensure google groups defined in 'members' have read-only access to -all monitoring data and configurations. -Detailed list of permissions: https://cloud.google.com/monitoring/access-control#roles -*/ -resource "google_project_iam_binding" "readonlymonitoringbinding" { - project = data.google_project.project.id - role = "roles/monitoring.viewer" - - members = [ - "group:gke-security-groups@kubernetes.io", - ] -} - -/* -Ensure google groups defined in 'members' have read-only access to logs. -Detailed list of permissions: https://cloud.google.com/logging/docs/access-control#permissions_and_roles -*/ -resource "google_project_iam_binding" "readonlyloggingbinding" { - project = data.google_project.project.id - role = "roles/logging.privateLogViewer" - - members = [ - "group:gke-security-groups@kubernetes.io", - ] -} diff --git a/infra/gcp/ensure-main-project.sh b/infra/gcp/ensure-main-project.sh index 5c27e314d87..66cfab4abac 100755 --- a/infra/gcp/ensure-main-project.sh +++ b/infra/gcp/ensure-main-project.sh @@ -193,10 +193,14 @@ function empower_cluster_admins_and_users() { ensure_removed_custom_project_iam_role "${project}" "ServiceAccountLister" color 6 "Empowering cluster users" - ensure_project_role_binding \ - "${project}" \ - "group:${CLUSTER_USERS_GROUP}" \ - "roles/container.clusterViewer" + cluster_user_roles=( + roles/container.clusterViewer + roles/logging.privateLogViewer + roles/monitoring.viewer + ) + for role in "${cluster_user_roles[@]}"; do + ensure_project_role_binding "${project}" "group:${CLUSTER_USERS_GROUP}" "${role}" + done }