Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Component] Add quota check in diagnose me component. #3062

Merged
merged 6 commits into from
Feb 13, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
282 changes: 172 additions & 110 deletions components/diagnostics/diagnose_me/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import NamedTuple
from typing import Any, List, NamedTuple, Optional


def run_diagnose_me(
bucket: str, execution_mode: str, project_id:str , target_apis: str
bucket: str,
execution_mode: str,
project_id: str,
target_apis: str,
quota_check: Optional[List[Any]] = None,
) -> NamedTuple('Outputs', [('bucket', str), ('project_id', str)]):
""" Performs environment verification specific to this pipeline.
""" Performs environment verification specific to this pipeline.

args:
bucket:
Expand All @@ -29,134 +33,192 @@ def run_diagnose_me(
If set to HALT_ON_ERROR will case any error to raise an exception.
This is intended to stop the data processing of a pipeline. Can set
to False to only report Errors/Warnings.
project_id:
GCP project ID which is assumed to be accessible from the pod.
numerology marked this conversation as resolved.
Show resolved Hide resolved
target_apis:
String consisting of a comma separated list of apis to be verified.
quota_check:
List of entries describing how much quota is required. Each entry
has three fields: region, metric and quota_needed. All
string-typed.
Raises:
RuntimeError: If configuration is not setup properly and
HALT_ON_ERROR flag is set.
"""

# Installing pip3 and kfp, since the base image 'google/cloud-sdk:279.0.0'
# does not come with pip3 pre-installed.
import subprocess
subprocess.run(
['curl', 'https://bootstrap.pypa.io/get-pip.py', '-o', 'get-pip.py'],
capture_output=True)
subprocess.run(['apt-get', 'install', 'python3-distutils', '--yes'],
capture_output=True)
subprocess.run(['python3', 'get-pip.py'], capture_output=True)
subprocess.run(['python3', '-m', 'pip', 'install', 'kfp>=0.1.31', '--quiet'],
capture_output=True)

import sys
from typing import List, Text
import os
from kfp.cli.diagnose_me import gcp
from kfp.cli.diagnose_me import utility
import json as json_library

config_error_observed = False

# Get the project ID
# from project configuration
project_config = gcp.get_gcp_configuration(
gcp.Commands.GET_GCLOUD_DEFAULT, human_readable=False)
if not project_config.has_error:
auth_project_id = project_config.parsed_output['core']['project']
print('GCP credentials are configured with access to project: %s ...\n' % (project_id))
print('Following account(s) are active under this pipeline:\n')
subprocess.run(['gcloud', 'auth', 'list','--format','json'])
print('\n')
# Installing pip3 and kfp, since the base image 'google/cloud-sdk:279.0.0'
# does not come with pip3 pre-installed.
import subprocess
subprocess.run([
'curl', 'https://bootstrap.pypa.io/get-pip.py', '-o', 'get-pip.py'
],
capture_output=True)
subprocess.run(['apt-get', 'install', 'python3-distutils', '--yes'],
capture_output=True)
subprocess.run(['python3', 'get-pip.py'], capture_output=True)
subprocess.run(['python3', '-m', 'pip', 'install', 'kfp>=0.1.31', '--quiet'],
capture_output=True)

import sys
from kfp.cli.diagnose_me import gcp

config_error_observed = False

quota_list = gcp.get_gcp_configuration(
gcp.Commands.GET_QUOTAS, human_readable=False
)
quota_dict = {} # Mapping from region to dict[metric, available]
numerology marked this conversation as resolved.
Show resolved Hide resolved
for region_quota in quota_list:
quota_dict[region_quota['name']] = {}
for quota in region_quota['quotas']:
quota_dict[region_quota['name']][quota['metric']
] = quota['limit'] - quota['usage']

quota_check = [] or quota_check
for single_check in quota_check:
if single_check['region'] not in quota_dict:
print(
'Regional quota for %s does not exist in current project' %
(single_check['region'])
)
config_error_observed = True
else:
if quota_dict[single_check['region']][single_check['metric']
] < single_check['quota_needed']:
print(
'Project configuration is not accessible with error %s\n' %
(project_config.stderr),
file=sys.stderr)
'Insufficient quota observed for %s at %s: %s is needed but only %s is available'
% (
single_check['metric'], single_check['region'],
str(single_check['quota_needed']
), str(quota_dict[single_check['region']][single_check['metric']])
)
)
config_error_observed = True

if auth_project_id != project_id:
print(
'User provided project ID %s does not match the configuration %s\n' %
(project_id,auth_project_id), file=sys.stderr)
config_error_observed = True
# Get the project ID
# from project configuration
project_config = gcp.get_gcp_configuration(
gcp.Commands.GET_GCLOUD_DEFAULT, human_readable=False
)
if not project_config.has_error:
auth_project_id = project_config.parsed_output['core']['project']
print(
'GCP credentials are configured with access to project: %s ...\n' %
(project_id)
)
print('Following account(s) are active under this pipeline:\n')
subprocess.run(['gcloud', 'auth', 'list', '--format', 'json'])
print('\n')
else:
print(
'Project configuration is not accessible with error %s\n' %
(project_config.stderr),
file=sys.stderr
)
config_error_observed = True

# Get project buckets
get_project_bucket_results = gcp.get_gcp_configuration(
gcp.Commands.GET_STORAGE_BUCKETS, human_readable=False)
if auth_project_id != project_id:
print(
'User provided project ID %s does not match the configuration %s\n' %
(project_id, auth_project_id),
file=sys.stderr
)
config_error_observed = True

# Get project buckets
get_project_bucket_results = gcp.get_gcp_configuration(
gcp.Commands.GET_STORAGE_BUCKETS, human_readable=False
)

if get_project_bucket_results.has_error:
print(
'could not retrieve project buckets with error: %s' %
(get_project_bucket_results.stderr),
file=sys.stderr
)
config_error_observed = True

if get_project_bucket_results.has_error:
print(
'could not retrieve project buckets with error: %s' %
(get_project_bucket_results.stderr),
file=sys.stderr)
config_error_observed = True
# Get the root of the user provided bucket i.e. gs://root.
bucket_root = '/'.join(bucket.split('/')[0:3])

# Get the root of the user provided bucket i.e. gs://root.
bucket_root = '/'.join(bucket.split('/')[0:3])
print(
'Checking to see if the provided GCS bucket\n %s\nis accessible ...\n' %
(bucket)
)

if bucket_root in get_project_bucket_results.json_output:
print(
'Checking to see if the provided GCS bucket\n %s\nis accessible ...\n' %
(bucket))
'Provided bucket \n %s\nis accessible within the project\n %s\n' %
(bucket, project_id)
)

if bucket_root in get_project_bucket_results.json_output:
print('Provided bucket \n %s\nis accessible within the project\n %s\n' %
(bucket, project_id))

else:
print('Could not find the bucket %s in project %s' % (bucket, project_id) +
'Please verify that you have provided the correct GCS bucket name.\n' +
'Only the following buckets are visible in this project:\n%s' %
(get_project_bucket_results.parsed_output),
file=sys.stderr)
config_error_observed = True

# Verify APIs that are required are enabled
api_config_results = gcp.get_gcp_configuration(gcp.Commands.GET_APIS)
else:
print(
'Could not find the bucket %s in project %s' % (bucket, project_id) +
'Please verify that you have provided the correct GCS bucket name.\n' +
'Only the following buckets are visible in this project:\n%s' %
(get_project_bucket_results.parsed_output),
file=sys.stderr
)
config_error_observed = True

api_status = {}
# Verify APIs that are required are enabled
api_config_results = gcp.get_gcp_configuration(gcp.Commands.GET_APIS)

if api_config_results.has_error:
print('could not retrieve API status with error: %s' %
(api_config_results.stderr),file=sys.stderr)
config_error_observed = True
api_status = {}

print('Checking APIs status ...')
for item in api_config_results.parsed_output:
api_status[item['config']['name']] = item['state']
# printing the results in stdout for logging purposes
print('%s %s' % (item['config']['name'], item['state']))

# Check if target apis are enabled
api_check_results = True
for api in target_apis.replace(' ', '').split(','):
if 'ENABLED' != api_status.get(api, 'DISABLED'):
api_check_results = False
print(
'API \"%s\" is not accessible or not enabled. To enable this api go to ' % (api) +
'https://console.cloud.google.com/apis/library/%s?project=%s' %
(api, project_id),file=sys.stderr)
config_error_observed = True

if 'HALT_ON_ERROR' in execution_mode and config_error_observed:
raise RuntimeError('There was an error in your environment configuration.\n'+
'Note that resolving such issues generally require a deep knowledge of Kubernetes.\n'+
'\n'+
'We highly recommend that you recreate the cluster and check "Allow access ..." \n'+
'checkbox during cluster creation to have the cluster configured automatically.\n'+
'For more information on this and other troubleshooting instructions refer to\n'+
'our troubleshooting guide.\n'+
'\n'+
'If you have intentionally modified the cluster configuration, you may\n'+
'bypass this error by removing the execution_mode HALT_ON_ERROR flag.\n')

return (project_id, bucket)
if api_config_results.has_error:
print(
'could not retrieve API status with error: %s' %
(api_config_results.stderr),
file=sys.stderr
)
config_error_observed = True

print('Checking APIs status ...')
for item in api_config_results.parsed_output:
api_status[item['config']['name']] = item['state']
# printing the results in stdout for logging purposes
print('%s %s' % (item['config']['name'], item['state']))

# Check if target apis are enabled
api_check_results = True
for api in target_apis.replace(' ', '').split(','):
if 'ENABLED' != api_status.get(api, 'DISABLED'):
api_check_results = False
print(
'API \"%s\" is not accessible or not enabled. To enable this api go to '
% (api) +
'https://console.cloud.google.com/apis/library/%s?project=%s' %
(api, project_id),
file=sys.stderr
)
config_error_observed = True

if 'HALT_ON_ERROR' in execution_mode and config_error_observed:
raise RuntimeError(
'There was an error in your environment configuration.\n' +
'Note that resolving such issues generally require a deep knowledge of Kubernetes.\n'
+ '\n' +
'We highly recommend that you recreate the cluster and check "Allow access ..." \n'
+
'checkbox during cluster creation to have the cluster configured automatically.\n'
+
'For more information on this and other troubleshooting instructions refer to\n'
+ 'our troubleshooting guide.\n' + '\n' +
'If you have intentionally modified the cluster configuration, you may\n'
+
'bypass this error by removing the execution_mode HALT_ON_ERROR flag.\n'
)

return (project_id, bucket)


if __name__ == '__main__':
import kfp.components as comp
import kfp.components as comp

comp.func_to_container_op(
run_diagnose_me,
base_image='google/cloud-sdk:279.0.0',
output_component_file='component.yaml',
)
comp.func_to_container_op(
run_diagnose_me,
base_image='google/cloud-sdk:279.0.0',
output_component_file='component.yaml',
)
Loading