Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Databricks E2E] Refactor code and log messages to stderr #962

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions e2e_samples/parking_sensors/.devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ RUN az config set extension.use_dynamic_install=yes_without_prompt
RUN az config set extension.dynamic_install_allow_preview=true
RUN az extension add --name application-insights
RUN az extension add --name azure-devops
RUN az bicep upgrade
elenaterenzi marked this conversation as resolved.
Show resolved Hide resolved

# Install jq & makepasswd for some frequently used utility
RUN apt-get update \
Expand Down
6 changes: 3 additions & 3 deletions e2e_samples/parking_sensors/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ DEV_DATAFACTORY_NAME=$DEV_DATAFACTORY_NAME \

####

print_style "DEPLOYMENT SUCCESSFUL
log "DEPLOYMENT SUCCESSFUL
Details of the deployment can be found in local .env.* files.\n\n" "success"

print_style "IMPORTANT:
log "IMPORTANT:
This script has updated your local Azure Pipeline YAML definitions to point to your Github repo.
ACTION REQUIRED: Commit and push up these changes to your Github repo before proceeding.\n\n" "warning"

echo "See README > Setup and Deployment for more details and next steps."
log "See README > Setup and Deployment for more details and next steps."
39 changes: 20 additions & 19 deletions e2e_samples/parking_sensors/scripts/clean_up.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ set -o nounset
# set -o xtrace # For debugging
elenaterenzi marked this conversation as resolved.
Show resolved Hide resolved

. ./scripts/common.sh
. ./scripts/init_environment.sh
. ./scripts/verify_prerequisites.sh

###################
Expand All @@ -35,77 +36,77 @@ delete_all(){
local prefix=$1
local DEPLOYMENT_ID=${2:-}

echo "!! WARNING: !!"
echo "THIS SCRIPT WILL DELETE RESOURCES PREFIXED WITH $prefix AND HAVING DEPLOYMENT_ID $DEPLOYMENT_ID!!"
log "!! WARNING: !!" "danger"
log "THIS SCRIPT WILL DELETE RESOURCES PREFIXED WITH $prefix AND HAVING DEPLOYMENT_ID $DEPLOYMENT_ID!!" "danger"

printf "\nDEVOPS PIPELINES:\n"
log "\nDEVOPS PIPELINES:\n"
az pipelines list -o tsv --only-show-errors --query "[?contains(name,'$prefix')].name"

printf "\nDEVOPS VARIABLE GROUPS:\n"
log "\nDEVOPS VARIABLE GROUPS:\n"
az pipelines variable-group list -o tsv --only-show-errors --query "[?contains(name, '$prefix')].name"

printf "\nDEVOPS SERVICE CONNECTIONS:\n"
log "\nDEVOPS SERVICE CONNECTIONS:\n"
az devops service-endpoint list -o tsv --only-show-errors --query "[?contains(name, '$prefix')].name"

printf "\nENTRA SERVICE PRINCIPALS:\n"
log "\nENTRA SERVICE PRINCIPALS:\n"
if [[ -z $DEPLOYMENT_ID ]]
then
az ad sp list -o tsv --show-mine --query "[?contains(appDisplayName,'$prefix')].displayName"
else
az ad sp list -o tsv --show-mine --query "[?contains(appDisplayName,'$prefix') && contains(appDisplayName,'$DEPLOYMENT_ID')].displayName"
fi

printf "\nRESOURCE GROUPS:\n"
log "\nRESOURCE GROUPS:\n"
if [[ -z $DEPLOYMENT_ID ]]
then
az group list -o tsv --query "[?contains(name,'$prefix') && ! contains(name,'dbw')].name"
else
az group list -o tsv --query "[?contains(name,'$prefix-$DEPLOYMENT_ID') && ! contains(name,'dbw')].name"
fi

printf "\nEND OF SUMMARY\n"
log "\nEND OF SUMMARY\n"

read -r -p "Do you wish to DELETE above? [y/N] " response
case "$response" in
[yY][eE][sS]|[yY])
echo "Delete pipelines that start with '$prefix' in name..."
log "Deleting pipelines that start with '$prefix' in name..."
[[ -n $prefix ]] &&
az pipelines list -o tsv |
{ grep "$prefix" || true; } |
awk '{print $4}' |
xargs -r -I % az pipelines delete --id % --yes

echo "Delete variable groups that start with '$prefix' in name..."
log "Deleting variable groups that start with '$prefix' in name..."
[[ -n $prefix ]] &&
az pipelines variable-group list -o tsv --query "[?contains(name, '$prefix')].id" |
xargs -r -I % az pipelines variable-group delete --id % --yes

echo "Delete service connections that start with '$prefix' in name..."
log "Deleting service connections that start with '$prefix' in name..."
[[ -n $prefix ]] &&
az devops service-endpoint list -o tsv --query "[?contains(name, '$prefix')].id" |
xargs -r -I % az devops service-endpoint delete --id % --yes

if [[ -z $DEPLOYMENT_ID ]]
then
echo "Delete service principal that contain '$prefix' in name, created by yourself..."
log "Deleting service principal that contain '$prefix' in name, created by yourself..."
[[ -n $prefix ]] &&
az ad sp list --query "[?contains(appDisplayName,'$prefix')].appId" -o tsv --show-mine |
xargs -r -I % az ad sp delete --id %
else
echo "Delete service principal that contain '$prefix' and $DEPLOYMENT_ID in name, created by yourself..."
log "Deleting service principal that contain '$prefix' and $DEPLOYMENT_ID in name, created by yourself..."
[[ -n $prefix ]] &&
az ad sp list --query "[?contains(appDisplayName,'$prefix') && contains(appDisplayName,'$DEPLOYMENT_ID')].appId" -o tsv --show-mine |
xargs -r -I % az ad sp delete --id %
fi

if [[ -z $DEPLOYMENT_ID ]]
then
echo "Delete resource group that comtain '$prefix' in name..."
log "Deleting resource groups that comtain '$prefix' in name..."
[[ -n $prefix ]] &&
az group list --query "[?contains(name,'$prefix') && ! contains(name,'dbw')].name" -o tsv |
xargs -I % az group delete --verbose --name % -y
else
echo "Delete resource group that contain '$prefix-$DEPLOYMENT_ID' in name..."
log "Deleting resource groups that contain '$prefix-$DEPLOYMENT_ID' in name..."
[[ -n $prefix ]] &&
az group list --query "[?contains(name,'$prefix-$DEPLOYMENT_ID') && ! contains(name,'dbw')].name" -o tsv |
xargs -I % az group delete --verbose --name % -y
Expand All @@ -120,19 +121,19 @@ delete_all(){

if [[ -z "$DEPLOYMENT_ID" ]]
then
echo "No deployment id [DEPLOYMENT_ID] specified. You will only be able to delete by prefix $prefix..."
log "No deployment id [DEPLOYMENT_ID] specified. You will only be able to delete by prefix $prefix..."
response=3
else
read -r -p "Do you wish to DELETE by"$'\n'" 1) ONLY BY PREFIX ($prefix)?"$'\n'" 2) PREFIX ($prefix) AND DEPLOYMENT_ID ($DEPLOYMENT_ID)?"$'\n'" Choose 1 or 2: " response
fi

case "$response" in
1)
echo "Delete by prefix..."
log "Deleting by prefix..."
delete_all $prefix
;;
2)
echo "Delete by deployment id..."
log "Deleting by deployment id..."
delete_all $prefix $DEPLOYMENT_ID
;;
3)
Expand All @@ -147,7 +148,7 @@ case "$response" in
esac
;;
*)
echo "Invalid choice. Exiting..."
log "Invalid choice. Exiting..." "warning"
exit
;;
esac
102 changes: 79 additions & 23 deletions e2e_samples/parking_sensors/scripts/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,52 +14,73 @@ random_str() {
}

print_style () {
case "$2" in
"info")
COLOR="96m"
;;
"success")
COLOR="92m"
;;
"warning")
COLOR="93m"
;;
"danger")
COLOR="91m"
;;
"action")
COLOR="32m"
;;
*)
COLOR="0m"
;;
esac

if [ "$2" == "info" ] ; then
COLOR="96m";
elif [ "$2" == "success" ] ; then
COLOR="92m";
elif [ "$2" == "warning" ] ; then
COLOR="93m";
elif [ "$2" == "danger" ] ; then
COLOR="91m";
else #default color
COLOR="0m";
fi

STARTCOLOR="\e[$COLOR";
ENDCOLOR="\e[0m";
STARTCOLOR="\e[$COLOR"
ENDCOLOR="\e[0m"

printf "$STARTCOLOR%b$ENDCOLOR" "$1";
printf "$STARTCOLOR%b$ENDCOLOR" "$1"
}

log() {
# This function takes a string as an argument and prints it to the console to stderr
# if a second argument is provided, it will be used as the style of the message
# Usage: log "message" "style"
# Example: log "Hello, World!" "info"
local message=$1
local style=${2:-}

deletePipelineIfExists() {
declare pipeline_name=$1
full_pipeline_name=$PROJECT-$pipeline_name
if [[ -z "$style" ]]; then
echo -e "$(print_style "$message" "default")" >&2
else
echo -e "$(print_style "$message" "$style")" >&2
fi
}

delete_azdo_pipeline_if_exists() {
declare full_pipeline_name=$1

## when returning a pipeline that does exist, delete.

pipeline_output=$(az pipelines list --query "[?name=='$full_pipeline_name']" --output json)
pipeline_id=$(echo "$pipeline_output" | jq -r '.[0].id')

if [[ -z "$pipeline_id" || "$pipeline_id" == "null" ]]; then
echo "Pipeline $full_pipeline_name does not exist.Creating..."
log "No Deployment pipeline with name $full_pipeline_name found."
else
az pipelines delete --id "$pipeline_id" --yes 1>/dev/null
echo "Deleted existing pipeline: $full_pipeline_name (Pipeline ID: $pipeline_id)"

log "Deleted existing pipeline: $full_pipeline_name (Pipeline ID: $pipeline_id)"
fi
}

createPipeline ()
create_azdo_pipeline ()
{
declare pipeline_name=$1
declare pipeline_description=$2
full_pipeline_name=$PROJECT-$pipeline_name

delete_azdo_pipeline_if_exists "$full_pipeline_name"
log "Creating deployment pipeline: $full_pipeline_name"


pipeline_id=$(az pipelines create \
--name "$full_pipeline_name" \
--description "$pipeline_description" \
Expand All @@ -70,5 +91,40 @@ createPipeline ()
--skip-first-run true \
--output json | jq -r '.id')
echo "$pipeline_id"
}

databricks_cluster_exists () {
declare cluster_name="$1"
declare cluster=$(databricks clusters list | tr -s " " | cut -d" " -f2 | grep ^${cluster_name}$)
if [[ -n $cluster ]]; then
return 0; # cluster exists
else
return 1; # cluster does not exists
fi
}


create_adf_linked_service () {
declare name=$1
log "Creating ADF LinkedService: $name"
adfLsUrl="${adfFactoryBaseUrl}/linkedservices/${name}?api-version=${apiVersion}"
az rest --method put --uri "$adfLsUrl" --body @"${ADF_DIR}"/linkedService/"${name}".json
}
create_adf_dataset () {
declare name=$1
log "Creating ADF Dataset: $name"
adfDsUrl="${adfFactoryBaseUrl}/datasets/${name}?api-version=${apiVersion}"
az rest --method put --uri "$adfDsUrl" --body @"${ADF_DIR}"/dataset/"${name}".json
}
create_adf_pipeline () {
declare name=$1
log "Creating ADF Pipeline: $name"
adfPUrl="${adfFactoryBaseUrl}/pipelines/${name}?api-version=${apiVersion}"
az rest --method put --uri "$adfPUrl" --body @"${ADF_DIR}"/pipeline/"${name}".json
}
create_adf_trigger () {
declare name=$1
log "Creating ADF Trigger: $name"
adfTUrl="${adfFactoryBaseUrl}/triggers/${name}?api-version=${apiVersion}"
az rest --method put --uri "$adfTUrl" --body @"${ADF_DIR}"/trigger/"${name}".json
}
38 changes: 15 additions & 23 deletions e2e_samples/parking_sensors/scripts/configure_databricks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,34 +29,26 @@ set -o nounset
# KEYVAULT_DNS_NAME
# USER_NAME

cluster_exists () {
declare cluster_name="$1"
declare cluster=$(databricks clusters list | tr -s " " | cut -d" " -f2 | grep ^${cluster_name}$)
if [[ -n $cluster ]]; then
return 0; # cluster exists
else
return 1; # cluster does not exists
fi
}
. ./scripts/common.sh

echo "Configuring Databricks workspace."
log "Configuring Databricks workspace."

# Create secret scope, if not exists
scope_name="storage_scope"
if [[ ! -z $(databricks secrets list-scopes | grep "$scope_name") ]]; then
# Delete existing scope
# NOTE: Need to recreate everytime to ensure idempotent deployment. Reruning deployment overrides KeyVault permissions.
echo "Scope already exists, re-creating secrets scope: $scope_name"
log "Scope already exists, re-creating secrets scope: $scope_name"
databricks secrets delete-scope "$scope_name"
fi

# Create secret scope
databricks secrets create-scope --json "{\"scope\": \"$scope_name\", \"scope_backend_type\": \"AZURE_KEYVAULT\", \"backend_azure_keyvault\": { \"resource_id\": \"$KEYVAULT_RESOURCE_ID\", \"dns_name\": \"$KEYVAULT_DNS_NAME\" } }"

# Upload notebooks
echo "Uploading notebooks..."
log "Uploading notebooks..."
databricks_folder_name="/Workspace/Users/${USER_NAME,,}"
echo "databricks_folder_name: ${databricks_folder_name}"
log "databricks_folder_name: ${databricks_folder_name}"

databricks workspace import "$databricks_folder_name/00_setup.py" --file "./databricks/notebooks/00_setup.py" --format SOURCE --language PYTHON --overwrite
databricks workspace import "$databricks_folder_name/01_explore.py" --file "./databricks/notebooks/01_explore.py" --format SOURCE --language PYTHON --overwrite
Expand All @@ -67,25 +59,25 @@ databricks workspace import "$databricks_folder_name/03_transform.py" --file "./
# cluster.config.json file needs to refer to one of the available SKUs on yout Region
# az vm list-skus --location <LOCATION> --all --output table
cluster_config="./databricks/config/cluster.config.json"
echo "Creating an interactive cluster using config in $cluster_config..."
log "Creating an interactive cluster using config in $cluster_config..."
cluster_name=$(cat "$cluster_config" | jq -r ".cluster_name")
if cluster_exists "$cluster_name"; then
echo "Cluster ${cluster_name} already exists!"
if databricks_cluster_exists "$cluster_name"; then
log "Cluster ${cluster_name} already exists! Skipping creation..." "info"
else
echo "Creating cluster ${cluster_name}..."
log "Creating cluster ${cluster_name}..."
databricks clusters create --json "@$cluster_config"
fi

cluster_id=$(databricks clusters list --output JSON | jq -r '.[]|select(.default_tags.ClusterName == "ddo_cluster")|.cluster_id')
echo "Cluster ID:" $cluster_id
log "Cluster ID:" $cluster_id

adfTempDir=.tmp/adf
mkdir -p $adfTempDir && cp -a adf/ .tmp/
tmpfile=.tmpfile
adfLsDir=$adfTempDir/linkedService
jq --arg databricksExistingClusterId "$cluster_id" '.properties.typeProperties.existingClusterId = $databricksExistingClusterId' $adfLsDir/Ls_AzureDatabricks_01.json > "$tmpfile" && mv "$tmpfile" $adfLsDir/Ls_AzureDatabricks_01.json

echo "Uploading libs TO dbfs..."
log "Uploading libs TO dbfs..."
databricks fs cp --recursive --overwrite "./databricks/libs/ddo_transform-localdev-py2.py3-none-any.whl" "dbfs:/ddo_transform-localdev-py2.py3-none-any.whl"

# Create JSON file for library installation
Expand All @@ -105,9 +97,9 @@ EOF
databricks libraries install --json @$json_file

# Creates a Job to setup workspace
echo "Creates a job to setup the workspace..."
log "Creating a job to setup the workspace..."
notebook_path="${databricks_folder_name}/00_setup.py"
echo "notebook_path: ${notebook_path}"
log "notebook_path: ${notebook_path}"
json_file_config="./databricks/config/job.setup.config.json"
cat <<EOF > $json_file_config
{
Expand All @@ -127,10 +119,10 @@ cat <<EOF > $json_file_config
EOF

job_id=$(databricks jobs create --json @$json_file_config | jq -r ".job_id")
echo "Job ID:" $job_id
log "Job ID:" $job_id

databricks jobs run-now --json "{\"job_id\":$job_id, \"notebook_params\": {\"PYSPARK_PYTHON\": \"/databricks/python3/bin/python3\", \"MOUNT_DATA_PATH\": \"/mnt/datalake\", \"MOUNT_DATA_CONTAINER\": \"datalake\", \"DATABASE\": \"datalake\"}}"
# Upload libs -- for initial dev package
# Needs to run AFTER mounting dbfs:/mnt/datalake in setup workspace

echo "Completed configuring databricks."
log "Completed configuring databricks." "success"
Loading