Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Anti-pattern Recognition tool to the Optimization Scripts #397

Merged
merged 9 commits into from
Mar 25, 2024
Next Next commit
adding anti pattern recognition step to optimization scripts
  • Loading branch information
franklinWhaite committed Mar 24, 2024
commit 9b1dea1e595c295387eb6f01f00e97e202c1bb38
21 changes: 21 additions & 0 deletions scripts/optimization/anti_pattern_recoginition_tool_tables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
CREATE OR REPLACE TABLE optimization_workshop.antipattern_output_table (
job_id STRING,
user_email STRING,
query STRING,
recommendation ARRAY<STRUCT<name STRING, description STRING>>,
slot_hours FLOAT64,
optimized_sql STRING,
process_timestamp TIMESTAMP
);

CREATE OR REPLACE VIEW optimization_workshop.antipattern_tool_input_view AS
SELECT
query_hash id,
top_10_jobs[SAFE_OFFSET(0)].query_text query
FROM
`pso-dev-cs-cdwp.optimization_workshop.queries_grouped_by_hash`
ORDER BY
avg_total_slots desc
LIMIT
1000
;
26 changes: 26 additions & 0 deletions scripts/optimization/run_all_scripts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,29 @@ done
# actively_read_tables_with_partitioning_clustering_info.sql
bq query ${bq_flags} <table_read_patterns.sql
bq query ${bq_flags} <actively_read_tables_with_partitioning_clustering_info.sql &


# Run setup for anti pattern recognition tool
bq query ${bq_flags} <anti_pattern_recoginition_tool_tables.sql

{ # try

## build anti-pattern recognition tool locally
git clone https://github.com/GoogleCloudPlatform/bigquery-antipattern-recognition.git
(cd bigquery-antipattern-recognition && mvn clean package jib:dockerBuild -DskipTests)

## build anti-pattern recognition tool locally
export PROJECT_ID=$(gcloud config get-value project)
docker run -i bigquery-antipattern-recognition \
--input_bq_table ${PROJECT_ID}.optimization_workshop.antipattern_tool_input_view \
--output_table ${PROJECT_ID}.optimization_workshop.antipattern_output_table

# write anti pattern output to queries by has table
bq query ${bq_flags} <update_queries_by_hash_w_anti_patterns.sql

} || { # catch
echo 'Error: could not run Anti-pattern Recognition Tool. Try using GCP Cloud Shell https://cloud.google.com/shell/docs/launching-cloud-shell'
}

# Clean up anti pattern recognition tool
rm -rf bigquery-antipattern-recognition
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ALTER TABLE optimization_workshop.queries_grouped_by_hash
ADD COLUMN recommendation ARRAY<STRUCT<name STRING, description STRING>>;

UPDATE optimization_workshop.queries_grouped_by_hash t1
SET t1.recommendation = t2.recommendation
FROM optimization_workshop.antipattern_output_table t2
WHERE t1.query_hash = t2.job_id;