@@ -67,12 +67,34 @@ jobs:
67
67
run : curl -sSL https://install.python-poetry.org | python3 -
68
68
shell : bash
69
69
- name : Install dependencies
70
- run : poetry install
71
70
shell : bash
71
+ run : |
72
+ # Install root level dependencies
73
+ poetry install
74
+ # Create a virtual environment for the PySpark project, install dependencies
75
+ python3 -m venv "pyspark/.venv"
76
+ pyspark/.venv/bin/pip install -r pyspark/requirements.txt
77
+ ls -la pyspark/.venv/bin
72
78
- name : Run migrations from db_migrations directory
73
79
working-directory : ./db_migrations
74
80
run : poetry run alembic upgrade head
75
81
shell : bash
82
+ - name : Check PySpark Data Assets
83
+ uses : gabledata/cicd/github-actions/check-data-assets@python_path
84
+ with :
85
+ # Provide API key and endpoint secrets
86
+ gable-api-key : ${{secrets.GABLE_API_KEY_UNSTABLE_SANDBOX}}
87
+ gable-api-endpoint : ${{secrets.GABLE_API_ENDPOINT_UNSTABLE_SANDBOX}}
88
+ allow-gable-pre-release : true
89
+ python-path : pyspark/.venv/bin/python
90
+ # List of paths to Protobuf files that should be checked with support for glob syntax.
91
+ # Can either be specified as a space separated list ('event1.proto event2.proto'), or
92
+ # a multiline string
93
+ data-asset-options : |
94
+ --source-type pyspark \
95
+ --project-root ${{ github.workspace }}/pyspark \
96
+ --csv-schema-file ${{ github.workspace }}/pyspark/schemas.csv \
97
+ --spark-job-entrypoint 'job.py --final_output_table pnw_bookings_30_days' --debug
76
98
- name : Check Protobuf Data Assets
77
99
uses : gabledata/cicd/github-actions/check-data-assets@latest
78
100
with :
0 commit comments