Skip to content

Commit 5529cf4

Browse files
committed
Check PySpark
1 parent dc9dd9d commit 5529cf4

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

.github/workflows/check-contracts-and-assets.yml

+23-1
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,34 @@ jobs:
6767
run: curl -sSL https://install.python-poetry.org | python3 -
6868
shell: bash
6969
- name: Install dependencies
70-
run: poetry install
7170
shell: bash
71+
run: |
72+
# Install root level dependencies
73+
poetry install
74+
# Create a virtual environment for the PySpark project, install dependencies
75+
python3 -m venv "pyspark/.venv"
76+
pyspark/.venv/bin/pip install -r pyspark/requirements.txt
77+
ls -la pyspark/.venv/bin
7278
- name: Run migrations from db_migrations directory
7379
working-directory: ./db_migrations
7480
run: poetry run alembic upgrade head
7581
shell: bash
82+
- name: Check PySpark Data Assets
83+
uses: gabledata/cicd/github-actions/check-data-assets@python_path
84+
with:
85+
# Provide API key and endpoint secrets
86+
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE_SANDBOX}}
87+
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE_SANDBOX}}
88+
allow-gable-pre-release: true
89+
python-path: pyspark/.venv/bin/python
90+
# List of paths to Protobuf files that should be checked with support for glob syntax.
91+
# Can either be specified as a space separated list ('event1.proto event2.proto'), or
92+
# a multiline string
93+
data-asset-options: |
94+
--source-type pyspark \
95+
--project-root ${{ github.workspace }}/pyspark \
96+
--csv-schema-file ${{ github.workspace }}/pyspark/schemas.csv \
97+
--spark-job-entrypoint 'job.py --final_output_table pnw_bookings_30_days' --debug
7698
- name: Check Protobuf Data Assets
7799
uses: gabledata/cicd/github-actions/check-data-assets@latest
78100
with:

.github/workflows/publish-contracts-and-assets.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ jobs:
111111
--source-type pyspark \
112112
--project-root ${{ github.workspace }}/pyspark \
113113
--csv-schema-file ${{ github.workspace }}/pyspark/schemas.csv \
114-
--spark-job-entrypoint 'job.py --final_output_table pnw_bookings_30_days' --trace
114+
--spark-job-entrypoint 'job.py --final_output_table pnw_bookings_30_days' --debug
115115
- name: Register Protobuf Data Assets
116116
uses: gabledata/cicd/github-actions/register-data-assets@latest
117117
with:

0 commit comments

Comments
 (0)