test json diff workflow (#14806)

* add cross repo dispatch to trigger indexer processor integ tests * convert to bash script for readability * temp to check new file behavior * add check if workflow run exists * remove testing txn * skip auth if dispatch isn't needed
aptos-labs · Oct 8, 2024 · 8bb94b7 · 8bb94b7
1 parent 67f7ee6
commit 8bb94b7
Show file tree

Hide file tree

Showing 3 changed files with 381 additions and 0 deletions.
diff --git a/.github/workflows/indexer-processor-testing.yaml b/.github/workflows/indexer-processor-testing.yaml
@@ -0,0 +1,148 @@
+
+name: Trigger Processor Tests on JSON Change
+
+on:
+  workflow_dispatch:
+  pull_request:  # Trigger on PR-level events
+    branches:
+      - main
+
+# the required permissions to request the ID token
+permissions:
+  id-token: write  # This is required for GCP authentication
+  contents: read   # Ensure the workflow has access to repository contents
+
+jobs:
+  dispatch_event:
+    runs-on: runs-on,cpu=64,family=c7,hdd=500,image=aptos-ubuntu-x64,run-id=${{ github.run_id }}
+
+    steps:
+      - name: Checkout the repository
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.ref }}
+
+      - name: Set up Rust
+        uses: aptos-labs/aptos-core/.github/actions/rust-setup@main
+        with:
+          GIT_CREDENTIALS: ${{ secrets.GIT_CREDENTIALS }}
+
+      # Install necessary system dependencies
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential libssl-dev pkg-config
+
+      # Ensure Rust is updated
+      - name: Update Rust toolchain
+        run: rustup update
+
+      - name: Run CLI to Generate JSON Files
+        run: |
+          cd ecosystem/indexer-grpc/indexer-transaction-generator
+          cargo run -- --testing-folder ./example_tests --output-folder ../indexer-test-transactions/new_json_transactions
+
+      - name: Install jq
+        run: sudo apt-get install jq  # Ensure jq is installed for JSON processing
+
+      # TODO: improve this step to be easily maintainable and extensible
+      # Prepare Original and New JSON Files
+      - name: Prepare and Clean JSON Files
+        run: |
+          cd ecosystem/indexer-grpc/indexer-test-transactions
+
+          for folder in json_transactions/scripted_transactions new_json_transactions/scripted_transactions; do
+            for file in $folder/*.json; do
+              echo "Processing $file..."
+              base_file=$(basename "$file")
+
+              jq 'del(.timestamp,
+                      .version,
+                      .info.hash,
+                      .info.stateChangeHash,
+                      .info.accumulatorRootHash,
+                      .info.changes[].writeResource.stateKeyHash,
+                      .info.changes[].writeResource.type.address,
+                      .info.changes[].writeResource.address,
+                      .info.changes[].writeTableItem.stateKeyHash,
+                      .info.changes[].writeTableItem.data.key,
+                      .info.changes[].writeTableItem.data.value,
+                      .epoch,
+                      .blockHeight,
+                      .sizeInfo,
+                      .user.request.sender,
+                      .user.request.expirationTimestampSecs.seconds,
+                      .user.request.signature.ed25519.publicKey,
+                      .user.request.signature.ed25519.signature)
+                  | (.info.changes[].writeResource.data |=
+                  if type == "string" then
+                    (fromjson
+                     | del(.authentication_key)
+                     | walk(if type == "object" and has("addr") then del(.addr) else . end)
+                     | tostring)
+                  else . end)' "$file" > "$folder/cleaned_$base_file"
+            done
+          done
+
+      - name: Compare JSON Files Across Multiple Folders
+        id: diff_check
+        run: |
+          . scripts/indexer_test_txns_compare_and_diff.sh
+
+      - name: Handle New Files and Differences
+        run: |
+          echo "Checking outputs from diff_check step..."
+          echo "New file found: ${{ steps.diff_check.outputs.new_file_found }}"
+          echo "Diff found: ${{ steps.diff_check.outputs.diff_found }}"
+          
+          if [ "${{ steps.diff_check.outputs.new_file_found }}" == "true" ]; then
+            echo "New JSON files detected:"
+            echo "${{ steps.diff_check.outputs.new_files }}"  # Print all new files with paths
+            exit 0  # Fail the workflow to enforce manual review
+          elif [ "${{ steps.diff_check.outputs.diff_found }}" == "true" ]; then
+            echo "Differences detected. Proceeding with dispatch event."
+            echo "Modified files:"
+            echo "${{ steps.diff_check.outputs.modified_files }}"  # Print modified files with paths
+          else
+            echo "No differences or new files detected."
+            exit 0  # Proceed successfully only if no new files or differences are found
+          fi
+
+      - id: auth
+        if: steps.diff_check.outputs.diff_found == 'true' && steps.diff_check.outputs.new_file_found == 'false'
+        uses: "google-github-actions/auth@v2"
+        with:
+          workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
+          service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }}
+
+      - name: Log active service account email
+        if: steps.diff_check.outputs.diff_found == 'true' && steps.diff_check.outputs.new_file_found == 'false'
+        run: |
+          gcloud auth list --filter=status:ACTIVE --format="value(account)"
+
+      - id: 'secrets'
+        if: steps.diff_check.outputs.diff_found == 'true' && steps.diff_check.outputs.new_file_found == 'false'
+        uses: 'google-github-actions/get-secretmanager-secrets@v2'
+        with:
+          secrets: |-
+            token:aptos-ci/github-actions-repository-dispatch
+
+      # Conditionally Dispatch Event to Processor Repo if Differences Found
+      - name: Dispatch Event to Processor Repo
+        if: steps.diff_check.outputs.diff_found == 'true' && steps.diff_check.outputs.new_file_found == 'false'
+        uses: peter-evans/repository-dispatch@v3.0.0
+        with:
+          TOKEN: '${{ steps.secrets.outputs.token }}'
+          repository: 'aptos-labs/aptos-indexer-processors'
+          event-type: 'test-txn-json-change-detected'
+          client-payload: '{"commit_hash": "${{ github.sha }}"}'
+
+      # Poll Processor Repo for Workflow Run Status and Memorize Run ID to check the job status
+      - name: Poll for Workflow Run and Wait for Job Completion
+        if: steps.diff_check.outputs.diff_found == 'true' && steps.diff_check.outputs.new_file_found == 'false'
+        id: poll_status
+        run: |
+          . scripts/indexer_processor_tests_status_poll.sh
+        env:
+          GITHUB_TOKEN: ${{ steps.secrets.outputs.token }}  # Pass the correct GitHub token
+          GITHUB_SHA: ${{ github.sha }}
diff --git a/scripts/indexer_processor_tests_status_poll.sh b/scripts/indexer_processor_tests_status_poll.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+
+# Poll for Workflow Run and Wait for Job Completion
+
+# Unique identifier for the run (commit_hash or UUID passed via event payload)
+UUID="${GITHUB_SHA}"
+
+# If a run_id is already known, use it directly to check the run status
+if [ -f ".cached_run_id" ]; then
+  run_id=$(cat .cached_run_id)
+  echo "Using cached run_id: $run_id"
+else
+  echo "Polling for the workflow run with UUID: $UUID."
+
+  attempts=0
+  max_attempts=5  # Number of attempts to find the run_id
+  sleep_interval=30  # Time to wait between attempts (in seconds)
+
+  while [ $attempts -lt $max_attempts ]; do
+    echo "Polling for the workflow run. Attempt $((attempts+1)) of $max_attempts..."
+
+    # Get the workflow runs for the repository
+    response=$(curl -s -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+    "https://api.github.com/repos/aptos-labs/aptos-indexer-processors/actions/runs?event=repository_dispatch&branch=main")
+
+    # Check if the workflow_runs array exists
+    workflow_runs=$(echo "$response" | jq -r '.workflow_runs')
+    if [ "$workflow_runs" == "null" ] || [ -z "$workflow_runs" ]; then
+      echo "No workflow runs found. Response from GitHub API:"
+      echo "$response"  # Output the raw response for debugging
+      echo "Retrying in $sleep_interval seconds..."
+      attempts=$((attempts + 1))
+      sleep $sleep_interval
+      continue
+    fi
+
+    # Filter the workflow run by the unique run-name commit hash
+    run_id=$(echo "$workflow_runs" | jq -r ".[] | select(.name | test(\"$UUID\")) | .id")
+
+    if [ -n "$run_id" ]; then
+      echo "Found workflow run with ID: $run_id"
+      echo "$run_id" > .cached_run_id  # Save the run_id to cache
+      break
+    else
+      echo "No matching workflow run found yet. Retrying in $sleep_interval seconds..."
+      attempts=$((attempts + 1))
+      sleep $sleep_interval
+    fi
+  done
+fi
+
+# If we still don't have a run_id, exit the job
+if [ -z "$run_id" ]; then
+  echo "Workflow run not found after $max_attempts attempts. Exiting."
+  exit 1
+fi
+
+# Now that we have the run_id (cached or newly found), proceed to poll job status
+jobs_url="https://api.github.com/repos/aptos-labs/aptos-indexer-processors/actions/runs/${run_id}/jobs"
+
+# Poll the job status until completion
+job_completed=false
+max_job_attempts=20  # Adjust based on how long you expect the job to run
+job_attempts=0
+sleep_interval=60  # Adjust polling interval as needed
+
+while [ "$job_completed" == false ] && [ $job_attempts -lt $max_job_attempts ]; do
+  echo "Polling for job status. Attempt $((job_attempts+1)) of $max_job_attempts..."
+  jobs_response=$(curl -s -H "Authorization: Bearer ${GITHUB_TOKEN}" "$jobs_url")
+
+  # Check if the jobs array exists
+  jobs=$(echo "$jobs_response" | jq -r '.jobs')
+  if [ "$jobs" == "null" ] || [ -z "$jobs" ]; then
+    echo "No jobs found in the workflow run. Response from GitHub API:"
+    echo "$jobs_response"  # Output the raw response for debugging
+    exit 1
+  fi
+
+  # Loop through the jobs and check their status
+  for job in $(echo "$jobs" | jq -r '.[] | @base64'); do
+    _jq() {
+      echo "${job}" | base64 --decode | jq -r "${1}"
+    }
+
+    job_name=$(_jq '.name')
+    job_id=$(_jq '.id')
+    job_status=$(_jq '.status')
+    job_conclusion=$(_jq '.conclusion')
+
+    echo "Checking job: $job_name (Job ID: $job_id)"
+    echo "Job status: $job_status"
+    echo "Job conclusion: $job_conclusion"
+
+    # Check if the job has completed
+    if [ "$job_status" == "completed" ]; then
+      job_completed=true
+      if [ "$job_conclusion" == "success" ]; then
+        echo "Job completed successfully!"
+        exit 0  # Exit with success
+      else
+        echo "Job failed!"
+        exit 1  # Exit with failure
+      fi
+    fi
+  done
+
+  # Sleep before the next polling attempt
+  echo "Job is still in progress. Waiting $sleep_interval seconds before polling again..."
+  sleep $sleep_interval
+  job_attempts=$((job_attempts + 1))
+done
+
+# If the job hasn't completed within the allowed attempts, exit with an error
+if [ "$job_completed" == false ]; then
+  echo "Job did not complete within the expected time. Exiting with failure."
+  exit 1
+fi
diff --git a/scripts/indexer_test_txns_compare_and_diff.sh b/scripts/indexer_test_txns_compare_and_diff.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+# Function to compare and handle diff logic
+compare_and_diff() {
+  local generated_file=$1
+  local original_file=$2
+
+  echo "Modified file path: $generated_file"
+  echo "Original file path: $original_file"
+
+  if [ -f "$original_file" ]; then
+    echo "Original file exists, comparing with modified file."
+    # Run diff and capture the output
+    diff_output=$(diff -u "$original_file" "$generated_file" || true)
+
+    if [ -n "$diff_output" ]; then
+      echo "Differences found in $generated_file"
+      diff_found=true
+      modified_files="${modified_files}${generated_file}\n"  # Append the full path of the modified file
+      echo "Diff output:"
+      echo "$diff_output"
+    else
+      echo "No differences found for $generated_file."
+    fi
+  else
+    echo "New file detected: $generated_file (no corresponding original file found)"
+    new_file_found=true
+    new_files="${new_files}${generated_file}\n"  # Append the full path of the new file
+
+    # Treat as new file, but still run a diff (compare with /dev/null)
+    diff_output=$(diff -u /dev/null "$generated_file" || true)
+    if [ -n "$diff_output" ]; then
+      echo "New file with diff found in $generated_file"
+      echo "Diff output for new file:"
+      echo "$diff_output"
+    fi
+  fi
+}
+
+
+# Initialize the flags
+diff_found=false
+new_file_found=false
+new_files=""
+modified_files=""
+
+cd ecosystem/indexer-grpc/indexer-test-transactions || exit 1
+
+echo "Starting comparison between new and original JSON files."
+
+# C heck if the new_json_transactions folder exists
+if [ ! -d "new_json_transactions" ]; then
+  echo "Directory new_json_transactions does not exist. Exiting."
+  exit 1
+fi
+
+# Loop over all subdirectories inside new_json_transactions
+for folder in new_json_transactions/*; do
+  if [ -d "$folder" ]; then  # Ensure it's a directory
+    echo "Processing folder: $folder"
+
+    # Check if the folder is for imported transactions
+    if [[ "$folder" == *"imported_"* ]]; then
+      # For imported transactions, process all files without any 'modified_' check
+      for file in "$folder"/*.json; do
+        if [ -f "$file" ]; then
+          echo "Processing imported file: $file"
+          base_file=$(basename "$file" .json)
+          original_file="../indexer-test-transactions/json_transactions/$(basename $folder)/${base_file}.json"
+          compare_and_diff "$file" "$original_file"
+        fi
+      done
+    else
+      # For scripted transactions, only process files that are prefixed with 'cleaned_'
+      for file in "$folder"/cleaned_*.json; do
+        if [ -f "$file" ]; then
+          echo "Processing scripted file: $file"
+          base_file=$(basename "$file" .json)
+          original_file="../indexer-test-transactions/json_transactions/$(basename $folder)/${base_file}.json"
+          compare_and_diff "$file" "$original_file"
+        fi
+      done
+    fi
+  else
+    echo "Folder $folder is not a valid directory."
+  fi
+done
+
+# Print all new files if found
+if [ "$new_file_found" = "true" ] && [ -n "$new_files" ]; then
+  echo "New files detected:"
+  echo -e "$new_files"
+else
+  echo "No new files detected."
+fi
+
+# Print all modified files if found
+if [ "$diff_found" = "true" ] && [ -n "$modified_files" ]; then
+  echo "Modified files detected:"
+  echo -e "$modified_files"
+else
+  echo "No modified files detected."
+fi
+
+# Debugging logs before setting outputs
+echo "diff_found=$diff_found"
+echo "new_file_found=$new_file_found"
+echo "new_files=$new_files"
+echo "modified_files=$modified_files"
+
+# Set output flags
+echo "diff_found=$diff_found" >> $GITHUB_OUTPUT
+echo "new_file_found=$new_file_found" >> $GITHUB_OUTPUT
+echo "new_files=$new_files" >> $GITHUB_OUTPUT  # Store new files as output
+echo "modified_files=$modified_files" >> $GITHUB_OUTPUT  # Store modified files as output
+echo "Comparison completed."