Skip to content

Commit

Permalink
divide to batches
Browse files Browse the repository at this point in the history
  • Loading branch information
msmouse committed Sep 14, 2024
1 parent f516aa9 commit 21e0776
Showing 1 changed file with 29 additions and 112 deletions.
141 changes: 29 additions & 112 deletions .github/workflows/workflow-run-replay-verify.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,10 @@ jobs:
prepare:
runs-on: ${{ inputs.RUNS_ON }}
outputs:
ranges: ${{ steps.gen-jobs.outputs.ranges }}
ranges0: ${{ steps.gen-jobs.outputs.ranges0 }}
ranges1: ${{ steps.gen-jobs.outputs.ranges1 }}
ranges2: ${{ steps.gen-jobs.outputs.ranges2 }}
ranges3: ${{ steps.gen-jobs.outputs.ranges3 }}
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand All @@ -94,7 +97,7 @@ jobs:
# copy the binary to the root of the repo and cache it there, because rust-setup calls a cache-rust action
# which cleans up the target directory in its post action
path: aptos-debugger
key: aptos-debugger-${{ inputs.GIT_SHA || github.sha }}
key: alden-hack-0914 #aptos-debugger-${{ inputs.GIT_SHA || github.sha }}

- name: Prepare for build if not cached
if: steps.cache-aptos-debugger-binary.outputs.cache-hit != 'true'
Expand Down Expand Up @@ -144,132 +147,46 @@ jobs:
./aptos-debugger aptos-db gen-replay-verify-jobs \
--metadata-cache-dir ./metadata_cache \
--command-adapter-config $BACKUP_CONFIG_TEMPLATE_PATH \
--output-json-file job_ranges.json \
--start-version $HISTORY_START
--start-version $HISTORY_START \
--output-json-file job_ranges.0.json
--output-json-file job_ranges.1.json
--output-json-file job_ranges.2.json
--output-json-file job_ranges.3.json
echo "ranges=$(cat job_ranges.json)" >> $GITHUB_OUTPUT
cat job_ranges.json | jq || true
echo "ranges0=$(cat job_ranges.0.json)" >> $GITHUB_OUTPUT
echo "ranges1=$(cat job_ranges.1.json)" >> $GITHUB_OUTPUT
echo "ranges2=$(cat job_ranges.2.json)" >> $GITHUB_OUTPUT
echo "ranges3=$(cat job_ranges.3.json)" >> $GITHUB_OUTPUT
- name: Cache backup storage config so the replay jobs don't need to checkout entire repo
uses: actions/cache/save@v4
with:
path: ${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }}
key: backup-config-${{ github.run_id }}

replay-verify:
replay-verify-batch:
needs: prepare
timeout-minutes: ${{ inputs.TIMEOUT_MINUTES || 180 }}
runs-on: ${{ inputs.RUNS_ON }}
strategy:
fail-fast: false
max-parallel: 200
matrix:
range: ${{ fromJson(needs.prepare.outputs.ranges) }}
batch: [
${{ steps.prepare.outputs.ranges0 }},
${{ steps.prepare.outputs.ranges1 }},
${{ steps.prepare.outputs.ranges2 }},
${{ steps.prepare.outputs.ranges3 }}
]
steps:
- name: Parse job - ${{ matrix.range }}
id: parse-job
shell: bash
run: |
read name begin end sesc <<< "${{ matrix.range }}"
echo name=$name >> $GITHUB_OUTPUT
echo begin=$begin >> $GITHUB_OUTPUT
echo end=$end>> $GITHUB_OUTPUT
echo desc=$desc>> $GITHUB_OUTPUT
- name: Load cached aptos-debugger binary
uses: actions/cache/restore@v4
with:
path: aptos-debugger
key: aptos-debugger-${{ inputs.GIT_SHA || github.sha }}
fail-on-cache-miss: true

- name: Load cached backup storage metadata cache dir
uses: actions/cache/restore@v4
with:
path: metadata_cache
key: metadata-cache-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-
fail-on-cache-miss: true

- name: Load cached backup storage config
uses: actions/cache/restore@v4
with:
path: ${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }}
key: backup-config-${{ github.run_id }}
fail-on-cache-miss: true

- id: auth
uses: "google-github-actions/auth@v2"
with:
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }}

- name: Install GCloud SDK
uses: "google-github-actions/setup-gcloud@v2"
- name: Call replay-verify-batch workflow
uses: aptos-labs/aptos-core/.github/workflows/workflow-run-replay-verify-batch.yaml@0911-alden-use-gcloud-on-base-image
secrets: inherit
with:
version: ">= 418.0.0"
install_components: "kubectl,gke-gcloud-auth-plugin"

- name: phase 1 - restore snapshot, with retries
env:
RANGES_JSON: ${{ matrix.batch }}
GIT_SHA: ${{ inputs.GIT_SHA }}
BUCKET: ${{ inputs.BUCKET }}
SUB_DIR: ${{ inputs.SUB_DIR }}
HISTORY_START: ${{ inputs.HISTORY_START || '0' }}
TXNS_TO_SKIP: ${{ inputs.TXNS_TO_SKIP }}
HISTORY_START: ${{ inputs.HISTORY_START }}
BACKUP_CONFIG_TEMPLATE_PATH: ${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }}
run: |
for try in {0..3}
do
if [ $try -gt 0 ]; then
SLEEP=$((10 * $try))
echo "sleeping for $SLEEP seconds before retry #$try" >&2
sleep $SLEEP
fi
./aptos-debugger aptos-db replay-verify \
--metadata-cache-dir ./metadata_cache \
--command-adapter-config $BACKUP_CONFIG_TEMPLATE_PATH \
--txns-to-skip $TXNS_TO_SKIP \
--start-version ${{ steps.parse-job.outputs.begin }} \
--end-version ${{ steps.parse-job.outputs.begin }} \
\
--lazy-quit \
--enable-storage-sharding \
--target-db-dir db \
--concurrent-downloads 8 \
--replay-concurrency-level 8 \
\
&& exit 0 || true # exit 0 if successful, otherwise retry
done
exit(1)
- name: phase 2 - replay-verify transactions, with retries
env:
BUCKET: ${{ inputs.BUCKET }}
SUB_DIR: ${{ inputs.SUB_DIR }}
HISTORY_START: ${{ inputs.HISTORY_START || '0' }}
TXNS_TO_SKIP: ${{ inputs.TXNS_TO_SKIP }}
BACKUP_CONFIG_TEMPLATE_PATH: ${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }}
run: |
for try in {0..3}
do
if [ $try -gt 0 ]; then
SLEEP=$((10 * $try))
echo "sleeping for $SLEEP seconds before retry #$try" >&2
sleep $SLEEP
fi
./aptos-debugger aptos-db replay-verify \
--metadata-cache-dir ./metadata_cache \
--command-adapter-config $BACKUP_CONFIG_TEMPLATE_PATH \
--txns-to-skip $TXNS_TO_SKIP \
--start-version ${{ steps.parse-job.outputs.begin }} \
--end-version ${{ steps.parse-job.outputs.end }} \
\
--lazy-quit \
--enable-storage-sharding \
--target-db-dir db \
--concurrent-downloads 8 \
--replay-concurrency-level 8 \
\
&& exit 0 || true # exit 0 if successful, otherwise retry
done
exit(1)
RUNS_ON: ${{ inputs.RUNS_ON }}
TIMEOUT_MINUTES: ${{ inputs.TIMEOUT_MINUTES }}

0 comments on commit 21e0776

Please sign in to comment.