diff --git a/CHANGES.md b/CHANGES.md index 513c159d..1ed8d175 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -13,6 +13,21 @@ development source code and as such may not be routinely kept up to date. # __NEXT__ +## Improvements + +* Snakemake's per-input/output file metadata (stored in `.snakemake/metadata/`) + is now downloaded from AWS Batch builds by default. Like file modification + times (mtimes), which are already preserved from the remote build, this + additional metadata is used by Snakemake to track when inputs have changed + and when it should regenerate outputs. The metadata is also used in + [Snakemake report generation](https://snakemake.readthedocs.io/en/v8.14.0/snakefiles/reporting.html#rendering-reports) + and can be useful for gathering ad-hoc workflow statistics. + + The runtime image used must be at least `nextstrain/base:build-20240617T235011Z` + for these Snakemake metadata files to be available for download from the AWS + Batch job. + ([#374](https://github.com/nextstrain/cli/pull/374)) + # 8.4.0 (29 May 2024) diff --git a/nextstrain/cli/runner/aws_batch/s3.py b/nextstrain/cli/runner/aws_batch/s3.py index 02343e1f..7d07fb9f 100644 --- a/nextstrain/cli/runner/aws_batch/s3.py +++ b/nextstrain/cli/runner/aws_batch/s3.py @@ -119,8 +119,12 @@ def download_workdir(remote_workdir: S3Object, workdir: Path, patterns: List[str ]) included = path_matcher([ - # But we do want the Snakemake logs to come over. + # But we do want the Snakemake logs to come over… ".snakemake/log/", + + # …and the input/output metadata Snakemake tracks (akin to mtimes, + # which we also preserve). + ".snakemake/metadata/", ]) if patterns: