nextstrain · joverlee521 · Mar 7, 2024 · Feb 26, 2024
diff --git a/ingest/README.md b/ingest/README.md
@@ -44,6 +44,26 @@ inputs/outputs should be relative to the ingest directory.
 Modules are all [included](https://snakemake.readthedocs.io/en/stable/snakefiles/modularization.html#includes)
 in the main Snakefile in the order that they are expected to run.
 
+### Nextclade
+
+Nextstrain is pushing to standardize ingest workflows with Nextclade runs to include Nextclade outputs in our publicly
+hosted data. However, if a Nextclade dataset does not already exist, it requires curated data as input, so we are making
+Nextclade steps optional here.
+
+If Nextclade config values are included, the Nextclade rules will create the final metadata TSV by joining the Nextclade
+output with the metadata. If Nextclade configs are not included, we rename the subset metadata TSV to the final metadata TSV.
+
+To run Nextclade rules, include the `defaults/nextclade_config.yaml` config file with:
+
+```
+nextstrain build ingest --configfile defaults/nextclade_config.yaml
+```
+
+> [!TIP]
+> If the Nextclade dataset is stable and you always want to run the Nextclade rules as part of ingest, we recommend
+moving the Nextclade related config parameters from the `defaults/nextclade_config.yaml` file to the default config file
+`defaults/config.yaml`.
+
 ## Build configs
 
 The build-configs directory contains custom configs and rules that override and/or

diff --git a/ingest/Snakefile b/ingest/Snakefile
@@ -39,6 +39,8 @@ include: "rules/curate.smk"
 # final metadata TSV by joining the Nextclade output with the metadata.
 # If Nextclade configs are not included, we rename the subset metadata TSV
 # to the final metadata TSV.
+# To run nextclade.smk rules, include the `defaults/nextclade_config.yaml`
+# config file with `nextstrain build ingest --configfile defaults/nextclade_config.yaml`.
 if "nextclade" in config:
 
     include: "rules/nextclade.smk"

diff --git a/ingest/defaults/config.yaml b/ingest/defaults/config.yaml
@@ -115,18 +115,3 @@ curate:
     "abbr_authors",
     "institution",
   ]
-
-
-# Nextclade parameters to include if you are running Nextclade as a part of your ingest workflow
-# Note that this requires a Nextclade dataset to already exist for your pathogen.
-# Remove the following parameters if you do not plan to run Nextclade.
-nextclade:
-  # The name of the Nextclade dataset to use for running nextclade.
-  # Run `nextclade dataset list` to get a full list of available Nextclade datasets
-  dataset_name: ""
-  # Path to the mapping for renaming Nextclade output columns
-  # The path should be relative to the ingest directory
-  field_map: "config/nextclade_field_map.tsv"
-  # This is the ID field you would use to match the Nextclade output with the record metadata.
-  # This should be the new name that you have defined in your field map.
-  id_field: "seqName"
diff --git a/ingest/defaults/nextclade_config.yaml b/ingest/defaults/nextclade_config.yaml
@@ -0,0 +1,12 @@
+# Nextclade parameters to include if you are running Nextclade as a part of your ingest workflow
+# Note that this requires a Nextclade dataset to already exist for your pathogen.
+nextclade:
+  # The name of the Nextclade dataset to use for running nextclade.
+  # Run `nextclade dataset list` to get a full list of available Nextclade datasets
+  dataset_name: ""
+  # Path to the mapping for renaming Nextclade output columns
+  # The path should be relative to the ingest directory
+  field_map: "config/nextclade_field_map.tsv"
+  # This is the ID field you would use to match the Nextclade output with the record metadata.
+  # This should be the new name that you have defined in your field map.
+  id_field: "seqName"