EBIvariation · apriltuesday · Nov 6, 2025 · Sep 9, 2025 · Sep 9, 2025 · Sep 11, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -4,7 +4,7 @@ on:
   push:
     branches: [ main ]
   pull_request:
-    branches: [ main ]
+    branches: [ main, add-validation-tasks ]
 
 jobs:
   build:

diff --git a/eva_sub_cli/executables/cli.py b/eva_sub_cli/executables/cli.py
@@ -1,24 +1,23 @@
 import sys
 
-import eva_sub_cli
-from eva_sub_cli.exceptions.metadata_template_version_exception import MetadataTemplateVersionException, \
-    MetadataTemplateVersionNotFoundException
-from eva_sub_cli.exceptions.submission_not_found_exception import SubmissionNotFoundException
-from eva_sub_cli.exceptions.submission_status_exception import SubmissionStatusException
-
 if not sys.warnoptions:
     import warnings
-
     warnings.simplefilter("ignore")
 
 import logging
 import os
 from argparse import ArgumentParser
 from ebi_eva_common_pyutils.logger import logging_config
 
+import eva_sub_cli
 from eva_sub_cli import orchestrator
-from eva_sub_cli.orchestrator import VALIDATE, SUBMIT, DOCKER, NATIVE
+from eva_sub_cli.exceptions.metadata_template_version_exception import MetadataTemplateVersionException, \
+    MetadataTemplateVersionNotFoundException
+from eva_sub_cli.exceptions.submission_status_exception import SubmissionStatusException
+from eva_sub_cli.exceptions.submission_not_found_exception import SubmissionNotFoundException
 from eva_sub_cli.file_utils import is_submission_dir_writable, DirLockError, DirLock
+from eva_sub_cli.orchestrator import VALIDATE, SUBMIT, DOCKER, NATIVE
+from eva_sub_cli.validators.validator import ALL_VALIDATION_TASKS
 
 
 def validate_command_line_arguments(args, argparser):
@@ -93,6 +92,9 @@ def parse_args(cmd_line_args):
                            help='Select a task to perform (default SUBMIT). VALIDATE will run the validation'
                                 ' regardless of the outcome of previous runs. SUBMIT will run validate only if'
                                 ' the validation was not performed successfully before and then run the submission.')
+    argparser.add_argument('--validation_tasks', nargs='+', choices=ALL_VALIDATION_TASKS, default=ALL_VALIDATION_TASKS,
+                           type=str.lower, help='Select only a subset of the validation tasks to run. Note that all '
+                                                'tasks need to be successful for the validation to pass')
     argparser.add_argument('--executor', choices=[DOCKER, NATIVE], default=NATIVE, type=str.lower,
                            help='Select the execution type for running validation (default native)')
     credential_group = argparser.add_argument_group('Credentials', 'Specify the ENA Webin credentials you want to use '
@@ -125,7 +127,6 @@ def main():
 
     try:
         # lock the submission directory
-
         with DirLock(os.path.join(args.submission_dir)) as lock:
             # Create the log file
             logging_config.add_file_handler(os.path.join(args.submission_dir, 'eva_submission.log'), logging.DEBUG)

diff --git a/eva_sub_cli/jinja_templates/html/file_validation.html b/eva_sub_cli/jinja_templates/html/file_validation.html
@@ -1,6 +1,6 @@
 
 {% macro file_validation_report(validation_results, file_name) -%}
-    {% for check_type, check_per_file in validation_results.items() %}
+    {% for check_type, check_per_file in validation_results.items() if check_type not in ["trim_down", "version"] %}
         {% set result = check_per_file.get(file_name, {}) %}
         {% if check_type == "assembly_check" %}
             {{ assembly_check(result) }}

diff --git a/eva_sub_cli/jinja_templates/html/report.html b/eva_sub_cli/jinja_templates/html/report.html
@@ -6,6 +6,13 @@
 {% from 'metadata_validation.html' import metadata_validation_report %}
 {% from 'shallow_validation.html' import optional_shallow_validation_report %}
 
+{% macro validation_not_run_yet_message(text="Process not run yet") %}
+    {% set icon = "&#9208;" %}   {# pause symbol ⏸ #}
+    {% set row_class = "report-section info" %}
+    {% set expand_icon = "" %}
+    <div class='{{ row_class }}'><span class="expand_icon">{{ expand_icon }}</span> {{ icon }} {{ text }}</div>
+{% endmacro %}
+
 <html lang="EN">
 <head>
     <meta charset="UTF-8">
@@ -63,7 +70,12 @@ <h2>Metadata validation results</h2>
         Ensures that required fields are present and values are formatted correctly.
         For requirements, please refer to the <a href="https://www.ebi.ac.uk/eva/?Submit-Data" target=”_blank”>EVA website</a>.
     </div>
-    {{ metadata_validation_report(validation_results) }}
+    {% set run_status = validation_results.get('metadata_check', {}).get('run_status', '') %}
+    {% if run_status %}
+        {{ metadata_validation_report(validation_results) }}
+    {% else %}
+        {{ validation_not_run_yet_message() }}
+    {% endif %}
 </section>
 
 <section>
@@ -72,20 +84,31 @@ <h2>VCF validation results</h2>
         Checks whether each file is compliant with the <a href="http://samtools.github.io/hts-specs/VCFv4.4.pdf" target=”_blank”>VCF specification</a>.
         Also checks whether the variants' reference alleles match against the reference assembly.
     </div>
-    {% for file_name in vcf_files %}
-        {% if file_name != "pass"%}
-        <h3>{{ file_name }}</h3>
-        {{ file_validation_report(validation_results, file_name) }}
-        {% endif %}
-    {% endfor %}
+    {% set run_status = validation_results.get('vcf_check', {}).get('run_status', '') %}
+    {% if run_status %}
+        {% for file_name in vcf_files %}
+            {% if file_name != "pass"%}
+            <h3>{{ file_name }}</h3>
+            {{ file_validation_report(validation_results, file_name) }}
+            {% endif %}
+        {% endfor %}
+    {% else %}
+        {{ validation_not_run_yet_message() }}
+    {% endif %}
 </section>
 
 <section>
     <h2>Sample name concordance check</h2>
     <div class="description">
         Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
     </div>
-    {{ sample_name_check_report(validation_results)}}
+    {% set run_status = validation_results.get('sample_check', {}).get('run_status', '') %}
+    {% if run_status %}
+        {{ sample_name_check_report(validation_results)}}
+    {% else %}
+        {{ validation_not_run_yet_message() }}
+
+    {% endif %}
 </section>
 
 <section>
@@ -94,12 +117,17 @@ <h2>Reference genome INSDC check</h2>
         Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC.
         Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
     </div>
-    {% for file_name in fasta_files %}
-        {% if file_name != "pass"%}
-        <h3>{{ file_name }}</h3>
-        {{ fasta_check_report(validation_results, file_name) }}
-        {% endif %}
-    {% endfor %}
+    {% set run_status = validation_results.get('fasta_check', {}).get('run_status', '') %}
+    {% if run_status %}
+       {% for file_name in fasta_files %}
+            {% if file_name != "pass"%}
+            <h3>{{ file_name }}</h3>
+            {{ fasta_check_report(validation_results, file_name) }}
+            {% endif %}
+        {% endfor %}
+    {% else %}
+        {{ validation_not_run_yet_message() }}
+    {% endif %}
 </section>
 
 <script>

diff --git a/eva_sub_cli/jinja_templates/html/shallow_validation.html b/eva_sub_cli/jinja_templates/html/shallow_validation.html
@@ -2,7 +2,7 @@
 {% macro optional_shallow_validation_report(validation_results) -%}
     {% set results = validation_results.get('shallow_validation', {}) %}
 
-    {% if results.get('required') %}
+    {% if validation_results.get('trim_down') %}
     <section>
         <div class="report-section fail collapsible"> <span class="expand_icon">&#9654;</span>
             &#10060; <b>You requested to run the shallow validation, please run full validation before submitting the data</b>

diff --git a/eva_sub_cli/jinja_templates/text/file_validation.txt b/eva_sub_cli/jinja_templates/text/file_validation.txt
@@ -1,6 +1,6 @@
 
 {% macro file_validation_report(validation_results, file_name) -%}
-    {% for check_type, check_per_file in validation_results.items() %}
+    {% for check_type, check_per_file in validation_results.items() if check_type not in ["trim_down", "version"] %}
         {% set result = check_per_file.get(file_name, {}) %}
         {% if check_type == "assembly_check" %}
             {{ assembly_check(result) }}

diff --git a/eva_sub_cli/jinja_templates/text/report.txt b/eva_sub_cli/jinja_templates/text/report.txt
@@ -5,6 +5,11 @@
 {% from 'metadata_validation.txt' import metadata_validation_report %}
 {% from 'shallow_validation.txt' import optional_shallow_validation_report %}
 
+{% macro validation_not_run_yet_message(text="Process not run yet") -%}
+{% set icon = "\u23F8" %}
+{{ icon }} {{ text }}
+{%- endmacro %}
+
 VALIDATION REPORT
 eva-sub-cli v{{cli_version}}
 
@@ -23,37 +28,56 @@ METADATA VALIDATION RESULTS
 Ensures that required fields are present and values are formatted correctly.
 For requirements, please refer to the EVA website (https://www.ebi.ac.uk/eva/?Submit-Data).
 
-{{ metadata_validation_report(validation_results) }}
-
+{% set run_status = validation_results.get('metadata_check', {}).get('run_status', '') %}
+{% if run_status %}
+    {{ metadata_validation_report(validation_results) }}
+{% else %}
+    {{ validation_not_run_yet_message() }}
+{% endif %}
 -
 
 VCF VALIDATION RESULTS
 Checks whether each file is compliant with the VCF specification (http://samtools.github.io/hts-specs/VCFv4.4.pdf).
 Also checks whether the variants' reference alleles match against the reference assembly.
 
+{% set run_status = validation_results.get('vcf_check', {}).get('run_status', '') %}
+{% if run_status %}
 {% for file_name in vcf_files %}
 {% if file_name != "pass"%}
-	{{ file_name }}
-	{{ file_validation_report(validation_results, file_name) }}
+    {{ file_name }}
+    {{ file_validation_report(validation_results, file_name) }}
 {% endif %}
 {% endfor %}
+{% else %}
+    {{ validation_not_run_yet_message() }}
+{% endif %}
 
 -
 
 SAMPLE NAME CONCORDANCE CHECK
 Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
 
-{{ sample_name_check_report(validation_results)}}
+{% set run_status = validation_results.get('sample_check', {}).get('run_status', '') %}
+{% if run_status %}
+    {{ sample_name_check_report(validation_results) }}
+{% else %}
+    {{ validation_not_run_yet_message() }}
+{% endif %}
 
 -
 
 REFERENCE GENOME INSDC CHECK
 Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC.
 Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
 
+{% set run_status = validation_results.get('fasta_check', {}).get('run_status', '') %}
+{% if run_status %}
 {% for file_name in fasta_files %}
 {% if file_name != "pass"%}
-	{{ file_name }}
-	{{ fasta_check_report(validation_results, file_name) }}
+    {{ file_name }}
+    {{ fasta_check_report(validation_results, file_name) }}
 {% endif %}
 {% endfor %}
+{% else %}
+    {{ validation_not_run_yet_message() }}
+{% endif %}
diff --git a/eva_sub_cli/jinja_templates/text/shallow_validation.txt b/eva_sub_cli/jinja_templates/text/shallow_validation.txt
@@ -1,7 +1,7 @@
 {% macro optional_shallow_validation_report(validation_results) -%}
 
 {% set results = validation_results.get('shallow_validation', {}) %}
-{% if results.get('required') %}
+{% if validation_results.get('trim_down') %}
 
 {{ "\u274C" }} You requested to run the shallow validation, please run full validation before submitting the data
 {% for vcf_file in results.get('metrics') %}

diff --git a/eva_sub_cli/nextflow/validation.nf b/eva_sub_cli/nextflow/validation.nf
@@ -7,6 +7,7 @@ def helpMessage() {
     Validate a set of VCF files and metadata to check if they are valid to be submitted to EVA.
 
     Inputs:
+            --tasks					    Which validation tasks to run
             --vcf_files_mapping         csv file with the mappings for vcf files, fasta and assembly report
             --output_dir                output_directory where the reports will be output
             --metadata_json             Json file describing the project, analysis, samples and files
@@ -15,6 +16,14 @@ def helpMessage() {
     """
 }
 
+// Values from validators.validator.ALL_VALIDATION_TASKS
+VCF_CHECK = 'vcf_check'
+ASSEMBLY_CHECK = 'assembly_check'
+METADATA_CHECK = 'metadata_check'
+SAMPLE_CHECK = 'sample_check'
+
+// Default to running all tasks
+params.tasks = [VCF_CHECK, ASSEMBLY_CHECK, METADATA_CHECK, SAMPLE_CHECK]
 params.vcf_files_mapping = null
 params.output_dir = null
 params.metadata_json = null
@@ -59,7 +68,7 @@ conversion_configuration = "${schema_dir}/$params.conversion_configuration_name"
 
 
 def joinBasePath(path) {
-    if (path){
+    if (path) {
         return params.base_dir + '/' + path
     }
     return 'NO_FILE'
@@ -76,36 +85,41 @@ workflow {
             file(joinBasePath(row.fasta)),
             file(joinBasePath(row.report))
         )}
-    if (params.shallow_validation){
+    if (params.shallow_validation) {
         // create a smaller vcf and fasta then replace the channel
         trim_down_vcf(vcf_and_ref_ch)
         vcf_and_ref_ch = trim_down_vcf.out.vcf_and_ref
     }
     vcf_files = vcf_and_ref_ch.map{row -> row[0]}
     fasta_to_vcfs = vcf_and_ref_ch.map{row -> tuple(row[1], row[0])}.groupTuple(by:0)
-    // VCF checks
-    check_vcf_valid(vcf_and_ref_ch)
-    check_vcf_reference(vcf_and_ref_ch)
-
-    generate_file_size_and_md5_digests(vcf_files)
-    collect_file_size_and_md5(generate_file_size_and_md5_digests.out.file_size_and_digest_info.collect())
-
 
     // Metadata conversion
     if (params.metadata_xlsx && !params.metadata_json){
-        convert_xlsx_2_json(joinBasePath(params.metadata_xlsx))
-        metadata_json = convert_xlsx_2_json.out.metadata_json
-    } else {
-        metadata_json = joinBasePath(params.metadata_json)
-    }
-    if (metadata_json) {
-        // Metadata checks and concordance checks
-        metadata_json_validation(metadata_json)
-        metadata_semantic_check(metadata_json)
-        sample_name_concordance(metadata_json, vcf_files.collect())
+		convert_xlsx_2_json(joinBasePath(params.metadata_xlsx))
+		metadata_json = convert_xlsx_2_json.out.metadata_json
+	} else {
+		metadata_json = joinBasePath(params.metadata_json)
+	}
+	// File size and MD5
+	generate_file_size_and_md5_digests(vcf_files)
+	collect_file_size_and_md5(generate_file_size_and_md5_digests.out.file_size_and_digest_info.collect())
+
+	// Task-specific processing
+    if (params.tasks.contains(VCF_CHECK)) {
+        check_vcf_valid(vcf_and_ref_ch)
         evidence_type_check(metadata_json, vcf_files.collect())
-        insdc_checker(metadata_json, fasta_to_vcfs)
-    }
+	}
+	if (params.tasks.contains(ASSEMBLY_CHECK)) {
+		check_vcf_reference(vcf_and_ref_ch)
+		insdc_checker(metadata_json, fasta_to_vcfs)
+	}
+	if (params.tasks.contains(METADATA_CHECK)) {
+		metadata_json_validation(metadata_json)
+		metadata_semantic_check(metadata_json)
+	}
+	if (params.tasks.contains(SAMPLE_CHECK)) {
+		sample_name_concordance(metadata_json, vcf_files.collect())
+	}
 }
 
 

diff --git a/eva_sub_cli/orchestrator.py b/eva_sub_cli/orchestrator.py
@@ -28,7 +28,7 @@
 from eva_sub_cli.utils import get_project_title_from_ena
 from eva_sub_cli.validators.docker_validator import DockerValidator
 from eva_sub_cli.validators.native_validator import NativeValidator
-from eva_sub_cli.validators.validator import READY_FOR_SUBMISSION_TO_EVA
+from eva_sub_cli.validators.validator import READY_FOR_SUBMISSION_TO_EVA, ALL_VALIDATION_TASKS
 
 VALIDATE = 'validate'
 SUBMIT = 'submit'
@@ -279,8 +279,8 @@ def check_validation_required(tasks, sub_config, username=None, password=None):
 
 
 def orchestrate_process(submission_dir, vcf_files, reference_fasta, metadata_json, metadata_xlsx,
-                        tasks, executor, username=None, password=None, shallow_validation=False, nextflow_config=None,
-                        **kwargs):
+                        tasks, executor, validation_tasks=ALL_VALIDATION_TASKS, username=None, password=None,
+                        shallow_validation=False, nextflow_config=None, **kwargs):
     # load config
     config_file_path = os.path.join(submission_dir, SUB_CLI_CONFIG_FILE)
     sub_config = WritableConfig(config_file_path, version=__version__)
@@ -309,13 +309,14 @@ def orchestrate_process(submission_dir, vcf_files, reference_fasta, metadata_jso
     if VALIDATE in tasks:
         if executor == DOCKER:
             validator = DockerValidator(vcf_files_mapping, submission_dir, project_title, metadata_json, metadata_xlsx,
-                                        metadata_xlsx_version, shallow_validation=shallow_validation,
-                                        submission_config=sub_config)
+                                        metadata_xlsx_version, validation_tasks=validation_tasks,
+                                        shallow_validation=shallow_validation, submission_config=sub_config)
         # default to native execution
         else:
             validator = NativeValidator(vcf_files_mapping, submission_dir, project_title, metadata_json, metadata_xlsx,
-                                        metadata_xlsx_version, shallow_validation=shallow_validation,
-                                        submission_config=sub_config, nextflow_config=nextflow_config)
+                                        metadata_xlsx_version, validation_tasks=validation_tasks,
+                                        shallow_validation=shallow_validation, submission_config=sub_config,
+                                        nextflow_config=nextflow_config)
         with validator:
             validator.validate_and_report()
             if not metadata_json: