Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
branches: [ main, add-validation-tasks ]

jobs:
build:
Expand Down
19 changes: 10 additions & 9 deletions eva_sub_cli/executables/cli.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,23 @@
import sys

import eva_sub_cli
from eva_sub_cli.exceptions.metadata_template_version_exception import MetadataTemplateVersionException, \
MetadataTemplateVersionNotFoundException
from eva_sub_cli.exceptions.submission_not_found_exception import SubmissionNotFoundException
from eva_sub_cli.exceptions.submission_status_exception import SubmissionStatusException

if not sys.warnoptions:
import warnings

warnings.simplefilter("ignore")

import logging
import os
from argparse import ArgumentParser
from ebi_eva_common_pyutils.logger import logging_config

import eva_sub_cli
from eva_sub_cli import orchestrator
from eva_sub_cli.orchestrator import VALIDATE, SUBMIT, DOCKER, NATIVE
from eva_sub_cli.exceptions.metadata_template_version_exception import MetadataTemplateVersionException, \
MetadataTemplateVersionNotFoundException
from eva_sub_cli.exceptions.submission_status_exception import SubmissionStatusException
from eva_sub_cli.exceptions.submission_not_found_exception import SubmissionNotFoundException
from eva_sub_cli.file_utils import is_submission_dir_writable, DirLockError, DirLock
from eva_sub_cli.orchestrator import VALIDATE, SUBMIT, DOCKER, NATIVE
from eva_sub_cli.validators.validator import ALL_VALIDATION_TASKS


def validate_command_line_arguments(args, argparser):
Expand Down Expand Up @@ -93,6 +92,9 @@ def parse_args(cmd_line_args):
help='Select a task to perform (default SUBMIT). VALIDATE will run the validation'
' regardless of the outcome of previous runs. SUBMIT will run validate only if'
' the validation was not performed successfully before and then run the submission.')
argparser.add_argument('--validation_tasks', nargs='+', choices=ALL_VALIDATION_TASKS, default=ALL_VALIDATION_TASKS,
type=str.lower, help='Select only a subset of the validation tasks to run. Note that all '
'tasks need to be successful for the validation to pass')
argparser.add_argument('--executor', choices=[DOCKER, NATIVE], default=NATIVE, type=str.lower,
help='Select the execution type for running validation (default native)')
credential_group = argparser.add_argument_group('Credentials', 'Specify the ENA Webin credentials you want to use '
Expand Down Expand Up @@ -125,7 +127,6 @@ def main():

try:
# lock the submission directory

with DirLock(os.path.join(args.submission_dir)) as lock:
# Create the log file
logging_config.add_file_handler(os.path.join(args.submission_dir, 'eva_submission.log'), logging.DEBUG)
Expand Down
2 changes: 1 addition & 1 deletion eva_sub_cli/jinja_templates/html/file_validation.html
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

{% macro file_validation_report(validation_results, file_name) -%}
{% for check_type, check_per_file in validation_results.items() %}
{% for check_type, check_per_file in validation_results.items() if check_type not in ["trim_down", "version"] %}
{% set result = check_per_file.get(file_name, {}) %}
{% if check_type == "assembly_check" %}
{{ assembly_check(result) }}
Expand Down
56 changes: 42 additions & 14 deletions eva_sub_cli/jinja_templates/html/report.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
{% from 'metadata_validation.html' import metadata_validation_report %}
{% from 'shallow_validation.html' import optional_shallow_validation_report %}

{% macro validation_not_run_yet_message(text="Process not run yet") %}
{% set icon = "⏸" %} {# pause symbol ⏸ #}
{% set row_class = "report-section info" %}
{% set expand_icon = "" %}
<div class='{{ row_class }}'><span class="expand_icon">{{ expand_icon }}</span> {{ icon }} {{ text }}</div>
{% endmacro %}

<html lang="EN">
<head>
<meta charset="UTF-8">
Expand Down Expand Up @@ -63,7 +70,12 @@ <h2>Metadata validation results</h2>
Ensures that required fields are present and values are formatted correctly.
For requirements, please refer to the <a href="https://www.ebi.ac.uk/eva/?Submit-Data" target=”_blank”>EVA website</a>.
</div>
{{ metadata_validation_report(validation_results) }}
{% set run_status = validation_results.get('metadata_check', {}).get('run_status', '') %}
{% if run_status %}
{{ metadata_validation_report(validation_results) }}
{% else %}
{{ validation_not_run_yet_message() }}
{% endif %}
</section>

<section>
Expand All @@ -72,20 +84,31 @@ <h2>VCF validation results</h2>
Checks whether each file is compliant with the <a href="http://samtools.github.io/hts-specs/VCFv4.4.pdf" target=”_blank”>VCF specification</a>.
Also checks whether the variants' reference alleles match against the reference assembly.
</div>
{% for file_name in vcf_files %}
{% if file_name != "pass"%}
<h3>{{ file_name }}</h3>
{{ file_validation_report(validation_results, file_name) }}
{% endif %}
{% endfor %}
{% set run_status = validation_results.get('vcf_check', {}).get('run_status', '') %}
{% if run_status %}
{% for file_name in vcf_files %}
{% if file_name != "pass"%}
<h3>{{ file_name }}</h3>
{{ file_validation_report(validation_results, file_name) }}
{% endif %}
{% endfor %}
{% else %}
{{ validation_not_run_yet_message() }}
{% endif %}
</section>

<section>
<h2>Sample name concordance check</h2>
<div class="description">
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
</div>
{{ sample_name_check_report(validation_results)}}
{% set run_status = validation_results.get('sample_check', {}).get('run_status', '') %}
{% if run_status %}
{{ sample_name_check_report(validation_results)}}
{% else %}
{{ validation_not_run_yet_message() }}

{% endif %}
</section>

<section>
Expand All @@ -94,12 +117,17 @@ <h2>Reference genome INSDC check</h2>
Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC.
Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
</div>
{% for file_name in fasta_files %}
{% if file_name != "pass"%}
<h3>{{ file_name }}</h3>
{{ fasta_check_report(validation_results, file_name) }}
{% endif %}
{% endfor %}
{% set run_status = validation_results.get('fasta_check', {}).get('run_status', '') %}
{% if run_status %}
{% for file_name in fasta_files %}
{% if file_name != "pass"%}
<h3>{{ file_name }}</h3>
{{ fasta_check_report(validation_results, file_name) }}
{% endif %}
{% endfor %}
{% else %}
{{ validation_not_run_yet_message() }}
{% endif %}
</section>

<script>
Expand Down
2 changes: 1 addition & 1 deletion eva_sub_cli/jinja_templates/html/shallow_validation.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{% macro optional_shallow_validation_report(validation_results) -%}
{% set results = validation_results.get('shallow_validation', {}) %}

{% if results.get('required') %}
{% if validation_results.get('trim_down') %}
<section>
<div class="report-section fail collapsible"> <span class="expand_icon">&#9654;</span>
&#10060; <b>You requested to run the shallow validation, please run full validation before submitting the data</b>
Expand Down
2 changes: 1 addition & 1 deletion eva_sub_cli/jinja_templates/text/file_validation.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

{% macro file_validation_report(validation_results, file_name) -%}
{% for check_type, check_per_file in validation_results.items() %}
{% for check_type, check_per_file in validation_results.items() if check_type not in ["trim_down", "version"] %}
{% set result = check_per_file.get(file_name, {}) %}
{% if check_type == "assembly_check" %}
{{ assembly_check(result) }}
Expand Down
38 changes: 31 additions & 7 deletions eva_sub_cli/jinja_templates/text/report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
{% from 'metadata_validation.txt' import metadata_validation_report %}
{% from 'shallow_validation.txt' import optional_shallow_validation_report %}

{% macro validation_not_run_yet_message(text="Process not run yet") -%}
{% set icon = "\u23F8" %}
{{ icon }} {{ text }}
{%- endmacro %}

VALIDATION REPORT
eva-sub-cli v{{cli_version}}

Expand All @@ -23,37 +28,56 @@ METADATA VALIDATION RESULTS
Ensures that required fields are present and values are formatted correctly.
For requirements, please refer to the EVA website (https://www.ebi.ac.uk/eva/?Submit-Data).

{{ metadata_validation_report(validation_results) }}

{% set run_status = validation_results.get('metadata_check', {}).get('run_status', '') %}
{% if run_status %}
{{ metadata_validation_report(validation_results) }}
{% else %}
{{ validation_not_run_yet_message() }}
{% endif %}
-

VCF VALIDATION RESULTS
Checks whether each file is compliant with the VCF specification (http://samtools.github.io/hts-specs/VCFv4.4.pdf).
Also checks whether the variants' reference alleles match against the reference assembly.

{% set run_status = validation_results.get('vcf_check', {}).get('run_status', '') %}
{% if run_status %}
{% for file_name in vcf_files %}
{% if file_name != "pass"%}
{{ file_name }}
{{ file_validation_report(validation_results, file_name) }}
{{ file_name }}
{{ file_validation_report(validation_results, file_name) }}
{% endif %}
{% endfor %}
{% else %}
{{ validation_not_run_yet_message() }}
{% endif %}

-

SAMPLE NAME CONCORDANCE CHECK
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.

{{ sample_name_check_report(validation_results)}}
{% set run_status = validation_results.get('sample_check', {}).get('run_status', '') %}
{% if run_status %}
{{ sample_name_check_report(validation_results) }}
{% else %}
{{ validation_not_run_yet_message() }}
{% endif %}

-

REFERENCE GENOME INSDC CHECK
Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC.
Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.

{% set run_status = validation_results.get('fasta_check', {}).get('run_status', '') %}
{% if run_status %}
{% for file_name in fasta_files %}
{% if file_name != "pass"%}
{{ file_name }}
{{ fasta_check_report(validation_results, file_name) }}
{{ file_name }}
{{ fasta_check_report(validation_results, file_name) }}
{% endif %}
{% endfor %}
{% else %}
{{ validation_not_run_yet_message() }}
{% endif %}
2 changes: 1 addition & 1 deletion eva_sub_cli/jinja_templates/text/shallow_validation.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{% macro optional_shallow_validation_report(validation_results) -%}

{% set results = validation_results.get('shallow_validation', {}) %}
{% if results.get('required') %}
{% if validation_results.get('trim_down') %}

{{ "\u274C" }} You requested to run the shallow validation, please run full validation before submitting the data
{% for vcf_file in results.get('metrics') %}
Expand Down
56 changes: 35 additions & 21 deletions eva_sub_cli/nextflow/validation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ def helpMessage() {
Validate a set of VCF files and metadata to check if they are valid to be submitted to EVA.

Inputs:
--tasks Which validation tasks to run
--vcf_files_mapping csv file with the mappings for vcf files, fasta and assembly report
--output_dir output_directory where the reports will be output
--metadata_json Json file describing the project, analysis, samples and files
Expand All @@ -15,6 +16,14 @@ def helpMessage() {
"""
}

// Values from validators.validator.ALL_VALIDATION_TASKS
VCF_CHECK = 'vcf_check'
ASSEMBLY_CHECK = 'assembly_check'
METADATA_CHECK = 'metadata_check'
SAMPLE_CHECK = 'sample_check'

// Default to running all tasks
params.tasks = [VCF_CHECK, ASSEMBLY_CHECK, METADATA_CHECK, SAMPLE_CHECK]
params.vcf_files_mapping = null
params.output_dir = null
params.metadata_json = null
Expand Down Expand Up @@ -59,7 +68,7 @@ conversion_configuration = "${schema_dir}/$params.conversion_configuration_name"


def joinBasePath(path) {
if (path){
if (path) {
return params.base_dir + '/' + path
}
return 'NO_FILE'
Expand All @@ -76,36 +85,41 @@ workflow {
file(joinBasePath(row.fasta)),
file(joinBasePath(row.report))
)}
if (params.shallow_validation){
if (params.shallow_validation) {
// create a smaller vcf and fasta then replace the channel
trim_down_vcf(vcf_and_ref_ch)
vcf_and_ref_ch = trim_down_vcf.out.vcf_and_ref
}
vcf_files = vcf_and_ref_ch.map{row -> row[0]}
fasta_to_vcfs = vcf_and_ref_ch.map{row -> tuple(row[1], row[0])}.groupTuple(by:0)
// VCF checks
check_vcf_valid(vcf_and_ref_ch)
check_vcf_reference(vcf_and_ref_ch)

generate_file_size_and_md5_digests(vcf_files)
collect_file_size_and_md5(generate_file_size_and_md5_digests.out.file_size_and_digest_info.collect())


// Metadata conversion
if (params.metadata_xlsx && !params.metadata_json){
convert_xlsx_2_json(joinBasePath(params.metadata_xlsx))
metadata_json = convert_xlsx_2_json.out.metadata_json
} else {
metadata_json = joinBasePath(params.metadata_json)
}
if (metadata_json) {
// Metadata checks and concordance checks
metadata_json_validation(metadata_json)
metadata_semantic_check(metadata_json)
sample_name_concordance(metadata_json, vcf_files.collect())
convert_xlsx_2_json(joinBasePath(params.metadata_xlsx))
metadata_json = convert_xlsx_2_json.out.metadata_json
} else {
metadata_json = joinBasePath(params.metadata_json)
}
// File size and MD5
generate_file_size_and_md5_digests(vcf_files)
collect_file_size_and_md5(generate_file_size_and_md5_digests.out.file_size_and_digest_info.collect())

// Task-specific processing
if (params.tasks.contains(VCF_CHECK)) {
check_vcf_valid(vcf_and_ref_ch)
evidence_type_check(metadata_json, vcf_files.collect())
insdc_checker(metadata_json, fasta_to_vcfs)
}
}
if (params.tasks.contains(ASSEMBLY_CHECK)) {
check_vcf_reference(vcf_and_ref_ch)
insdc_checker(metadata_json, fasta_to_vcfs)
}
if (params.tasks.contains(METADATA_CHECK)) {
metadata_json_validation(metadata_json)
metadata_semantic_check(metadata_json)
}
if (params.tasks.contains(SAMPLE_CHECK)) {
sample_name_concordance(metadata_json, vcf_files.collect())
}
}


Expand Down
15 changes: 8 additions & 7 deletions eva_sub_cli/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from eva_sub_cli.utils import get_project_title_from_ena
from eva_sub_cli.validators.docker_validator import DockerValidator
from eva_sub_cli.validators.native_validator import NativeValidator
from eva_sub_cli.validators.validator import READY_FOR_SUBMISSION_TO_EVA
from eva_sub_cli.validators.validator import READY_FOR_SUBMISSION_TO_EVA, ALL_VALIDATION_TASKS

VALIDATE = 'validate'
SUBMIT = 'submit'
Expand Down Expand Up @@ -279,8 +279,8 @@ def check_validation_required(tasks, sub_config, username=None, password=None):


def orchestrate_process(submission_dir, vcf_files, reference_fasta, metadata_json, metadata_xlsx,
tasks, executor, username=None, password=None, shallow_validation=False, nextflow_config=None,
**kwargs):
tasks, executor, validation_tasks=ALL_VALIDATION_TASKS, username=None, password=None,
shallow_validation=False, nextflow_config=None, **kwargs):
# load config
config_file_path = os.path.join(submission_dir, SUB_CLI_CONFIG_FILE)
sub_config = WritableConfig(config_file_path, version=__version__)
Expand Down Expand Up @@ -309,13 +309,14 @@ def orchestrate_process(submission_dir, vcf_files, reference_fasta, metadata_jso
if VALIDATE in tasks:
if executor == DOCKER:
validator = DockerValidator(vcf_files_mapping, submission_dir, project_title, metadata_json, metadata_xlsx,
metadata_xlsx_version, shallow_validation=shallow_validation,
submission_config=sub_config)
metadata_xlsx_version, validation_tasks=validation_tasks,
shallow_validation=shallow_validation, submission_config=sub_config)
# default to native execution
else:
validator = NativeValidator(vcf_files_mapping, submission_dir, project_title, metadata_json, metadata_xlsx,
metadata_xlsx_version, shallow_validation=shallow_validation,
submission_config=sub_config, nextflow_config=nextflow_config)
metadata_xlsx_version, validation_tasks=validation_tasks,
shallow_validation=shallow_validation, submission_config=sub_config,
nextflow_config=nextflow_config)
with validator:
validator.validate_and_report()
if not metadata_json:
Expand Down
Loading