Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ <h2>Metadata validation results</h2>
<section>
<h2>VCF validation results</h2>
<div class="description">
Checks whether each file is compliant with the <a href="http://samtools.github.io/hts-specs/VCFv4.3.pdf" target=”_blank”>VCF specification</a>.
Checks whether each file is compliant with the <a href="http://samtools.github.io/hts-specs/VCFv4.4.pdf" target=”_blank”>VCF specification</a>.
Also checks whether the variants' reference alleles match against the reference assembly.
</div>
{% for file_name in vcf_files %}
{% if file_name!= "pass"%}
{% if file_name != "pass"%}
<h3>{{ file_name }}</h3>
{{ file_validation_report(validation_results, file_name) }}
{% endif %}
Expand All @@ -95,7 +95,7 @@ <h2>Reference genome INSDC check</h2>
Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
</div>
{% for file_name in fasta_files %}
{% if file_name!= "pass"%}
{% if file_name != "pass"%}
<h3>{{ file_name }}</h3>
{{ fasta_check_report(validation_results, file_name) }}
{% endif %}
Expand Down
46 changes: 46 additions & 0 deletions eva_sub_cli/jinja_templates/text/fasta_check.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{% macro fasta_check_report(validation_results, file_name) -%}

{% set results_for_fasta = validation_results['fasta_check'][file_name] %}

{% if results_for_fasta.get('connection_error') %}
Warning: The following results may be incomplete due to problems with external services.
Please try again later for complete results.
Error message: {{ results_for_fasta.get('connection_error') }}
{% endif %}

{% if results_for_fasta.get('all_insdc') %}
{{ "\u2714" }} All sequences are INSDC accessioned.
{% else %}
{{ "\u274C" }} Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: {{ results_for_fasta.get('report_path', '') }}
{% set sequence_info_list = results_for_fasta.get('sequences', [])|rejectattr("insdc")|list %}
{% for sequence_info in sequence_info_list[:10] %}
Sequence name: {{ sequence_info.get('sequence_name') }}
Refget md5: {{ sequence_info.get('sequence_md5') }}
---
{% endfor %}
{% endif %}

{% if 'possible_assemblies' in results_for_fasta %}
{% if 'metadata_assembly_compatible' in results_for_fasta %}
{% set analysis_text = results_for_fasta.get('associated_analyses')|join(", ") %}
{% if results_for_fasta.get('metadata_assembly_compatible') %}
{% set icon = "\u2714" %}
{% set text = analysis_text + ": Assembly accession in metadata is compatible" %}
{% else %}
{% set icon = "\u274C" %}
{% set text = analysis_text + ": Assembly accession in metadata is not compatible" %}
{% endif %}
{% else %}
{% set icon = "\u274C" %}
{% set text = "No assembly accession found in metadata" %}
{% endif %}
{{ icon }} {{ text }}
{% if 'metadata_assembly_compatible' not in results_for_fasta or not results_for_fasta['metadata_assembly_compatible'] %}
Full report: {{ results_for_fasta.get('report_path', '') }}
Assembly accession found in metadata: {{ results_for_fasta.get('assembly_in_metadata', 'Not found') }}
Assembly accession(s) compatible with FASTA: {{ results_for_fasta.get('possible_assemblies')|join(", ") }}
{% endif %}
{% endif %}

{%- endmacro %}
58 changes: 58 additions & 0 deletions eva_sub_cli/jinja_templates/text/file_validation.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@

{% macro file_validation_report(validation_results, file_name) -%}
{% for check_type, check_per_file in validation_results.items() %}
{% set result = check_per_file.get(file_name, {}) %}
{% if check_type == "assembly_check" %}
{{ assembly_check(result) }}
{% elif check_type == "vcf_check" %}
{{ vcf_check(result) }}
{% endif %}
{% endfor %}
{%- endmacro %}

{% macro vcf_check(vcf_check_result) %}
{% set critical_count = vcf_check_result.get("critical_count", 0) %}
{% set error_count = vcf_check_result.get("error_count", 0) %}
{% if critical_count > 0 or error_count > 0 %}
{% set icon = "\u274C" %}
{% else %}
{% set icon = "\u2714" %}
{% endif %}
{{ icon }} VCF check: {{ critical_count }} critical errors, {{ error_count }} non-critical errors

{% set critical_list = vcf_check_result.get("critical_list") %}
{% set error_list = vcf_check_result.get("error_list") %}
{% if critical_list or error_list%}
First 10 errors per category are below. Full report: {{ vcf_check_result.get('report_path', '') }}
{% for error in critical_list[:10] %}
Critical error: {{ error }}
{% endfor %}
{% for error in error_list[:10] %}
Non-critical error: {{ error }}
{% endfor %}
{% endif %}
{%- endmacro %}

{% macro assembly_check(assembly_check_result) %}
{% set nb_match = assembly_check_result.get("match", 0) %}
{% set nb_total = assembly_check_result.get("total", 0) %}
{% set match_percentage = nb_match / nb_total * 100 if nb_total else 0 %}
{% if assembly_check_result.get("nb_mismatch", 0) > 0 or nb_total == 0 %}
{% set icon = "\u274C" %}
{% else %}
{% set icon = "\u2714" %}
{% endif %}
{{ icon }} Assembly check: {{ nb_match }}/{{ nb_total }} ({{ match_percentage|round(2) }}%)

{% set mismatch_list = assembly_check_result.get("mismatch_list") %}
{% set error_list = assembly_check_result.get("error_list") %}
{% if mismatch_list or error_list %}
First 10 errors per category are below. Full report: {{ assembly_check_result.get('report_path', '') }}
{% for error in error_list[:10] %}
Parsing error: {{ error }}
{% endfor %}
{% for error in mismatch_list[:10] %}
Mismatch error: {{ error }}
{% endfor %}
{% endif %}
{%- endmacro %}
33 changes: 33 additions & 0 deletions eva_sub_cli/jinja_templates/text/metadata_validation.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

{% macro metadata_validation_report(validation_results) -%}

{% set results = validation_results.get('metadata_check', {}) %}
{% set spreadsheet_errors = results.get('spreadsheet_errors', []) %}
{% set json_errors = results.get('json_errors', []) %}
{% set has_errors = spreadsheet_errors or json_errors %}
{% if has_errors %}
{% set icon = "\u274C" %}
{% else %}
{% set icon = "\u2714" %}
{% endif %}
{{ icon }} Metadata validation check

{% if spreadsheet_errors %}
Full report: {{ results.get('spreadsheet_report_path', '') }}
{% for error in spreadsheet_errors %}
Sheet: {{ error.get('sheet') }} | Row: {{ error.get('row') }} | Column: {{ error.get('column') }}
Error: {{ error.get('description') }}
---
{% endfor %}
{% endif %}

{% if json_errors %}
Full report: {{ results.get('json_report_path', '') }}
{% for error in json_errors %}
JSON Property: {{ error.get('property') }}
Error: {{ error.get('description') }}
---
{% endfor %}
{% endif %}

{%- endmacro %}
21 changes: 21 additions & 0 deletions eva_sub_cli/jinja_templates/text/project_details.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{% macro project_details_report(project_title, validation_date, submission_dir, vcf_fasta_analysis_mapping) -%}

{% if project_title %}
Project Title: {{ project_title }}
{% else %}
Project Title: Not Found
{% endif %}
Validation Date: {{ validation_date }}
Submission Directory: {{ submission_dir }}

{% if vcf_fasta_analysis_mapping %}
Files mapping:
{% for mapping in vcf_fasta_analysis_mapping %}
---
VCF File: {{ mapping.vcf_file }}
Fasta File: {{ mapping.fasta_file }}
Analysis: {{ mapping.analysis }}
{% endfor %}
{% endif %}

{%- endmacro %}
59 changes: 59 additions & 0 deletions eva_sub_cli/jinja_templates/text/report.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{% from 'project_details.txt' import project_details_report %}
{% from 'file_validation.txt' import file_validation_report %}
{% from 'sample_name_check.txt' import sample_name_check_report %}
{% from 'fasta_check.txt' import fasta_check_report %}
{% from 'metadata_validation.txt' import metadata_validation_report %}
{% from 'shallow_validation.txt' import optional_shallow_validation_report %}

VALIDATION REPORT
eva-sub-cli v{{cli_version}}

{{ optional_shallow_validation_report(validation_results) }}

-

PROJECT SUMMARY
General details about the project

{{ project_details_report(project_title, validation_date, submission_dir, vcf_fasta_analysis_mapping) }}

-

METADATA VALIDATION RESULTS
Ensures that required fields are present and values are formatted correctly.
For requirements, please refer to the EVA website (https://www.ebi.ac.uk/eva/?Submit-Data).

{{ metadata_validation_report(validation_results) }}

-

VCF VALIDATION RESULTS
Checks whether each file is compliant with the VCF specification (http://samtools.github.io/hts-specs/VCFv4.4.pdf).
Also checks whether the variants' reference alleles match against the reference assembly.

{% for file_name in vcf_files %}
{% if file_name != "pass"%}
{{ file_name }}
{{ file_validation_report(validation_results, file_name) }}
{% endif %}
{% endfor %}

-

SAMPLE NAME CONCORDANCE CHECK
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.

{{ sample_name_check_report(validation_results)}}

-

REFERENCE GENOME INSDC CHECK
Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC.
Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.

{% for file_name in fasta_files %}
{% if file_name != "pass"%}
{{ file_name }}
{{ fasta_check_report(validation_results, file_name) }}
{% endif %}
{% endfor %}
24 changes: 24 additions & 0 deletions eva_sub_cli/jinja_templates/text/sample_name_check.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

{% macro sample_name_check_report(validation_results) -%}
{% set results = validation_results.get('sample_check', {}) %}
{% macro format_error(error) %}
{{ error | replace(' ', '•') }}
{% endmacro %}

{% for analysis, results_for_analysis in results.get('results_per_analysis', {}).items() %}
{% if results_for_analysis.get('difference') %}
{% set icon = "\u274C" %}
{% set text = "Sample names in metadata do not match with those in VCF files" %}
{% else %}
{% set icon = "\u2714" %}
{% set text = "Sample names in metadata match with those in VCF files" %}
{% endif %}
{{ icon }} {{ analysis }}: {{ text }}

{% if results_for_analysis.get('difference') %}
First 10 errors per category are below. Full report: {{ results.get('report_path', '') }}
Samples described in the metadata but not in the VCF files: {{ results_for_analysis.get('more_metadata_submitted_files')[:10]|join(", ") }}
Samples in the VCF files but not described in the metadata: {{ results_for_analysis.get('more_submitted_files_metadata')[:10]|join(", ") }}
{% endif %}
{% endfor %}
{%- endmacro %}
16 changes: 16 additions & 0 deletions eva_sub_cli/jinja_templates/text/shallow_validation.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{% macro optional_shallow_validation_report(validation_results) -%}

{% set results = validation_results.get('shallow_validation', {}) %}
{% if results.get('required') %}

{{ "\u274C" }} You requested to run the shallow validation, please run full validation before submitting the data
{% for vcf_file in results.get('metrics') %}
---
VCF File: {{ vcf_file }}
Variant lines validated in VCF: {{ results.get('metrics').get(vcf_file).get('trim_down_vcf_record') }}
Entries used in Fasta: {{ results.get('metrics').get(vcf_file).get('number_sequence_found') }}
{% endfor %}

{% endif %}

{%- endmacro %}
17 changes: 13 additions & 4 deletions eva_sub_cli/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ def get_logo_data():
return logo_data


def generate_html_report(validation_results, validation_date, submission_dir, vcf_fasta_analysis_mapping,
project_title=None):
def generate_report(validation_results, validation_date, submission_dir, vcf_fasta_analysis_mapping, project_title,
subdir, template_file):
vcf_files = sorted(set([file_name
for check in validation_results if check in ["vcf_check", "assembly_check"]
for file_name in validation_results[check]
]))
fasta_files = sorted([file_name for file_name in validation_results['fasta_check']])
template = Environment(
loader=FileSystemLoader(os.path.join(current_dir, 'jinja_templates'))
).get_template('html_report.html')
loader=FileSystemLoader(os.path.join(current_dir, 'jinja_templates', subdir))
).get_template(template_file)
rendered_template = template.render(
cli_version=eva_sub_cli.__version__,
logo_data=get_logo_data(),
Expand All @@ -38,3 +38,12 @@ def generate_html_report(validation_results, validation_date, submission_dir, vc
)
return re.sub('\s+\n', '\n', rendered_template)


def generate_html_report(validation_results, validation_date, submission_dir, vcf_fasta_analysis_mapping, project_title):
return generate_report(validation_results, validation_date, submission_dir, vcf_fasta_analysis_mapping, project_title,
subdir='html', template_file='report.html')


def generate_text_report(validation_results, validation_date, submission_dir, vcf_fasta_analysis_mapping, project_title):
return generate_report(validation_results, validation_date, submission_dir, vcf_fasta_analysis_mapping, project_title,
subdir='text', template_file='report.txt')
20 changes: 14 additions & 6 deletions eva_sub_cli/validators/validator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python
import csv
import datetime
import json
import logging
import os
from functools import lru_cache, cached_property
Expand All @@ -13,7 +12,7 @@
from eva_sub_cli import ETC_DIR, SUB_CLI_CONFIG_FILE, __version__
from eva_sub_cli.file_utils import backup_file_or_directory, resolve_single_file_path
from eva_sub_cli.metadata import EvaMetadataJson
from eva_sub_cli.report import generate_html_report
from eva_sub_cli.report import generate_html_report, generate_text_report
from ebi_eva_common_pyutils.logger import logging_config, AppLogger

from eva_sub_cli.validators.validation_results_parsers import parse_assembly_check_log, parse_assembly_check_report, \
Expand Down Expand Up @@ -501,9 +500,18 @@ def create_reports(self):
report_html = generate_html_report(self.results, self.validation_date, self.submission_dir,
self.get_vcf_fasta_analysis_mapping(),
self.project_title)
file_path = os.path.join(self.output_dir, 'report.html')
with open(file_path, "w") as f:
html_path = os.path.join(self.output_dir, 'report.html')
with open(html_path, "w") as f:
f.write(report_html)

report_text = generate_text_report(self.results, self.validation_date, self.submission_dir,
self.get_vcf_fasta_analysis_mapping(),
self.project_title)
text_path = os.path.join(self.output_dir, 'report.txt')
with open(text_path, "w") as f:
f.write(report_text)

self.info(f'Validation result: {"SUCCESS" if self.verify_ready_for_submission_to_eva() else "FAILURE"}')
self.info(f'View the full report in your browser: {file_path}')
return file_path
self.info(f'View the full report in your browser: {html_path}')
self.info(f'Or view a text version: {text_path}')
return html_path, text_path
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ <h2>Metadata validation results</h2>
<section>
<h2>VCF validation results</h2>
<div class="description">
Checks whether each file is compliant with the <a href="http://samtools.github.io/hts-specs/VCFv4.3.pdf" target=”_blank”>VCF specification</a>.
Checks whether each file is compliant with the <a href="http://samtools.github.io/hts-specs/VCFv4.4.pdf" target=”_blank”>VCF specification</a>.
Also checks whether the variants' reference alleles match against the reference assembly.
</div>
<h3>input_fail.vcf</h3>
Expand Down
Loading