From 7aea9973c693b28f4e89aecd8e8821e53f87802d Mon Sep 17 00:00:00 2001 From: james hadfield Date: Wed, 16 Aug 2023 21:46:36 +1200 Subject: [PATCH] Extend annotations schema See for the context for these additions. --- augur/data/schema-annotations.json | 62 ++++++++++++++++++++++--- tests/test_validate.py | 73 +++++++++++++++++++++++++++++- 2 files changed, 128 insertions(+), 7 deletions(-) diff --git a/augur/data/schema-annotations.json b/augur/data/schema-annotations.json index 7e09b9b0e..c1647d925 100644 --- a/augur/data/schema-annotations.json +++ b/augur/data/schema-annotations.json @@ -6,21 +6,52 @@ "nuc": { "type": "object", "allOf": [{ "$ref": "#/$defs/startend" }], + "properties": { + "start": { + "enum": [1], + "$comment": "nuc must begin at 1" + }, + "strand": { + "type": "string", + "enum":["+"], + "description": "Strand is optional for nuc, as it should be +ve for all genomes (-ve strand genomes are reverse complemented)", + "$comment": "Auspice will not proceed if the JSON has strand='-'" + } + }, "additionalProperties": true, - "$comment": "All other properties are unused by Auspice. Strand is always considered to be positive." + "$comment": "All other properties are unused by Auspice." } }, "required": ["nuc"], "patternProperties": { - "^[a-zA-Z0-9*_-]+$": { + "^(?!nuc)[a-zA-Z0-9*_-]+$": { + "$comment": "Each object here defines a single CDS", "type": "object", - "allOf": [{ "$ref": "#/$defs/startend" }], + "oneOf": [{ "$ref": "#/$defs/startend" }, { "$ref": "#/$defs/segments" }], "additionalProperties": true, + "required": ["strand"], "properties": { + "gene": { + "type": "string", + "description": "The name of the gene the CDS is from. Optional.", + "$comment": "Shown in on-hover infobox & influences default CDS colors" + }, "strand": { - "description": "Is the gene on the positive ('+') or negative ('-') strand.", - "$comment": "Auspice assumes positive strand unless strand is '-'", - "type": "string" + "description": "Strand of the CDS", + "type": "string", + "enum": ["-", "+"] + }, + "color": { + "type": "string", + "description": "A CSS color or a color hex code. Optional." + }, + "display_name": { + "type": "string", + "$comment": "Shown in the on-hover info box" + }, + "description": { + "type": "string", + "$comment": "Shown in the on-hover info box" } } } @@ -41,6 +72,25 @@ "description": "End position (one-based, following GFF format). This value _must_ be greater than the start." } } + }, + "segments": { + "type": "object", + "required": ["segments"], + "properties": { + "segments": { + "type": "array", + "items": { + "type": "object", + "allOf": [{ "$ref": "#/$defs/startend" }], + "properties": { + "name": { + "type": "string", + "$comment": "Displayed for the individual CDS segment" + } + } + } + } + } } } } diff --git a/tests/test_validate.py b/tests/test_validate.py index 427e3947d..ef1efe161 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -4,7 +4,10 @@ from augur.validate import ( validate_collection_config_fields, validate_collection_display_defaults, - validate_measurements_config + validate_measurements_config, + load_json_schema, + validate_json, + ValidateError ) @@ -88,3 +91,71 @@ def test_validate_measurements_config_invalid_default_collection(self, example_m } assert not validate_measurements_config(measurements) assert capsys.readouterr().err == "ERROR: The default collection key 'invalid_collection' does not match any of the collections' keys.\n" + + +@pytest.fixture +def genome_annotation_schema(): + return load_json_schema("schema-annotations.json") + +class TestValidateGenomeAnnotations(): + def test_negative_strand_nuc(self, capsys, genome_annotation_schema): + d = {"nuc": {"start": 1, "end": 200, "strand": "-"}} + with pytest.raises(ValidateError): + validate_json(d, genome_annotation_schema, "") + capsys.readouterr() # suppress validation error printing + + def test_nuc_not_starting_at_one(self, capsys, genome_annotation_schema): + d = {"nuc": {"start": 100, "end": 200, "strand": "+"}} + with pytest.raises(ValidateError): + validate_json(d, genome_annotation_schema, "") + capsys.readouterr() # suppress validation error printing + + def test_missing_nuc(self, capsys, genome_annotation_schema): + d = {"cds": {"start": 100, "end": 200, "strand": "+"}} + with pytest.raises(ValidateError): + validate_json(d, genome_annotation_schema, "") + capsys.readouterr() # suppress validation error printing + + def test_missing_properties(self, capsys, genome_annotation_schema): + d = {"nuc": {"start": 1, "end": 100}, "cds": {"start": 20, "strand": "+"}} + with pytest.raises(ValidateError): + validate_json(d, genome_annotation_schema, "") + capsys.readouterr() # suppress validation error printing + + def test_not_stranded_cds(self, capsys, genome_annotation_schema): + # Strand . is for features that are not stranded (as per GFF spec), and thus they're not CDSs + d = {"nuc": {"start": 1, "end": 100}, "cds": {"start": 18, "end": 20, "strand": "."}} + with pytest.raises(ValidateError): + validate_json(d, genome_annotation_schema, "") + capsys.readouterr() # suppress validation error printing + + def test_negative_coordinates(self, capsys, genome_annotation_schema): + d = {"nuc": {"start": 1, "end": 100}, "cds": {"start": -2, "end": 10, "strand": "+"}} + with pytest.raises(ValidateError): + validate_json(d, genome_annotation_schema, "") + capsys.readouterr() # suppress validation error printing + + def test_valid_genome(self, capsys, genome_annotation_schema): + d = {"nuc": {"start": 1, "end": 100}, "cds": {"start": 20, "end": 28, "strand": "+"}} + validate_json(d, genome_annotation_schema, "") + capsys.readouterr() # suppress validation error printing + + def test_valid_segmented_genome(self, capsys, genome_annotation_schema): + d = {"nuc": {"start": 1, "end": 100}, + "cds": {"segments": [{"start": 20, "end": 28}], "strand": "+"}} + validate_json(d, genome_annotation_schema, "") + capsys.readouterr() # suppress validation error printing + + def test_invalid_segmented_genome(self, capsys, genome_annotation_schema): + d = {"nuc": {"start": 1, "end": 100}, + "cds": {"segments": [{"start": 20, "end": 28}, {"start": 27}], "strand": "+"}} + with pytest.raises(ValidateError): + validate_json(d, genome_annotation_schema, "") + capsys.readouterr() # suppress validation error printing + + def test_string_coordinates(self, capsys, genome_annotation_schema): + d = {"nuc": {"start": 1, "end": 100}, + "cds": {"segments": [{"start": 20, "end": 28}, {"start": "27", "end": "29"}], "strand": "+"}} + with pytest.raises(ValidateError): + validate_json(d, genome_annotation_schema, "") + capsys.readouterr() # suppress validation error printing \ No newline at end of file