Skip to content

Commit

Permalink
Extend annotations schema
Browse files Browse the repository at this point in the history
See <nextstrain/auspice#1684> for the context
for these additions.
  • Loading branch information
jameshadfield committed Aug 17, 2023
1 parent 904b7a2 commit 7aea997
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 7 deletions.
62 changes: 56 additions & 6 deletions augur/data/schema-annotations.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,52 @@
"nuc": {
"type": "object",
"allOf": [{ "$ref": "#/$defs/startend" }],
"properties": {
"start": {
"enum": [1],
"$comment": "nuc must begin at 1"
},
"strand": {
"type": "string",
"enum":["+"],
"description": "Strand is optional for nuc, as it should be +ve for all genomes (-ve strand genomes are reverse complemented)",
"$comment": "Auspice will not proceed if the JSON has strand='-'"
}
},
"additionalProperties": true,
"$comment": "All other properties are unused by Auspice. Strand is always considered to be positive."
"$comment": "All other properties are unused by Auspice."
}
},
"required": ["nuc"],
"patternProperties": {
"^[a-zA-Z0-9*_-]+$": {
"^(?!nuc)[a-zA-Z0-9*_-]+$": {
"$comment": "Each object here defines a single CDS",
"type": "object",
"allOf": [{ "$ref": "#/$defs/startend" }],
"oneOf": [{ "$ref": "#/$defs/startend" }, { "$ref": "#/$defs/segments" }],
"additionalProperties": true,
"required": ["strand"],
"properties": {
"gene": {
"type": "string",
"description": "The name of the gene the CDS is from. Optional.",
"$comment": "Shown in on-hover infobox & influences default CDS colors"
},
"strand": {
"description": "Is the gene on the positive ('+') or negative ('-') strand.",
"$comment": "Auspice assumes positive strand unless strand is '-'",
"type": "string"
"description": "Strand of the CDS",
"type": "string",
"enum": ["-", "+"]
},
"color": {
"type": "string",
"description": "A CSS color or a color hex code. Optional."
},
"display_name": {
"type": "string",
"$comment": "Shown in the on-hover info box"
},
"description": {
"type": "string",
"$comment": "Shown in the on-hover info box"
}
}
}
Expand All @@ -41,6 +72,25 @@
"description": "End position (one-based, following GFF format). This value _must_ be greater than the start."
}
}
},
"segments": {
"type": "object",
"required": ["segments"],
"properties": {
"segments": {
"type": "array",
"items": {
"type": "object",
"allOf": [{ "$ref": "#/$defs/startend" }],
"properties": {
"name": {
"type": "string",
"$comment": "Displayed for the individual CDS segment"
}
}
}
}
}
}
}
}
73 changes: 72 additions & 1 deletion tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
from augur.validate import (
validate_collection_config_fields,
validate_collection_display_defaults,
validate_measurements_config
validate_measurements_config,
load_json_schema,
validate_json,
ValidateError
)


Expand Down Expand Up @@ -88,3 +91,71 @@ def test_validate_measurements_config_invalid_default_collection(self, example_m
}
assert not validate_measurements_config(measurements)
assert capsys.readouterr().err == "ERROR: The default collection key 'invalid_collection' does not match any of the collections' keys.\n"


@pytest.fixture
def genome_annotation_schema():
return load_json_schema("schema-annotations.json")

class TestValidateGenomeAnnotations():
def test_negative_strand_nuc(self, capsys, genome_annotation_schema):
d = {"nuc": {"start": 1, "end": 200, "strand": "-"}}
with pytest.raises(ValidateError):
validate_json(d, genome_annotation_schema, "<test-json>")
capsys.readouterr() # suppress validation error printing

def test_nuc_not_starting_at_one(self, capsys, genome_annotation_schema):
d = {"nuc": {"start": 100, "end": 200, "strand": "+"}}
with pytest.raises(ValidateError):
validate_json(d, genome_annotation_schema, "<test-json>")
capsys.readouterr() # suppress validation error printing

def test_missing_nuc(self, capsys, genome_annotation_schema):
d = {"cds": {"start": 100, "end": 200, "strand": "+"}}
with pytest.raises(ValidateError):
validate_json(d, genome_annotation_schema, "<test-json>")
capsys.readouterr() # suppress validation error printing

def test_missing_properties(self, capsys, genome_annotation_schema):
d = {"nuc": {"start": 1, "end": 100}, "cds": {"start": 20, "strand": "+"}}
with pytest.raises(ValidateError):
validate_json(d, genome_annotation_schema, "<test-json>")
capsys.readouterr() # suppress validation error printing

def test_not_stranded_cds(self, capsys, genome_annotation_schema):
# Strand . is for features that are not stranded (as per GFF spec), and thus they're not CDSs
d = {"nuc": {"start": 1, "end": 100}, "cds": {"start": 18, "end": 20, "strand": "."}}
with pytest.raises(ValidateError):
validate_json(d, genome_annotation_schema, "<test-json>")
capsys.readouterr() # suppress validation error printing

def test_negative_coordinates(self, capsys, genome_annotation_schema):
d = {"nuc": {"start": 1, "end": 100}, "cds": {"start": -2, "end": 10, "strand": "+"}}
with pytest.raises(ValidateError):
validate_json(d, genome_annotation_schema, "<test-json>")
capsys.readouterr() # suppress validation error printing

def test_valid_genome(self, capsys, genome_annotation_schema):
d = {"nuc": {"start": 1, "end": 100}, "cds": {"start": 20, "end": 28, "strand": "+"}}
validate_json(d, genome_annotation_schema, "<test-json>")
capsys.readouterr() # suppress validation error printing

def test_valid_segmented_genome(self, capsys, genome_annotation_schema):
d = {"nuc": {"start": 1, "end": 100},
"cds": {"segments": [{"start": 20, "end": 28}], "strand": "+"}}
validate_json(d, genome_annotation_schema, "<test-json>")
capsys.readouterr() # suppress validation error printing

def test_invalid_segmented_genome(self, capsys, genome_annotation_schema):
d = {"nuc": {"start": 1, "end": 100},
"cds": {"segments": [{"start": 20, "end": 28}, {"start": 27}], "strand": "+"}}
with pytest.raises(ValidateError):
validate_json(d, genome_annotation_schema, "<test-json>")
capsys.readouterr() # suppress validation error printing

def test_string_coordinates(self, capsys, genome_annotation_schema):
d = {"nuc": {"start": 1, "end": 100},
"cds": {"segments": [{"start": 20, "end": 28}, {"start": "27", "end": "29"}], "strand": "+"}}
with pytest.raises(ValidateError):
validate_json(d, genome_annotation_schema, "<test-json>")
capsys.readouterr() # suppress validation error printing

0 comments on commit 7aea997

Please sign in to comment.