Skip to content

Commit

Permalink
refactor: simplify ncbi models
Browse files Browse the repository at this point in the history
* Use `Annotated` for typing Pydantic fields.
* Remove unnecessary validators.
* Remove unused error module.
* Simplify, clean up, and document tests.
* Use `pattern` instead of direct use of `re` module.
* Let `NCBISource` take either `db_xref` or `taxid` using a `model_validator`.
* Comment some models where missing.
  • Loading branch information
igboyes authored Apr 16, 2024
1 parent 51b5d10 commit fb50df9
Show file tree
Hide file tree
Showing 4 changed files with 220 additions and 303 deletions.
231 changes: 81 additions & 150 deletions tests/__snapshots__/test_ncbi_model.ambr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# serializer version: 1
# name: TestParseGenbank.test_parse_genbank_record[AB017504]
# name: TestParseGenbank.test_ok[AB017504]
dict({
'accession': 'AB017504',
'accession_version': 'AB017504.1',
Expand All @@ -23,7 +23,7 @@
'topology': <NCBITopology.LINEAR: 'linear'>,
})
# ---
# name: TestParseGenbank.test_parse_genbank_record[MH200607]
# name: TestParseGenbank.test_ok[MH200607]
dict({
'accession': 'MH200607',
'accession_version': 'MH200607.1',
Expand All @@ -47,7 +47,7 @@
'topology': <NCBITopology.LINEAR: 'linear'>,
})
# ---
# name: TestParseGenbank.test_parse_genbank_record[MT240513]
# name: TestParseGenbank.test_ok[MT240513]
dict({
'accession': 'MT240513',
'accession_version': 'MT240513.1',
Expand All @@ -71,7 +71,7 @@
'topology': <NCBITopology.LINEAR: 'linear'>,
})
# ---
# name: TestParseGenbank.test_parse_genbank_record[NC_015504]
# name: TestParseGenbank.test_ok[NC_015504]
dict({
'accession': 'NC_015504',
'accession_version': 'NC_015504.1',
Expand All @@ -95,7 +95,7 @@
'topology': <NCBITopology.CIRCULAR: 'circular'>,
})
# ---
# name: TestParseGenbank.test_parse_genbank_record[NC_036587]
# name: TestParseGenbank.test_ok[NC_036587]
dict({
'accession': 'NC_036587',
'accession_version': 'NC_036587.1',
Expand All @@ -119,7 +119,7 @@
'topology': <NCBITopology.LINEAR: 'linear'>,
})
# ---
# name: TestParseGenbank.test_parse_genbank_source[AB017504]
# name: TestParseGenbank.test_source
dict({
'clone': '',
'host': '',
Expand All @@ -131,142 +131,7 @@
'taxid': 1169032,
})
# ---
# name: TestParseGenbank.test_parse_genbank_source[MH200607]
dict({
'clone': '',
'host': 'Eutrema japonicum',
'isolate': 'WMoV-6.3',
'mol_type': <NCBISourceMolType.GENOMIC_RNA: 'genomic RNA'>,
'organism': 'Wasabi mottle virus',
'segment': '',
'strain': '',
'taxid': 1169032,
})
# ---
# name: TestParseGenbank.test_parse_genbank_source[MT240513]
dict({
'clone': '',
'host': 'Vasconcellea cundinamarcensis',
'isolate': 'AC2-6',
'mol_type': <NCBISourceMolType.GENOMIC_RNA: 'genomic RNA'>,
'organism': 'Babaco mosaic virus',
'segment': '',
'strain': '',
'taxid': 2060511,
})
# ---
# name: TestParseGenbank.test_parse_genbank_source[NC_015504]
dict({
'clone': '',
'host': 'Musa sp. cv. Kibuzi',
'isolate': '',
'mol_type': <NCBISourceMolType.GENOMIC_DNA: 'genomic DNA'>,
'organism': 'Banana streak UL virus',
'segment': '',
'strain': '',
'taxid': 1016856,
})
# ---
# name: TestParseGenbank.test_parse_genbank_source[NC_036587]
dict({
'clone': '',
'host': 'Vasconcellea x heilbornii',
'isolate': 'Tandapi',
'mol_type': <NCBISourceMolType.GENOMIC_RNA: 'genomic RNA'>,
'organism': 'Babaco mosaic virus',
'segment': '',
'strain': '',
'taxid': 2060511,
})
# ---
# name: TestParseGenbank.test_parse_genbank_source_taxid[AB017504]
1169032
# ---
# name: TestParseGenbank.test_parse_genbank_source_taxid[MH200607]
1169032
# ---
# name: TestParseGenbank.test_parse_genbank_source_taxid[MT240513]
2060511
# ---
# name: TestParseGenbank.test_parse_genbank_source_taxid[NC_015504]
1016856
# ---
# name: TestParseGenbank.test_parse_genbank_source_taxid[NC_036587]
2060511
# ---
# name: TestTaxonomyParse.test_parse_taxonomy_record_rank_addendum[1016856-isolate]
dict({
'id': 1016856,
'lineage': list([
dict({
'id': 10239,
'name': 'Viruses',
'rank': 'superkingdom',
}),
dict({
'id': 2559587,
'name': 'Riboviria',
'rank': 'clade',
}),
dict({
'id': 2732397,
'name': 'Pararnavirae',
'rank': 'kingdom',
}),
dict({
'id': 2732409,
'name': 'Artverviricota',
'rank': 'phylum',
}),
dict({
'id': 2732514,
'name': 'Revtraviricetes',
'rank': 'class',
}),
dict({
'id': 2169561,
'name': 'Ortervirales',
'rank': 'order',
}),
dict({
'id': 186534,
'name': 'Caulimoviridae',
'rank': 'family',
}),
dict({
'id': 10652,
'name': 'Badnavirus',
'rank': 'genus',
}),
dict({
'id': 3047392,
'name': 'Badnavirus etavirgamusae',
'rank': 'species',
}),
]),
'name': 'Banana streak UL virus',
'other_names': dict({
'acronym': list([
]),
'equivalent_name': list([
'Banana streak Uganda L virus',
]),
'genbank_acronym': list([
]),
'includes': list([
]),
'synonym': list([
]),
}),
'rank': <NCBIRank.ISOLATE: 'isolate'>,
'species': dict({
'id': 3047392,
'name': 'Badnavirus etavirgamusae',
'rank': 'species',
}),
})
# ---
# name: TestTaxonomyParse.test_parse_taxonomy_record_single[1077859]
# name: TestParseTaxonomy.test_ok[1077859]
dict({
'id': 1077859,
'lineage': list([
Expand Down Expand Up @@ -332,7 +197,7 @@
}),
})
# ---
# name: TestTaxonomyParse.test_parse_taxonomy_record_single[1198450]
# name: TestParseTaxonomy.test_ok[1198450]
dict({
'id': 1198450,
'lineage': list([
Expand Down Expand Up @@ -398,7 +263,7 @@
}),
})
# ---
# name: TestTaxonomyParse.test_parse_taxonomy_record_single[270478]
# name: TestParseTaxonomy.test_ok[270478]
dict({
'id': 270478,
'lineage': list([
Expand Down Expand Up @@ -464,7 +329,7 @@
}),
})
# ---
# name: TestTaxonomyParse.test_parse_taxonomy_record_single[438782]
# name: TestParseTaxonomy.test_ok[438782]
dict({
'id': 438782,
'lineage': list([
Expand Down Expand Up @@ -530,9 +395,75 @@
}),
})
# ---
# name: test_create_lineage_item_alias[lineage_data0]
NCBILineage(id=2732397, name='Pararnavirae', rank='kingdom')
# ---
# name: test_create_lineage_item_alias[lineage_data1]
NCBILineage(id=2732409, name='Artverviricota', rank='phylum')
# name: TestParseTaxonomy.test_with_rank
dict({
'id': 1016856,
'lineage': list([
dict({
'id': 10239,
'name': 'Viruses',
'rank': 'superkingdom',
}),
dict({
'id': 2559587,
'name': 'Riboviria',
'rank': 'clade',
}),
dict({
'id': 2732397,
'name': 'Pararnavirae',
'rank': 'kingdom',
}),
dict({
'id': 2732409,
'name': 'Artverviricota',
'rank': 'phylum',
}),
dict({
'id': 2732514,
'name': 'Revtraviricetes',
'rank': 'class',
}),
dict({
'id': 2169561,
'name': 'Ortervirales',
'rank': 'order',
}),
dict({
'id': 186534,
'name': 'Caulimoviridae',
'rank': 'family',
}),
dict({
'id': 10652,
'name': 'Badnavirus',
'rank': 'genus',
}),
dict({
'id': 3047392,
'name': 'Badnavirus etavirgamusae',
'rank': 'species',
}),
]),
'name': 'Banana streak UL virus',
'other_names': dict({
'acronym': list([
]),
'equivalent_name': list([
'Banana streak Uganda L virus',
]),
'genbank_acronym': list([
]),
'includes': list([
]),
'synonym': list([
]),
}),
'rank': <NCBIRank.ISOLATE: 'isolate'>,
'species': dict({
'id': 3047392,
'name': 'Badnavirus etavirgamusae',
'rank': 'species',
}),
})
# ---
Loading

0 comments on commit fb50df9

Please sign in to comment.