Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions v03_pipeline/lib/misc/family_loading_failures_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def test_build_sex_check_lookup(self):
{'s': 'ROS_007_19Y05919_D1', 'predicted_sex': 'F'},
{'s': 'ROS_007_19Y05939_D1', 'predicted_sex': 'M'},
{'s': 'ROS_007_19Y05987_D1', 'predicted_sex': 'U'},
{'s': 'ROS_007_19Y05989_D1', 'predicted_sex': 'X0'},
],
hl.tstruct(
s=hl.tstr,
Expand All @@ -78,6 +79,7 @@ def test_build_sex_check_lookup(self):
'ROS_007_19Y05919_D1': Sex.FEMALE,
'ROS_007_19Y05939_D1': Sex.MALE,
'ROS_007_19Y05987_D1': Sex.UNKNOWN,
'ROS_007_19Y05989_D1': Sex.X0,
},
)

Expand Down Expand Up @@ -184,6 +186,7 @@ def test_get_families_failed_sex_check(self):
{'s': 'ROS_007_19Y05919_D1', 'predicted_sex': 'F'},
{'s': 'ROS_007_19Y05939_D1', 'predicted_sex': 'M'},
{'s': 'ROS_007_19Y05987_D1', 'predicted_sex': 'U'}, # Pedigree Sex F
{'s': 'ROS_007_19Y05989_D1', 'predicted_sex': 'XXX'},
],
hl.tstruct(
s=hl.tstr,
Expand Down
1 change: 1 addition & 0 deletions v03_pipeline/lib/misc/io_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def test_import_imputed_sex(self) -> None:
hl.Struct(s='abc_1', predicted_sex='M'),
hl.Struct(s='abc_2', predicted_sex='F'),
hl.Struct(s='abc_3', predicted_sex='U'),
hl.Struct(s='abc_4', predicted_sex='XYY'),
],
)

Expand Down
12 changes: 10 additions & 2 deletions v03_pipeline/lib/misc/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,17 @@ def validate_imputed_sex_ploidy(
& (sex_check_ht[mt.s].predicted_sex == Sex.MALE.value)
)
| (
# At least one call is haploid but the sex is Female
# At least one call is haploid but the sex is Female, X0, XXY, XYY, or XXX
hl.agg.any(~mt.GT.is_diploid())
& (sex_check_ht[mt.s].predicted_sex == Sex.FEMALE.value)
& hl.literal(
{
Sex.FEMALE.value,
Sex.X0.value,
Sex.XYY.value,
Sex.XXY.value,
Sex.XXX.value,
},
).contains(sex_check_ht[mt.s].predicted_sex)
)
),
)
Expand Down
88 changes: 81 additions & 7 deletions v03_pipeline/lib/misc/validation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,18 @@ def test_validate_allele_type(self) -> None:

@patch('v03_pipeline.lib.misc.validation.Env')
def test_validate_imputed_sex_ploidy(self, mock_env: Mock) -> None:
female_sample = 'HG00731_1'
male_sample_1 = 'HG00732_1'
male_sample_2 = 'HG00732_1'
x0_sample = 'NA20899_1'
xxy_sample = 'NA20889_1'
xyy_sample = 'NA20891_1'
xxx_sample = 'NA20892_1'

mock_env.CHECK_SEX_AND_RELATEDNESS = True
sex_check_ht = hl.read_table(TEST_SEX_CHECK_1)

# All calls on X chromosome are valid
mt = hl.MatrixTable.from_parts(
rows={
'locus': [
Expand All @@ -125,42 +135,106 @@ def test_validate_imputed_sex_ploidy(self, mock_env: Mock) -> None:
),
],
},
cols={'s': ['HG00731_1', 'HG00732_1']},
cols={
's': [
female_sample,
male_sample_1,
x0_sample,
xxy_sample,
xyy_sample,
xxx_sample,
],
},
entries={
'GT': [
[
hl.Call(alleles=[0, 0], phased=False),
hl.Call(alleles=[0], phased=False),
hl.Call(alleles=[0, 0], phased=False), # X0
hl.Call(alleles=[0, 0], phased=False), # XXY
hl.Call(alleles=[0, 0], phased=False), # XYY
hl.Call(alleles=[0, 0], phased=False), # XXX
],
],
},
).key_rows_by('locus')
validate_imputed_sex_ploidy(mt, sex_check_ht)

# All calls on Y chromosome are valid
mt = hl.MatrixTable.from_parts(
rows={
'locus': [
hl.Locus(
contig='chrX',
contig='chrY',
position=1,
reference_genome='GRCh38',
),
],
},
# Male, Female, Male
cols={'s': ['HG00731_1', 'HG00732_1', 'NA19678_1']},
cols={
's': [
female_sample,
male_sample_1,
x0_sample,
xxy_sample,
xyy_sample,
xxx_sample,
],
},
entries={
'GT': [
[
hl.Call(alleles=[0], phased=False),
hl.Call(alleles=[0], phased=False),
hl.missing(hl.tcall),
hl.Call(alleles=[0], phased=False),
hl.missing(hl.tcall), # X0
hl.Call(alleles=[0, 0], phased=False), # XXY
hl.Call(alleles=[0, 0], phased=False), # XYY
hl.missing(hl.tcall), # XXX
],
],
},
).key_rows_by('locus')
validate_imputed_sex_ploidy(mt, sex_check_ht)

# Invalid X chromosome case
mt = hl.MatrixTable.from_parts(
rows={
'locus': [
hl.Locus(
contig='chrX',
position=1,
reference_genome='GRCh38',
),
],
},
cols={
's': [
female_sample,
male_sample_1,
male_sample_2,
x0_sample,
xxy_sample,
xyy_sample,
xxx_sample,
],
},
entries={
'GT': [
[
hl.Call(alleles=[0], phased=False), # invalid Female call
hl.Call(alleles=[0], phased=False), # valid Male call
hl.missing(hl.tcall), # invalid Male call
hl.Call(alleles=[0], phased=False), # invalid X0 call
hl.Call(alleles=[0], phased=False), # invalid XXY call
hl.missing(hl.tcall), # valid XYY call
hl.Call(alleles=[0, 0], phased=False), # valid XXX call
],
],
},
).key_rows_by('locus')
self.assertRaisesRegex(
SeqrValidationError,
'66.67% of samples have misaligned ploidy',
'57.14% of samples have misaligned ploidy',
validate_imputed_sex_ploidy,
mt,
sex_check_ht,
Expand Down
6 changes: 5 additions & 1 deletion v03_pipeline/lib/model/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,18 @@ class Sex(str, Enum):
FEMALE = 'F'
MALE = 'M'
UNKNOWN = 'U'
XXX = 'XXX'
X0 = 'X0'
XXY = 'XXY'
XYY = 'XYY'

@property
def imputed_sex_value(self):
return {
Sex.MALE: 'Male',
Sex.FEMALE: 'Female',
Sex.UNKNOWN: 'Unknown',
}[self]
}.get(self, self.name)


class PipelineVersion(str, Enum):
Expand Down
3 changes: 2 additions & 1 deletion v03_pipeline/var/test/pedigrees/test_pedigree_6.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ R0116_sex_check_project2 family_1 family_1 ROS_006_18Y03227_D1 U
R0116_sex_check_project2 family_1 family_1 ROS_006_18Y03228_D1 ROS_006_18Y03226_D1 ROS_006_18Y03227_D1 F
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05919_D1 F
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05939_D1 F
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05987_D1 ROS_007_19Y05919_D1 ROS_007_19Y05939_D1 F
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05987_D1 ROS_007_19Y05919_D1 ROS_007_19Y05939_D1 F
R0116_sex_check_project2 family_2 family_2 ROS_007_19Y05989_D1 ROS_007_19Y05919_D1 ROS_007_19Y05939_D1 XXX
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to confirm that pedigree file will also be delivered with sex aneupolodies matching those in the imputed sex file predicted_sex

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

confirmed! This is how seqr would generate a pedigree file with an aneuploidy

1 change: 1 addition & 0 deletions v03_pipeline/var/test/sex_check/test_imputed_sex.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ entity:sample_id collaborator_participant_id collaborator_sample_id contaminatio
SM-DM66X abc_1 abc_1 0E+00 gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/6f30a41f-1d91-44d1-915c-5c10c6d87fcd/WAL_LIS6100_LIS6101.qc-coverage-region-1_coverage_metrics.csv gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/3e204a66-f044-4bdc-ade4-1671a0269214/WAL_LIS6100_LIS6101.cram.crai gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/a6ed4850-6a69-412e-a071-bf8cce04fca0/WAL_LIS6100_LIS6101.cram.md5sum gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/c51bbfd6-42f0-40ca-aa0c-b5eece935516/WAL_LIS6100_LIS6101.cram 8a07ce00-16a1-40f4-8666-c4cfaad1bbe1 07.021.604.3.7.8 cc9d9ed9-785a-407d-910e-d9bd46936fa6 2024-04-17T14:58:10 98.450000000 gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/9e745b1d-2c00-44ce-bbfb-31c44369f4fe/WAL_LIS6100_LIS6101.mapping_metrics.csv DNA:DNA Genomic 35.730000000 Whole Blood:Whole Blood PT-24FB4 Pass PDO-32851 96.140000000 97.850000000 Male P-WG-0139 2017-03-15 04:00:00 Male RP-3071 gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/360ec721-0af8-4085-a677-38c018069559/WAL_LIS6100_LIS6101.vcf.gz.tbi gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/8da8cda2-497f-4a8b-a642-af4a4ad28aac/WAL_LIS6100_LIS6101.vcf.gz.md5sum gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/0a2d93fb-8837-4b6f-ac68-a6b9701f9a08/WAL_LIS6100_LIS6101.vcf.gz 134324623400.000000000 gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/f7b62337-1339-4c2e-8280-281c48604e07/WAL_LIS6100_LIS6101.vc_metrics.csv
SM-DM69X abc_2 abc_2 0E+00 gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c4c07edf-7735-4aa7-9283-7cb2607b60a2/GLE-5774-3-3.qc-coverage-region-1_coverage_metrics.csv gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/dcd4c271-0249-47f1-8e91-81f74735c5a1/GLE-5774-3-3.cram.crai gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/ec41ec06-673f-4fe2-a063-23dc5fe1dcce/GLE-5774-3-3.cram.md5sum gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/aad0e270-2ad5-4f39-b968-9b4beafeb5cc/GLE-5774-3-3.cram a4b04a39-9234-4028-a155-442c4acf12a0 07.021.604.3.7.8 ce74d94c-c33d-49d7-85c9-5f3cbd08aff7 2024-04-17T15:02:46 99.800000000 gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c3a9e6f2-4c68-410b-823d-46ca406e5061/GLE-5774-3-3.mapping_metrics.csv DNA:DNA Genomic 35.300000000 Whole Blood:Whole Blood PT-24OHM Pass PDO-32755 96.320000000 97.340000000 Female P-WG-0139 2017-04-12 04:00:00 Female RP-3061 gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c71cd2a1-c789-4715-9ebc-dbfc40d9f2e2/GLE-5774-3-3.vcf.gz.tbi gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/957a99cb-c9a9-4fc5-a0ec-53f9e461469e/GLE-5774-3-3.vcf.gz.md5sum gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/df520949-5f2b-4976-9d46-80d1cc299813/GLE-5774-3-3.vcf.gz 133253714921.000000000 gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/2e98e51b-9394-4e64-977f-e9010a4e16dc/GLE-5774-3-3.vc_metrics.csv
SM-DPB5G abc_3 abc_3 0E+00 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/432f8354-77e0-4381-9bb5-dfdc0633b5b2/PIE_OGI1433_002628_1.qc-coverage-region-1_coverage_metrics.csv gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/3dc623fa-2a45-4b3d-a0f8-fcdec09f9418/PIE_OGI1433_002628_1.cram.crai gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/895966ef-c705-4c18-952d-03863243a184/PIE_OGI1433_002628_1.cram.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/96ca6d5f-fb23-4102-bb5e-c7bbfd194e1c/PIE_OGI1433_002628_1.cram ffb50687-165e-425a-a545-c3797d3a28d4 07.021.604.3.7.8 55729ba9-3ce4-47b3-9c3b-1148737ae40f 2024-04-17T15:07:57 99.670000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/30f8e208-5d2d-4ce8-b835-695b5ed673f4/PIE_OGI1433_002628_1.mapping_metrics.csv DNA:DNA Genomic 41.910000000 Whole Blood:Whole Blood PT-25BR5 Pass PDO-32756 92.920000000 97.990000000 Unknown P-WG-0139 2017-05-19 04:00:00 Unknown RP-3062 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/1641d1b2-1035-4cc3-9c8b-0c8cb430f56b/PIE_OGI1433_002628_1.vcf.gz.tbi gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/f5ba2708-899e-42e8-b287-fdf72c2e404d/PIE_OGI1433_002628_1.vcf.gz.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/e925ee5d-a75e-471f-adfd-2756c8690069/PIE_OGI1433_002628_1.vcf.gz 156149580126.000000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/df076bc5-9db8-44f0-a3fe-f693370634cc/PIE_OGI1433_002628_1.vc_metrics.csv
SM-DPB5G abc_4 abc_4 0E+00 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/432f8354-77e0-4381-9bb5-dfdc0633b5b2/PIE_OGI1433_002628_1.qc-coverage-region-1_coverage_metrics.csv gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/3dc623fa-2a45-4b3d-a0f8-fcdec09f9418/PIE_OGI1433_002628_1.cram.crai gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/895966ef-c705-4c18-952d-03863243a184/PIE_OGI1433_002628_1.cram.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/96ca6d5f-fb23-4102-bb5e-c7bbfd194e1c/PIE_OGI1433_002628_1.cram ffb50687-165e-425a-a545-c3797d3a28d4 07.021.604.3.7.8 55729ba9-3ce4-47b3-9c3b-1148737ae40f 2024-04-17T15:07:57 99.670000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/30f8e208-5d2d-4ce8-b835-695b5ed673f4/PIE_OGI1433_002628_1.mapping_metrics.csv DNA:DNA Genomic 41.910000000 Whole Blood:Whole Blood PT-25BR5 Pass PDO-32756 92.920000000 97.990000000 XYY P-WG-0139 2017-05-19 04:00:00 XYY RP-3062 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/1641d1b2-1035-4cc3-9c8b-0c8cb430f56b/PIE_OGI1433_002628_1.vcf.gz.tbi gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/f5ba2708-899e-42e8-b287-fdf72c2e404d/PIE_OGI1433_002628_1.vcf.gz.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/e925ee5d-a75e-471f-adfd-2756c8690069/PIE_OGI1433_002628_1.vcf.gz 156149580126.000000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/df076bc5-9db8-44f0-a3fe-f693370634cc/PIE_OGI1433_002628_1.vc_metrics.csv
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
Written with version 0.2.128-eead8100a1c1
Created at 2024/05/02 16:11:09
Written with version 0.2.132-678e1f52b999
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added 4 new samples with the 4 new sexes

Created at 2024/10/29 17:04:32
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading