From dc6442c803520ba8d8632de763ec4c60eec42abc Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Thu, 26 Sep 2024 21:41:23 -0700 Subject: [PATCH] Preserve unknown auth_seq_num in poly_seq_scheme If the poly_seq_scheme table maps a given seq_id to an auth_seq_num of ? then preserve that mapping on output, so we don't lose any information on not-modeled residues in that table. --- ihm/dumper.py | 4 ++++ src/ihm_format.i | 20 ++++++++------------ test/test_dumper.py | 34 ++++++++++++++++++++++++++++++++++ test/test_reader.py | 20 ++++++++++++++++++++ 4 files changed, 66 insertions(+), 12 deletions(-) diff --git a/ihm/dumper.py b/ihm/dumper.py index f3c6713..e42547a 100644 --- a/ihm/dumper.py +++ b/ihm/dumper.py @@ -744,6 +744,10 @@ def dump(self, system, writer): # See, e.g., https://files.rcsb.org/view/8QB4.cif auth_comp_id = ihm.unknown auth_seq_num = ihm.unknown + elif auth_seq_num is ihm.unknown: + # If we don't know the seq num, we can't know + # the component ID either + auth_comp_id = ihm.unknown lp.write(asym_id=asym._id, pdb_strand_id=asym.strand_id, entity_id=entity._id, seq_id=num, diff --git a/src/ihm_format.i b/src/ihm_format.i index 3469643..cb1a11b 100644 --- a/src/ihm_format.i +++ b/src/ihm_format.i @@ -437,7 +437,7 @@ static void handle_poly_seq_scheme_data(struct ihm_reader *reader, void *data, struct ihm_error **err) { int i, seq_id, pdb_seq_num, auth_seq_num; - char *seq_id_endptr, *pdb_seq_num_endptr; + char *seq_id_endptr, *pdb_seq_num_endptr, *auth_seq_num_endptr; struct category_handler_data *hd = data; struct ihm_keyword **keys; @@ -451,9 +451,11 @@ static void handle_poly_seq_scheme_data(struct ihm_reader *reader, return; } - for (i = 0, keys = hd->keywords; i < 3; ++i, ++keys) { - /* Do nothing if any of asym_id, seq_id, or pdb_seq_num are missing */ + for (i = 0, keys = hd->keywords; i < 4; ++i, ++keys) { + /* Call Python handler if any of asym_id, seq_id, pdb_seq_num, + or auth_seq_num are missing */ if (!(*keys)->in_file || (*keys)->omitted || (*keys)->unknown) { + handle_category_data(reader, data, err); return; } } @@ -464,15 +466,9 @@ static void handle_poly_seq_scheme_data(struct ihm_reader *reader, nothing needs to be done */ seq_id = strtol(hd->keywords[1]->data, &seq_id_endptr, 10); pdb_seq_num = strtol(hd->keywords[2]->data, &pdb_seq_num_endptr, 10); - if (!hd->keywords[3]->in_file || hd->keywords[3]->omitted - || hd->keywords[3]->unknown) { - /* If auth_seq_num is missing, assume identical to pdb_seq_num */ - auth_seq_num = pdb_seq_num; - } else { - auth_seq_num = strtol(hd->keywords[3]->data, &pdb_seq_num_endptr, 10); - } - if (!*seq_id_endptr && !*pdb_seq_num_endptr && seq_id == pdb_seq_num - && seq_id == auth_seq_num + auth_seq_num = strtol(hd->keywords[3]->data, &auth_seq_num_endptr, 10); + if (!*seq_id_endptr && !*pdb_seq_num_endptr && !*auth_seq_num_endptr + && seq_id == pdb_seq_num && seq_id == auth_seq_num && (!hd->keywords[4]->in_file || hd->keywords[4]->omitted || hd->keywords[4]->unknown)) { return; diff --git a/test/test_dumper.py b/test/test_dumper.py index 47c1431..e28f3c1 100644 --- a/test/test_dumper.py +++ b/test/test_dumper.py @@ -980,6 +980,40 @@ def test_poly_seq_scheme_dumper(self): D 4 1 DA 1 1 DA DA X A D 4 2 DC 1 1 DC DC X B # +""") + + def test_poly_seq_scheme_unknown_auth_seq(self): + """Test PolySeqSchemeDumper with explicit unknown auth_seq_num""" + system = ihm.System() + e1 = ihm.Entity('ACGT') + system.entities.append(e1) + a1 = ihm.AsymUnit(e1, 'foo', + orig_auth_seq_id_map={1: 3, 2: 4, + 3: ihm.unknown, 4: 6}) + system.asym_units.append(a1) + ihm.dumper._EntityDumper().finalize(system) + ihm.dumper._StructAsymDumper().finalize(system) + dumper = ihm.dumper._PolySeqSchemeDumper() + out = _get_dumper_output(dumper, system) + # If auth_seq_num is ?, so should pdb_mon_id and auth_mon_id; + # see, e.g. PDB ID 8qb4 + self.assertEqual(out, """# +loop_ +_pdbx_poly_seq_scheme.asym_id +_pdbx_poly_seq_scheme.entity_id +_pdbx_poly_seq_scheme.seq_id +_pdbx_poly_seq_scheme.mon_id +_pdbx_poly_seq_scheme.pdb_seq_num +_pdbx_poly_seq_scheme.auth_seq_num +_pdbx_poly_seq_scheme.pdb_mon_id +_pdbx_poly_seq_scheme.auth_mon_id +_pdbx_poly_seq_scheme.pdb_strand_id +_pdbx_poly_seq_scheme.pdb_ins_code +A 1 1 ALA 1 3 ALA ALA A . +A 1 2 CYS 2 4 CYS CYS A . +A 1 3 GLY 3 ? ? ? A . +A 1 4 THR 4 6 THR THR A . +# """) def test_poly_seq_scheme_dumper_not_modeled(self): diff --git a/test/test_reader.py b/test/test_reader.py index 8c97c5c..9abf137 100644 --- a/test/test_reader.py +++ b/test/test_reader.py @@ -2832,6 +2832,26 @@ def test_poly_seq_scheme_handler_incon_off(self): self.assertIsNone(asym.residue(1).ins_code) self.assertIsNone(asym.orig_auth_seq_id_map) + def test_poly_seq_scheme_handler_unknown_auth_seq(self): + """Test PolySeqSchemeHandler with explicit unknown auth_seq_num""" + fh = StringIO(ASYM_ENTITY + """ +loop_ +_pdbx_poly_seq_scheme.asym_id +_pdbx_poly_seq_scheme.entity_id +_pdbx_poly_seq_scheme.seq_id +_pdbx_poly_seq_scheme.pdb_seq_num +_pdbx_poly_seq_scheme.auth_seq_num +_pdbx_poly_seq_scheme.pdb_strand_id +A 1 1 1 1 A +A 1 2 2 2 A +A 1 3 3 ? A +A 1 4 4 4 A +""") + s, = ihm.reader.read(fh) + asym, = s.asym_units + self.assertEqual(asym.auth_seq_id_map, 0) + self.assertEqual(asym.orig_auth_seq_id_map, {3: ihm.unknown}) + def test_poly_seq_scheme_handler_str_seq_id(self): """Test PolySeqSchemeHandler with a non-integer pdb_seq_num""" fh = StringIO(ASYM_ENTITY + """