Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Handling of multiple bridgehead atom pairs #585

Merged
merged 2 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Fix: Canonical direction determination edge case
The code was failing to distinguish between forward/reverse directions
(and stereoassignments) to decide the canonical direction for this
reaction:
```
 C1CC2[CH]C1O2 => [CH]1CC2CC1O2
 ^  *  ^  *        ^    * ^*
 [* marks the first pair of bridgehead stereo atoms]
 [^ marks the first pair of bridgehead stereo atoms]
```

This now appears to be fixed.
  • Loading branch information
avcopan committed Dec 2, 2024
commit 81a4d4a923314f17f3c7f117a0a58a8e4499cfbf
1 change: 1 addition & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ ignored-modules=pyx2z, rdkit
disable=
too-few-public-methods,
too-many-arguments,
too-many-positional-arguments,
too-many-lines,
too-many-locals,
too-many-statements,
Expand Down
73 changes: 48 additions & 25 deletions automol/graph/base/_08canon.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from typing import Any, Callable, Dict, List, Optional, Tuple

import numpy

from phydat import ptab

from ...util import dict_
Expand Down Expand Up @@ -55,6 +54,7 @@
parity_evaluator_flip_from_graph,
parity_evaluator_read_from_graph,
stereocenter_candidates,
stereocenter_candidates_grouped,
unassigned_stereocenter_keys_from_candidates,
)

Expand Down Expand Up @@ -159,15 +159,19 @@ def canonical_amchi_graph_with_numbers(
is_can_enant = None
if len(atm_ste_keys) == 1:
rgra = invert_atom_stereo_parities(gra)
is_can_enant = is_canonical_enantiomer(gra, pri_dct, rgra, pri_dct)
is_can_enant = is_canonical_enantiomer(
gra, pri_dct, rgra, pri_dct, cand_dct=cand_dct
)
gra = rgra if is_can_enant is False else gra
elif len(atm_ste_keys) > 1:
par_eval_ = parity_evaluator_flip_from_graph
loc_gra = set_stereo_parities(gra, par_eval_(gra, ste_keys, pri_dct, cand_dct))
rloc_gra = invert_atom_stereo_parities(loc_gra)
rgra, _, rpri_dct, _ = calculate_stereo(rloc_gra, par_eval_=par_eval_)

is_can_enant = is_canonical_enantiomer(gra, pri_dct, rgra, rpri_dct)
is_can_enant = is_canonical_enantiomer(
gra, pri_dct, rgra, rpri_dct, cand_dct=cand_dct
)

gra = rgra if is_can_enant is False else gra
pri_dct = rpri_dct if is_can_enant is False else pri_dct
Expand Down Expand Up @@ -216,29 +220,36 @@ def smiles_graph(
return kgr


def stereo_assignment_representation(gra, pri_dct: dict):
def stereo_assignment_representation(gra, pri_dct: dict, cand_dct: dict | None = None):
"""Generate a representation of a stereo assignment, for checking for
symmetric equivalence or for determining a canonical enantiomer
symmetric equivalence or for determining a canonical enantiomer.

:param gra: molecular graph
:type gra: automol graph data structure
:param pri_dct: A dictionary mapping atom keys to priorities
:type pri_dct: dict
:param cand_dct: A dictionary mapping stereocenter candidates to their neighbor keys
:returns: A canonical representation of the assignment
"""
cand_dct = stereocenter_candidates(gra) if cand_dct is None else cand_dct

atm_keys = sorted(atom_stereo_keys(gra), key=pri_dct.get)
bnd_keys = sorted(bond_stereo_keys(gra), key=lambda x: sorted(map(pri_dct.get, x)))

rep = tuple(
dict_.values_by_key(atom_stereo_parities(gra), atm_keys)
+ dict_.values_by_key(bond_stereo_parities(gra), bnd_keys)
)
atm_nkeys_dct, bnd_nkeys_dct = stereocenter_candidates_grouped(cand_dct, pri_dct)

atm_pars = dict_.values_by_key(atom_stereo_parities(gra), atm_keys)
bnd_pars = dict_.values_by_key(bond_stereo_parities(gra), bnd_keys)
atm_pris = tuple(map(pri_dct.get, atm_keys))
bnd_pris = tuple(map(pri_dct.get, bnd_keys))
atm_npris = tuple(tuple(map(pri_dct.get, atm_nkeys_dct[k])) for k in atm_keys)
bnd_npris = tuple(tuple(map(pri_dct.get, bnd_nkeys_dct[k])) for k in bnd_keys)

rep = (atm_pars, bnd_pars, atm_pris, bnd_pris, atm_npris, bnd_npris)
return rep


def ts_direction_representation(tsg, pri_dct: dict):
def ts_direction_representation(tsg, pri_dct: dict, cand_dct: dict | None = None):
"""Generate a representation of the reaction direction to determine the
canonical direction of a TS graph
canonical direction of a TS graph.

The reaction representation consists of two pieces:

Expand All @@ -247,9 +258,8 @@ def ts_direction_representation(tsg, pri_dct: dict):
2. Canonical representations of the stereo assignments.

:param tsg: A TS graph
:type tsg: automol graph data structure
:param pri_dct: A dictionary mapping atom keys to priorities
:type pri_dct: dict
:param cand_dct: A dictionary mapping stereocenter candidates to their neighbor keys
:returns: A canonical representation of the TS reaction
"""
frm_keys = ts_forming_bond_keys(tsg)
Expand All @@ -262,13 +272,15 @@ def ts_direction_representation(tsg, pri_dct: dict):
sorted(sorted(map(pri_dct.get, k)) for k in brk_keys),
)
# Rep value 3: Stereo assignment
ste_rep = stereo_assignment_representation(tsg, pri_dct)
ste_rep = stereo_assignment_representation(tsg, pri_dct, cand_dct=cand_dct)
rep = (rxn_rep1, rxn_rep2, ste_rep)
return rep


def is_canonical_enantiomer(ugra, upri_dct, rgra, rpri_dct):
"""Is this enantiomer the canonical one?
def is_canonical_enantiomer(
ugra, upri_dct, rgra, rpri_dct, cand_dct: dict | None = None
):
"""Determine whether this enantiomer is the canonical one.

:param ugra: An unreflected molecular graph
:type ugra: automol graph data structure
Expand All @@ -281,13 +293,22 @@ def is_canonical_enantiomer(ugra, upri_dct, rgra, rpri_dct):
:returns: `True` if it is, `False` if it isn't, and `None` if it isn't an
enantiomer
"""
urep = stereo_assignment_representation(ugra, upri_dct)
rrep = stereo_assignment_representation(rgra, rpri_dct)
cand_dct = stereocenter_candidates(ugra) if cand_dct is None else cand_dct

urep = stereo_assignment_representation(ugra, upri_dct, cand_dct=cand_dct)
rrep = stereo_assignment_representation(rgra, rpri_dct, cand_dct=cand_dct)
return True if (urep < rrep) else False if (urep > rrep) else None


def is_canonical_direction(ftsg, fpri_dct, rtsg, rpri_dct):
"""Is this TS direction the canonical one?
def is_canonical_direction(
ftsg,
fpri_dct,
rtsg,
rpri_dct,
fcand_dct: dict | None = None,
rcand_dct: dict | None = None,
):
"""Determine whether this TS direction is the canonical one.

:param ftsg: A TS graph in the forward direction
:type ftsg: automol graph data structure
Expand All @@ -301,8 +322,8 @@ def is_canonical_direction(ftsg, fpri_dct, rtsg, rpri_dct):
:type rpri_dct: dict
:returns: A canonical representation of the TS reaction
"""
frep = ts_direction_representation(ftsg, fpri_dct)
rrep = ts_direction_representation(rtsg, rpri_dct)
frep = ts_direction_representation(ftsg, fpri_dct, cand_dct=fcand_dct)
rrep = ts_direction_representation(rtsg, rpri_dct, cand_dct=rcand_dct)
return True if (frep < rrep) else False if (frep > rrep) else None


Expand Down Expand Up @@ -395,7 +416,9 @@ def _calculate_ts_stereo(
)

# 3. Determine which direction is canonical
is_can_dir = is_canonical_direction(can_tsg, pri_dct, rcan_tsg, rpri_dct)
is_can_dir = is_canonical_direction(
can_tsg, pri_dct, rcan_tsg, rpri_dct, fcand_dct=cand_dct
)
if is_can_dir is False:
tsg = ts_reverse(rtsg)
can_tsg = ts_reverse(rcan_tsg)
Expand Down
44 changes: 28 additions & 16 deletions automol/graph/base/_11stereo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

import more_itertools as mit
import numpy

from phydat import phycon

from ... import util
Expand Down Expand Up @@ -91,29 +90,36 @@ def expand_stereo(
:returns: a series of molecular graphs for the stereoisomers
"""
cand_dct = stereocenter_candidates(gra)
rcand_dct = stereocenter_candidates(ts_reverse(gra))

# 1. Run the core stereo expansion algorithm
gps = _expand_stereo_core(gra, cand_dct, rcts_gra=rcts_gra, prds_gra=prds_gra)
gps = _expand_stereo_core(
gra, cand_dct, rcand_dct=rcand_dct, rcts_gra=rcts_gra, prds_gra=prds_gra
)

# 2. If requested, filter out strained stereoisomers
if not strained:
gps = _remove_strained_stereoisomers_from_expansion(gps, cand_dct)
gps = _remove_strained_stereoisomers_from_expansion(gps, cand_dct=cand_dct)

# 3. If requested, filter out non-canonical enantiomers
if not enant:
gps = _remove_noncanonical_enantiomers_from_expansion(gps)
gps = _remove_noncanonical_enantiomers_from_expansion(gps, cand_dct=cand_dct)

# 4. If requested, filter out symmetry equivalents
if not symeq:
gps = _remove_symmetry_equivalents_from_expansion(gps)
gps = _remove_symmetry_equivalents_from_expansion(gps, cand_dct=cand_dct)

sgras = [sgra for sgra, _ in gps]
sgras = tuple(sorted(sgras, key=frozen))
return sgras


def _expand_stereo_core(
gra, cand_dct, rcts_gra: object | None = None, prds_gra: object | None = None
gra,
cand_dct,
rcand_dct: dict | None = None,
rcts_gra: object | None = None,
prds_gra: object | None = None,
):
"""Obtain all possible stereoisomers of a graph, ignoring its assignments.

Expand Down Expand Up @@ -155,7 +161,9 @@ def _expand_stereo_core(
gprs.append((gra, pri_dct, rpri_dct))

if ts_:
gps = _select_ts_canonical_direction_priorities(gprs)
gps = _select_ts_canonical_direction_priorities(
gprs, fcand_dct=cand_dct, rcand_dct=rcand_dct
)
else:
gps = [(g, p) for g, p, _ in gprs]

Expand All @@ -176,12 +184,14 @@ def _expand_stereo_core(
return gps


def _select_ts_canonical_direction_priorities(gprs):
def _select_ts_canonical_direction_priorities(gprs, fcand_dct: dict, rcand_dct: dict):
"""Select select priorities for the canonical directions of each TS"""
gps = []
for ts_gra, pri_dct, rpri_dct in gprs:
ts_rgra = ts_reverse(ts_gra)
is_can_dir = is_canonical_direction(ts_gra, pri_dct, ts_rgra, rpri_dct)
is_can_dir = is_canonical_direction(
ts_gra, pri_dct, ts_rgra, rpri_dct, fcand_dct=fcand_dct, rcand_dct=rcand_dct
)
gps.append((ts_gra, pri_dct if is_can_dir else rpri_dct))
return gps

Expand Down Expand Up @@ -224,8 +234,8 @@ def _remove_strained_stereoisomers_from_expansion(gps, cand_dct):
return gps


def _remove_noncanonical_enantiomers_from_expansion(gps):
"""Remove non-canonical enantiomers from an expansion"""
def _remove_noncanonical_enantiomers_from_expansion(gps, cand_dct: dict):
"""Remove non-canonical enantiomers from an expansion."""
gps = list(gps)

# a. Augment the list of graphs and priorities with local stereo graphs
Expand All @@ -236,7 +246,9 @@ def _remove_noncanonical_enantiomers_from_expansion(gps):
ugra, upri_dct, uloc_gra = ugpl
rgra, rpri_dct, rloc_gra = rgpl
if rloc_gra == invert_atom_stereo_parities(uloc_gra):
is_can = is_canonical_enantiomer(ugra, upri_dct, rgra, rpri_dct)
is_can = is_canonical_enantiomer(
ugra, upri_dct, rgra, rpri_dct, cand_dct=cand_dct
)

if is_can is True:
gps.remove((rgra, rpri_dct))
Expand All @@ -246,13 +258,13 @@ def _remove_noncanonical_enantiomers_from_expansion(gps):
return gps


def _remove_symmetry_equivalents_from_expansion(gps):
"""Remove symmetry-equivalent stereoisomers from an expansion"""
def _remove_symmetry_equivalents_from_expansion(gps, cand_dct: dict):
"""Remove symmetry-equivalent stereoisomers from an expansion."""
gps0 = gps
gps = []
seen_reps = []
for gra, pri_dct in gps0:
rep = stereo_assignment_representation(gra, pri_dct)
rep = stereo_assignment_representation(gra, pri_dct, cand_dct=cand_dct)
if rep not in seen_reps:
gps.append((gra, pri_dct))
seen_reps.append(rep)
Expand Down Expand Up @@ -450,7 +462,7 @@ def stereo_corrected_geometry(

# 2. If there is a single, wrong atom stereocenter, simply reflect the geometry
if len(atm_keys) == 1:
atm_key, = atm_keys
(atm_key,) = atm_keys
curr_par = geometry_atom_parity(gra, geo, atm_key)
if curr_par != par_dct[atm_key]:
geo = geom_base.reflect_coordinates(geo)
Expand Down
8 changes: 5 additions & 3 deletions automol/tests/test_graph_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -873,9 +873,9 @@
C5H7O_B_TSG = (
{
0: ("C", 0, False),
1: ("C", 0, True),
1: ("C", 0, False),
2: ("C", 0, None),
3: ("C", 0, True),
3: ("C", 0, False),
4: ("C", 0, False),
5: ("O", 0, None),
6: ("H", 0, None),
Expand Down Expand Up @@ -1532,4 +1532,6 @@ def test__zmatrix():
# test__zmatrix()
# test__ts__expand_reaction_stereo("C5H6O", C5H6O_TSG, [1, 1])
# test__ts__expand_reaction_stereo("C5H7O3", C5H7O3_TSG, [1, 1, 1, 1])
test__ts__reagents_graph("C5H7O_B", C5H7O_B_TSG, {3: True, 4: False}, {3: False, 4: True})
test__ts__reagents_graph(
"C5H7O_B", C5H7O_B_TSG, {3: True, 4: False}, {3: False, 4: True}
)