Skip to content

Commit 84ac1a7

Browse files
committed
NO_JIRA Updated and moved some conformer analysis scripts from confidential.
1 parent 9dfc79d commit 84ac1a7

File tree

4 files changed

+437
-0
lines changed

4 files changed

+437
-0
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Conformer Filter Density
2+
3+
Filter conformers using a variety of metrics described below.
4+
5+
A csv file will be produced with various points of analysis for unusual torsions. Optionally, files containing the
6+
conformers which pass and fail within the limits can be written to files.
7+
8+
CCDC Python API Licence required, minimum version: 3.0.15
9+
10+
Script can be run with any multimolecule file e.g. sdf.
11+
12+
## Instructions on Running
13+
14+
positional arguments: input molecule file
15+
16+
Input file (single- or multi-molecule file)
17+
18+
options:
19+
* -h, --help; Show this help message and exit
20+
* -m {absolute,relative}, --mode; Limit mode: absolute (fixed threshold) or relative (threshold based on
21+
molecule with fewest unusual torsions). WARNING: Relative mode may behave unexpectedly with conformers from
22+
multiple input molecules (default: absolute)
23+
* -l, --limit; Maximum number of unusual torsions for a passing molecule (default: 0)
24+
* -d, --local-density; Local density threshold for classifying a torsion as unusual (default: 10.0)
25+
* --incl-organometallics; Include organometallic compounds in the search (default: organic compounds only)
26+
* --generalisation; Turn on generalisation for searches
27+
* --successfn; Output file for molecules that pass the filter (default: successes.mol)
28+
* --failurefn; Output file for molecules that fail the filter (default: failures.mol)
29+
* -u, --unusual-torsions; Output CSV file for unusual torsion details (default: unusual_torsions.csv)
30+
31+
32+
Originally created by Paul Sanschagrin
33+
Updated by Chris Ringrose
34+
35+
For feedback or to report any issues please contact support@ccdc.cam.ac.uk
36+
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#!/usr/bin/env python3
2+
#
3+
# This script can be used for any purpose without limitation subject to the
4+
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
5+
#
6+
# This permission notice and the following statement of attribution must be
7+
# included in all copies or substantial portions of this script.
8+
#
9+
# 2026-02-03: created by the Cambridge Crystallographic Data Centre
10+
11+
import argparse
12+
import csv
13+
14+
from ccdc import conformer, io
15+
16+
17+
def parse_args():
18+
"""Parse command line arguments."""
19+
parser = argparse.ArgumentParser(description=__doc__)
20+
21+
parser.add_argument('inmolfn',
22+
metavar='<input molecule file>',
23+
help='Input file (single- or multi-molecule file)')
24+
25+
parser.add_argument('-m', '--mode',
26+
choices=['absolute', 'relative'],
27+
default='absolute',
28+
help='Limit mode: absolute (fixed threshold) or relative '
29+
'(threshold based on molecule with fewest unusual torsions). '
30+
'WARNING: Relative mode may behave unexpectedly with conformers '
31+
'from multiple input molecules (default: %(default)s)')
32+
33+
parser.add_argument('-l', '--limit',
34+
dest='torsion_limit',
35+
type=int,
36+
default=0,
37+
metavar='<limit>',
38+
help='Maximum number of unusual torsions for a passing molecule '
39+
'(default: %(default)s)')
40+
41+
parser.add_argument('-d', '--local-density',
42+
dest='local_density_threshold',
43+
type=float,
44+
default=10.0,
45+
metavar='<threshold>',
46+
help='Local density threshold for classifying a torsion as unusual '
47+
'(default: %(default)s)')
48+
49+
parser.add_argument('--incl-organometallics',
50+
dest='incl_organometallics',
51+
action='store_true',
52+
help='Include organometallic compounds in the search '
53+
'(default: organic compounds only)')
54+
55+
parser.add_argument('--generalisation',
56+
action='store_true',
57+
help='Turn on generalisation for searches')
58+
59+
parser.add_argument('--successfn',
60+
default='successes.mol',
61+
metavar='<file>',
62+
help='Output file for molecules that pass the filter '
63+
'(default: %(default)s)')
64+
65+
parser.add_argument('--failurefn',
66+
default='failures.mol',
67+
metavar='<file>',
68+
help='Output file for molecules that fail the filter '
69+
'(default: %(default)s)')
70+
71+
parser.add_argument('-u', '--unusual-torsions',
72+
dest='unusualtorsionsfn',
73+
default='unusual_torsions.csv',
74+
metavar='<file>',
75+
help='Output CSV file for unusual torsion details '
76+
'(default: %(default)s)')
77+
78+
return parser.parse_args()
79+
80+
81+
def create_mogul_engine(local_density_threshold, incl_organometallics, generalisation):
82+
"""Create and configure a geometry analyser engine.
83+
84+
Args:
85+
local_density_threshold: Threshold for classifying torsions as unusual
86+
incl_organometallics: Whether to include organometallic compounds
87+
generalisation: Whether to enable generalisation for searches
88+
89+
Returns:
90+
Configured ccdc.conformer.GeometryAnalyser instance
91+
"""
92+
engine = conformer.GeometryAnalyser()
93+
94+
engine.settings.bond.analyse = False
95+
engine.settings.angle.analyse = False
96+
engine.settings.ring.analyse = False
97+
98+
engine.settings.torsion.local_density_threshold = local_density_threshold
99+
engine.settings.generalisation = generalisation
100+
engine.settings.organometallic_filter = 'all' if incl_organometallics else 'organics_only'
101+
102+
return engine
103+
104+
105+
def analysis(torsion_limit, input_filename, mode, engine, success_file, failure_file, unusual_torsion_file):
106+
"""Analyze molecules for unusual torsions and filter based on criteria.
107+
108+
Args:
109+
torsion_limit: Maximum number of unusual torsions allowed
110+
input_filename: Path to input molecule file
111+
mode: 'absolute' or 'relative' filtering mode
112+
engine: Configured GeometryAnalyser instance
113+
success_file: Path to output file for passing molecules
114+
failure_file: Path to output file for failing molecules
115+
unusual_torsion_file: Path to CSV file for unusual torsion details
116+
"""
117+
# Analyze all molecules and collect unusual torsion data
118+
molecules = []
119+
min_unusual_torsions = float('inf')
120+
121+
with io.MoleculeReader(input_filename) as mol_reader:
122+
for molecule in mol_reader:
123+
molecule.standardise_aromatic_bonds()
124+
molecule.standardise_delocalised_bonds()
125+
126+
geometry_analysed_molecule = engine.analyse_molecule(molecule)
127+
128+
molecule.unusual_torsions = [
129+
t for t in geometry_analysed_molecule.analysed_torsions
130+
if t.unusual and t.enough_hits
131+
]
132+
molecule.num_unusual_torsions = len(molecule.unusual_torsions)
133+
molecules.append(molecule)
134+
135+
min_unusual_torsions = min(min_unusual_torsions, molecule.num_unusual_torsions)
136+
137+
# Write results
138+
with io.MoleculeWriter(success_file) as passed_writer, \
139+
io.MoleculeWriter(failure_file) as failed_writer, \
140+
open(unusual_torsion_file, 'w', newline='') as csv_file:
141+
142+
csv_writer = csv.writer(csv_file)
143+
csv_writer.writerow(['MoleculeIndex', 'Value', 'Zscore', 'LocalDensity', 'NumHits', 'Atoms'])
144+
145+
for idx, molecule in enumerate(molecules):
146+
threshold = torsion_limit if mode == 'absolute' else min_unusual_torsions + torsion_limit
147+
failed = molecule.num_unusual_torsions > threshold
148+
149+
if failed:
150+
failed_writer.write(molecule)
151+
for torsion in molecule.unusual_torsions:
152+
csv_writer.writerow([
153+
idx,
154+
torsion.value,
155+
torsion.z_score,
156+
torsion.local_density,
157+
torsion.nhits,
158+
' '.join(torsion.atom_labels)
159+
])
160+
else:
161+
passed_writer.write(molecule)
162+
163+
164+
def run():
165+
"""Main entry point for the script."""
166+
args = parse_args()
167+
168+
if args.torsion_limit < 0:
169+
raise ValueError('Torsion limit must be >= 0')
170+
171+
engine = create_mogul_engine(
172+
args.local_density_threshold,
173+
args.incl_organometallics,
174+
args.generalisation
175+
)
176+
177+
analysis(
178+
args.torsion_limit,
179+
args.inmolfn,
180+
args.mode,
181+
engine,
182+
args.successfn,
183+
args.failurefn,
184+
args.unusualtorsionsfn,
185+
)
186+
187+
188+
if __name__ == '__main__':
189+
run()

scripts/filter_poses/ReadMe.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Filter Poses
2+
3+
This is a short script to filter molecular poses in a multi-molecule file based on the torsion probabilities.
4+
Information will be printed to screen, or can be saved to a csv file.
5+
6+
CCDC Python API Licence required, minimum version: 3.0.15
7+
8+
Script can be run with any multimolecule file e.g. sdf.
9+
10+
## Instructions on Running
11+
12+
positional arguments:
13+
* input file; Input file (multi-molecule file).
14+
15+
options:
16+
* -h, --help; Show this help message and exit.
17+
* -csv, --write-csv; Write a csv file for all the analysed conformers.
18+
19+
Originally written by Jason Cole
20+
Updated by Chris Ringrose
21+
22+
For feedback or to report any issues please contact support@ccdc.cam.ac.uk
23+

0 commit comments

Comments
 (0)