Skip to content

Commit 71d0374

Browse files
authored
rgroups (#946)
Co-authored-by: Roman Porozhnetov <roman_porozhnetov@epam.com>
1 parent dcc2e58 commit 71d0374

File tree

5 files changed

+162
-24
lines changed

5 files changed

+162
-24
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*** CDXML to mol ***
2+
simple_rgrp64.cdx
3+
{"root":{"nodes":[{"$ref":"mol0"},{"$ref":"rg1"}]},"mol0":{"type":"molecule","atoms":[{"label":"C","location":[0.4969991147518158,-0.639666736125946,0.0]},{"type":"rg-label","$refs":["rg-1"],"location":[0.49533283710479739,-0.1623331755399704,0.0]},{"label":"C","location":[0.011334228329360485,-0.6336659789085388,0.0]},{"label":"C","location":[0.009999592788517475,-0.1623331755399704,0.0]}],"bonds":[{"type":1,"atoms":[0,1]},{"type":1,"atoms":[0,2]},{"type":1,"atoms":[1,3]},{"type":1,"atoms":[2,3]}]},"rg1":{"rlogic":{"number":1},"type":"rgroup","atoms":[{"label":"C","location":[27.818666458129884,-13.968999862670899,0.0]},{"label":"C","location":[28.23433494567871,-14.208999633789063,0.0]},{"label":"C","location":[28.649999618530275,-13.968999862670899,0.0]},{"label":"C","location":[29.06566619873047,-14.208999633789063,0.0]},{"label":"Cl","location":[29.481332778930665,-13.968999862670899,0.0]}],"bonds":[{"type":1,"atoms":[0,1]},{"type":1,"atoms":[1,2]},{"type":1,"atoms":[2,3]},{"type":1,"atoms":[3,4]}]}}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import os
2+
import sys
3+
4+
sys.path.append(
5+
os.path.normpath(
6+
os.path.join(os.path.abspath(__file__), "..", "..", "..", "common")
7+
)
8+
)
9+
from env_indigo import * # noqa
10+
11+
indigo = Indigo()
12+
indigo.setOption("molfile-saving-skip-date", True)
13+
14+
print("*** CDXML to mol ***")
15+
16+
root = joinPathPy("molecules/cdx", __file__)
17+
files = os.listdir(root)
18+
files.sort()
19+
for filename in files:
20+
print(filename)
21+
try:
22+
mol = indigo.loadMoleculeFromFile(os.path.join(root, filename))
23+
print(mol.json())
24+
except IndigoException as e:
25+
print(getIndigoExceptionText(e))
26+
print("*** Try as Query ***")
27+
mol = indigo.loadQueryMoleculeFromFile(os.path.join(root, filename))
28+
print(mol.json())
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
VmpDRDAxMDAEAwIBAAAAAAAAAAAAAACAAAAAAAMAFQAAAENoZW1EcmF3IEpTIDIu
2+
MC4wLjkEAhAAvVl5ASOP3QJw/bgBHkWEAwEJCAAAAAAAAAAAAAIJCAAAANIEAADb
3+
CQ0IAQABCAcBAAE6BAEAATsEAQAARQQBAAE8BAEAAEoEAQAADAYBAAEPBgEAAQ0G
4+
AQAAQgQBAABDBAEAAEQEAQAADggCAJoCCggIABgAYADIAAMACwgIABgAAADIAAMA
5+
CQgEAACAAgAICAQAmZkBAAcIBACZmQAABggEAAAAAgAFCAQAZmYOAAQIAgC0AAMI
6+
BAAAAHgAIwgBAAUMCAEAACgIAQABKQgBAAEqCAEAATIIAQAAKwgBACgsCAEACi0I
7+
AQABLggBAAACCBAAAAAkAAAAJAAAACQAAAAkAAEDAgAAAAIDAgABAAADMgAIAP//
8+
/////wAAAAAAAP//AAAAAP////8AAAAA//8AAAAA/////wAAAAD/////AAD//wAB
9+
DwAAAAEAGADp/QUAQXJpYWwACHgAAAMAAAEgASAAAAAAC2YIoP+E/4gL4wkYA2cF
10+
JwP8AAIAAAEgASAAAAAAC2YIoAABAAAAZAAAAAEAAQEBAAAAAScPAAEAAQAAAAAA
11+
AAAAAAAAAAACABkBkAAAAAAAQAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAtAsC
12+
AAAAtQsUAAAAQ2hlbWljYWwgRm9ybXVsYTogtgsOAAAARXhhY3QgTWFzczogtwsU
13+
AAAATW9sZWN1bGFyIFdlaWdodDoguAsHAAAAbS96OiC5CxYAAABFbGVtZW50YWwg
14+
QW5hbHlzaXM6ILoLEQAAAEJvaWxpbmcgUG9pbnQ6ILsLEQAAAE1lbHRpbmcgUG9p
15+
bnQ6ILwLEQAAAENyaXRpY2FsIFRlbXA6IL0LEQAAAENyaXRpY2FsIFByZXM6IL4L
16+
EAAAAENyaXRpY2FsIFZvbDogvwsQAAAAR2liYnMgRW5lcmd5OiDACwkAAABMb2cg
17+
UDogwQsGAAAATVI6IMILDwAAAEhlbnJ5J3MgTGF3OiDDCxAAAABIZWF0IG9mIEZv
18+
cm06IMQLCAAAAHRQU0E6IMkLAgAAAMoLAgAAAAsMAgABAAoMAQAACQwBAAAMDAUA
19+
AAAoIykBgBwAAAAEAhAAAAAAAAAAAACFqzYDAACSBhIIBAAAAJIGEwgEAIWrNgMW
20+
CAQAAAAkABgIBAAAACQAFAgEAAAAAAAZCAAAEAgCAAEADwgCAAEAEQgBAAEDgAEA
21+
AAAEAhAAvVl5ASOP3QL+14wBtTP0AgoAAgABAASAEAAAAAACCAA9iowBUXjsAgoA
22+
AgACADcEAQABAAAEgBEAAAAAAggAUTh+AYVr7AIKAAIAAwAABAIACQBIBAAANwQB
23+
AAE0BAQAAgAAADEECAAVAAAAFwAAAAaAAAAAAAACCAA0sYEBJc/oAgQCEAC9WXkB
24+
Jc/oAjRxgwG1M/QCIwgBAAACBwIAAAAFBwEAAQAHDgABAAAAGABgAMgAAABSMQkH
25+
DgABAAAAGABgAMgAAABSMQAAAAAEgBMAAAAAAggAKFyMAWbm3QIKAAIABAA3BAEA
26+
AQAABIAUAAAAAAIIAFE4fgEo3N0CCgACAAUANwQBAAEAAAWAFQAAAAoAAgAGAAQG
27+
BAAQAAAABQYEABEAAAAKBgEAAQAABYAWAAAACgACAAcABAYEABAAAAAFBgQAEwAA
28+
AAoGAQABAAAFgBcAAAAKAAIACAAEBgQAEQAAAAUGBAAUAAAACgYBAAEAAAWAGAAA
29+
AAoAAgAJAAQGBAATAAAABQYEABQAAAAKBgEAAQAAAAAKgAIAAAAEAhAAhat9Abhe
30+
JwNw/bgBHkWEAwALEAAeRX4BUfgnAz2KlAFm5joDAQsQAD2KlAFR+CcD12O4AYWr
31+
gwMCCwIAAQADgAQAAAAEAhAAVzOeAR/FMQOZma4Bw0V6AwoAAgALAASABQAAAAAC
32+
CADrEaMBXI9CAwoAAgAMADcEAQABAAAEgAYAAAAAAggAHkWqAa4HTwMKAAIADQA3
33+
BAEAAQAABIAHAAAAAAIIAOsRowEAgFsDCgACAA4ANwQBAAEAAASACAAAAAACCAAe
34+
RaoBUfhnAwoAAgAPADcEAQABAAAEgAkAAAAAAggA6xGjAaNwdAMKAAIAEAACBAIA
35+
EQArBAIAAABIBAAANwQBAAEGgAAAAAAAAggAzoqmAUPUcAMEAhAAVzOeAUPUcAMB
36+
vqYBw0V6AyMIAQAAAgcCAAAABQcBAAEABw4AAQAAABgAYADIAAAAQ2wJBw4AAQAA
37+
ABgAYADIAAAAQ2wAAAAABIAOAAAAAAIIAABAqgG4HjYDCgACABEAAAQCAAwASwQB
38+
AAE3BAEAAQAABYAKAAAACgACABIABAYEAAUAAAAFBgQABgAAAAoGAQABAAAFgAsA
39+
AAAKAAIAEwAEBgQABgAAAAUGBAAHAAAACgYBAAEAAAWADAAAAAoAAgAUAAQGBAAH
40+
AAAABQYEAAgAAAAKBgEAAQAABYANAAAACgACABUABAYEAAgAAAAFBgQACQAAAAoG
41+
AQABAAAFgA8AAAAKAAIAFgAEBgQADgAAAAUGBAAFAAAACgYBAAEAAAAABoAdAAAA
42+
AAIIAJnZigEeRS0DBAIQAIWrfQG4XicDcP24AR5FhAMKAAIACgAHBwIAAAACBwIA
43+
AAAABw4AAQAAABgAYADIAAAAUjEJBw4AAQAAABgAYADIAAAAUjEAAAAAAAAAAAAA

core/indigo-core/molecule/molecule_cdxml_loader.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ namespace indigo
132132
AutoInt enhanced_stereo_group;
133133
AutoInt index;
134134
AutoInt geometry;
135+
AutoInt alt_group_id;
136+
AutoInt rg_index;
137+
135138
bool is_not_list;
136139
std::vector<AutoInt> element_list;
137140
std::unordered_map<int, int> bond_id_to_connection_idx;
@@ -893,6 +896,7 @@ namespace indigo
893896

894897
void _parseGraphic(CDXElement elem);
895898
void _parseArrow(CDXElement elem);
899+
void _parseAltGroup(CDXElement elem);
896900

897901
void _addAtomsAndBonds(BaseMolecule& mol, const std::vector<int>& atoms, const std::vector<CdxmlBond>& bonds);
898902
void _addBracket(BaseMolecule& mol, const CdxmlBracket& bracket);

core/indigo-core/molecule/src/molecule_cdxml_loader.cpp

Lines changed: 84 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -152,14 +152,22 @@ void MoleculeCdxmlLoader::_parseCollections(BaseMolecule& mol)
152152
int node_idx = _id_to_node_index.at(node.id);
153153
switch (node.type)
154154
{
155+
case kCDXNodeType_NamedAlternativeGroup:
155156
case kCDXNodeType_Element:
156157
case kCDXNodeType_ElementList:
157158
atoms.push_back(node_idx);
158159
break;
159160
case kCDXNodeType_ExternalConnectionPoint: {
160-
auto& fn = nodes[_fragment_nodes.back()];
161-
if (fn.connections.size() == 0)
162-
fn.ext_connections.push_back(node.id);
161+
if (_fragment_nodes.size())
162+
{
163+
auto& fn = nodes[_fragment_nodes.back()];
164+
if (fn.connections.size() == 0)
165+
fn.ext_connections.push_back(node.id);
166+
}
167+
else
168+
{
169+
// handle free external connection. attachment point?
170+
}
163171
}
164172
break;
165173
case kCDXNodeType_Nickname:
@@ -366,9 +374,11 @@ void MoleculeCdxmlLoader::_parseCDXMLElements(CDXElement elem, bool no_siblings,
366374

367375
auto arrow_lambda = [this](CDXElement elem) { this->_parseArrow(elem); };
368376

377+
auto altgroup_lambda = [this](CDXElement elem) { this->_parseAltGroup(elem); };
378+
369379
std::unordered_map<std::string, std::function<void(CDXElement elem)>> cdxml_dispatcher = {
370-
{"n", node_lambda}, {"b", bond_lambda}, {"fragment", fragment_lambda}, {"group", group_lambda}, {"bracketedgroup", bracketed_lambda},
371-
{"t", text_lambda}, {"graphic", graphic_lambda}, {"arrow", arrow_lambda}};
380+
{"n", node_lambda}, {"b", bond_lambda}, {"fragment", fragment_lambda}, {"group", group_lambda}, {"bracketedgroup", bracketed_lambda},
381+
{"t", text_lambda}, {"graphic", graphic_lambda}, {"arrow", arrow_lambda}, {"altgroup", altgroup_lambda}};
372382

373383
for (elem; elem.hasContent(); elem = elem.nextSiblingElement())
374384
{
@@ -416,6 +426,10 @@ void MoleculeCdxmlLoader::_addAtomsAndBonds(BaseMolecule& mol, const std::vector
416426
if (_pmol)
417427
{
418428
atom_idx = _pmol->addAtom(atom.element);
429+
430+
if (atom.type == kCDXNodeType_NamedAlternativeGroup)
431+
mol.allowRGroupOnRSite(atom_idx, atom.rg_index);
432+
419433
_id_to_atom_idx.emplace(atom.id, atom_idx);
420434
mol.setAtomXyz(atom_idx, atom.pos);
421435
_pmol->setAtomCharge_Silent(atom_idx, atom.charge);
@@ -752,7 +766,11 @@ void MoleculeCdxmlLoader::_parseNode(CdxmlNode& node, CDXElement elem)
752766

753767
auto stereo_lambda = [&node](const std::string& data) { node.stereo = KCIPStereochemistryCharToIndex.at(data.front()); };
754768

755-
auto node_type_lambda = [&node](const std::string& data) { node.type = KNodeTypeNameToInt.at(data); };
769+
auto node_type_lambda = [&node](const std::string& data) {
770+
node.type = KNodeTypeNameToInt.at(data);
771+
if (node.type == kCDXNodeType_NamedAlternativeGroup)
772+
node.element = ELEM_RSITE;
773+
};
756774

757775
auto element_list_lambda = [&node](const std::string& data) {
758776
std::vector<std::string> elements = split(data, ' ');
@@ -770,25 +788,36 @@ void MoleculeCdxmlLoader::_parseNode(CdxmlNode& node, CDXElement elem)
770788

771789
auto enhanced_stereo_group_lambda = [&node](const std::string& data) { node.enhanced_stereo_group = data; };
772790

773-
std::unordered_map<std::string, std::function<void(const std::string&)>> node_dispatcher = {
774-
{"id", id_lambda},
775-
{"p", pos_lambda},
776-
{"xyz", pos_lambda},
777-
{"NumHydrogens", hydrogens_lambda},
778-
{"Charge", charge_lambda},
779-
{"Isotope", isotope_lambda},
780-
{"Radical", radical_lambda},
781-
{"AS", stereo_lambda},
782-
{"NodeType", node_type_lambda},
783-
{"Element", element_lambda},
784-
{"GenericNickname", label_lambda},
785-
{"ElementList", element_list_lambda},
786-
{"BondOrdering", bond_ordering_lambda},
787-
{"Geometry", geometry_lambda},
788-
{"EnhancedStereoType", enhanced_stereo_type_lambda},
789-
{"EnhancedStereoGroupNum", enhanced_stereo_group_lambda},
790-
};
791+
auto alt_group_id_lambda = [&node](const std::string& data) { node.alt_group_id = data; };
792+
793+
std::unordered_map<std::string, std::function<void(const std::string&)>> node_dispatcher = {{"id", id_lambda},
794+
{"p", pos_lambda},
795+
{"xyz", pos_lambda},
796+
{"NumHydrogens", hydrogens_lambda},
797+
{"Charge", charge_lambda},
798+
{"Isotope", isotope_lambda},
799+
{"Radical", radical_lambda},
800+
{"AS", stereo_lambda},
801+
{"NodeType", node_type_lambda},
802+
{"Element", element_lambda},
803+
{"GenericNickname", label_lambda},
804+
{"ElementList", element_list_lambda},
805+
{"BondOrdering", bond_ordering_lambda},
806+
{"Geometry", geometry_lambda},
807+
{"EnhancedStereoType", enhanced_stereo_type_lambda},
808+
{"EnhancedStereoGroupNum", enhanced_stereo_group_lambda},
809+
{"AltGroupID", alt_group_id_lambda}};
791810
applyDispatcher(elem.firstProperty(), node_dispatcher);
811+
for (auto child_elem = elem.firstChildElement(); child_elem.hasContent(); child_elem = child_elem.nextSiblingElement())
812+
{
813+
if (child_elem.name() == "t")
814+
{
815+
std::string label;
816+
_parseLabel(child_elem, label);
817+
if (label.find("R") == 0)
818+
node.rg_index = label.substr(1);
819+
}
820+
}
792821
}
793822

794823
void MoleculeCdxmlLoader::_addNode(CdxmlNode& node)
@@ -879,6 +908,37 @@ void MoleculeCdxmlLoader::parseBBox(const std::string& data, Rect2f& bbox)
879908
throw Error("Not enought coordinates for text bounding box");
880909
}
881910

911+
void MoleculeCdxmlLoader::_parseAltGroup(CDXElement elem)
912+
{
913+
std::vector<AutoInt> r_labels;
914+
std::vector<CDXElement> r_fragments;
915+
for (auto r_elem = elem.firstChildElement(); r_elem.hasContent(); r_elem = r_elem.nextSiblingElement())
916+
{
917+
auto el_name = r_elem.name();
918+
if (el_name == "fragment")
919+
r_fragments.push_back(r_elem);
920+
else if (el_name == "t")
921+
{
922+
std::string rl;
923+
_parseLabel(r_elem, rl);
924+
if (rl.find("R") == 0)
925+
r_labels.push_back(rl.substr(1));
926+
}
927+
}
928+
929+
if (r_fragments.size() && r_labels.size())
930+
{
931+
MoleculeCdxmlLoader alt_loader(_scanner, _is_binary);
932+
BaseMolecule& mol = _pmol ? *(BaseMolecule*)_pmol : *(BaseMolecule*)_pqmol;
933+
std::unique_ptr<BaseMolecule> fragment(mol.neu());
934+
alt_loader.stereochemistry_options = stereochemistry_options;
935+
alt_loader.loadMoleculeFromFragment(*fragment.get(), r_fragments.front());
936+
MoleculeRGroups& rgroups = mol.rgroups;
937+
RGroup& rgroup = rgroups.getRGroup(r_labels.front());
938+
rgroup.fragments.add(fragment.release());
939+
}
940+
}
941+
882942
void MoleculeCdxmlLoader::_parseGraphic(CDXElement elem)
883943
{
884944
AutoInt superseded_id = 0;

0 commit comments

Comments
 (0)