Extract Molecular SMILES embeddings from language models pre-trained with various objectives architectures.
pip install smiles-featurizers==1.0.8
Our released models are listed as following. You can import these models by using the smiles-featurizers
package or using HuggingFace's Transformers.
Model | Type |
---|---|
UdS-LSV/smole-bert | Bert |
UdS-LSV/smole-bert-mtr | Bert |
UdS-LSV/smole-bart | Bart |
UdS-LSV/muv2x-simcse-smole-bert | Simcse |
UdS-LSV/siamese-smole-bert-muv-1x | SentenceTransformer |
from smiles_featurizers import BertFeaturizer
import torch
## set device
use_gpu = True if torch.cuda.is_available() else False
featurizer = BertFeaturizer("UdS-LSV/smole-bert", use_gpu=use_gpu)
embeddings = featurizer.embed(["CCC(C)(C)Br"])
from smiles_featurizers import BartFeaturizer
featurizer = BartFeaturizer("UdS-LSV/smole-bart")
embeddings = featurizer.embed(["CCC(C)(C)Br"], embedder="encoder")
from smiles_featurizers import BartFeaturizer
featurizer = BartFeaturizer("UdS-LSV/smole-bart")
embeddings = featurizer.embed(["CCC(C)(C)Br"], embedder="decoder")
from smiles_featurizers import SimcseFeaturizer
import torch
## set device
device = "cuda" if torch.cuda.is_available() else "cpu"
featurizer = SimcseFeaturizer("UdS-LSV/muv2x-simcse-smole-bert", device=device)
embeddings = featurizer.embed(["CCC(C)(C)Br"])
from smiles_featurizers import SentenceTransformersFeaturizer
import torch
## set device
device = "cuda" if torch.cuda.is_available() else "cpu"
featurizer = SentenceTransformersFeaturizer("UdS-LSV/siamese-smole-bert-muv-1x", device=device)
embeddings = featurizer.embed(["CCC(C)(C)Br"])