zmzhang · zmzhang · Mar 2, 2023 · Mar 2, 2023 · Mar 2, 2023 · Mar 2, 2023
diff --git a/README.md b/README.md
@@ -81,6 +81,10 @@ The following files are in the [others](others) folder:
 - [UMAPDataset.py](others/UMAPDataset.py). for generating graph datasets.
 - [theoretical calculation.ipynb](others/theoretical%20calculation.ipynb). investigate of the relationship between SigmaCCS and theoretical calculation
 - [Filtering.ipynb](others/Filtering.ipynb). Filtering of target unknown molecules based on the CCS and *mz* of the molecules
+- [90 and 180 degrees rotations.ipynb](others/90%20and%20180%20degrees%20rotations.ipynb). Performance on the test set with 90 and 180 degrees rotations
+- [Completely random rotations.ipynb](others/Completely%20random%20rotations.ipynb). Performance on the test set with completely random rotations
+- [Visualization of the 3D conformers using different seeds of ETKDG.ipynb](others/Visualization%20of%20the%203D%20conformers%20using%20different%20seeds%20of%20ETKDG.ipynb)
+- [RotationMatrix.py](others/RotationMatrix.py). define rotation matrix for rotating the molecules
 - *[CFM-ID4](others/CFM-ID4)*. the code for generating MS/MS spectra with CFM-ID 4.0.
 - *[GNN-RT](others/GNN-RT)*:
     - [README.md](others/GNN-RT/README.md)
@@ -100,7 +104,12 @@ The following files are in the [others](others) folder:
         - LJ_data.csv (Get the LJ interaction parameters of different elements according to LJ_data.csv)
         - *Coordinate data* (Store the 3D coordinate data of all molecules in data.csv)
     - *Filtering data* 
-        - data.csv 
+        - data.csv
+- *[MultidimensionalFiltering](others/MultidimensionalFiltering)*:
+    - *[Example.ipynb](others/MultidimensionalFiltering/Example.ipynb)*. Taken the lipid (PubChem CID: 114944) as an example to show the multidimensional filtering assisted by SigmaCCS
+    - *[LipidBlastPredCCS.xlsx](others/MultidimensionalFiltering/LipidBlastPredCCS.xlsx)*. the predicted CCS values of the lipids in the LipidBlast
+    - *[MListWithPredictedRTs_329.xlsx](others/MultidimensionalFiltering/MListWithPredictedRTs_329.xlsx)*. retention times of the molecules in the MList of the lipid (PubChem CID: 114944). Please refer to github GNN-RT repository for details of RT prediction
+    - *[Mouse_lung_adduct_negative.xlsx](others/MultidimensionalFiltering/Mouse_lung_adduct_negative.xlsx)*. the mouse lung dataset with 761 lipids in negative ion mode after the removal of unpredictable adducts and empty SMILES strings
 
 ### Package required: 
 - [UMAP](https://github.com/lmcinnes/umap) 0.5.1
@@ -116,3 +125,4 @@ The following files are in the [`slurm`](slurm) folder
 - zmzhang@csu.edu.cn
 - youjiazhang126@163.com
 - renfengguo05@foxmail.com
+- 212311021@csu.edu.cn
diff --git a/others/90 and 180 degrees rotations.ipynb b/others/90 and 180 degrees rotations.ipynb
@@ -0,0 +1,251 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "cdedc3a0",
+   "metadata": {},
+   "source": [
+    "# 90 and 180 degrees rotations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "64dcf84e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from RotationMatrix import *\n",
+    "import sys\n",
+    "sys.path.append(\"..\")\n",
+    "from sigma.GraphData import *\n",
+    "from sigma.model import *\n",
+    "import pandas as pd\n",
+    "import random\n",
+    "from pandas import Series,DataFrame\n",
+    "import numpy as np\n",
+    "import math\n",
+    "import copy\n",
+    "from tqdm import tqdm\n",
+    "import statistics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c1e1129f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ifile = '../data/TestData.csv'\n",
+    "ParameterPath = '../parameter/parameter.pkl'\n",
+    "mfileh5 = '../model/model_new.h5'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "abffdfa3",
+   "metadata": {},
+   "source": [
+    "## Performance on the test set with initial position"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d62bc948",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The generation of 3d conformers: 100%|█████████████████▉| 558/559 [02:19<00:00,  3.99it/s]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "..\\sigma\\GraphData.py:187: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  features = np.asarray(features)\n",
+      "..\\sigma\\GraphData.py:188: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  edge_features = np.asarray(edge_features)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:07<00:00, 75.44it/s]\n",
+      "The number of molecules in the test set : 559 \n",
+      "\n",
+      "R2 Score : 0.9937075292150549 \n",
+      "\n",
+      "Median Relative Error : 1.2091052412873495 %\n"
+     ]
+    }
+   ],
+   "source": [
+    "smiles, adduct, ccs = read_data(ifile)\n",
+    "param = parameter.Parameter()\n",
+    "with open(ParameterPath,'rb') as file:\n",
+    "    param  = pickle.loads(file.read())  \n",
+    "smiles, adduct, ccs, Coordinate = Generating_coordinates(smiles, adduct, ccs, param.All_Atoms)    #559\n",
+    "\n",
+    "CoordinateStd = copy.deepcopy(Coordinate)\n",
+    "for z in range(len(CoordinateStd)):\n",
+    "    CoordinateStd[z] = (np.array(CoordinateStd[z]) - param.Min_Coor) / (param.Max_Coor - param.Min_Coor)\n",
+    "    \n",
+    "adj, features, edge_features = convertToGraph(smiles, CoordinateStd, param.All_Atoms)\n",
+    "DataSet = MyDataset(features, adj, edge_features, ccs)\n",
+    "\n",
+    "ECC_Model = load_Model_from_file(mfileh5)\n",
+    "\n",
+    "re = predict(ECC_Model,param.adduct_SET,DataSet,adduct,)\n",
+    "print(\"The number of molecules in the test set :\", len(re), '\\n')\n",
+    "\n",
+    "Sigma     = re\n",
+    "CCS     = ccs\n",
+    "SigmaPer = R2_MRE(CCS, Sigma)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5fe0a744",
+   "metadata": {},
+   "source": [
+    "## Performance on the test set with 90 degree rotation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "852caa4a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "..\\sigma\\GraphData.py:187: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  features = np.asarray(features)\n",
+      "..\\sigma\\GraphData.py:188: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  edge_features = np.asarray(edge_features)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:07<00:00, 75.13it/s]\n",
+      "R2 Score : 0.9936971125028244 \n",
+      "\n",
+      "Median Relative Error : 1.1653698978192344 %\n"
+     ]
+    }
+   ],
+   "source": [
+    "CoordinateRotate = copy.deepcopy(Coordinate)\n",
+    "for k in range(len(CoordinateRotate)):\n",
+    "    thetax = 0.25* math.pi\n",
+    "    thetay = 0.25* math.pi\n",
+    "    thetaz = 0.25* math.pi\n",
+    "    for ii in range(len(CoordinateRotate[k])):\n",
+    "        CoordinateRotate[k][ii] = CoordinateRotate[k][ii] @Rx(thetax) @Ry(thetay) @Rz(thetaz)\n",
+    "for i in range(len(CoordinateRotate)):\n",
+    "    CoordinateRotate[i] = (np.array(CoordinateRotate[i]) - param.Min_Coor) / (param.Max_Coor - param.Min_Coor)\n",
+    "\n",
+    "adj, features, edge_features = convertToGraph(smiles, CoordinateRotate, param.All_Atoms)\n",
+    "DataSet = MyDataset(features, adj, edge_features, ccs)\n",
+    "\n",
+    "ECC_Model = load_Model_from_file(mfileh5)\n",
+    "\n",
+    "re = predict(ECC_Model,param.adduct_SET,DataSet,adduct,)\n",
+    "\n",
+    "Sigma     = re\n",
+    "CCS     = ccs\n",
+    "SigmaPer = R2_MRE(CCS, Sigma)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a30e39c",
+   "metadata": {},
+   "source": [
+    "## Performance on the test set with 180 degree rotation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ef24f01e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "..\\sigma\\GraphData.py:187: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  features = np.asarray(features)\n",
+      "..\\sigma\\GraphData.py:188: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  edge_features = np.asarray(edge_features)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:07<00:00, 75.23it/s]\n",
+      "R2 Score : 0.9935965379786625 \n",
+      "\n",
+      "Median Relative Error : 1.2743489126141125 %\n"
+     ]
+    }
+   ],
+   "source": [
+    "CoordinateRotate = copy.deepcopy(Coordinate)\n",
+    "for k in range(len(CoordinateRotate)):\n",
+    "    thetax = 0.5*math.pi\n",
+    "    thetay = 0.5*math.pi\n",
+    "    thetaz = 0.5*math.pi\n",
+    "    for ii in range(len(CoordinateRotate[k])):\n",
+    "        CoordinateRotate[k][ii] = CoordinateRotate[k][ii] @Rx(thetax) @Ry(thetay) @Rz(thetaz)\n",
+    "for i in range(len(CoordinateRotate)):\n",
+    "    CoordinateRotate[i] = (np.array(CoordinateRotate[i]) - param.Min_Coor) / (param.Max_Coor - param.Min_Coor)\n",
+    "\n",
+    "adj, features, edge_features = convertToGraph(smiles, CoordinateRotate, param.All_Atoms)\n",
+    "DataSet = MyDataset(features, adj, edge_features, ccs)\n",
+    "\n",
+    "ECC_Model = load_Model_from_file(mfileh5)\n",
+    "\n",
+    "re = predict(ECC_Model,param.adduct_SET,DataSet,adduct,)\n",
+    "\n",
+    "Sigma     = re\n",
+    "CCS     = ccs\n",
+    "SigmaPer = R2_MRE(CCS, Sigma)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}