Merge pull request #611 from ISISNeutronMuon/chi/mdanalysis-converter

Add MDAnalysis converter
ISISNeutronMuon · Nov 19, 2024 · 07fa7e4 · 07fa7e4
2 parents 6272779 + dfcf3d0
commit 07fa7e4
Show file tree

Hide file tree

Showing 16 changed files with 841 additions and 37 deletions.
diff --git a/MDANSE/Src/MDANSE/Framework/AtomMapping/atom_mapping.py b/MDANSE/Src/MDANSE/Framework/AtomMapping/atom_mapping.py
@@ -61,11 +61,18 @@ def guess_element(atm_label: str, mass: Union[float, int, None] = None) -> str:
     AttributeError
         Error if unable to match to an element.
     """
-    if mass is not None and mass == 0.0:
+    if (mass is not None and mass == 0.0) or atm_label.upper() in [
+        "DUMMY",
+        "DU",
+        "D",
+        "M",
+    ]:
         return "Du"
 
+    regex = "([A-Za-z][A-Za-z]?)"
+
     guesses = []
-    guess_0 = re.findall("([A-Za-z][a-z]?)", atm_label)
+    guess_0 = re.findall(regex, atm_label)
     if len(guess_0) != 0:
         guess = guess_0[0].capitalize()
         guesses.append(guess)
@@ -80,27 +87,41 @@ def guess_element(atm_label: str, mass: Union[float, int, None] = None) -> str:
                 return guess
             num = ATOMS_DATABASE.get_atom_property(guess, "proton")
             atms = ATOMS_DATABASE.match_numeric_property("proton", num)
+
+            # if there is only one isotope for this element then we want
+            # to return the general element label e.g. Na not Na23
+            if len(atms) <= 2:
+                atms = [re.findall(regex, atms[0])[0]]
+
             for atm in atms:
                 atm_mass = ATOMS_DATABASE.get_atom_property(atm, "atomic_weight")
                 diff = abs(mass - atm_mass)
-                if diff < best_diff:
+                if diff < 1 and diff < best_diff:
                     best_match = atm
                     best_diff = diff
+
     if best_match is not None:
         return best_match
 
-    # try to match based on mass if available and guesses failed
+    # try to match based on mass only, if available and previous
+    # guesses failed
     best_diff = np.inf
     if mass is not None:
         for atm, properties in ATOMS_DATABASE._data.items():
             atm_mass = properties.get("atomic_weight", None)
             if atm_mass is None:
                 continue
             diff = abs(mass - atm_mass)
-            if diff < best_diff:
+            if diff < 1 and diff < best_diff:
                 best_match = atm
                 best_diff = diff
-        return best_match
+
+        num = ATOMS_DATABASE.get_atom_property(best_match, "proton")
+        atms = ATOMS_DATABASE.match_numeric_property("proton", num)
+        if len(atms) <= 2:
+            return re.findall(regex, atms[0])[0]
+        else:
+            return best_match
 
     raise AttributeError(f"Unable to guess: {atm_label}")
 

diff --git a/MDANSE/Src/MDANSE/Framework/Configurators/CoordinateFileConfigurator.py b/MDANSE/Src/MDANSE/Framework/Configurators/CoordinateFileConfigurator.py
@@ -0,0 +1,136 @@
+#    This file is part of MDANSE.
+#
+#    MDANSE is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+import ast
+import os
+from typing import Union
+
+import MDAnalysis as mda
+
+from MDANSE import PLATFORM
+from MDANSE.Framework.Configurators.IConfigurator import IConfigurator
+
+
+class CoordinateFileConfigurator(IConfigurator):
+
+    _default = ("", "AUTO")
+
+    def __init__(self, name, wildcard="All files (*)", **kwargs):
+        IConfigurator.__init__(self, name, **kwargs)
+        self._wildcard = wildcard
+
+    def configure(self, setting: tuple[Union[str, list], str]):
+        """
+        Parameters
+        ----------
+        setting : tuple[Union[str, list], str]
+            A tuple of a list of file names or a string of the list
+            which can be converted to a list using literal_eval and a
+            string of the coordinate file format.
+        """
+        values, format = setting
+
+        self["values"] = self._default
+        self._original_input = values
+
+        if type(values) is str:
+            if values:
+                try:
+                    values = ast.literal_eval(values)
+                except (SyntaxError, ValueError) as e:
+                    self.error_status = f"Unable to evaluate string: {e}"
+                    return
+                if type(values) is not list:
+                    self.error_status = (
+                        f"Input values should be able to be evaluated as a list"
+                    )
+                    return
+            else:
+                values = []
+
+        if type(values) is list:
+            if not all([type(value) is str for value in values]):
+                self.error_status = f"Input values should be a list of str"
+                return
+        else:
+            self.error_status = f"Input values should be able to be evaluated as a list"
+            return
+
+        values = [PLATFORM.get_path(value) for value in values]
+
+        none_exist = []
+        for value in values:
+            if not os.path.isfile(value):
+                none_exist.append(value)
+
+        if none_exist:
+            self.error_status = f"The files {', '.join(none_exist)} do not exist."
+            return
+
+        self["values"] = values
+        self["filenames"] = values
+
+        if format == "AUTO" or not self["filenames"]:
+            self["format"] = None
+        else:
+            if format in mda._READERS.keys():
+                self["format"] = format
+            else:
+                self.error_status = f"MDAnalysis coordinate file format not recognised."
+                return
+
+        topology_configurator = self._configurable[self._dependencies["input_file"]]
+        if topology_configurator._valid:
+            try:
+                if len(self["filenames"]) <= 1 or self["format"] is None:
+                    traj = mda.Universe(
+                        topology_configurator["filename"],
+                        *self["filenames"],
+                        format=self["format"],
+                        topology_format=topology_configurator["format"],
+                    ).trajectory
+                else:
+                    coord_files = [(i, self["format"]) for i in self["filenames"]]
+                    traj = mda.Universe(
+                        topology_configurator["filename"],
+                        coord_files,
+                        topology_format=topology_configurator["format"],
+                    ).trajectory
+            except Exception as e:
+                self.error_status = f"Unable to create MDAnalysis universe: {e}"
+                return
+        else:
+            if self["values"]:
+                self.error_status = "Requires valid topology file."
+                return
+
+        self.error_status = "OK"
+
+    @property
+    def wildcard(self):
+        return self._wildcard
+
+    def get_information(self) -> str:
+        """
+        Returns
+        -------
+        str
+           Input file names.
+        """
+        try:
+            filenames = self["value"]
+        except KeyError:
+            filenames = ["None"]
+        return f"Input files: {', '.join(filenames)}.\n"
diff --git a/MDANSE/Src/MDANSE/Framework/Configurators/MDAnalysisTimeStepConfigurator.py b/MDANSE/Src/MDANSE/Framework/Configurators/MDAnalysisTimeStepConfigurator.py
@@ -0,0 +1,66 @@
+#    This file is part of MDANSE.
+#
+#    MDANSE is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+import MDAnalysis as mda
+
+from MDANSE.Framework.Configurators.FloatConfigurator import FloatConfigurator
+
+
+class MDAnalysisTimeStepConfigurator(FloatConfigurator):
+
+    _default = 0.0
+
+    def configure(self, value):
+        # if the value is not valid then we use the MDAnalysis
+        # default values which maybe the time step in the inputted
+        # files or 1 ps
+        try:
+            value = float(value)
+        except (TypeError, ValueError):
+            pass
+
+        if value is None or value == "" or value == 0.0:
+            file_configurator = self._configurable[self._dependencies["topology_file"]]
+            files_configurator = self._configurable[
+                self._dependencies["coordinate_files"]
+            ]
+            if file_configurator._valid and files_configurator._valid:
+                try:
+                    coord_format = files_configurator["format"]
+                    coord_files = files_configurator["filenames"]
+                    if len(coord_files) <= 1 or coord_format is None:
+                        value = mda.Universe(
+                            file_configurator["filename"],
+                            *coord_files,
+                            format=coord_format,
+                            topology_format=file_configurator["format"],
+                        ).trajectory.ts.dt
+                    else:
+                        coord_files = [(i, coord_format) for i in coord_files]
+                        value = mda.Universe(
+                            file_configurator["filename"],
+                            coord_files,
+                            topology_format=file_configurator["format"],
+                        ).trajectory.ts.dt
+                except Exception as e:
+                    self.error_status = (
+                        f"Unable to determine a time step from MDAnalysis: {e}"
+                    )
+                    return
+            else:
+                self.error_status = f"Unable to determine a time step from MDAnalysis"
+                return
+
+        super().configure(value)
diff --git a/MDANSE/Src/MDANSE/Framework/Configurators/TopologyFileConfigurator.py b/MDANSE/Src/MDANSE/Framework/Configurators/TopologyFileConfigurator.py
@@ -0,0 +1,73 @@
+#    This file is part of MDANSE.
+#
+#    MDANSE is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+import MDAnalysis as mda
+
+from MDANSE.Framework.AtomMapping import AtomLabel
+from .FileWithAtomDataConfigurator import FileWithAtomDataConfigurator
+
+
+class TopologyFileConfigurator(FileWithAtomDataConfigurator):
+
+    _default = ("", "AUTO")
+
+    def configure(self, setting: str) -> None:
+        """
+        Parameters
+        ----------
+        setting : tuple
+            A tuple containing the topology filepath and format.
+        """
+        filepath, format = setting
+        if format == "AUTO":
+            self["format"] = None
+        else:
+            if format in mda._PARSERS.keys():
+                self["format"] = format
+            else:
+                self.error_status = f"MDAnalysis topology file format not recognised."
+                return
+        super().configure(filepath)
+
+    def parse(self) -> None:
+        # TODO currently MDAnalysis guesses the atom types and masses.
+        #  There is a PR https://github.com/MDAnalysis/mdanalysis/pull/3753
+        #  which will give us more control over what is guessed. We may
+        #  want to change the MDAnalysis guessing options in the future
+        #  so that it works better with the MDANSE atom mapping.
+        self.atoms = mda.Universe(
+            self["filename"], topology_format=self["format"]
+        ).atoms
+
+    def get_atom_labels(self) -> list[AtomLabel]:
+        """
+        Returns
+        -------
+        list[AtomLabel]
+            An ordered list of atom labels.
+        """
+        labels = []
+        for at in self.atoms:
+            kwargs = {}
+            for arg in ["element", "name", "type", "resname", "mass"]:
+                if hasattr(at, arg):
+                    kwargs[arg] = getattr(at, arg)
+            # the first out of the list above will be the main label
+            (k, main_label) = next(iter(kwargs.items()))
+            kwargs.pop(k)
+            label = AtomLabel(main_label, **kwargs)
+            if label not in labels:
+                labels.append(label)
+        return labels