Skip to content

Commit

Permalink
Merge pull request #611 from ISISNeutronMuon/chi/mdanalysis-converter
Browse files Browse the repository at this point in the history
Add MDAnalysis converter
  • Loading branch information
MBartkowiakSTFC authored Nov 19, 2024
2 parents 6272779 + dfcf3d0 commit 07fa7e4
Show file tree
Hide file tree
Showing 16 changed files with 841 additions and 37 deletions.
33 changes: 27 additions & 6 deletions MDANSE/Src/MDANSE/Framework/AtomMapping/atom_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,18 @@ def guess_element(atm_label: str, mass: Union[float, int, None] = None) -> str:
AttributeError
Error if unable to match to an element.
"""
if mass is not None and mass == 0.0:
if (mass is not None and mass == 0.0) or atm_label.upper() in [
"DUMMY",
"DU",
"D",
"M",
]:
return "Du"

regex = "([A-Za-z][A-Za-z]?)"

guesses = []
guess_0 = re.findall("([A-Za-z][a-z]?)", atm_label)
guess_0 = re.findall(regex, atm_label)
if len(guess_0) != 0:
guess = guess_0[0].capitalize()
guesses.append(guess)
Expand All @@ -80,27 +87,41 @@ def guess_element(atm_label: str, mass: Union[float, int, None] = None) -> str:
return guess
num = ATOMS_DATABASE.get_atom_property(guess, "proton")
atms = ATOMS_DATABASE.match_numeric_property("proton", num)

# if there is only one isotope for this element then we want
# to return the general element label e.g. Na not Na23
if len(atms) <= 2:
atms = [re.findall(regex, atms[0])[0]]

for atm in atms:
atm_mass = ATOMS_DATABASE.get_atom_property(atm, "atomic_weight")
diff = abs(mass - atm_mass)
if diff < best_diff:
if diff < 1 and diff < best_diff:
best_match = atm
best_diff = diff

if best_match is not None:
return best_match

# try to match based on mass if available and guesses failed
# try to match based on mass only, if available and previous
# guesses failed
best_diff = np.inf
if mass is not None:
for atm, properties in ATOMS_DATABASE._data.items():
atm_mass = properties.get("atomic_weight", None)
if atm_mass is None:
continue
diff = abs(mass - atm_mass)
if diff < best_diff:
if diff < 1 and diff < best_diff:
best_match = atm
best_diff = diff
return best_match

num = ATOMS_DATABASE.get_atom_property(best_match, "proton")
atms = ATOMS_DATABASE.match_numeric_property("proton", num)
if len(atms) <= 2:
return re.findall(regex, atms[0])[0]
else:
return best_match

raise AttributeError(f"Unable to guess: {atm_label}")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# This file is part of MDANSE.
#
# MDANSE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
import ast
import os
from typing import Union

import MDAnalysis as mda

from MDANSE import PLATFORM
from MDANSE.Framework.Configurators.IConfigurator import IConfigurator


class CoordinateFileConfigurator(IConfigurator):

_default = ("", "AUTO")

def __init__(self, name, wildcard="All files (*)", **kwargs):
IConfigurator.__init__(self, name, **kwargs)
self._wildcard = wildcard

def configure(self, setting: tuple[Union[str, list], str]):
"""
Parameters
----------
setting : tuple[Union[str, list], str]
A tuple of a list of file names or a string of the list
which can be converted to a list using literal_eval and a
string of the coordinate file format.
"""
values, format = setting

self["values"] = self._default
self._original_input = values

if type(values) is str:
if values:
try:
values = ast.literal_eval(values)
except (SyntaxError, ValueError) as e:
self.error_status = f"Unable to evaluate string: {e}"
return
if type(values) is not list:
self.error_status = (
f"Input values should be able to be evaluated as a list"
)
return
else:
values = []

if type(values) is list:
if not all([type(value) is str for value in values]):
self.error_status = f"Input values should be a list of str"
return
else:
self.error_status = f"Input values should be able to be evaluated as a list"
return

values = [PLATFORM.get_path(value) for value in values]

none_exist = []
for value in values:
if not os.path.isfile(value):
none_exist.append(value)

if none_exist:
self.error_status = f"The files {', '.join(none_exist)} do not exist."
return

self["values"] = values
self["filenames"] = values

if format == "AUTO" or not self["filenames"]:
self["format"] = None
else:
if format in mda._READERS.keys():
self["format"] = format
else:
self.error_status = f"MDAnalysis coordinate file format not recognised."
return

topology_configurator = self._configurable[self._dependencies["input_file"]]
if topology_configurator._valid:
try:
if len(self["filenames"]) <= 1 or self["format"] is None:
traj = mda.Universe(
topology_configurator["filename"],
*self["filenames"],
format=self["format"],
topology_format=topology_configurator["format"],
).trajectory
else:
coord_files = [(i, self["format"]) for i in self["filenames"]]
traj = mda.Universe(
topology_configurator["filename"],
coord_files,
topology_format=topology_configurator["format"],
).trajectory
except Exception as e:
self.error_status = f"Unable to create MDAnalysis universe: {e}"
return
else:
if self["values"]:
self.error_status = "Requires valid topology file."
return

self.error_status = "OK"

@property
def wildcard(self):
return self._wildcard

def get_information(self) -> str:
"""
Returns
-------
str
Input file names.
"""
try:
filenames = self["value"]
except KeyError:
filenames = ["None"]
return f"Input files: {', '.join(filenames)}.\n"
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# This file is part of MDANSE.
#
# MDANSE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
import MDAnalysis as mda

from MDANSE.Framework.Configurators.FloatConfigurator import FloatConfigurator


class MDAnalysisTimeStepConfigurator(FloatConfigurator):

_default = 0.0

def configure(self, value):
# if the value is not valid then we use the MDAnalysis
# default values which maybe the time step in the inputted
# files or 1 ps
try:
value = float(value)
except (TypeError, ValueError):
pass

if value is None or value == "" or value == 0.0:
file_configurator = self._configurable[self._dependencies["topology_file"]]
files_configurator = self._configurable[
self._dependencies["coordinate_files"]
]
if file_configurator._valid and files_configurator._valid:
try:
coord_format = files_configurator["format"]
coord_files = files_configurator["filenames"]
if len(coord_files) <= 1 or coord_format is None:
value = mda.Universe(
file_configurator["filename"],
*coord_files,
format=coord_format,
topology_format=file_configurator["format"],
).trajectory.ts.dt
else:
coord_files = [(i, coord_format) for i in coord_files]
value = mda.Universe(
file_configurator["filename"],
coord_files,
topology_format=file_configurator["format"],
).trajectory.ts.dt
except Exception as e:
self.error_status = (
f"Unable to determine a time step from MDAnalysis: {e}"
)
return
else:
self.error_status = f"Unable to determine a time step from MDAnalysis"
return

super().configure(value)
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# This file is part of MDANSE.
#
# MDANSE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
import MDAnalysis as mda

from MDANSE.Framework.AtomMapping import AtomLabel
from .FileWithAtomDataConfigurator import FileWithAtomDataConfigurator


class TopologyFileConfigurator(FileWithAtomDataConfigurator):

_default = ("", "AUTO")

def configure(self, setting: str) -> None:
"""
Parameters
----------
setting : tuple
A tuple containing the topology filepath and format.
"""
filepath, format = setting
if format == "AUTO":
self["format"] = None
else:
if format in mda._PARSERS.keys():
self["format"] = format
else:
self.error_status = f"MDAnalysis topology file format not recognised."
return
super().configure(filepath)

def parse(self) -> None:
# TODO currently MDAnalysis guesses the atom types and masses.
# There is a PR https://github.com/MDAnalysis/mdanalysis/pull/3753
# which will give us more control over what is guessed. We may
# want to change the MDAnalysis guessing options in the future
# so that it works better with the MDANSE atom mapping.
self.atoms = mda.Universe(
self["filename"], topology_format=self["format"]
).atoms

def get_atom_labels(self) -> list[AtomLabel]:
"""
Returns
-------
list[AtomLabel]
An ordered list of atom labels.
"""
labels = []
for at in self.atoms:
kwargs = {}
for arg in ["element", "name", "type", "resname", "mass"]:
if hasattr(at, arg):
kwargs[arg] = getattr(at, arg)
# the first out of the list above will be the main label
(k, main_label) = next(iter(kwargs.items()))
kwargs.pop(k)
label = AtomLabel(main_label, **kwargs)
if label not in labels:
labels.append(label)
return labels
Loading

0 comments on commit 07fa7e4

Please sign in to comment.