Skip to content
Merged
23 changes: 15 additions & 8 deletions larixite/cif_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
from io import StringIO
from typing import Union
import numpy as np
from pymatgen.core import __version__ as pymatgen_version, Structure, Site
from pymatgen.core import __version__ as pymatgen_version, Structure, Site, Molecule
from pymatgen.io.cif import CifParser
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer

from xraydb import atomic_symbol, atomic_number, xray_edge

from .utils import strict_ascii, fcompact, isotime
from .amcsd_utils import PMG_CIF_OPTS, CifParser, Molecule, SpacegroupAnalyzer
from .utils import strict_ascii, fcompact, isotime, get_logger
from .amcsd_utils import PMG_CIF_OPTS
from .amcsd import get_cif

from .version import __version__ as x_version
Expand All @@ -18,6 +20,11 @@

TEMPLATE_FOLDER = Path(Path(__file__).parent, 'templates')

logger = get_logger("larixite.cif_cluster")
if logger.level != 10:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)


def read_cif_structure(ciftext: str) -> Structure:
"""read CIF text, return CIF Structure
Expand Down Expand Up @@ -50,13 +57,13 @@ def read_cif_structure(ciftext: str) -> Structure:

def site_label(site: Site) -> str:
"""
return a string label for a pymatgen Site object,
return a string label for a pymatgen Site object,
using the species string and fractional coordinates

Parameters
----------
site : pymatgen Site object

Returns
-------
str
Expand Down Expand Up @@ -88,14 +95,14 @@ def __init__(self, ciftext=None, filename=None, absorber=None,
def set_absorber(self, absorber=None):
"""
set the absorbing atom element

Parameters
----------
absorber : None, int, or str
if None, no change will be made.
if int, the atomic number of the absorbing element
if str, the atomic symbol of the absorbing element

Notes
-----
The absorber atom is assumed to be in the CIF structure.
Expand Down
52 changes: 49 additions & 3 deletions larixite/fdmnes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,13 @@
spectroscopy (XAS, XES, RIXS) from the atomic structures

"""

from dataclasses import dataclass
from typing import Union
from pathlib import Path
from pymatgen.core import __version__ as pymatgen_version, Element, Molecule
from larixite.struct import get_structure
from larixite.struct import get_structure, get_structure_from_text
from larixite.struct.xas import XasStructure
from larixite.utils import get_logger, strict_ascii, isotime
from larixite.utils import get_logger, strict_ascii, isotime, read_textfile
from larixite.version import __version__ as larixite_version

logger = get_logger("larixite.fdmnes")
Expand Down Expand Up @@ -334,3 +333,50 @@ def write_input(
fp.write("1\njob_inp.txt")
logger.info(f"written `{fnout}`")
return outdir



def struct2fdmnes(inp: Union[str, Path], absorber:
Union[str, int, Element],
frame: int = 0,
format: str = 'cif',
filename: Union[None, str] = None) -> dict:
"""convert CIF/XYZ into a dictionary of {name: text} for FDMNES output files

Parameters
----------
inp : str or Path
text of CIF file, name of CIF file, or Path to CIF file
absorber : str, int, or Element
Atomic symbo or number of the absorbing element
frame : int, optional
Index of the structure for multi-frame structures in the CIF file [0]
format : str
format of text : 'cif' or 'xyz' ['cif']
filename : str
full path to filename ['unknown.{format}']

Returns
-------
XasStructure
The XAS structure group for the specified file and absorber.

"""
if len(inp) < 512 and Path(inp).exists():
if filename is None:
filename = Path(inp).absolute().as_posix()
inp = read_textfile(inp)
if filename is None:
filename = f'unknown.{format}'

if isinstance(absorber, str):
absorber = Element(absorber)
elif isinstance(absorber, int):
absorber = Element.from_Z(absorber)

structs = get_structure_from_text(inp, absorber, frame=frame, format=format,
filename=filename)
fout_name = f"{filename.replace('.', '_')}_{absorber.symbol}.inp"
fdm = FdmnesXasInput(structs, absorber=absorber)
return {'fdmfile.txt': f'1\n{fout_name}\n',
fout_name: fdm.get_input()}
100 changes: 89 additions & 11 deletions larixite/struct/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
Wrapper on top of pymatgen to handle atomic structures for XAS calculations
============================================================================
"""

import tempfile
import os
from io import StringIO
import numpy as np

from pathlib import Path
from typing import Union
from copy import deepcopy
Expand All @@ -16,7 +19,7 @@
from larixite.struct.xas import XasStructure
from larixite.struct.xas_cif import XasStructureCif
from larixite.struct.xas_xyz import XasStructureXyz
from larixite.utils import get_logger
from larixite.utils import get_logger, read_textfile
from larixite.amcsd_utils import PMG_CIF_OPTS

logger = get_logger("larixite.struct")
Expand Down Expand Up @@ -83,7 +86,7 @@ def get_structure(
try:
struct = structs.parse_structures(primitive=False)[frame]
except Exception:
raise ValueError(f"could not get structure {frame} from text of CIF")
raise ValueError(f"could not get structure {frame} from text of CIF {filepath}")
molecule = Molecule.from_dict(struct.as_dict())
logger.debug("structure created from a CIF file")
structout = XasStructureCif(
Expand All @@ -93,10 +96,10 @@ def get_structure(
structure=struct,
molecule=molecule,
struct_type="crystal",
absorber=Element(absorber),
absorber=Element(absorber)
)
#: XYZ
if filepath.suffix == ".xyz":
elif filepath.suffix == '.xyz':
xyz = XYZ.from_file(filepath)
molecules = xyz.all_molecules
molecule = molecules[frame]
Expand All @@ -109,19 +112,94 @@ def get_structure(
structure=structure,
molecule=molecule,
struct_type="molecule",
absorber=Element(absorber),
absorber=Element(absorber)
)
#: some checks on the structure
else:
raise ValueError(f"unknown structure format '{format}'")

if not structout.struct.is_ordered:
logger.warning(
f"[{structout.name}] contains partially occupied sites that are not fully supported yet"
)
if structout is not None:
return structout
return structout


def get_structure_from_text(text: str,
absorber: Union[str, int, Element],
frame: int = 0,
format: str = 'cif',
filename: str = 'unknown.cif') -> XasStructure:
"""Get an XasStructure from the text of a structural file, according to its format

Parameters
----------
text : str
text of file
absorber : str, int, or Element
Atomic symbo or number of the absorbing element
frame : int, optional
Index of the structure for multi-frame structures in the CIF/XYZ file [0]
format : str
format of text : 'cif' or 'xyz' ['cif']
filename : str
full path to filename ['unknown.{format}']
Returns
-------
XasStructure
The XAS structure group for the specified file and absorber.
"""
if isinstance(absorber, str):
absorber = Element(absorber)
elif isinstance(absorber, int):
absorber = Element.from_Z(absorber)

structout = None
filepath = Path(filename).absolute()
#: CIF
if format == "cif":
try:
structs = CifParser(StringIO(text), **PMG_CIF_OPTS)
except Exception:
raise ValueError(f"could not parse CIF text")
try:
struct = structs.parse_structures(primitive=False)[frame]
except Exception:
raise ValueError(f"could not get structure {frame} from text of CIF {filename}")
molecule = Molecule.from_dict(struct.as_dict())
logger.debug("structure created from a CIF file")
structout = XasStructureCif(name=filepath.name, label=filepath.stem,
filepath=filepath,
structure=struct,
molecule=molecule,
struct_type="crystal",
absorber=absorber)

#: XYZ
elif format == 'xyz':
xyz_tfile = Path(tempfile.gettempdir(), 'tmp_0.xyz')
with open(xyz_tfile, 'w') as fh:
fh.write(text)

xyz = XYZ.from_file(xyz_tfile)
molecules = xyz.all_molecules
molecule = molecules[frame]
structure = mol2struct(molecule)
logger.debug("structure created from a XYZ file")
structout = XasStructureXyz(name=filepath.name, label=filepath.stem,
filepath=filepath,
structure=structure,
molecule=molecule,
struct_type="molecule",
absorber=absorber)
os.unlink(xyz_tfile)
else:
#: UNSUPPORTED
raise ValueError(f"File type {filepath.suffix} not supported yet")
raise ValueError(f"unknown structure format '{format}'")

if not structout.struct.is_ordered:
logger.warning(
f"[{structout.name}] contains partially occupied sites that are not fully supported yet"
)
return structout

def get_structs_from_dir(
structsdir: Union[str, Path],
Expand Down
60 changes: 60 additions & 0 deletions larixite/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@

import os
import logging
import io
from typing import Union
from gzip import GzipFile
from pathlib import Path
from packaging import version as pkg_version
from charset_normalizer import from_bytes

from pyshortcuts import get_homedir, bytes2str, isotime

HAS_IPYTHON = False
Expand Down Expand Up @@ -134,3 +139,58 @@ def show_loggers(clear_handlers=False):
for handler in logger.handlers:
print(f"+-> Handler: {handler}")
print(f"+-> Level: {handler.level} ({logging.getLevelName(handler.level)})")




def bytes2str(bytedata):
"decode bytes using charset_normalizer.from_bytes"
return str(from_bytes(bytedata).best())

def get_path(val: Union[Path, str, bytes]):
"""return best guess for a posix-style path name from an input string
"""
if isinstance(val, bytes):
val = bytes2str(val)
if isinstance(val, str):
val = Path(val)
return val.absolute()

def is_gzip(filename):
"is a file gzipped?"
with open(get_path(filename), 'rb') as fh:
return fh.read(3) == b'\x1f\x8b\x08'
return False

def read_textfile(filename: Union[Path, io.IOBase, str, bytes], size=None) -> str:
"""read text from a file as string (decoding from bytes)

Argument
--------
filename (Path, str, bytes, or open File): file or file-like object to read
size (int or None): number of bytes to read

Returns
-------
text of file as string.

Notes
------
1. the file encoding is detected with charset_normalizer.from_bytes
which is then used to decode bytes read from file.
2. line endings are normalized to be '\n', so that
splitting on '\n' will give a list of lines.
3. if filename is given, it can be a gzip-compressed file
"""
text = ''


if isinstance(filename, io.IOBase):
text = filename.read(size)
if filename.mode == 'rb':
text = bytes2str(text)
else:
fopen = GzipFile if is_gzip(filename) else open
with fopen(get_path(filename), 'rb') as fh:
text = bytes2str(fh.read(size))
return text.replace('\r\n', '\n').replace('\r', '\n')
Loading
Loading