xraypy · maurov · Jul 23, 2025 · May 18, 2025 · May 18, 2025 · May 18, 2025
diff --git a/larixite/cif_cluster.py b/larixite/cif_cluster.py
@@ -4,12 +4,14 @@
 from io import StringIO
 from typing import Union
 import numpy as np
-from pymatgen.core import __version__ as pymatgen_version, Structure, Site
+from pymatgen.core import __version__ as pymatgen_version, Structure, Site, Molecule
+from pymatgen.io.cif import CifParser
+from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
 
 from xraydb import atomic_symbol, atomic_number, xray_edge
 
-from .utils import strict_ascii, fcompact, isotime
-from .amcsd_utils import PMG_CIF_OPTS, CifParser, Molecule, SpacegroupAnalyzer
+from .utils import strict_ascii, fcompact, isotime, get_logger
+from .amcsd_utils import PMG_CIF_OPTS
 from .amcsd import get_cif
 
 from .version import __version__ as x_version
@@ -18,6 +20,11 @@
 
 TEMPLATE_FOLDER = Path(Path(__file__).parent, 'templates')
 
+logger = get_logger("larixite.cif_cluster")
+if logger.level != 10:
+    import warnings
+    warnings.filterwarnings("ignore", category=UserWarning)
+
 
 def read_cif_structure(ciftext: str) -> Structure:
     """read CIF text, return CIF Structure
@@ -50,13 +57,13 @@ def read_cif_structure(ciftext: str) -> Structure:
 
 def site_label(site: Site) -> str:
     """
-    return a string label for a pymatgen Site object, 
+    return a string label for a pymatgen Site object,
     using the species string and fractional coordinates
-    
+
     Parameters
     ----------
     site : pymatgen Site object
-    
+
     Returns
     -------
     str
@@ -88,14 +95,14 @@ def __init__(self, ciftext=None, filename=None, absorber=None,
     def set_absorber(self, absorber=None):
         """
         set the absorbing atom element
-        
+
         Parameters
         ----------
         absorber : None, int, or str
             if None, no change will be made.
             if int, the atomic number of the absorbing element
             if str, the atomic symbol of the absorbing element
-        
+
         Notes
         -----
         The absorber atom is assumed to be in the CIF structure.

diff --git a/larixite/fdmnes.py b/larixite/fdmnes.py
@@ -9,14 +9,13 @@
 spectroscopy (XAS, XES, RIXS) from the atomic structures
 
 """
-
 from dataclasses import dataclass
 from typing import Union
 from pathlib import Path
 from pymatgen.core import __version__ as pymatgen_version, Element, Molecule
-from larixite.struct import get_structure
+from larixite.struct import get_structure, get_structure_from_text
 from larixite.struct.xas import XasStructure
-from larixite.utils import get_logger, strict_ascii, isotime
+from larixite.utils import get_logger, strict_ascii, isotime, read_textfile
 from larixite.version import __version__ as larixite_version
 
 logger = get_logger("larixite.fdmnes")
@@ -334,3 +333,50 @@ def write_input(
             fp.write("1\njob_inp.txt")
         logger.info(f"written `{fnout}`")
         return outdir
+
+
+
+def struct2fdmnes(inp: Union[str, Path], absorber:
+                  Union[str, int, Element],
+                  frame: int = 0,
+                  format: str = 'cif',
+                  filename: Union[None, str] = None) -> dict:
+    """convert CIF/XYZ  into a dictionary of {name: text} for FDMNES output files
+
+    Parameters
+    ----------
+    inp : str or Path
+        text of CIF file, name of CIF file, or Path to CIF file
+    absorber : str, int, or Element
+        Atomic symbo or number of the absorbing element
+    frame : int, optional
+        Index of the structure for multi-frame structures in the CIF file [0]
+    format : str
+        format of text : 'cif' or 'xyz' ['cif']
+    filename : str
+        full path to filename  ['unknown.{format}']
+
+    Returns
+    -------
+    XasStructure
+        The XAS structure group for the specified file and absorber.
+
+    """
+    if len(inp) < 512 and Path(inp).exists():
+        if filename is None:
+            filename = Path(inp).absolute().as_posix()
+        inp = read_textfile(inp)
+    if filename is None:
+        filename = f'unknown.{format}'
+
+    if isinstance(absorber, str):
+        absorber = Element(absorber)
+    elif isinstance(absorber, int):
+        absorber = Element.from_Z(absorber)
+
+    structs = get_structure_from_text(inp, absorber, frame=frame, format=format,
+                                      filename=filename)
+    fout_name = f"{filename.replace('.', '_')}_{absorber.symbol}.inp"
+    fdm = FdmnesXasInput(structs, absorber=absorber)
+    return {'fdmfile.txt': f'1\n{fout_name}\n',
+            fout_name: fdm.get_input()}
diff --git a/larixite/struct/__init__.py b/larixite/struct/__init__.py
@@ -5,8 +5,11 @@
 Wrapper on top of pymatgen to handle atomic structures for XAS calculations
 ============================================================================
 """
-
+import tempfile
+import os
+from io import StringIO
 import numpy as np
+
 from pathlib import Path
 from typing import Union
 from copy import deepcopy
@@ -16,7 +19,7 @@
 from larixite.struct.xas import XasStructure
 from larixite.struct.xas_cif import XasStructureCif
 from larixite.struct.xas_xyz import XasStructureXyz
-from larixite.utils import get_logger
+from larixite.utils import get_logger, read_textfile
 from larixite.amcsd_utils import PMG_CIF_OPTS
 
 logger = get_logger("larixite.struct")
@@ -83,7 +86,7 @@ def get_structure(
         try:
             struct = structs.parse_structures(primitive=False)[frame]
         except Exception:
-            raise ValueError(f"could not get structure {frame} from text of CIF")
+            raise ValueError(f"could not get structure {frame} from text of CIF {filepath}")
         molecule = Molecule.from_dict(struct.as_dict())
         logger.debug("structure created from a CIF file")
         structout = XasStructureCif(
@@ -93,10 +96,10 @@ def get_structure(
             structure=struct,
             molecule=molecule,
             struct_type="crystal",
-            absorber=Element(absorber),
+            absorber=Element(absorber)
         )
     #: XYZ
-    if filepath.suffix == ".xyz":
+    elif filepath.suffix == '.xyz':
         xyz = XYZ.from_file(filepath)
         molecules = xyz.all_molecules
         molecule = molecules[frame]
@@ -109,19 +112,94 @@ def get_structure(
             structure=structure,
             molecule=molecule,
             struct_type="molecule",
-            absorber=Element(absorber),
+            absorber=Element(absorber)
         )
-    #: some checks on the structure
+    else:
+        raise ValueError(f"unknown structure format '{format}'")
+
     if not structout.struct.is_ordered:
         logger.warning(
             f"[{structout.name}] contains partially occupied sites that are not fully supported yet"
         )
-    if structout is not None:
-        return structout
+    return structout
+
+
+def get_structure_from_text(text: str,
+                            absorber: Union[str, int, Element],
+                            frame: int = 0,
+                            format: str = 'cif',
+                            filename: str = 'unknown.cif') -> XasStructure:
+    """Get an XasStructure from the text of a structural file, according to its format
+
+    Parameters
+    ----------
+    text : str
+        text of file
+    absorber : str, int, or Element
+        Atomic symbo or number of the absorbing element
+    frame : int, optional
+        Index of the structure for multi-frame structures in the CIF/XYZ file [0]
+    format : str
+        format of text : 'cif' or 'xyz' ['cif']
+    filename : str
+        full path to filename  ['unknown.{format}']
+    Returns
+    -------
+    XasStructure
+        The XAS structure group for the specified file and absorber.
+    """
+    if isinstance(absorber, str):
+        absorber = Element(absorber)
+    elif isinstance(absorber, int):
+        absorber = Element.from_Z(absorber)
+
+    structout = None
+    filepath = Path(filename).absolute()
+    #: CIF
+    if format == "cif":
+        try:
+            structs = CifParser(StringIO(text), **PMG_CIF_OPTS)
+        except Exception:
+            raise ValueError(f"could not parse CIF text")
+        try:
+            struct = structs.parse_structures(primitive=False)[frame]
+        except Exception:
+            raise ValueError(f"could not get structure {frame} from text of CIF {filename}")
+        molecule = Molecule.from_dict(struct.as_dict())
+        logger.debug("structure created from a CIF file")
+        structout = XasStructureCif(name=filepath.name,  label=filepath.stem,
+                                    filepath=filepath,
+                                    structure=struct,
+                                    molecule=molecule,
+                                    struct_type="crystal",
+                                    absorber=absorber)
+
+    #: XYZ
+    elif format == 'xyz':
+        xyz_tfile = Path(tempfile.gettempdir(), 'tmp_0.xyz')
+        with open(xyz_tfile, 'w') as fh:
+            fh.write(text)
+
+        xyz = XYZ.from_file(xyz_tfile)
+        molecules = xyz.all_molecules
+        molecule = molecules[frame]
+        structure = mol2struct(molecule)
+        logger.debug("structure created from a XYZ file")
+        structout = XasStructureXyz(name=filepath.name, label=filepath.stem,
+                                    filepath=filepath,
+                                    structure=structure,
+                                    molecule=molecule,
+                                    struct_type="molecule",
+                                    absorber=absorber)
+        os.unlink(xyz_tfile)
     else:
-        #: UNSUPPORTED
-        raise ValueError(f"File type {filepath.suffix} not supported yet")
+        raise ValueError(f"unknown structure format '{format}'")
 
+    if not structout.struct.is_ordered:
+        logger.warning(
+            f"[{structout.name}] contains partially occupied sites that are not fully supported yet"
+        )
+    return structout
 
 def get_structs_from_dir(
     structsdir: Union[str, Path],

diff --git a/larixite/utils.py b/larixite/utils.py
@@ -5,8 +5,13 @@
 
 import os
 import logging
+import io
+from typing import Union
+from gzip import GzipFile
 from pathlib import Path
 from packaging import version as pkg_version
+from charset_normalizer import from_bytes
+
 from pyshortcuts import get_homedir, bytes2str, isotime
 
 HAS_IPYTHON = False
@@ -134,3 +139,58 @@ def show_loggers(clear_handlers=False):
         for handler in logger.handlers:
             print(f"+-> Handler: {handler}")
             print(f"+-> Level: {handler.level} ({logging.getLevelName(handler.level)})")
+
+
+
+
+def bytes2str(bytedata):
+    "decode bytes using charset_normalizer.from_bytes"
+    return str(from_bytes(bytedata).best())
+
+def get_path(val: Union[Path, str, bytes]):
+    """return best guess for a posix-style path name from an input string
+    """
+    if isinstance(val, bytes):
+        val = bytes2str(val)
+    if isinstance(val, str):
+        val = Path(val)
+    return val.absolute()
+
+def is_gzip(filename):
+    "is a file gzipped?"
+    with open(get_path(filename), 'rb') as fh:
+        return fh.read(3) == b'\x1f\x8b\x08'
+    return False
+
+def read_textfile(filename: Union[Path, io.IOBase, str, bytes], size=None) -> str:
+    """read text from a file as string (decoding from bytes)
+
+    Argument
+    --------
+    filename  (Path, str, bytes, or open File): file or file-like object to read
+    size  (int or None): number of bytes to read
+
+    Returns
+    -------
+    text of file as string.
+
+    Notes
+    ------
+    1. the file encoding is detected with charset_normalizer.from_bytes
+       which is then used to decode bytes read from file.
+    2. line endings are normalized to be '\n', so that
+       splitting on '\n' will give a list of lines.
+    3. if filename is given, it can be a gzip-compressed file
+    """
+    text = ''
+
+
+    if isinstance(filename, io.IOBase):
+        text = filename.read(size)
+        if filename.mode == 'rb':
+            text = bytes2str(text)
+    else:
+        fopen = GzipFile if is_gzip(filename) else open
+        with fopen(get_path(filename), 'rb') as fh:
+            text = bytes2str(fh.read(size))
+    return text.replace('\r\n', '\n').replace('\r', '\n')