From c8e489f9a79149ffbe3aebcb83b15975a162e673 Mon Sep 17 00:00:00 2001 From: Andreas Copan Date: Fri, 6 Dec 2024 11:36:05 -0500 Subject: [PATCH] New: Functions for working with formulas --- automol/form/__init__.py | 4 ++++ automol/form/_form.py | 43 +++++++++++++++++++++++++++++++++++----- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/automol/form/__init__.py b/automol/form/__init__.py index a5726c75..41b1aa4e 100644 --- a/automol/form/__init__.py +++ b/automol/form/__init__.py @@ -9,6 +9,7 @@ atom_count, electron_count, element_count, + equal, from_string, heavy_atom_count, join, @@ -19,6 +20,7 @@ sorted_symbols_in_sequence, string, string2, + unique, without, ) @@ -33,6 +35,8 @@ "join", "join_sequence", "sorted_symbols_in_sequence", + "unique", + "equal", "string", "string2", "from_string", diff --git a/automol/form/_form.py b/automol/form/_form.py index f5eca21c..cff30ee1 100644 --- a/automol/form/_form.py +++ b/automol/form/_form.py @@ -7,6 +7,7 @@ import itertools from collections.abc import Sequence +import more_itertools as mit import pyparsing as pp from pyparsing import pyparsing_common as ppc @@ -71,17 +72,28 @@ def element_count(fml: Formula, symb: str) -> int: return fml[symb] if symb in fml else 0 -def without(fml: dict[str, int], symbs: tuple = ()) -> dict[str, int]: +def without(fml: Formula, symbs: Sequence = ()) -> Formula: """Return a formula without hydrogen. - :param fml: Chemical formula, without hydrogen + :param fml: A chemical formula :symbs: Chemical symbols :return: Dictionary with new formula, without hydrogen """ return {k: v for k, v in fml.items() if k not in symbs} -def match(fml1: dict[str, int], fml2: dict[str, int]) -> bool: +def normalized(fml: Formula) -> Formula: + """Return a formula without `None` or 0 values. + + :param fml: A chemical formula + :return: The formula, without `None` values + """ + fml = {ptab.to_symbol(k): int(v) for k, v in fml.items() if v} + assert all(v > 0 for v in fml.values()), f"Invalid formula: {fml}" + return fml + + +def match(fml1: Formula, fml2: Formula) -> bool: """Check for a match between two formulas, allowing wildcard values. A stoichiometry of -1 indicates a wildcard value @@ -143,15 +155,34 @@ def join_sequence(fmls: Formula) -> int: return functools.reduce(join, fmls) -def sorted_symbols_in_sequence(fmls: list[dict]) -> list[dict]: +def sorted_symbols_in_sequence(fmls: Sequence[Formula]) -> tuple[str, ...]: """Sort a sequence of formulas based on Hill-sorting. :param fmls: A sequence of formulas - :return: The same sequence, but sorted + :return: The sorted symbols in the sequence """ return sorted_symbols(join_sequence(fmls).keys()) +def unique(fmls: Sequence[Formula]) -> list[Formula]: + """Get the unique formulas in a list. + + :param fmls: A sequence of formulas + :return: The unique formulas in the sequence + """ + return list(map(from_string, mit.unique_everseen(map(string, fmls)))) + + +def equal(fml1: Formula, fml2: Formula) -> bool: + """Determine whether two formulas are equal. + + :param fml1: A formula + :param fml2: Another formula + :return: `True` if they are, `False` if the are not + """ + return normalized(fml1) == normalized(fml2) + + # Str<->Dict Converters def string(fml: Formula, hyd: bool = True) -> str: """Convert formula dictionary to formula string in the Hill convention. @@ -161,6 +192,8 @@ def string(fml: Formula, hyd: bool = True) -> str: :param hyd: include hydrogens? :return: True if formula includes hydrogen, False if no hydrogen """ + fml = normalized(fml) + fml_lst = [ (symb, fml[symb]) for symb in sorted_symbols(fml.keys()) if symb != "H" or hyd ]