@@ -2864,6 +2864,7 @@ def to_hill_formula(molecule):
28642864 """
28652865
28662866 from openforcefield .topology import TopologyMolecule
2867+ import collections
28672868 # check for networkx then assuming we have a Molecule or TopologyMolecule instance just try and
28682869 # extract the info. Note we do not type check the TopologyMolecule due to cyclic dependencies
28692870 if isinstance (molecule , nx .Graph ):
@@ -2881,32 +2882,29 @@ def to_hill_formula(molecule):
28812882 f'openforcefield.topology.topology.TopologyMolecule or networkx representaion '
28822883 f'of the molecule.' )
28832884
2884- # Now sort the elements, method taken from topology._networkx_to_hill_formula
2885- # Count the number of instances of each atomic number
2886- at_num_to_counts = dict ([(unq , atom_nums .count (unq )) for unq in atom_nums ])
2887-
2888- symbol_to_counts = {}
2889- # Check for C and H first, to make a correct hill formula (remember dicts in python 3.6+ are ordered)
2890- if 6 in at_num_to_counts :
2891- symbol_to_counts ['C' ] = at_num_to_counts [6 ]
2892- del at_num_to_counts [6 ]
2893-
2894- if 1 in at_num_to_counts :
2895- symbol_to_counts ['H' ] = at_num_to_counts [1 ]
2896- del at_num_to_counts [1 ]
2897-
2898- # Now count instances of all elements other than C and H, in order of ascending atomic number
2899- sorted_atom_nums = sorted (at_num_to_counts .keys ())
2900- for atom_num in sorted_atom_nums :
2901- symbol_to_counts [Element .getByAtomicNumber (atom_num ).symbol ] = at_num_to_counts [atom_num ]
2902-
2903- # Finally format the formula as string
2904- formula = ''
2905- for ele , count in symbol_to_counts .items ():
2906- if count == 1 :
2907- count = ''
2908- formula += f'{ ele } { count } '
2909- return formula
2885+ # make a correct hill formula representation following this guide
2886+ # https://en.wikipedia.org/wiki/Chemical_formula#Hill_system
2887+
2888+ # create the counter dictionary using chemical symbols
2889+ atom_symbol_counts = collections .Counter (Element .getByAtomicNumber (atom_num ).symbol for atom_num in atom_nums )
2890+
2891+ formula = []
2892+ # Check for C and H first, to make a correct hill formula
2893+ for el in ['C' , 'H' ]:
2894+ if el in atom_symbol_counts :
2895+ count = atom_symbol_counts .pop (el )
2896+ formula .append (el )
2897+ if count > 1 :
2898+ formula .append (str (count ))
2899+
2900+ # now get the rest of the elements in alphabetical ordering
2901+ for el in sorted (atom_symbol_counts .keys ()):
2902+ count = atom_symbol_counts .pop (el )
2903+ formula .append (el )
2904+ if count > 1 :
2905+ formula .append (str (count ))
2906+
2907+ return "" .join (formula )
29102908
29112909 def chemical_environment_matches (self ,
29122910 query ,
0 commit comments