Skip to content

Commit 1918eea

Browse files
Add CIF checking for incorrect/missing elements, support for isotopes (primarily Deuterium and Tritium), and correct handling of new VASP POSCAR formats
1 parent 8aa1cf8 commit 1918eea

File tree

12 files changed

+11308
-1938
lines changed

12 files changed

+11308
-1938
lines changed

dev_scripts/periodic_table.yaml

Lines changed: 11004 additions & 1901 deletions
Large diffs are not rendered by default.

dev_scripts/update_pt_data.py

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,17 @@
2121

2222

2323
def test_yaml():
24-
with open("periodic_table.yaml") as file:
25-
data = yaml.load(file)
26-
print(data)
24+
loadfn("periodic_table.yaml")
2725

2826

2927
def test_json():
30-
with open("periodic_table.json") as file:
28+
with open("../pymatgen/core/periodic_table.json") as file:
3129
data = json.load(file)
3230
print(data)
3331

3432

3533
def parse_oxi_state():
36-
with open("periodic_table.yaml") as file:
37-
data = yaml.load(file)
34+
data = loadfn("periodic_table.yaml")
3835
with open("oxidation_states.txt") as file:
3936
oxi_data = file.read()
4037
oxi_data = re.sub("[\n\r]", "", oxi_data)
@@ -72,8 +69,7 @@ def parse_oxi_state():
7269

7370

7471
def parse_ionic_radii():
75-
with open("periodic_table.yaml") as f:
76-
data = yaml.load(f)
72+
data = loadfn("periodic_table.yaml")
7773
with open("ionic_radii.csv") as f:
7874
radii_data = f.read()
7975
radii_data = radii_data.split("\r")
@@ -104,8 +100,7 @@ def parse_ionic_radii():
104100

105101

106102
def parse_radii():
107-
with open("periodic_table.yaml") as f:
108-
data = yaml.load(f)
103+
data = loadfn("periodic_table.yaml")
109104
with open("radii.csv") as f:
110105
radii_data = f.read()
111106
radii_data = radii_data.split("\r")
@@ -136,13 +131,12 @@ def parse_radii():
136131
print(el)
137132
with open("periodic_table2.yaml", "w") as f:
138133
yaml.dump(data, f)
139-
with open("periodic_table.json", "w") as f:
134+
with open("../pymatgen/core/periodic_table.json", "w") as f:
140135
json.dump(data, f)
141136

142137

143138
def update_ionic_radii():
144-
with open("periodic_table.yaml") as f:
145-
data = yaml.load(f)
139+
data = loadfn("periodic_table.yaml")
146140

147141
for d in data.values():
148142
if "Ionic_radii" in d:
@@ -156,13 +150,12 @@ def update_ionic_radii():
156150
del d["Ionic_radii_ls"]
157151
with open("periodic_table2.yaml", "w") as f:
158152
yaml.dump(data, f)
159-
with open("periodic_table.json", "w") as f:
153+
with open("../pymatgen/core/periodic_table.json", "w") as f:
160154
json.dump(data, f)
161155

162156

163157
def parse_shannon_radii():
164-
with open("periodic_table.yaml") as f:
165-
data = yaml.load(f)
158+
data = loadfn("periodic_table.yaml")
166159

167160
from openpyxl import load_workbook
168161

@@ -194,22 +187,20 @@ def parse_shannon_radii():
194187
if el in data:
195188
data[el]["Shannon radii"] = dict(radii[el])
196189

197-
with open("periodic_table.yaml", "w") as f:
198-
yaml.safe_dump(data, f)
199-
with open("periodic_table.json", "w") as f:
190+
dumpfn(data, "periodic_table.yaml")
191+
with open("../pymatgen/core/periodic_table.json", "w") as f:
200192
json.dump(data, f)
201193

202194

203195
def gen_periodic_table():
204-
with open("periodic_table.yaml") as f:
205-
data = yaml.load(f)
196+
data = loadfn("periodic_table.yaml")
206197

207-
with open("periodic_table.json", "w") as f:
198+
with open("../pymatgen/core/periodic_table.json", "w") as f:
208199
json.dump(data, f)
209200

210201

211202
def gen_iupac_ordering():
212-
periodic_table = loadfn("periodic_table.json")
203+
periodic_table = loadfn("../pymatgen/core/periodic_table.json")
213204
order = [
214205
([18], range(6, 0, -1)), # noble gasses
215206
([1], range(7, 1, -1)), # alkali metals
@@ -265,12 +256,24 @@ def add_electron_affinities():
265256
row.append(td.get_text().strip())
266257
data.append(row)
267258
data.pop(0)
268-
ea = {int(r[0]): float(re.sub(r"[\s\(\)]", "", r[3].strip("()[]"))) for r in data}
269-
assert set(ea).issuperset(range(1, 93)) # Ensure that we have data for up to U.
259+
260+
ea = {}
261+
max_Z = max(Element(element).Z for element in Element.__members__)
262+
for r in data:
263+
# don't want superheavy elements or less common isotopes
264+
if int(r[0]) > max_Z or r[2] in ea:
265+
continue
266+
tempstr = re.sub(r"[\s\(\)]", "", r[3].strip("()[]"))
267+
# hyphen-like characters used that can't be parsed by .float
268+
bytesrep = tempstr.encode("unicode_escape").replace(b"\\u2212", b"-")
269+
ea[r[2]] = float(bytesrep.decode("unicode_escape"))
270+
271+
Z_set = {Element.from_name(element).Z for element in ea}
272+
assert Z_set.issuperset(range(1, 93)) # Ensure that we have data for up to U.
270273
print(ea)
271274
pt = loadfn("../pymatgen/core/periodic_table.json")
272275
for k, v in pt.items():
273-
v["Electron affinity"] = ea.get(Element(k).Z)
276+
v["Electron affinity"] = ea.get(Element(k).long_name)
274277
dumpfn(pt, "../pymatgen/core/periodic_table.json")
275278

276279

@@ -296,12 +299,12 @@ def add_ionization_energies():
296299
pt = loadfn("../pymatgen/core/periodic_table.json")
297300
for k, v in pt.items():
298301
del v["Ionization energy"]
299-
v["Ionization energies"] = data.get(Element(k).Z, [])
302+
v["Ionization energies"] = data.get(Element(k).long_name, [])
300303
dumpfn(pt, "../pymatgen/core/periodic_table.json")
301304

302305

303306
if __name__ == "__main__":
304-
# parse_shannon_radii()
307+
parse_shannon_radii()
305308
# add_ionization_energies()
306309
add_electron_affinities()
307310
# gen_periodic_table()

pymatgen/core/composition.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ def __init__(self, *args, strict: bool = False, **kwargs) -> None:
128128
raise ValueError("float('NaN') is not a valid Composition, did you mean str('NaN')?")
129129
else:
130130
elem_map = dict(*args, **kwargs) # type: ignore
131+
131132
elem_amt = {}
132133
self._n_atoms = 0
133134
for key, val in elem_map.items():

pymatgen/core/periodic_table.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

pymatgen/core/periodic_table.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def __init__(self, symbol: SpeciesLike) -> None:
5454
Z (int): Atomic number.
5555
symbol (str): Element symbol.
5656
long_name (str): Long name for element. E.g., "Hydrogen".
57+
A (int) : Atomic mass number (number of protons plus number of neutrons).
5758
atomic_radius_calculated (float): Calculated atomic radius for the element. This is the empirical value.
5859
Data is obtained from http://wikipedia.org/wiki/Atomic_radii_of_the_elements_(data_page).
5960
van_der_waals_radius (float): Van der Waals radius for the element. This is the empirical value determined
@@ -101,12 +102,26 @@ def __init__(self, symbol: SpeciesLike) -> None:
101102
# Store key variables for quick access
102103
self.Z = data["Atomic no"]
103104

105+
self._is_named_isotope = data.get("Is named isotope", False)
106+
if self._is_named_isotope:
107+
for sym in _pt_data:
108+
if _pt_data[sym]["Atomic no"] == self.Z and not _pt_data[sym].get("Is named isotope", False):
109+
self.symbol = sym
110+
break
111+
104112
at_r = data.get("Atomic radius", "no data")
105113
if str(at_r).startswith("no data"):
106114
self._atomic_radius = None
107115
else:
108116
self._atomic_radius = Length(at_r, "ang")
109117
self._atomic_mass = Mass(data["Atomic mass"], "amu")
118+
119+
self._atomic_mass_number = None
120+
self.A = None
121+
if data.get("Atomic mass no"):
122+
self.A = data.get("Atomic mass no")
123+
self._atomic_mass_number = Mass(data["Atomic mass no"], "amu")
124+
110125
self.long_name = data["Name"]
111126
self._data = data
112127

@@ -140,6 +155,14 @@ def atomic_mass(self) -> FloatWithUnit:
140155
"""
141156
return self._atomic_mass
142157

158+
@property
159+
def atomic_mass_number(self) -> FloatWithUnit | None:
160+
"""
161+
Returns:
162+
float: The atomic mass of the element in amu.
163+
"""
164+
return self._atomic_mass_number
165+
143166
def __getattr__(self, item: str) -> Any:
144167
"""Key access to available element data.
145168
@@ -454,7 +477,7 @@ def ground_state_term_symbol(self):
454477
return J_sorted_terms[-1][0]
455478

456479
def __eq__(self, other: object) -> bool:
457-
return isinstance(self, Element) and isinstance(other, Element) and self.Z == other.Z
480+
return isinstance(self, Element) and isinstance(other, Element) and self.Z == other.Z and self.A == other.A
458481

459482
def __hash__(self) -> int:
460483
return self.Z
@@ -482,17 +505,19 @@ def __lt__(self, other):
482505
return self.symbol < other.symbol
483506

484507
@staticmethod
485-
def from_Z(Z: int) -> Element:
508+
def from_Z(Z: int, A: int | None = None) -> Element:
486509
"""Get an element from an atomic number.
487510
488511
Args:
489-
Z (int): Atomic number
512+
Z (int): Atomic number (number of protons)
513+
A (int or None) : Atomic mass number (number of protons + neutrons)
490514
491515
Returns:
492516
Element with atomic number Z.
493517
"""
494518
for sym, data in _pt_data.items():
495-
if data["Atomic no"] == Z:
519+
amn = data.get("Atomic mass no") if A else None
520+
if data["Atomic no"] == Z and amn == A:
496521
return Element(sym)
497522
raise ValueError(f"Unexpected atomic number {Z=}")
498523

@@ -506,6 +531,9 @@ def from_name(name: str) -> Element:
506531
Returns:
507532
Element with the name 'name'
508533
"""
534+
# to accommodate the British Enlgish speaking world
535+
GBE_to_AmE = {"aluminium": "aluminum", "caesium": "cesium"}
536+
name = GBE_to_AmE.get(name.lower(), name)
509537
for sym, data in _pt_data.items():
510538
if data["Name"] == name.capitalize():
511539
return Element(sym)
@@ -765,6 +793,8 @@ class Element(ElementBase):
765793
# necessary to preserve backwards compatibility with a time when Element is
766794
# a regular object that is constructed with Element(symbol).
767795
H = "H"
796+
D = "D"
797+
T = "T"
768798
He = "He"
769799
Li = "Li"
770800
Be = "Be"
@@ -1407,6 +1437,8 @@ def get_el_sp(obj: int | SpeciesLike) -> Element | Species | DummySpecies:
14071437
that can be determined.
14081438
"""
14091439
if isinstance(obj, (Element, Species, DummySpecies)):
1440+
if hasattr(obj, "_is_named_isotope") and obj._is_named_isotope:
1441+
return Element(obj.name) if isinstance(obj, Element) else Species(obj.name)
14101442
return obj
14111443

14121444
try:

pymatgen/core/sites.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,6 @@ def __init__(
334334
species = Composition({get_el_sp(species): 1}) # type: ignore
335335
except TypeError:
336336
species = Composition(species)
337-
338337
total_occu = species.num_atoms
339338
if total_occu > 1 + Composition.amount_tolerance:
340339
raise ValueError("Species occupancies sum to more than 1!")

0 commit comments

Comments
 (0)