Skip to content

Commit a077130

Browse files
authored
Merge pull request #7 from aaaashanghai/main
Support Preprocess interface of ABACUS
2 parents 5bec78e + 846dcfe commit a077130

File tree

6 files changed

+221
-5
lines changed

6 files changed

+221
-5
lines changed

deeph/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
get_inference_config, get_preprocess_config
55
from .graph import Collater, collate_fn, get_graph, load_orbital_types
66
from .kernel import DeepHKernal
7-
from .preprocess import get_rc, OijLoad, GetEEiEij
7+
from .preprocess import get_rc, OijLoad, GetEEiEij, parse_ABACUS
88
from .rotate import get_rh, rotate_back, Rotate, dtype_dict
99

1010
__version__ = "0.1.0"

deeph/preprocess/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from .openmx_parse import OijLoad, GetEEiEij, openmx_parse_overlap
22
from .get_rc import get_rc
3+
from .abacus_get_data import parse_ABACUS

deeph/preprocess/abacus_get_data.py

+212
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
# Script for interface from ABACUS (http://abacus.ustc.edu.cn/) to DeepH-pack
2+
# Coded by ZC Tang @ Tsinghua Univ. e-mail: az_txycha@126.com
3+
# Modified by He Li @ Tsinghua Univ.
4+
# To use this script, please add 'out_hs2 1' in ABACUS INPUT File
5+
# Current version is capable of coping with f-orbitals, yet is unable to provide interface for spin-polarized or spin-orbit calculations
6+
# 20220717: Read structure from running_scf.log
7+
# Note: 20220717 version requires output subdirectory == "OUT.ABACUS"
8+
9+
import os
10+
import sys
11+
import json
12+
import re
13+
14+
import numpy as np
15+
from scipy.sparse import csr_matrix
16+
from scipy.linalg import block_diag
17+
import h5py
18+
19+
20+
Bohr2Ang = 0.529177249
21+
periodic_table = {'Ac': 89, 'Ag': 47, 'Al': 13, 'Am': 95, 'Ar': 18, 'As': 33, 'At': 85, 'Au': 79, 'B': 5, 'Ba': 56,
22+
'Be': 4, 'Bi': 83, 'Bk': 97, 'Br': 35, 'C': 6, 'Ca': 20, 'Cd': 48, 'Ce': 58, 'Cf': 98, 'Cl': 17,
23+
'Cm': 96, 'Co': 27, 'Cr': 24, 'Cs': 55, 'Cu': 29, 'Dy': 66, 'Er': 68, 'Es': 99, 'Eu': 63, 'F': 9,
24+
'Fe': 26, 'Fm': 100, 'Fr': 87, 'Ga': 31, 'Gd': 64, 'Ge': 32, 'H': 1, 'He': 2, 'Hf': 72, 'Hg': 80,
25+
'Ho': 67, 'I': 53, 'In': 49, 'Ir': 77, 'K': 19, 'Kr': 36, 'La': 57, 'Li': 3, 'Lr': 103, 'Lu': 71,
26+
'Md': 101, 'Mg': 12, 'Mn': 25, 'Mo': 42, 'N': 7, 'Na': 11, 'Nb': 41, 'Nd': 60, 'Ne': 10, 'Ni': 28,
27+
'No': 102, 'Np': 93, 'O': 8, 'Os': 76, 'P': 15, 'Pa': 91, 'Pb': 82, 'Pd': 46, 'Pm': 61, 'Po': 84,
28+
'Pr': 59, 'Pt': 78, 'Pu': 94, 'Ra': 88, 'Rb': 37, 'Re': 75, 'Rh': 45, 'Rn': 86, 'Ru': 44, 'S': 16,
29+
'Sb': 51, 'Sc': 21, 'Se': 34, 'Si': 14, 'Sm': 62, 'Sn': 50, 'Sr': 38, 'Ta': 73, 'Tb': 65, 'Tc': 43,
30+
'Te': 52, 'Th': 90, 'Ti': 22, 'Tl': 81, 'Tm': 69, 'U': 92, 'V': 23, 'W': 74, 'Xe': 54, 'Y': 39,
31+
'Yb': 70, 'Zn': 30, 'Zr': 40, 'Rf': 104, 'Db': 105, 'Sg': 106, 'Bh': 107, 'Hs': 108, 'Mt': 109,
32+
'Ds': 110, 'Rg': 111, 'Cn': 112, 'Nh': 113, 'Fl': 114, 'Mc': 115, 'Lv': 116, 'Ts': 117, 'Og': 118}
33+
34+
35+
class OrbAbacus2DeepH:
36+
def __init__(self):
37+
self.Us_abacus2deeph = {}
38+
self.Us_abacus2deeph[0] = np.eye(1)
39+
self.Us_abacus2deeph[1] = np.eye(3)[[1, 2, 0]]
40+
self.Us_abacus2deeph[2] = np.eye(5)[[0, 3, 4, 1, 2]]
41+
self.Us_abacus2deeph[3] = np.eye(7)[[0, 1, 2, 3, 4, 5, 6]]
42+
43+
minus_dict = {
44+
1: [0, 1],
45+
2: [3, 4],
46+
3: [1, 2, 5, 6],
47+
}
48+
for k, v in minus_dict.items():
49+
self.Us_abacus2deeph[k][v] *= -1
50+
51+
def get_U(self, l):
52+
if l > 3:
53+
raise NotImplementedError("Only support l = s, p, d, f")
54+
return self.Us_abacus2deeph[l]
55+
56+
def transform(self, mat, l_lefts, l_rights):
57+
block_lefts = block_diag(*[self.get_U(l_left) for l_left in l_lefts])
58+
block_rights = block_diag(*[self.get_U(l_right) for l_right in l_rights])
59+
return block_lefts @ mat @ block_rights.T
60+
61+
62+
def parse_ABACUS(input_path, output_path):
63+
input_path = os.path.abspath(input_path)
64+
output_path = os.path.abspath(output_path)
65+
os.makedirs(output_path, exist_ok=True)
66+
67+
spinful = False # Not implemented yet for spinful calculations
68+
69+
def find_target_line(f, target):
70+
line = f.readline()
71+
while line:
72+
if target in line:
73+
return line
74+
line = f.readline()
75+
return None
76+
77+
with open(os.path.join(input_path, "OUT.ABACUS", "running_scf.log"), 'r') as f:
78+
f.readline()
79+
line = f.readline()
80+
assert "WELCOME TO ABACUS" in line
81+
assert find_target_line(f, "READING UNITCELL INFORMATION") is not None
82+
num_atom_type = int(f.readline().split()[-1])
83+
84+
assert find_target_line(f, "lattice constant (Bohr)") is not None
85+
lattice_constant = float(f.readline().split()[-1]) # unit is Angstrom
86+
87+
site_norbits_dict = {}
88+
orbital_types_dict = {}
89+
for index_type in range(num_atom_type):
90+
tmp = find_target_line(f, "READING ATOM TYPE")
91+
assert tmp is not None
92+
assert tmp.split()[-1] == str(index_type + 1)
93+
if tmp is None:
94+
raise Exception(f"Cannot find ATOM {index_type} in running_scf.log")
95+
96+
line = f.readline()
97+
assert "atom label =" in line
98+
atom_type = periodic_table[line.split()[-1]]
99+
100+
current_site_norbits = 0
101+
current_orbital_types = []
102+
while True:
103+
line = f.readline()
104+
if "number of zeta" in line:
105+
tmp = line.split()
106+
L = int(tmp[0][2:-1])
107+
num_L = int(tmp[-1])
108+
current_site_norbits += (2 * L + 1) * num_L
109+
current_orbital_types.extend([L] * num_L)
110+
else:
111+
break
112+
site_norbits_dict[atom_type] = current_site_norbits
113+
orbital_types_dict[atom_type] = current_orbital_types
114+
115+
line = find_target_line(f, "TOTAL ATOM NUMBER")
116+
assert line is not None
117+
nsites = int(line.split()[-1])
118+
119+
assert find_target_line(f, "DIRECT COORDINATES") is not None
120+
assert "atom" in f.readline()
121+
frac_coords = np.zeros((nsites, 3))
122+
site_norbits = np.zeros(nsites, dtype=int)
123+
element = np.zeros(nsites, dtype=int)
124+
for index_site in range(nsites):
125+
line = f.readline()
126+
tmp = line.split()
127+
assert "taud_" in tmp[0]
128+
element_str = ''.join(re.findall(r'[A-Za-z]', tmp[0][5:]))
129+
element[index_site] = periodic_table[element_str]
130+
site_norbits[index_site] = site_norbits_dict[element[index_site]]
131+
frac_coords[index_site, :] = np.array(tmp[1:4])
132+
norbits = int(np.sum(site_norbits))
133+
site_norbits_cumsum = np.cumsum(site_norbits)
134+
135+
assert find_target_line(f, "Lattice vectors: (Cartesian coordinate: in unit of a_0)") is not None
136+
lattice = np.zeros((3, 3))
137+
for index_lat in range(3):
138+
lattice[index_lat, :] = np.array(f.readline().split())
139+
lattice = lattice * lattice_constant
140+
141+
line = find_target_line(f, "EFERMI")
142+
assert line is not None
143+
assert "eV" in line
144+
fermi_level = float(line.split()[2])
145+
assert find_target_line(f, "EFERMI") is None, "There is more than one EFERMI in the file"
146+
147+
np.savetxt(os.path.join(output_path, "lat.dat"), np.transpose(lattice))
148+
np.savetxt(os.path.join(output_path, "rlat.dat"), np.linalg.inv(lattice) * 2 * np.pi)
149+
cart_coords = frac_coords @ lattice
150+
np.savetxt(os.path.join(output_path, "site_positions.dat").format(output_path), np.transpose(cart_coords))
151+
np.savetxt(os.path.join(output_path, "element.dat"), element, fmt='%d')
152+
info = {'nsites' : nsites, 'isorthogonal': False, 'isspinful': spinful, 'norbits': norbits, 'fermi_level': fermi_level}
153+
with open('{}/info.json'.format(output_path), 'w') as info_f:
154+
json.dump(info, info_f)
155+
with open(os.path.join(output_path, "orbital_types.dat"), 'w') as f:
156+
for atomic_number in element:
157+
for index_l, l in enumerate(orbital_types_dict[atomic_number]):
158+
if index_l == 0:
159+
f.write(str(l))
160+
else:
161+
f.write(f" {l}")
162+
f.write('\n')
163+
164+
U_orbital = OrbAbacus2DeepH()
165+
def parse_matrix(matrix_path, factor):
166+
matrix_dict = dict()
167+
with open(matrix_path, 'r') as f:
168+
line = f.readline(); f.readline()
169+
norbits = int(line.split()[-1])
170+
for line in f:
171+
line1 = line.split()
172+
if len(line1) == 0:
173+
break
174+
num_element = int(line1[3])
175+
if num_element != 0:
176+
R_cur = np.array(line1[:3]).astype(int)
177+
line2 = f.readline().split()
178+
line3 = f.readline().split()
179+
line4 = f.readline().split()
180+
hamiltonian_cur = csr_matrix((np.array(line2).astype(float), np.array(line3).astype(int),
181+
np.array(line4).astype(int)), shape=(norbits, norbits)).toarray()
182+
for index_site_i in range(nsites):
183+
for index_site_j in range(nsites):
184+
key_str = f"[{R_cur[0]}, {R_cur[1]}, {R_cur[2]}, {index_site_i + 1}, {index_site_j + 1}]"
185+
mat = hamiltonian_cur[site_norbits_cumsum[index_site_i]
186+
- site_norbits[index_site_i]:site_norbits_cumsum[index_site_i],
187+
site_norbits_cumsum[index_site_j]
188+
- site_norbits[index_site_j]:site_norbits_cumsum[index_site_j]]
189+
if abs(mat).max() < 1e-8:
190+
continue
191+
mat = U_orbital.transform(mat, orbital_types_dict[element[index_site_i]],
192+
orbital_types_dict[element[index_site_j]])
193+
matrix_dict[key_str] = mat * factor
194+
return matrix_dict, norbits
195+
196+
hamiltonian_dict, tmp = parse_matrix(os.path.join(input_path, "OUT.ABACUS", "data-HR-sparse_SPIN0.csr"), 13.605698) # Ryd2eV
197+
assert tmp == norbits
198+
overlap_dict, tmp = parse_matrix(os.path.join(input_path, "OUT.ABACUS", "data-SR-sparse_SPIN0.csr"), 1)
199+
assert tmp == norbits
200+
201+
with h5py.File(os.path.join(output_path, "hamiltonians.h5"), 'w') as fid:
202+
for key_str, value in hamiltonian_dict.items():
203+
fid[key_str] = value
204+
with h5py.File(os.path.join(output_path, "overlaps.h5"), 'w') as fid:
205+
for key_str, value in overlap_dict.items():
206+
fid[key_str] = value
207+
208+
209+
if __name__ == '__main__':
210+
ABACUS_path = sys.argv[1]
211+
output_path = sys.argv[2]
212+
parse_ABACUS(ABACUS_path, output_path)

deeph/scripts/preprocess.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import argparse
55
from pathos.multiprocessing import ProcessingPool as Pool
66

7-
from deeph import get_preprocess_config, get_rc, get_rh
7+
from deeph import get_preprocess_config, get_rc, get_rh, parse_ABACUS
88

99

1010
def main():
@@ -34,7 +34,7 @@ def make_cmd(input_dir, output_dir, target, interface):
3434
f"--input_dir {input_dir} --output_dir {output_dir} --if_DM true"
3535
else:
3636
raise ValueError('Unknown target: {}'.format(target))
37-
elif interface == 'siesta':
37+
elif interface == 'siesta' or interface == 'abacus':
3838
cmd = ''
3939
elif interface == 'aims':
4040
cmd = f"{julia_interpreter} " \
@@ -49,6 +49,7 @@ def make_cmd(input_dir, output_dir, target, interface):
4949
abspath_list = []
5050
for root, dirs, files in os.walk('./'):
5151
if (interface == 'openmx' and 'openmx.scfout' in files) or (
52+
interface == 'abacus' and 'OUT.ABACUS' in dirs) or (
5253
interface == 'siesta' and 'hamiltonians.h5' in files) or (
5354
interface == 'aims' and 'NoTB.dat' in files):
5455
relpath_list.append(root)
@@ -71,6 +72,8 @@ def worker(index):
7172
)
7273
capture_output = sp.run(cmd, shell=True, capture_output=False, encoding="utf-8")
7374
assert capture_output.returncode == 0
75+
if interface == 'abacus':
76+
parse_ABACUS(abspath, os.path.abspath(relpath))
7477
get_rc(os.path.abspath(relpath), os.path.abspath(relpath), radius=config.getfloat('graph', 'radius'),
7578
r2_rand=config.getboolean('graph', 'r2_rand'),
7679
create_from_DFT=config.getboolean('graph', 'create_from_DFT'), neighbour_file='hamiltonians.h5')

deeph/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def get_preprocess_config(*args):
194194
for config_file in args:
195195
config.read(config_file)
196196
assert config['basic']['target'] in ['hamiltonian', 'density_matrix', 'phiVdphi']
197-
assert config['basic']['interface'] in ['openmx', 'siesta', 'aims']
197+
assert config['basic']['interface'] in ['openmx', 'abacus', 'aims', 'siesta']
198198
assert config['basic']['multiprocessing'] in ['False'], 'multiprocessing is not yet implemented'
199199

200200
return config

docs/source/dataset/dataset.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ One needs to perform the DFT calculation with ABACUS
3535
to get the Kohn-Sham Hamiltonian output file in the csr
3636
format. This output file should be placed in a separate
3737
folder for each structure in the dataset. In order to get
38-
this binary file, the input file of ABACUS should include
38+
this csr file, the input file of ABACUS should include
3939
keywords like this::
4040

4141
out_mat_hs2 1

0 commit comments

Comments
 (0)