Skip to content

Commit f09e237

Browse files
authored
Support QChem input format (#35)
1 parent 7d40997 commit f09e237

19 files changed

+813
-7
lines changed

doc/format-qchem.md

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
---
2+
title: Q-Chem molecule format
3+
---
4+
5+
## Specification
6+
7+
@Note: Reference can be found in the [Q-Chem manual](https://manual.q-chem.com/5.1/sect-molinput.html).
8+
9+
Format used by the Q-Chem program.
10+
Elements can be specified either by atomic numbers or element symbols while the geometry is provided in Ångström by default.
11+
12+
13+
## Example
14+
15+
Caffeine molecule in xyz format
16+
17+
```
18+
$molecule
19+
0 1
20+
C 1.07320000000000 0.04890000000000 -0.07570000000000
21+
N 2.51370000000000 0.01260000000000 -0.07580000000000
22+
C 3.35200000000000 1.09590000000000 -0.07530000000000
23+
N 4.61900000000000 0.73030000000000 -0.07550000000000
24+
C 4.57910000000000 -0.63140000000000 -0.07530000000000
25+
C 3.30130000000000 -1.10260000000000 -0.07520000000000
26+
C 2.98070000000000 -2.48690000000000 -0.07380000000000
27+
O 1.82530000000000 -2.90040000000000 -0.07580000000000
28+
N 4.11440000000000 -3.30430000000000 -0.06940000000000
29+
C 5.45170000000000 -2.85620000000000 -0.07240000000000
30+
O 6.38930000000000 -3.65970000000000 -0.07230000000000
31+
N 5.66240000000000 -1.47680000000000 -0.07490000000000
32+
C 7.00950000000000 -0.93650000000000 -0.07520000000000
33+
C 3.92060000000000 -4.74090000000000 -0.06160000000000
34+
H 0.73400000000000 1.08790000000000 -0.07500000000000
35+
H 0.71240000000000 -0.45700000000000 0.82340000000000
36+
H 0.71240000000000 -0.45580000000000 -0.97550000000000
37+
H 2.99300000000000 2.11760000000000 -0.07480000000000
38+
H 7.76530000000000 -1.72630000000000 -0.07590000000000
39+
H 7.14860000000000 -0.32180000000000 0.81970000000000
40+
H 7.14800000000000 -0.32080000000000 -0.96950000000000
41+
H 2.86500000000000 -5.02320000000000 -0.05830000000000
42+
H 4.40230000000000 -5.15920000000000 0.82840000000000
43+
H 4.40020000000000 -5.16930000000000 -0.94780000000000
44+
$end
45+
46+
47+
## Missing Features
48+
49+
Following features are missing
50+
51+
- reading of z-matrix input
52+
- possibility to change coordinate units to Bohr
53+
54+
@Note Feel free to contribute support for missing features
55+
or bring missing features to our attention by opening an issue.
56+
```

doc/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ This library supports reading and writing of the following formats:
1313
- [Gaussian external format](./format-ein.html)
1414
- [QCSchema JSON format](./format-qcschema.html)
1515
- [FHI-aims geometry.in](./format-aims.html)
16+
- [Q-Chem molecule format](./format-qchem.html)

man/mctc-convert.1.adoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Supported formats:
2828
- Gaussian's external program input (ein)
2929
- JSON input with `qcschema_molecule` or `qcschema_input` structure (json)
3030
- FHI-AIMS' input files (geometry.in)
31+
- Q-Chem molecule block inputs (qchem)
3132

3233

3334
== Options

src/mctc/io/filetype.f90

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ module mctc_io_filetype
5656
!> FHI-aims geometry.in format
5757
integer :: aims = 10
5858

59+
!> Q-Chem molecule format
60+
integer :: qchem = 11
61+
5962
end type enum_filetype
6063

6164
!> File type enumerator
@@ -99,6 +102,8 @@ elemental function get_filetype(file) result(ftype)
99102
ftype = filetype%gaussian
100103
case('json')
101104
ftype = filetype%qcschema
105+
case('qchem')
106+
ftype = filetype%qchem
102107
end select
103108
if (ftype /= filetype%unknown) return
104109
else

src/mctc/io/read.f90

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ module mctc_io_read
1919
use mctc_io_read_ctfile, only : read_molfile, read_sdf
2020
use mctc_io_read_gaussian, only : read_gaussian_external
2121
use mctc_io_read_genformat, only : read_genformat
22+
use mctc_io_read_qchem, only : read_qchem
2223
use mctc_io_read_qcschema, only : read_qcschema
2324
use mctc_io_read_pdb, only : read_pdb
2425
use mctc_io_read_turbomole, only : read_coord
@@ -169,6 +170,9 @@ subroutine get_structure_reader(reader, ftype)
169170
case(filetype%aims)
170171
reader => read_aims
171172

173+
case(filetype%qchem)
174+
reader => read_qchem
175+
172176
end select
173177

174178
end subroutine get_structure_reader

src/mctc/io/read/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ list(
2020
"${dir}/ctfile.f90"
2121
"${dir}/gaussian.f90"
2222
"${dir}/genformat.f90"
23+
"${dir}/qchem.f90"
2324
"${dir}/qcschema.F90"
2425
"${dir}/pdb.f90"
2526
"${dir}/turbomole.f90"

src/mctc/io/read/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ srcs += files(
1717
'ctfile.f90',
1818
'gaussian.f90',
1919
'genformat.f90',
20+
'qchem.f90',
2021
'qcschema.F90',
2122
'pdb.f90',
2223
'turbomole.f90',

src/mctc/io/read/qchem.f90

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
! This file is part of mctc-lib.
2+
!
3+
! Licensed under the Apache License, Version 2.0 (the "License");
4+
! you may not use this file except in compliance with the License.
5+
! You may obtain a copy of the License at
6+
!
7+
! http://www.apache.org/licenses/LICENSE-2.0
8+
!
9+
! Unless required by applicable law or agreed to in writing, software
10+
! distributed under the License is distributed on an "AS IS" BASIS,
11+
! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
! See the License for the specific language governing permissions and
13+
! limitations under the License.
14+
15+
module mctc_io_read_qchem
16+
use mctc_env_accuracy, only : wp
17+
use mctc_env_error, only : error_type
18+
use mctc_io_convert, only : aatoau
19+
use mctc_io_resize, only : resize
20+
use mctc_io_symbols, only : symbol_length, to_number, to_symbol
21+
use mctc_io_structure, only : structure_type, new
22+
use mctc_io_utils, only : next_line, token_type, next_token, io_error, filename, &
23+
read_next_token, read_token
24+
implicit none
25+
private
26+
27+
public :: read_qchem
28+
29+
integer, parameter :: initial_size = 64
30+
31+
contains
32+
33+
subroutine read_qchem(mol, unit, error)
34+
35+
!> Instance of the molecular structure data
36+
type(structure_type), intent(out) :: mol
37+
38+
!> File handle
39+
integer, intent(in) :: unit
40+
41+
!> Error handling
42+
type(error_type), allocatable, intent(out) :: error
43+
44+
integer :: stat, pos, lnum, izp, iat
45+
integer :: charge, multiplicity
46+
type(token_type) :: token
47+
character(len=:), allocatable :: line
48+
real(wp) :: x, y, z
49+
character(len=symbol_length), allocatable :: sym(:)
50+
real(wp), allocatable :: xyz(:, :), abc(:, :), lattice(:, :)
51+
logical :: is_frac, periodic(3)
52+
53+
iat = 0
54+
lnum = 0
55+
stat = 0
56+
57+
do while(stat == 0)
58+
call next_line(unit, line, pos, lnum, stat)
59+
if (stat /= 0) exit
60+
61+
call next_token(line, pos, token)
62+
if (token%first > len(line)) cycle
63+
if (to_lower(line(token%first:token%last)) == '$molecule') exit
64+
end do
65+
66+
if (stat /= 0) then
67+
call io_error(error, "No atoms found", &
68+
& line, token_type(0, 0), filename(unit), lnum+1, "expected molecule block")
69+
return
70+
end if
71+
72+
call next_line(unit, line, pos, lnum, stat)
73+
if (stat == 0) &
74+
call read_next_token(line, pos, token, charge, stat)
75+
if (stat == 0) &
76+
call read_next_token(line, pos, token, multiplicity, stat)
77+
if (stat /= 0) then
78+
call io_error(error, "Failed to read charge and multiplicity", &
79+
& line, token, filename(unit), lnum, "expected integer value")
80+
return
81+
end if
82+
83+
allocate(sym(initial_size), source=repeat(' ', symbol_length))
84+
allocate(xyz(3, initial_size), source=0.0_wp)
85+
86+
do while(stat == 0)
87+
call next_line(unit, line, pos, lnum, stat)
88+
if (stat /= 0) exit
89+
90+
call next_token(line, pos, token)
91+
if (to_lower(line(token%first:token%last)) == '$end') exit
92+
93+
if (iat >= size(sym)) call resize(sym)
94+
if (iat >= size(xyz, 2)) call resize(xyz)
95+
iat = iat + 1
96+
97+
token%last = min(token%last, token%first + symbol_length - 1)
98+
sym(iat) = line(token%first:token%last)
99+
if (to_number(sym(iat)) == 0) then
100+
call read_token(line, token, izp, stat)
101+
sym(iat) = to_symbol(izp)
102+
end if
103+
if (stat /= 0) then
104+
call io_error(error, "Cannot map symbol to atomic number", &
105+
& line, token, filename(unit), lnum, "unknown element")
106+
return
107+
end if
108+
109+
call read_next_token(line, pos, token, x, stat)
110+
if (stat == 0) &
111+
call read_next_token(line, pos, token, y, stat)
112+
if (stat == 0) &
113+
call read_next_token(line, pos, token, z, stat)
114+
if (stat /= 0) then
115+
call io_error(error, "Cannot read coordinates", &
116+
& line, token, filename(unit), lnum, "expected real value")
117+
return
118+
end if
119+
120+
xyz(:, iat) = [x, y, z] * aatoau
121+
end do
122+
123+
if (stat /= 0) then
124+
call io_error(error, "Failed to read molecule block", &
125+
& line, token_type(0, 0), filename(unit), lnum, "unexpected end of input")
126+
return
127+
end if
128+
129+
call new(mol, sym(:iat), xyz, charge=real(charge, wp), uhf=multiplicity-1)
130+
end subroutine read_qchem
131+
132+
!> Convert input string to lowercase
133+
elemental function to_lower(str) result(lcstr)
134+
135+
!> Input string
136+
character(len=*), intent(in) :: str
137+
138+
!> Lowercase version of string
139+
character(len=len(str)):: lcstr
140+
141+
integer :: ilen, iquote, i, iav, iqc
142+
integer, parameter :: offset = iachar('A') - iachar('a')
143+
144+
ilen = len(str)
145+
iquote = 0
146+
lcstr = str
147+
148+
do i = 1, ilen
149+
iav = iachar(str(i:i))
150+
if (iquote == 0 .and. (iav == 34 .or.iav == 39)) then
151+
iquote = 1
152+
iqc = iav
153+
cycle
154+
end if
155+
if (iquote == 1 .and. iav==iqc) then
156+
iquote=0
157+
cycle
158+
end if
159+
if (iquote == 1) cycle
160+
if (iav >= iachar('A') .and. iav <= iachar('Z')) then
161+
lcstr(i:i) = achar(iav - offset)
162+
else
163+
lcstr(i:i) = str(i:i)
164+
end if
165+
end do
166+
167+
end function to_lower
168+
169+
end module mctc_io_read_qchem

src/mctc/io/write.f90

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ module mctc_io_write
2020
use mctc_io_write_gaussian, only : write_gaussian_external
2121
use mctc_io_write_genformat, only : write_genformat
2222
use mctc_io_write_pdb, only : write_pdb
23+
use mctc_io_write_qchem, only : write_qchem
2324
use mctc_io_write_qcschema, only : write_qcschema
2425
use mctc_io_write_turbomole, only : write_coord
2526
use mctc_io_write_vasp, only : write_vasp
@@ -69,7 +70,7 @@ subroutine write_structure_to_file(self, file, error, format)
6970
end if
7071

7172
! Unknown file type is inacceptable in this situation,
72-
! try to figure something at least something out
73+
! try to figure at least something out
7374
if (ftype == filetype%unknown) then
7475
if (any(self%periodic)) then
7576
ftype = filetype%vasp
@@ -136,6 +137,9 @@ subroutine write_structure_to_unit(self, unit, ftype, error)
136137
case(filetype%aims)
137138
call write_aims(self, unit)
138139

140+
case(filetype%qchem)
141+
call write_qchem(self, unit)
142+
139143
end select
140144

141145
end subroutine write_structure_to_unit

src/mctc/io/write/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ list(
2121
"${dir}/gaussian.f90"
2222
"${dir}/genformat.f90"
2323
"${dir}/pdb.f90"
24+
"${dir}/qchem.f90"
2425
"${dir}/qcschema.f90"
2526
"${dir}/turbomole.f90"
2627
"${dir}/vasp.f90"

0 commit comments

Comments
 (0)