Skip to content

Commit e11583b

Browse files
committed
Merge branch '687-WORKSHOP-viirs-hdf5' into add_viirs_to_aws_download_script
2 parents 7f88e15 + 6b7a8d7 commit e11583b

File tree

2 files changed

+322
-0
lines changed

2 files changed

+322
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
enchancement:
2+
- description: |
3+
Created reader for VIIRS HDF5 SDR data, correcting for bowtie
4+
distortion for single and multiple files.
5+
title: 'Add VIIRS HDF5 SDR reader'
6+
files:
7+
added:
8+
- geoips/plugins/modules/readers/viirs_sdr_hdf5.py
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
"""VIIRS SDR Satpy reader.
2+
3+
This VIIRS reader is designed for reading the NPP/JPSS SDR HDF5 files.
4+
The input files are produced by CSPP Polar (CSPP RDR pipeline),
5+
and the read by satpy.
6+
7+
V1.1.0: NRL-Monterey, Aug. 2024
8+
9+
"""
10+
11+
# Python Standard Libraries
12+
import logging
13+
import os
14+
15+
# Installed Libraries
16+
import xarray as xr
17+
import numpy as np
18+
import h5py
19+
from pandas import date_range
20+
from pykdtree.kdtree import KDTree
21+
22+
# If this reader is not installed on the system, don't fail altogether, just skip this
23+
# import. This reader will not work if the import fails, and the package will have to be
24+
# installed to process data of this type.
25+
LOG = logging.getLogger(__name__)
26+
27+
interface = "readers"
28+
family = "standard"
29+
name = "viirs_sdr_hdf5"
30+
31+
try:
32+
import satpy
33+
except ImportError:
34+
LOG.info("Failed import satpy. If you need it, install it.")
35+
36+
VARLIST = {
37+
"DNB": ["DNB"],
38+
"IMG": ["I04", "I05"],
39+
"IMG-Vis": ["I01", "I02", "I03"],
40+
"MOD": ["M07", "M08", "M10", "M11", "M12", "M13", "M14", "M15", "M16"],
41+
"MOD-Vis": ["M01", "M02", "M03", "M04", "M05", "M06", "M09"],
42+
}
43+
44+
45+
def bowtie_correction(band, lat, lon):
46+
"""Correct input data for the instrument bowtie effect.
47+
48+
Correction derived from: Remote Sens. 2016, 8, 79.
49+
"""
50+
# Unfold
51+
ord_lat = np.sort(lat, axis=0)
52+
unfold_idx = np.argsort(lat, axis=0)
53+
# no_shift_flag = np.argsort(ord_lat, axis=0) == unfold_idx
54+
55+
rad_fold = np.take_along_axis(band, unfold_idx, axis=0)
56+
sort_lon = np.take_along_axis(lon, unfold_idx, axis=0)
57+
58+
if np.all(np.isnan(rad_fold)):
59+
LOG.debug("All nan band, no bowtie correction")
60+
return rad_fold, ord_lat, sort_lon
61+
62+
# Adjust lon, not used for satpy
63+
# Only need for manual read with overlapping granuales
64+
# ord_lon = np.empty(lon.shape)
65+
# xi = np.arange(sort_lon.shape[0])
66+
67+
# for x in range(lon.shape[1]):
68+
# 0 shift xi values
69+
# xo = xi[no_shift_flag[:, x]]
70+
71+
# if all(no_shift_flag[:, x]):
72+
# if there was no shift in the column
73+
# ord_lon[:, x] = lon[:, x]
74+
# continue
75+
# elif not any(no_shift_flag[:, x]):
76+
# if the whole column was shifted (should be rare)
77+
# ord_lon[:, x] = sort_lon[:, x]
78+
# continue
79+
80+
# longitude values that were not shifted
81+
# noshift_lon = sort_lon[xo,x]
82+
# noshift_lon = sort_lon[:, x][no_shift_flag[:, x]]
83+
# replace only values that were shifted
84+
# nsf = no_shift_flag[:, x]
85+
86+
# ord_lon[nsf, x] = noshift_lon
87+
# ord_lon[~nsf, x] = np.interp(xi, xo, noshift_lon)[~nsf]
88+
89+
# Resample
90+
point_mask = np.isnan(rad_fold)
91+
92+
good_points = np.dstack((ord_lat[~point_mask], sort_lon[~point_mask]))[0]
93+
bad_points = np.dstack((ord_lat[point_mask], sort_lon[point_mask]))[0]
94+
95+
res_band = rad_fold.copy()
96+
good_rad = rad_fold[~point_mask]
97+
rad_idx = np.indices(rad_fold.shape)
98+
ridx, ridy = rad_idx[0][point_mask], rad_idx[1][point_mask]
99+
os.environ["OMP_NUM_THREADS"] = "64"
100+
101+
kd_tree = KDTree(good_points)
102+
# print("Querying")
103+
dist, idx = kd_tree.query(bad_points, k=4) # ,workers=4)
104+
105+
for i in range(bad_points.shape[0]):
106+
xi, yi = ridx[i], ridy[i]
107+
108+
if np.any(dist[i] == 0):
109+
# weight the zero to a small value
110+
weight = np.where(dist[i] == 0, 1e-6, dist[i])
111+
res_band[xi, yi] = np.average(good_rad[idx[i]], weights=1 / weight)
112+
continue
113+
114+
res_band[xi, yi] = np.average(good_rad[idx[i]], weights=1 / dist[i])
115+
116+
return res_band, ord_lat.astype(np.float64), sort_lon.astype(np.float64)
117+
118+
119+
def call(fnames, metadata_only=False, chans=None, area_def=None, self_register=False):
120+
"""Read VIIRS SDR hdf5 data products.
121+
122+
Parameters
123+
----------
124+
fnames : list
125+
* List of strings, full paths to files
126+
metadata_only : bool, default=False
127+
* Return before actually reading data if True
128+
chans : list of str, default=None
129+
* List of desired channels (skip unneeded variables as needed).
130+
* Include all channels if None.
131+
area_def : pyresample.AreaDefinition, default=None
132+
* NOT YET IMPLEMENTED
133+
* Specify region to read
134+
* Read all data if None.
135+
self_register : str or bool, default=False
136+
* NOT YET IMPLEMENTED
137+
* register all data to the specified dataset id (as specified in the
138+
return dictionary keys).
139+
* Read multiple resolutions of data if False.
140+
141+
Returns
142+
-------
143+
dict of xarray.Datasets
144+
* dictionary of xarray.Dataset objects with required Variables and
145+
Attributes.
146+
* Dictionary keys can be any descriptive dataset ids.
147+
* Conforms to geoips xarray standards, see more in geoips documentation.
148+
"""
149+
# print("Reading")
150+
tmp_scn = satpy.Scene(reader="viirs_sdr", filenames=fnames)
151+
scn_start, scn_end = tmp_scn.start_time, tmp_scn.end_time
152+
base_fnames = list(map(os.path.basename, fnames))
153+
154+
full_xr = {}
155+
if metadata_only:
156+
# average resolution
157+
# sensor, plat name
158+
tmp_scn.load([tmp_scn.available_dataset_names()[0]])
159+
tmp_attrs = tmp_scn[tmp_scn.available_dataset_names()[0]].attrs
160+
tmp_xr = xr.Dataset(
161+
attrs={
162+
"source_file_name": base_fnames[0],
163+
"start_datetime": scn_start,
164+
"end_datetime": scn_end,
165+
"source_name": tmp_attrs["sensor"],
166+
"platform_name": tmp_attrs["platform_name"],
167+
"data_provider": "NOAA",
168+
"sample_distance_km": 1,
169+
"interpolation_radius_of_influence": 1000, # guess!
170+
}
171+
)
172+
tmp_dict = {"METADATA": tmp_xr}
173+
return tmp_dict
174+
175+
# trim VARLIST based on channels requested
176+
if chans:
177+
tmp_vl = VARLIST.copy()
178+
for key, val in tmp_vl.items():
179+
km = [c[:3] in val for c in chans]
180+
if not any(km):
181+
VARLIST.pop(key)
182+
else:
183+
matches = [s for s in val for c in chans if s in c]
184+
VARLIST[key] = matches
185+
186+
# could opimize more
187+
tmp_coor = {}
188+
for var in VARLIST:
189+
tmp_dask = {}
190+
dataset_ids = [
191+
idx
192+
for idx in tmp_scn.available_dataset_ids()
193+
if idx["name"] in VARLIST[var]
194+
]
195+
if len(dataset_ids) == 0:
196+
# print("No datasets found for {}.".format(VARLIST[var]))
197+
continue
198+
199+
for d in dataset_ids:
200+
# print("Loading {}".format(d))
201+
tmp_scn.load([d])
202+
full_key = tmp_scn[d].attrs["name"] + tmp_scn[d].attrs[
203+
"calibration"
204+
].capitalize()[:3].replace("Bri", "BT")
205+
206+
tmp_ma = tmp_scn[d].to_masked_array().data
207+
208+
#
209+
tmp_scn.load([d])
210+
211+
lat = tmp_scn[d].area.lats.to_masked_array().data
212+
lon = tmp_scn[d].area.lons.to_masked_array().data
213+
214+
# bowtie correction
215+
band_data, band_lat, band_lon = bowtie_correction(tmp_ma, lat, lon)
216+
217+
tmp_dask |= {full_key: (("dim_0", "dim_1"), band_data)}
218+
219+
# coordinates
220+
tmp_coor["latitude"] = (
221+
("dim_0", "dim_1"),
222+
band_lat,
223+
)
224+
tmp_coor["longitude"] = (
225+
("dim_0", "dim_1"),
226+
band_lon,
227+
)
228+
# print("Setting cal vals")
229+
# sample time to the proper shape (N*48), while lat/lon are ()
230+
time_range = date_range(
231+
start=scn_start, end=scn_end, periods=tmp_coor["latitude"][1].shape[0]
232+
).values
233+
interp_time = np.tile(time_range, (tmp_coor["latitude"][1].shape[1], 1)).T
234+
tmp_coor["time"] = (("dim_0", "dim_1"), interp_time)
235+
# # print(tmp_coor["latitude"][1].shape)
236+
# raise
237+
238+
tmp_attrs = tmp_scn[VARLIST[var][0]].attrs
239+
240+
cal_params = [
241+
"satellite_azimuth_angle",
242+
"satellite_zenith_angle",
243+
"solar_azimuth_angle",
244+
"solar_zenith_angle",
245+
]
246+
247+
if var == "DNB":
248+
cal_params = [
249+
"dnb_lunar_azimuth_angle",
250+
"dnb_lunar_zenith_angle",
251+
"dnb_satellite_azimuth_angle",
252+
"dnb_satellite_zenith_angle",
253+
"dnb_solar_azimuth_angle",
254+
"dnb_solar_zenith_angle",
255+
]
256+
257+
tmp_scn.load(cal_params)
258+
tmp_cal_params = {
259+
i.removeprefix("dnb_"): (("dim_0", "dim_1"), tmp_scn[i].to_masked_array())
260+
for i in cal_params
261+
}
262+
263+
if var == "DNB":
264+
try:
265+
from lunarref.lib.liblunarref import lunarref
266+
267+
# tmp_scn.load(["dnb_moon_illumination_fraction"])
268+
# this results in the wrong value..
269+
# np.arccos((tmp_scn["dnb_moon_illumination_fraction"].data/50)-1)
270+
271+
dnb_geofile = [i for i in fnames if "GDNBO" in os.path.basename(i)][0]
272+
h5_dnb = h5py.File(dnb_geofile)
273+
phase_ang = h5_dnb["All_Data/VIIRS-DNB-GEO_All/MoonPhaseAngle"][...]
274+
275+
lunarref_data = lunarref(
276+
tmp_dask["DNBRad"][1],
277+
tmp_cal_params["solar_zenith_angle"][1],
278+
tmp_cal_params["lunar_zenith_angle"][1],
279+
scn_start.strftime("%Y%m%d%H"),
280+
scn_start.strftime("%M"),
281+
phase_ang,
282+
)
283+
lunarref_data = np.ma.masked_less_equal(lunarref_data, -999, copy=False)
284+
tmp_dask |= {"DNBRef": (("dim_0", "dim_1"), lunarref_data)}
285+
except ImportError:
286+
LOG.info("Failed lunarref in viirs reader. If you need it, build it")
287+
288+
# problem with sat_za/az values being too high, need to downsample
289+
# print("Building xarray")
290+
obs_xr = xr.Dataset(data_vars=tmp_dask)
291+
coor_xr = xr.Dataset(data_vars=tmp_coor)
292+
cal_xr = xr.Dataset(data_vars=tmp_cal_params)
293+
294+
try:
295+
tmp_xr = xr.merge([obs_xr, coor_xr, cal_xr])
296+
except ValueError:
297+
# downsample for certain bands
298+
tmp_xr = xr.merge([obs_xr, coor_xr])
299+
300+
tmp_xr.attrs = {
301+
"source_file_name": base_fnames,
302+
"start_datetime": tmp_scn.start_time,
303+
"end_datetime": tmp_scn.end_time,
304+
"source_name": tmp_attrs["sensor"],
305+
"platform_name": tmp_attrs["platform_name"],
306+
"data_provider": "NOAA",
307+
"sample_distance_km": tmp_attrs["resolution"] / 1e3,
308+
"interpolation_radius_of_influence": 1000,
309+
}
310+
311+
full_xr |= {var: tmp_xr}
312+
full_xr["METADATA"] = xr.Dataset(attrs=tmp_xr.attrs)
313+
314+
return full_xr

0 commit comments

Comments
 (0)