Skip to content

Commit

Permalink
BUFR2IODA python API converters for marine monthly in situ BUFR (#844)
Browse files Browse the repository at this point in the history
This PR adds bufr2ioda python API converters for marine monthly in situ
data based on the work done during the "End-to-End Code Sprint" by the
atmosphere group.

partially resolves #711

---------

Co-authored-by: Guillaume Vernieres <guillaume.vernieres@noaa.gov>
  • Loading branch information
ShastriPaturi and guillaumevernieres authored Jan 18, 2024
1 parent f63fe7e commit 7f74f17
Show file tree
Hide file tree
Showing 15 changed files with 2,146 additions and 32 deletions.
12 changes: 12 additions & 0 deletions parm/ioda/bufr2ioda/bufr2ioda_bathythermal_profiles.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_format" : "bathy",
"subsets" : "BATHY",
"source" : "NCEP data tank",
"data_type" : "bathy",
"cycle_type" : "{{ RUN }}",
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hrly in situ Bathythermal profiles",
"data_provider" : "U.S. NOAA"
}
2 changes: 1 addition & 1 deletion parm/ioda/bufr2ioda/bufr2ioda_subpfl_argo_profiles.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hourly in situ ARGO profiles",
"data_description" : "6-hrly in situ ARGO profiles",
"data_provider" : "U.S. NOAA"
}
12 changes: 12 additions & 0 deletions parm/ioda/bufr2ioda/bufr2ioda_subpfl_glider_profiles.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_format" : "subpfl",
"subsets" : "SUBPFL",
"source" : "NCEP data tank",
"data_type" : "glider",
"cycle_type" : "{{ RUN }}",
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hrly in situ GLIDER profiles",
"data_provider" : "U.S. NOAA"
}
12 changes: 12 additions & 0 deletions parm/ioda/bufr2ioda/bufr2ioda_tesac_mammals_profiles.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_format" : "tesac",
"subsets" : "TESAC",
"source" : "NCEP data tank",
"data_type" : "mammals",
"cycle_type" : "{{ RUN }}",
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hrly in situ marine mammals from TESAC profiles",
"data_provider" : "U.S. NOAA"
}
12 changes: 12 additions & 0 deletions parm/ioda/bufr2ioda/bufr2ioda_tesac_profiles.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_format" : "tesac",
"subsets" : "TESAC",
"source" : "NCEP data tank",
"data_type" : "tesac",
"cycle_type" : "{{ RUN }}",
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hrly in situ TESAC profiles",
"data_provider" : "U.S. NOAA"
}
12 changes: 12 additions & 0 deletions parm/ioda/bufr2ioda/bufr2ioda_trackob_surface.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_format" : "trkob",
"subsets" : "TRACKOB",
"source" : "NCEP data tank",
"data_type" : "trackob",
"cycle_type" : "{{ RUN }}",
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hrly in situ TRACKOB surface",
"data_provider" : "U.S. NOAA"
}
12 changes: 12 additions & 0 deletions parm/ioda/bufr2ioda/bufr2ioda_xbtctd_profiles.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_format" : "xbtctd",
"subsets" : "XBTCTD",
"source" : "NCEP data tank",
"data_type" : "xbtctd",
"cycle_type" : "{{ RUN }}",
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hrly in situ XBT/XCTD profiles",
"data_provider" : "U.S. NOAA"
}
272 changes: 272 additions & 0 deletions ush/ioda/bufr2ioda/bufr2ioda_altkob_surface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
#!/usr/bin/env python3
# (C) Copyright 2024 NOAA/NWS/NCEP/EMC
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.

import sys
import numpy as np
import numpy.ma as ma
import os
import argparse
import math
import calendar
import time
import copy
from datetime import datetime
import json
from pyiodaconv import bufr
from collections import namedtuple
from pyioda import ioda_obs_space as ioda_ospace
from wxflow import Logger


def Compute_sequenceNumber(lon):

lon_u, seqNum = np.unique(lon, return_inverse=True)
seqNum = seqNum.astype(np.int32)
logger.debug(f"Len of Sequence Number: {len(seqNum)}")

return seqNum


def bufr_to_ioda(config, logger):

subsets = config["subsets"]
logger.debug(f"Checking subsets = {subsets}")

# Get parameters from configuration
data_format = config["data_format"]
source = config["source"]
data_type = config["data_type"]
data_description = config["data_description"]
data_provider = config["data_provider"]
cycle_type = config["cycle_type"]
cycle_datetime = config["cycle_datetime"]
dump_dir = config["dump_directory"]
ioda_dir = config["ioda_directory"]
cycle = config["cycle_datetime"]

yyyymmdd = cycle[0:8]
hh = cycle[8:10]

# General Information
converter = 'BUFR to IODA Converter'
platform_description = 'Surface obs from ALTKOB: temperature and salinity'

bufrfile = f"{cycle_type}.t{hh}z.{data_format}.tm{hh}.bufr_d"
DATA_PATH = os.path.join(dump_dir, f"{cycle_type}.{yyyymmdd}", str(hh), f"atmos", bufrfile)
logger.debug(f"{bufrfile}, {DATA_PATH}")

# ===========================================
# Make the QuerySet for all the data we want
# ===========================================
start_time = time.time()

logger.debug(f"Making QuerySet ...")
q = bufr.QuerySet()

# MetaData
q.add('year', '*/YEAR')
q.add('month', '*/MNTH')
q.add('day', '*/DAYS')
q.add('hour', '*/HOUR')
q.add('minute', '*/MINU')
q.add('ryear', '*/RCYR')
q.add('rmonth', '*/RCMO')
q.add('rday', '*/RCDY')
q.add('rhour', '*/RCHR')
q.add('rminute', '*/RCMI')
q.add('latitude', '*/CLATH')
q.add('longitude', '*/CLONH')

# ObsValue
q.add('temp', '*/SST0')
q.add('saln', '*/SSS0')

end_time = time.time()
running_time = end_time - start_time
logger.debug(f"Running time for making QuerySet: {running_time} seconds")

# ===============================================================
# Open the BUFR file and execute the QuerySet
# Use the ResultSet returned to get numpy arrays of the data
# ==============================================================
start_time = time.time()

logger.debug(f"Executing QuerySet to get ResultSet ...")
with bufr.File(DATA_PATH) as f:
r = f.execute(q)

# MetaData
logger.debug(f" ... Executing QuerySet: get MetaData ...")
dateTime = r.get_datetime('year', 'month', 'day', 'hour', 'minute', group_by='depth')
dateTime = dateTime.astype(np.int64)
rcptdateTime = r.get_datetime('ryear', 'rmonth', 'rday', 'rhour', 'rminute', group_by='depth')
rcptdateTime = rcptdateTime.astype(np.int64)
lat = r.get('latitude', group_by='depth')
lon = r.get('longitude', group_by='depth')

# ObsValue
logger.debug(f" ... Executing QuerySet: get ObsValue ...")
temp = r.get('temp', group_by='depth')
temp -= 273.15
saln = r.get('saln', group_by='depth')

# Add mask based on min, max values
mask = ((temp > -10.0) & (temp <= 50.0)) & ((saln >= 0.0) & (saln <= 45.0))
lat = lat[mask]
lon = lon[mask]
dateTime = dateTime[mask]
rcptdateTime = rcptdateTime[mask]
temp = temp[mask]
saln = saln[mask]

# ObsError
logger.debug(f"Generating ObsError array with constant value (instrument error)...")
ObsError_temp = np.float32(np.ma.masked_array(np.full((len(mask)), 0.30)))
ObsError_saln = np.float32(np.ma.masked_array(np.full((len(mask)), 1.00)))

# PreQC
logger.debug(f"Generating PreQC array with 0...")
PreQC = (np.ma.masked_array(np.full((len(mask)), 0))).astype(np.int32)

logger.debug(f" ... Executing QuerySet: Done!")

logger.debug(f" ... Executing QuerySet: Check BUFR variable generic \
dimension and type ...")
# ==================================================
# Check values of BUFR variables, dimension and type
# ==================================================
logger.debug(f" temp min, max, length, dtype = {temp.min()}, {temp.max()}, {len(temp)}, {temp.dtype}")
logger.debug(f" saln min, max, length, dtype = {saln.min()}, {saln.max()}, {len(saln)}, {saln.dtype}")
logger.debug(f" lon min, max, length, dtype = {lon.min()}, {lon.max()}, {len(lon)}, {lon.dtype}")
logger.debug(f" lat min, max, length, dtype = {lat.min()}, {lat.max()}, {len(lat)}, {lat.dtype}")
logger.debug(f" depth min, max, length, dtype = {depth.min()}, {depth.max()}, {len(depth)}, {depth.dtype}")
logger.debug(f" PreQC min, max, length, dtype = {PreQC.min()}, {PreQC.max()}, {len(PreQC)}, {PreQC.dtype}")
logger.debug(f" ObsError_temp min, max, length, dtype = {ObsError_temp.min()}, {ObsError_temp.max()}, {len(ObsError_temp)}, {ObsError_temp.dtype}")
logger.debug(f" ObsError_saln min, max, length, dtype = {ObsError_saln.min()}, {ObsError_saln.max()}, {len(ObsError_saln)}, {ObsError_saln.dtype}")
logger.debug(f" stationID shape, dtype = {stationID.shape}, {stationID.astype(str).dtype}")
logger.debug(f" dateTime shape, dtype = {dateTime.shape}, {dateTime.dtype}")
logger.debug(f" rcptdateTime shape, dytpe = {rcptdateTime.shape}, {rcptdateTime.dtype}")

# =====================================
# Create IODA ObsSpace
# Write IODA output
# =====================================

# Create the dimensions
dims = {'Location': np.arange(0, lat.shape[0])}

iodafile = f"{cycle_type}.t{hh}z.{data_type}_profiles.{data_format}.nc"
OUTPUT_PATH = os.path.join(ioda_dir, iodafile)
logger.debug(f" ... ... Create OUTPUT file: {OUTPUT_PATH}")

path, fname = os.path.split(OUTPUT_PATH)
if path and not os.path.exists(path):
os.makedirs(path)

obsspace = ioda_ospace.ObsSpace(OUTPUT_PATH, mode='w', dim_dict=dims)

# Create Global attributes
logger.debug(f" ... ... Create global attributes")
obsspace.write_attr('Converter', converter)
obsspace.write_attr('source', source)
obsspace.write_attr('sourceFiles', bufrfile)
obsspace.write_attr('dataProviderOrigin', data_provider)
obsspace.write_attr('description', data_description)
obsspace.write_attr('datetimeRange', [str(dateTime.min()), str(dateTime.max())])
obsspace.write_attr('platformLongDescription', platform_description)

# Create IODA variables
logger.debug(f" ... ... Create variables: name, type, units, and attributes")

# Datetime
obsspace.create_var('MetaData/dateTime', dtype=dateTime.dtype, fillval=dateTime.fill_value) \
.write_attr('units', 'seconds since 1970-01-01T00:00:00Z') \
.write_attr('long_name', 'Datetime') \
.write_data(dateTime)

# rcptDatetime
obsspace.create_var('MetaData/rcptdateTime', dtype=dateTime.dtype, fillval=dateTime.fill_value) \
.write_attr('units', 'seconds since 1970-01-01T00:00:00Z') \
.write_attr('long_name', 'receipt Datetime') \
.write_data(rcptdateTime)

# Longitude
obsspace.create_var('MetaData/longitude', dtype=lon.dtype, fillval=lon.fill_value) \
.write_attr('units', 'degrees_east') \
.write_attr('valid_range', np.array([-180, 180], dtype=np.float32)) \
.write_attr('long_name', 'Longitude') \
.write_data(lon)

# Latitude
obsspace.create_var('MetaData/latitude', dtype=lat.dtype, fillval=lat.fill_value) \
.write_attr('units', 'degrees_north') \
.write_attr('valid_range', np.array([-90, 90], dtype=np.float32)) \
.write_attr('long_name', 'Latitude') \
.write_data(lat)

# Station Identification
obsspace.create_var('MetaData/stationID', dtype=stationID.dtype, fillval=stationID.fill_value) \
.write_attr('long_name', 'Station Identification') \
.write_data(stationID)

# PreQC
obsspace.create_var('PreQC/seaSurfaceTemperature', dtype=PreQC.dtype, fillval=PreQC.fill_value) \
.write_attr('long_name', 'PreQC') \
.write_data(PreQC)

obsspace.create_var('PreQC/seaSurfaceSalinity', dtype=PreQC.dtype, fillval=PreQC.fill_value) \
.write_attr('long_name', 'PreQC') \
.write_data(PreQC)

# ObsError
obsspace.create_var('ObsError/seaSurfaceTemperature', dtype=ObsError_temp.dtype, fillval=ObsError_temp.fill_value) \
.write_attr('units', 'degC') \
.write_attr('long_name', 'ObsError') \
.write_data(ObsError_temp)

obsspace.create_var('ObsError/seaSurfaceSalinity', dtype=ObsError_saln.dtype, fillval=ObsError_saln.fill_value) \
.write_attr('units', 'psu') \
.write_attr('long_name', 'ObsError') \
.write_data(ObsError_saln)

# ObsValue
obsspace.create_var('ObsValue/seaSurfaceTemperature', dtype=temp.dtype, fillval=temp.fill_value) \
.write_attr('units', 'degC') \
.write_attr('valid_range', np.array([-10.0, 50.0], dtype=np.float32)) \
.write_attr('long_name', 'Sea Surface Temperature') \
.write_data(temp)

obsspace.create_var('ObsValue/seaSurfaceSalinity', dtype=saln.dtype, fillval=saln.fill_value) \
.write_attr('units', 'psu') \
.write_attr('valid_range', np.array([0.0, 45.0], dtype=np.float32)) \
.write_attr('long_name', 'Sea Surface Salinity') \
.write_data(saln)

end_time = time.time()
running_time = end_time - start_time
logger.debug(f"Running time for splitting and output IODA: {running_time} \
seconds")

logger.debug(f"All Done!")


if __name__ == '__main__':

start_time = time.time()
config = "bufr2ioda_trackob_surface.json"

log_level = 'DEBUG' if args.verbose else 'INFO'
logger = Logger('bufr2ioda_trackob_surface.py', level=log_level,
colored_log=True)

with open(args.config, "r") as json_file:
config = json.load(json_file)

bufr_to_ioda(config, logger)

end_time = time.time()
running_time = end_time - start_time
logger.debug(f"Total running time: {running_time} seconds")
Loading

0 comments on commit 7f74f17

Please sign in to comment.