Redefine fine-res budget for new ShiELD (#383)

The new simulation had different physics options and therefore different diagnostics outputs. Relevant changes included: - no gfdl microphysics output. Now microphysics is in the physics tendency, and the fv_sat_adj dynamical core phase changes - New formulation of pressure velocity "omega" resolving this error: NOAA-GFDL/GFDL_atmos_cubed_sphere#40. The correct pressure velocity and corresponding eddy flux are now named "vulcan_omega" and "eddy_flux_vulcan_omega_sphum" for example. Changes introduced by this commit: - Adjust hardcoded budget terms to accomodate the changes above. The team suggested that I not overgeneralize this code since won't have a new dataset anytime soon. - Update test schema to reflect the new training data - Add unit and long_name information to the outputs - Simplify the variable renaming logic. Previously "_coarse" was automatically stripped from all the names of the diagnostic output, which made it tricky to know infer what variables were available at a particular step of the pipeline. Now most of the renaming is concentrated into one function. - Adjust `FineResolutionSources` to reflect this schema change. [VCMML-310] Closes #347 [VCMML-310]: https://vulcan.atlassian.net/browse/VCMML-310
ai2cm · Jun 5, 2020 · 4cd497f · 4cd497f
1 parent bf65fcd
commit 4cd497f
Show file tree

Hide file tree

Showing 9 changed files with 141 additions and 93 deletions.
diff --git a/fv3net/regression/loaders/_transform.py b/fv3net/regression/loaders/_transform.py
@@ -105,7 +105,7 @@ def _derived_budget_ds(
         variable_prefixes: Mapping[str, str] = None,
         apparent_source_terms: Sequence[str] = (
             "physics",
-            "microphysics",
+            "saturation_adjustment",
             "convergence",
         ),
     ) -> xr.Dataset:

diff --git a/tests/loaders/test__transform.py b/tests/loaders/test__transform.py
@@ -95,41 +95,35 @@ def test_shuffled_dask():
     shuffled(dataset, "sample", np.random.RandomState(1))
 
 
-air_temperature = xr.DataArray([270.0], [(["x"], [1.0])], ["x"], attrs={"units": "K"})
-air_temperature_physics = xr.DataArray(
-    [0.1], [(["x"], [1.0])], ["x"], attrs={"units": "K/s"}
-)
-air_temperature_microphysics = xr.DataArray(
-    [0.2], [(["x"], [1.0])], ["x"], attrs={"units": "K/s"}
-)
-air_temperature_convergence = xr.DataArray(
-    [-0.1], [(["x"], [1.0])], ["x"], attrs={"units": "K/s"}
-)
-specific_humidity = xr.DataArray(
-    [1.0e-3], [(["x"], [1.0])], ["x"], attrs={"units": "kg/kg"}
-)
-specific_humidity_physics = xr.DataArray(
-    [1.0e-6], [(["x"], [1.0])], ["x"], attrs={"units": "kg/kg/s"}
-)
-specific_humidity_microphysics = xr.DataArray(
-    [2.0e-6], [(["x"], [1.0])], ["x"], attrs={"units": "kg/kg/s"}
-)
-specific_humidity_convergence = xr.DataArray(
-    [-1.0e-6], [(["x"], [1.0])], ["x"], attrs={"units": "kg/kg/s"}
-)
 budget_ds = xr.Dataset(
-    {
-        "air_temperature": air_temperature,
-        "air_temperature_physics": air_temperature_physics,
-        "air_temperature_microphysics": air_temperature_microphysics,
-        "air_temperature_convergence": air_temperature_convergence,
-        "specific_humidity": specific_humidity,
-        "specific_humidity_physics": specific_humidity_physics,
-        "specific_humidity_microphysics": specific_humidity_microphysics,
-        "specific_humidity_convergence": specific_humidity_convergence,
-    }
+    dict(
+        air_temperature=xr.DataArray(
+            [270.0], [(["x"], [1.0])], ["x"], attrs={"units": "K"}
+        ),
+        air_temperature_physics=xr.DataArray(
+            [0.1], [(["x"], [1.0])], ["x"], attrs={"units": "K/s"}
+        ),
+        air_temperature_saturation_adjustment=xr.DataArray(
+            [0.2], [(["x"], [1.0])], ["x"], attrs={"units": "K/s"}
+        ),
+        air_temperature_convergence=xr.DataArray(
+            [-0.1], [(["x"], [1.0])], ["x"], attrs={"units": "K/s"}
+        ),
+        specific_humidity=xr.DataArray(
+            [1.0e-3], [(["x"], [1.0])], ["x"], attrs={"units": "kg/kg"}
+        ),
+        specific_humidity_physics=xr.DataArray(
+            [1.0e-6], [(["x"], [1.0])], ["x"], attrs={"units": "kg/kg/s"}
+        ),
+        specific_humidity_saturation_adjustment=xr.DataArray(
+            [2.0e-6], [(["x"], [1.0])], ["x"], attrs={"units": "kg/kg/s"}
+        ),
+        specific_humidity_convergence=xr.DataArray(
+            [-1.0e-6], [(["x"], [1.0])], ["x"], attrs={"units": "kg/kg/s"}
+        ),
+    )
 )
-apparent_source_terms = ["physics", "microphysics", "convergence"]
+apparent_source_terms = ["physics", "saturation_adjustment", "convergence"]
 
 
 @pytest.mark.parametrize(
@@ -159,7 +153,7 @@ def test_shuffled_dask():
             budget_ds,
             "air_temperature",
             "dQ1",
-            ["physics", "microphysics"],
+            ["physics", "saturation_adjustment"],
             budget_ds.assign(
                 {
                     "dQ1": xr.DataArray(

diff --git a/workflows/fine_res_budget/budget/budgets.py b/workflows/fine_res_budget/budget/budgets.py
@@ -111,7 +111,7 @@ def compute_recoarsened_budget_field(
     field_fine: xr.DataArray,
     unresolved_flux: xr.DataArray,
     storage: xr.DataArray,
-    microphysics: xr.DataArray,
+    saturation_adjustment: xr.DataArray,
     physics: xr.DataArray,
     nudging: xr.DataArray = None,
     factor: int = 8,
@@ -122,7 +122,7 @@ def compute_recoarsened_budget_field(
     Returns:
 
         xr.Dataset with keys: storage, eddy, field, resolved, convergence,
-            microphysics, physics, nudging
+            saturation_adjustment, physics, nudging
     Note:
         Need to pass in coarsened omega and delp to save computational cost
 
@@ -131,22 +131,22 @@ def compute_recoarsened_budget_field(
 
     storage_name = "storage"
     unresolved_flux_name = "eddy"
-    field_name = "field"
+    field_place_holder_name = "field"
     resolved_flux_name = "resolved"
     convergence_name = "convergence"
 
     grid = Grid("grid_xt", "grid_yt", "pfull", "grid_x", "grid_y", "pfulli")
 
     # Make iterator of all the variables to average
     def variables_to_average():
-        yield microphysics.rename("microphysics")
+        yield saturation_adjustment.rename("saturation_adjustment")
         yield physics.rename("physics")
         if nudging is not None:
             yield nudging.rename("nudging")
         yield unresolved_flux.rename(unresolved_flux_name)
         yield (field_fine * omega_fine).rename(resolved_flux_name)
         yield storage.rename(storage_name)
-        yield field_fine.rename(field_name)
+        yield field_fine.rename(field_place_holder_name)
 
     def averaged_variables():
         for array in variables_to_average():
@@ -160,7 +160,7 @@ def averaged_variables():
         averaged[unresolved_flux_name],
         averaged[resolved_flux_name],
         omega_coarse,
-        averaged[field_name],
+        averaged[field_place_holder_name],
     )
 
     convergence = grid.vertical_convergence(eddy_flux, delp_coarse).rename(
@@ -170,7 +170,53 @@ def averaged_variables():
     return xr.merge([convergence, averaged])
 
 
-def rename_recoarsened_budget(budget: xr.Dataset, field_name: str):
+def add_budget_metadata(budget: xr.Dataset, units: str, field_name: str):
+    tendency_units = units + "/s"
+    budget.convergence.attrs.update(
+        {"long_name": f"eddy flux convergence of {field_name}", "units": tendency_units}
+    )
+
+    budget.saturation_adjustment.attrs.update(
+        {
+            "long_name": (
+                f"tendency of {field_name} due to dynamical core "
+                "saturation adjustment"
+            ),
+            "units": tendency_units,
+        }
+    )
+
+    budget.physics.attrs.update(
+        {
+            "long_name": f"tendency of {field_name} due to physics",
+            "description": "sum of microphysics and any other parameterized process",
+            "units": tendency_units,
+        }
+    )
+
+    if "nudging" in budget:
+        budget.nudging.attrs.update(
+            {
+                "long_name": f"tendency of {field_name} due to SHiELD nudging",
+                "units": tendency_units,
+            }
+        )
+
+    budget.storage.attrs.update(
+        {
+            "long_name": f"storage of {field_name}",
+            "description": (
+                f"partial time derivative of {field_name} for fixed x, y, "
+                "and output model level. Sum of all the budget tendencies."
+            ),
+            "units": tendency_units,
+        }
+    )
+
+    budget.field.attrs.update({"units": units})
+
+
+def rename_recoarsened_budget(budget: xr.Dataset, field_name: str) -> str:
     rename = {}
     rename["field"] = field_name
     for variable in budget:
@@ -219,10 +265,10 @@ def compute_recoarsened_budget(merged: xr.Dataset, dt=15 * 60, factor=8):
 
     middle = merged.sel(step="middle")
 
-    omega_fine = middle.omega
-    area = middle.area
+    omega_fine = middle.vulcan_omega_coarse
+    area = middle.area_coarse
     delp_fine = middle.delp
-    delp_coarse = grid.weighted_block_average(delp_fine, middle.area, factor=factor)
+    delp_coarse = grid.weighted_block_average(delp_fine, area, factor=factor)
     omega_coarse = grid.pressure_level_average(
         delp_fine, delp_coarse, area, omega_fine, factor=factor
     )
@@ -235,12 +281,12 @@ def compute_recoarsened_budget(merged: xr.Dataset, dt=15 * 60, factor=8):
         omega_coarse,
         middle["T"],
         storage=storage(merged["T"], dt),
-        unresolved_flux=middle["eddy_flux_omega_temp"],
-        microphysics=middle["t_dt_gfdlmp"],
-        nudging=middle["t_dt_nudge"],
-        physics=middle["t_dt_phys"],
+        unresolved_flux=middle["eddy_flux_vulcan_omega_temp"],
+        saturation_adjustment=middle["t_dt_fv_sat_adj_coarse"],
+        nudging=middle["t_dt_nudge_coarse"],
+        physics=middle["t_dt_phys_coarse"],
         factor=factor,
-    ).pipe(rename_recoarsened_budget, "air_temperature")
+    )
 
     q_budget_coarse = compute_recoarsened_budget_field(
         area,
@@ -250,10 +296,23 @@ def compute_recoarsened_budget(merged: xr.Dataset, dt=15 * 60, factor=8):
         omega_coarse,
         middle["sphum"],
         storage=storage(merged["sphum"], dt),
-        unresolved_flux=middle["eddy_flux_omega_sphum"],
-        microphysics=middle["qv_dt_gfdlmp"],
-        physics=middle["qv_dt_phys"],
+        unresolved_flux=middle["eddy_flux_vulcan_omega_sphum"],
+        saturation_adjustment=middle["qv_dt_fv_sat_adj_coarse"],
+        physics=middle["qv_dt_phys_coarse"],
         factor=factor,
-    ).pipe(rename_recoarsened_budget, "specific_humidity")
+    )
+
+    # metadata adjustments
+    add_budget_metadata(t_budget_coarse, "K", "air_temperature")
+    t_budget_coarse = rename_recoarsened_budget(t_budget_coarse, "air_temperature")
+
+    add_budget_metadata(q_budget_coarse, "kg/kg", "specific_humidity")
+    q_budget_coarse = rename_recoarsened_budget(q_budget_coarse, "specific_humidity")
+
+    omega_coarse = omega_coarse.assign_attrs(
+        {"long_name": "Lagrangian derivative of hydrostatic pressure", "units": "Pa/s"}
+    ).rename("omega")
+
+    delp_coarse = delp_coarse.rename("delp")
 
     return xr.merge([t_budget_coarse, q_budget_coarse, omega_coarse, delp_coarse])
diff --git a/workflows/fine_res_budget/budget/data.py b/workflows/fine_res_budget/budget/data.py
@@ -21,15 +21,6 @@
 GRID_VARIABLES = ["grid_x", "grid_y", "grid_xt", "grid_yt", "pfull", "tile"]
 
 
-def remove_coarse_name(ds):
-    name_dict = {}
-    for variable in ds:
-        suffix = "_coarse"
-        if variable.endswith(suffix):
-            name_dict[variable] = variable[: -len(suffix)]
-    return ds.rename(name_dict)
-
-
 def rename_dims(ds):
     name_dict = {}
     for variable in ds.dims:
@@ -41,7 +32,12 @@ def rename_dims(ds):
 
 def rename_latlon(ds):
     return ds.rename(
-        {"grid_lat": "latb", "grid_lon": "lonb", "grid_lont": "lon", "grid_latt": "lat"}
+        {
+            "grid_lat_coarse": "latb",
+            "grid_lon_coarse": "lonb",
+            "grid_lont_coarse": "lon",
+            "grid_latt_coarse": "lat",
+        }
     )
 
 
@@ -66,12 +62,7 @@ def standardize_restart_metadata(restarts):
 
 def standardize_diagnostic_metadata(ds):
     times = np.vectorize(round_time)(ds.time)
-    return (
-        ds.assign(time=times)
-        .pipe(remove_coarse_name)
-        .pipe(rename_dims)
-        .pipe(rename_latlon)
-    )
+    return ds.assign(time=times).pipe(rename_dims).pipe(rename_latlon)
 
 
 def shift(restarts, dt=datetime.timedelta(seconds=30, minutes=7)):

diff --git a/workflows/fine_res_budget/budget/pipeline.py b/workflows/fine_res_budget/budget/pipeline.py
@@ -26,19 +26,20 @@
 
 
 PHYSICS_VARIABLES = [
-    "omega",
-    "t_dt_gfdlmp",
-    "t_dt_nudge",
-    "t_dt_phys",
-    "qv_dt_gfdlmp",
-    "qv_dt_phys",
-    "eddy_flux_omega_sphum",
-    "eddy_flux_omega_temp",
-    "omega",
+    # from ShiELD diagnostics
+    "t_dt_fv_sat_adj_coarse",
+    "t_dt_nudge_coarse",
+    "t_dt_phys_coarse",
+    "qv_dt_fv_sat_adj_coarse",
+    "qv_dt_phys_coarse",
+    "eddy_flux_vulcan_omega_sphum",
+    "eddy_flux_vulcan_omega_temp",
+    "vulcan_omega_coarse",
+    "area_coarse",
+    # from restarts
     "delp",
     "sphum",
     "T",
-    "area",
 ]
 
 Dims = Sequence[Hashable]
@@ -99,7 +100,7 @@ def load(ds: xr.Dataset) -> xr.Dataset:
 
 def yield_time_physics_time_slices(merged: xr.Dataset) -> Iterable[Mapping[str, slice]]:
     # grab a physics variable
-    omega = merged["omega"]
+    omega = merged["vulcan_omega_coarse"]
     chunks = omega.chunks[omega.get_axis_num("time")]
     time_slices = chunks_1d_to_slices(chunks)
 

diff --git a/workflows/fine_res_budget/tests/diag.json b/workflows/fine_res_budget/tests/diag.json
diff --git a/workflows/fine_res_budget/tests/regenerate_schema.py b/workflows/fine_res_budget/tests/regenerate_schema.py
@@ -8,8 +8,8 @@
 logging.basicConfig(level=logging.INFO)
 
 
-diagurl = "gs://vcm-ml-data/2020-03-16-5-day-X-SHiELD-simulation-C384-diagnostics/atmos_15min_coarse_ave.zarr"  # noqa
-restart_url = "gs://vcm-ml-data/2020-03-16-5-day-X-SHiELD-simulation-C384-restart-files.zarr"  # noqa
+diagurl = "gs://vcm-ml-raw/2020-05-27-40-day-X-SHiELD-simulation-C384-diagnostics/atmos_15min_coarse_ave.zarr"  # noqa
+restart_url = "gs://vcm-ml-experiments/2020-06-02-fine-res/2020-05-27-40-day-X-SHiELD-simulation-C384-restart-files.zarr"  # noqa
 
 lo_res_coords = ("time", "tile", "grid_xt", "grid_yt", "pfull")
 

diff --git a/workflows/fine_res_budget/tests/restart.json b/workflows/fine_res_budget/tests/restart.json