Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add standardise_mom6_filenames.sh script (#32) #35

Merged
merged 8 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions payu_config/archive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

source $(dirname "$0")/archive_scripts/archive_cice_restarts.sh
source $(dirname "$0")/archive_scripts/concat_ice_daily.sh
source $(dirname "$0")/archive_scripts/standardise_mom6_filenames.sh
python3 $(dirname "$0")/archive_scripts/build_intake_ds.py
53 changes: 53 additions & 0 deletions payu_config/archive_scripts/standardise_mom6_filenames.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/bash
# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
# SPDX-License-Identifier: Apache-2.0.
#
# Standardise file naming for MOM6 output files from access-om3.
anton-seaice marked this conversation as resolved.
Show resolved Hide resolved
# This was written assuming it would be used as a payu "userscript" at the "archive" stage, but alternatively a path to an "archive" directory can be provided.
# For more details, see https://github.com/COSIMA/om3-scripts/issues/32

Help()
{
# Display help
echo -e "Standardise file naming for MOM6 output files.\n"
echo "Syntax: scriptTemplate [-h|d DIRECTORY]"
echo "options:"
echo "h Print this help message."
echo -e "d Process files in the specified 'DIRECTORY'."
}

while getopts ":hd:" option; do
case $option in
h) # display help
Help
exit;;
d) # Enter a directory
out_dir=$OPTARG
if [ ! -d $out_dir ]; then
echo $out_dir Does not exist
exit
fi;;
\?) # Invalid option
echo "Error: Invalid option"
exit;;
esac
done

# if no directory was specified, collect all directories from 'archive'
if [ -z $out_dir ]; then
out_dirs=$(ls -rd archive/output*[0-9] 2>/dev/null)
else
out_dirs=$out_dir
fi

# process each output directory
for dir in ${out_dirs[@]}; do
# process each mom6 file
for current_file in $dir/access-om3.mom6.*.nc; do
if [ -f $current_file ]; then
new_filename=$(echo $current_file | sed -E 's/_([0-9]{4})\./\1./')
minghangli-uni marked this conversation as resolved.
Show resolved Hide resolved
# rename the file without overwriting existing files
mv -n $current_file $new_filename
fi
done
done
147 changes: 147 additions & 0 deletions test/test_payu_conf/test_mom6_filenames.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import pytest
import xarray as xr
import numpy as np
import pandas as pd

from os import makedirs, chdir
from subprocess import run
from pathlib import Path

scripts_base = Path(__file__).parents[2]
run_str = f"{scripts_base}/payu_config/archive_scripts/standardise_mom6_filenames.sh"

DIAG_BASE = "access-om3.mom6.h.test"


def assert_file_exists(p):
if not Path(p).resolve().is_file():
raise AssertionError("File does not exist: %s" % str(p))


def assert_f_not_exists(p):
if Path(p).resolve().is_file():
raise AssertionError("File exists and should not: %s" % str(p))


def monthly_files(dir_name, nmonths, tmp_path):
"""
Make 12 months of empty data files data, and then write it into 12 files

request = (path, ndays)
e.g. request = ("archive/output000", "365")

"""

times = pd.date_range("2010-01-01", freq="ME", periods=nmonths)

out_dir = str(tmp_path) + "/" + dir_name + "/"
paths = [f"{out_dir}{DIAG_BASE}_{str(t)[0:4]}_{str(t)[5:7]}.nc" for t in times]

makedirs(out_dir)

for p in paths:
with open(p, "w") as f:
# f.write("blank")
f.close()

for p in paths:
assert_file_exists(p)

return paths


@pytest.mark.parametrize(
"hist_dir, use_dir, nmonths",
[
("Default", False, 12),
("archive/output999", False, 1),
("archive/output9999", False, 1),
("archive/output574", True, 12),
],
) # run this test with a several folder names and lengths, provide the directory as an argument sometimes
def test_true_case(hist_dir, use_dir, nmonths, tmp_path):

monthly_paths = monthly_files(hist_dir, nmonths, tmp_path)
chdir(tmp_path)
output_dir = Path(monthly_paths[0]).parents[0]

if not use_dir: # default path
run([run_str])
expected_months = pd.date_range("2010-01-01", freq="ME", periods=nmonths + 1)
else: # provide path
run(
[
run_str,
"-d",
output_dir,
],
)
expected_months = pd.date_range("2010-01-01", freq="ME", periods=nmonths + 1)

# valid output filenames
expected_paths = [
f"{output_dir}/{DIAG_BASE}_{str(t)[0:4]}-{str(t)[5:7]}.nc"
for t in expected_months
]

for p in expected_paths[0:nmonths]:
assert_file_exists(p)

for p in expected_paths[nmonths]:
assert_f_not_exists(p)

for p in monthly_paths:
assert_f_not_exists(p)


# @pytest.mark.parametrize("hist_dir, ndays", [("Default", 1), ("Default", 30)])
# def test_incomplete_month(hist_dir, ndays, hist_base, tmp_path):
# """
# Run the script to convert the daily data into monthly files, with less than 28 days data, and check no things happen.
# """

# daily_paths = daily_files(hist_dir, hist_base, ndays, tmp_path)

# chdir(tmp_path)
# output_dir = Path(daily_paths[0]).parents[0]

# run([run_str])
# expected_months = pd.date_range("2010-01-01", freq="ME", periods=1)

# monthly_paths = [
# f"{output_dir}/{hist_base}.{str(t)[0:7]}.nc" for t in expected_months
# ]

# for p in daily_paths:
# assert_file_exists(p)

# for p in monthly_paths:
# assert_f_not_exists(p)


# @pytest.mark.parametrize("hist_dir, ndays", [("Default", 31), ("Default", 27)])
# def test_no_override(hist_dir, ndays, hist_base, tmp_path):
# """
# Run the script to convert the daily data into monthly files, but the output filename already exists, and check nothing happens.
# """

# daily_paths = daily_files(hist_dir, hist_base, ndays, tmp_path)

# chdir(tmp_path)
# output_dir = Path(daily_paths[0]).parents[0]

# expected_months = pd.date_range("2010-01-01", freq="ME", periods=1)

# monthly_paths = [
# f"{output_dir}/{hist_base}.{str(t)[0:7]}.nc" for t in expected_months
# ]
# for p in monthly_paths:
# Path(p).touch()

# run([run_str])

# for p in daily_paths:
# assert_file_exists(p)

# for p in monthly_paths:
# assert_file_exists(p)
Loading