Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add standardise_mom6_filenames.sh script (#32) #35

Merged
merged 8 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions payu_config/archive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

source $(dirname "$0")/archive_scripts/archive_cice_restarts.sh
source $(dirname "$0")/archive_scripts/concat_ice_daily.sh
source $(dirname "$0")/archive_scripts/standardise_mom6_filenames.sh
python3 $(dirname "$0")/archive_scripts/build_intake_ds.py
53 changes: 53 additions & 0 deletions payu_config/archive_scripts/standardise_mom6_filenames.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/bash
# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
# SPDX-License-Identifier: Apache-2.0.
#
# Standardise file naming for MOM6 output files in access-om3 by removing the underscore before the four-digit year, i.e., replacing '_YYYY' with 'YYYY'
# This was written assuming it would be used as a payu "userscript" at the "archive" stage, but alternatively a path to an "archive" directory can be provided.
# For more details, see https://github.com/COSIMA/om3-scripts/issues/32

Help()
{
# Display help
echo -e "Standardise file naming for MOM6 output files.\n"
echo "Syntax: scriptTemplate [-h|d DIRECTORY]"
echo "options:"
echo "h Print this help message."
echo -e "d Process files in the specified 'DIRECTORY'."
}

while getopts ":hd:" option; do
case $option in
h) # display help
Help
exit;;
d) # Enter a directory
out_dir=$OPTARG
if [ ! -d $out_dir ]; then
echo $out_dir Does not exist
exit
fi;;
\?) # Invalid option
echo "Error: Invalid option"
exit;;
esac
done

# if no directory was specified, collect all directories from 'archive'
if [ -z $out_dir ]; then
out_dirs=$(ls -rd archive/output*[0-9] 2>/dev/null)
else
out_dirs=$out_dir
fi

# process each output directory
for dir in ${out_dirs[@]}; do
# process each mom6 file
for current_file in $dir/access-om3.mom6.*.nc; do
if [ -f $current_file ]; then
new_filename=$(echo $current_file | sed -E 's/_([0-9]{4})/\1/')
# rename the file without overwriting existing files
mv -n $current_file $new_filename
fi
done
done
159 changes: 159 additions & 0 deletions test/test_payu_conf/test_mom6_filenames.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import pytest
import pandas as pd

from os import makedirs, chdir
from subprocess import run
from pathlib import Path

scripts_base = Path(__file__).parents[2]
run_str = f"{scripts_base}/payu_config/archive_scripts/standardise_mom6_filenames.sh"

DIAG_BASE = "access-om3.mom6.h.test"


def assert_file_exists(p):
if not Path(p).resolve().is_file():
raise AssertionError("File does not exist: %s" % str(p))


def assert_f_not_exists(p):
if Path(p).resolve().is_file():
raise AssertionError("File exists and should not: %s" % str(p))


def yearly_files(dir_name, n, tmp_path):
"""
Make empty data files
"""

times = pd.date_range("2010-01-01", freq="YE", periods=n)

out_dir = str(tmp_path) + "/" + dir_name + "/"
paths = [f"{out_dir}{DIAG_BASE}._{str(t)[0:4]}.nc" for t in times]

makedirs(out_dir)

for p in paths:
with open(p, "w") as f:
f.close()

for p in paths:
assert_file_exists(p)

return paths


@pytest.mark.parametrize(
"hist_dir, use_dir, n",
[
("archive/output000", False, 12),
("archive/output999", False, 1),
("archive/output9999", False, 1),
("archive/output574", True, 12),
],
) # run this test with a several folder names and lengths, provide the directory as an argument sometimes
def test_true_case(hist_dir, use_dir, n, tmp_path):

yearly_paths = yearly_files(hist_dir, n, tmp_path)
chdir(tmp_path)
output_dir = Path(yearly_paths[0]).parents[0]

if not use_dir: # default path
run([run_str])
else: # provide path
run(
[
run_str,
"-d",
output_dir,
],
)

expected_years = pd.date_range("2010-01-01", freq="YE", periods=n + 1)

# valid output filenames
expected_paths = [
f"{output_dir}/{DIAG_BASE}.{str(t)[0:4]}.nc" for t in expected_years
]

for p in expected_paths[0:n]:
assert_file_exists(p)

for p in expected_paths[n]:
assert_f_not_exists(p)

for p in yearly_paths:
assert_f_not_exists(p)


@pytest.mark.parametrize(
"hist_dir, use_dir, n",
[
("archive/output000", False, 12),
],
)
def test_dont_override(hist_dir, use_dir, n, tmp_path):
"""
make some empty data files, and make some files where the files should be renamed to,
and confirm it doesn't delete any of them
"""

yearly_paths = yearly_files(hist_dir, n, tmp_path)
chdir(tmp_path)
output_dir = Path(yearly_paths[0]).parents[0]

# write the expected output too
expected_years = pd.date_range("2010-01-01", freq="YE", periods=n)

expected_paths = [
f"{output_dir}/{DIAG_BASE}.{str(t)[0:4]}.nc" for t in expected_years
]

for p in expected_paths:
with open(p, "w") as f:
f.close()

if not use_dir: # default path
run([run_str])
else: # provide path
run(
[
run_str,
"-d",
output_dir,
],
)

for p in expected_paths:
assert_file_exists(p)

for p in yearly_paths:
assert_file_exists(p)


# @pytest.mark.parametrize("hist_dir, ndays", [("Default", 31), ("Default", 27)])
# def test_no_override(hist_dir, ndays, hist_base, tmp_path):
# """
# Run the script to convert the daily data into monthly files, but the output filename already exists, and check nothing happens.
# """

# daily_paths = daily_files(hist_dir, hist_base, ndays, tmp_path)

# chdir(tmp_path)
# output_dir = Path(daily_paths[0]).parents[0]

# expected_months = pd.date_range("2010-01-01", freq="ME", periods=1)

# monthly_paths = [
# f"{output_dir}/{hist_base}.{str(t)[0:7]}.nc" for t in expected_months
# ]
# for p in monthly_paths:
# Path(p).touch()

# run([run_str])

# for p in daily_paths:
# assert_file_exists(p)

# for p in monthly_paths:
# assert_file_exists(p)
Loading