From 5bdb701c4780fd77fd4c960c63ef5e7e622090c2 Mon Sep 17 00:00:00 2001 From: mnurisso Date: Fri, 27 Sep 2024 12:34:42 +0300 Subject: [PATCH 01/26] initial linting --- ecmean/libs/plotting.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py index 1004d87..8ed4cfa 100644 --- a/ecmean/libs/plotting.py +++ b/ecmean/libs/plotting.py @@ -15,8 +15,7 @@ import numpy as np - -def heatmap_comparison_pi(relative_table, diag, filemap, size_model = 14): +def heatmap_comparison_pi(relative_table, diag, filemap, size_model=14): """Function to produce a heatmap - seaborn based - for Performance Indices based on CMIP6 ratio""" @@ -32,7 +31,6 @@ def heatmap_comparison_pi(relative_table, diag, filemap, size_model = 14): tictoc = [0, 0.25, 0.5, 0.75, 1, 2, 3, 4, 5] title = 'CMIP6 RELATIVE PI' - # axs.subplots_adjust(bottom=0.2) # pal = sns.diverging_palette(h_neg=130, h_pos=10, s=99, l=55, sep=3, as_cmap=True) tot = len(myfield.columns) @@ -44,7 +42,7 @@ def heatmap_comparison_pi(relative_table, diag, filemap, size_model = 14): cbar_kws={"ticks": tictoc, 'label': title}, ax=axs, annot=True, linewidth=0.5, fmt='.2f', annot_kws={'fontsize': size_model, 'fontweight': 'bold'}) - + chart = chart.set_facecolor('whitesmoke') axs.set_title(f'{title} {diag.modelname} {diag.expname} {diag.year1} {diag.year2}', fontsize=25) axs.vlines(list(range(sss, tot + sss, sss)), ymin=-1, ymax=len(myfield.index), colors='k') @@ -62,8 +60,8 @@ def heatmap_comparison_pi(relative_table, diag, filemap, size_model = 14): plt.close() -def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addnan = True, - size_model = 14, size_obs = 8): +def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addnan=True, + size_model=14, size_obs=8): """Function to produce a heatmap - seaborn based - for Global Mean based on season-averaged standard deviation ratio""" @@ -96,10 +94,10 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addn fmt='.2f', cmap=pal) if addnan: empty = np.where(clean.isna(), 0, np.nan) - empty = np.where(data_table[mask]==0, np.nan, empty) + empty = np.where(data_table[mask] == 0, np.nan, empty) chart = sns.heatmap(empty, annot=data_table[mask], fmt='.2f', vmin=-thr - 0.5, vmax=thr + 0.5, center=0, - annot_kws={'va': 'bottom', 'fontsize': size_model, 'color':'dimgrey'}, cbar=False, + annot_kws={'va': 'bottom', 'fontsize': size_model, 'color': 'dimgrey'}, cbar=False, cmap=ListedColormap(['none'])) chart = sns.heatmap(clean, annot=mean_table[mask], vmin=-thr - 0.5, vmax=thr + 0.5, center=0, annot_kws={'va': 'top', 'fontsize': size_obs, 'fontstyle': 'italic'}, @@ -108,8 +106,8 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addn empty = np.where(clean.isna(), 0, np.nan) empty = np.where(mean_table[mask].isna(), np.nan, empty) chart = sns.heatmap(empty, annot=mean_table[mask], vmin=-thr - 0.5, vmax=thr + 0.5, center=0, - annot_kws={'va': 'top', 'fontsize': size_obs, 'fontstyle': 'italic', 'color':'dimgrey'}, - fmt='.2f', cmap=ListedColormap(['none']), cbar=False) + annot_kws={'va': 'top', 'fontsize': size_obs, 'fontstyle': 'italic', 'color': 'dimgrey'}, + fmt='.2f', cmap=ListedColormap(['none']), cbar=False) chart = chart.set_facecolor('whitesmoke') axs.set_title(f'{title} {diag.modelname} {diag.expname} {diag.year1} {diag.year2}', fontsize=25) From 35aa21328585385d9386287d107c48e2030c33f4 Mon Sep 17 00:00:00 2001 From: mnurisso Date: Fri, 27 Sep 2024 13:17:35 +0300 Subject: [PATCH 02/26] plot takes now a dictionary with diag infos --- ecmean/global_mean.py | 7 +++--- ecmean/libs/plotting.py | 43 ++++++++++++++++++++++++----------- ecmean/performance_indices.py | 30 +++++++++++------------- 3 files changed, 47 insertions(+), 33 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index 191bdff..511e72a 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -331,8 +331,11 @@ def global_mean(exp, year1, year2, f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf' loggy.info('Figure file is: %s', mapfile) + diag_dict = {'modelname': diag.modelname, 'expname': diag.expname, + 'year1': diag.year1, 'year2': diag.year2} + heatmap_comparison_gm(data_table, mean_table, std_table, - diag, mapfile, addnan=diag.addnan) + diag_dict, mapfile, addnan=diag.addnan) # Print appending one line to table (for tuning) if diag.ftable: @@ -345,7 +348,6 @@ def global_mean(exp, year1, year2, print('ECmean4 Global Mean succesfully computed!') - def gm_entry_point(): """ Command line interface to run the global_mean function @@ -362,4 +364,3 @@ def gm_entry_point(): model=args.model, ensemble=args.ensemble, addnan=args.addnan, outputdir=args.outputdir) - diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py index 8ed4cfa..a3ad3ce 100644 --- a/ecmean/libs/plotting.py +++ b/ecmean/libs/plotting.py @@ -15,9 +15,17 @@ import numpy as np -def heatmap_comparison_pi(relative_table, diag, filemap, size_model=14): - """Function to produce a heatmap - seaborn based - for Performance Indices - based on CMIP6 ratio""" +def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=14): + """ + Function to produce a heatmap - seaborn based - for Performance Indices + based on CMIP6 ratio + + Args: + relative_table (pandas.DataFrame): table of relative performance indices + diag (dict): dictionary containing diagnostic information + filemap (str): path to save the plot + size_model (int): size of the PIs in the plot + """ # defining plot size myfield = relative_table @@ -31,20 +39,17 @@ def heatmap_comparison_pi(relative_table, diag, filemap, size_model=14): tictoc = [0, 0.25, 0.5, 0.75, 1, 2, 3, 4, 5] title = 'CMIP6 RELATIVE PI' - # axs.subplots_adjust(bottom=0.2) - # pal = sns.diverging_palette(h_neg=130, h_pos=10, s=99, l=55, sep=3, as_cmap=True) tot = len(myfield.columns) sss = len(set([tup[1] for tup in myfield.columns])) divnorm = TwoSlopeNorm(vmin=thr[0], vcenter=thr[1], vmax=thr[2]) pal = sns.color_palette("Spectral_r", as_cmap=True) - # pal = sns.diverging_palette(220, 20, as_cmap=True) chart = sns.heatmap(myfield, norm=divnorm, cmap=pal, cbar_kws={"ticks": tictoc, 'label': title}, ax=axs, annot=True, linewidth=0.5, fmt='.2f', annot_kws={'fontsize': size_model, 'fontweight': 'bold'}) chart = chart.set_facecolor('whitesmoke') - axs.set_title(f'{title} {diag.modelname} {diag.expname} {diag.year1} {diag.year2}', fontsize=25) + axs.set_title(f'{title} {diag["modelname"]} {diag["expname"]} {diag["year1"]} {diag["year2"]}', fontsize=25) axs.vlines(list(range(sss, tot + sss, sss)), ymin=-1, ymax=len(myfield.index), colors='k') axs.hlines(len(myfield.index) - 1, xmin=-1, xmax=len(myfield.columns), colors='purple', lw=2) names = [' '.join(x) for x in myfield.columns] @@ -60,10 +65,22 @@ def heatmap_comparison_pi(relative_table, diag, filemap, size_model=14): plt.close() -def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addnan=True, - size_model=14, size_obs=8): - """Function to produce a heatmap - seaborn based - for Global Mean - based on season-averaged standard deviation ratio""" +def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap: str, + addnan=True, size_model=14, size_obs=8): + """ + Function to produce a heatmap - seaborn based - for Global Mean + based on season-averaged standard deviation ratio + + Args: + data_table (pandas.DataFrame): table of model data + mean_table (pandas.DataFrame): table of observations + std_table (pandas.DataFrame): table of standard deviation + diag (dict): dictionary containing diagnostic information + filemap (str): path to save the plot + addnan (bool): add to the final plots also fields which cannot be compared against observations + size_model (int): size of the model values in the plot + size_obs (int): size of the observation values in the plot + """ # define array ratio = (data_table - mean_table) / std_table @@ -76,7 +93,7 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addn # for dimension of plots xfig = len(clean.columns) yfig = len(clean.index) - fig, axs = plt.subplots(1, 1, sharey=True, tight_layout=True, figsize=(xfig+5, yfig+2)) + _, axs = plt.subplots(1, 1, sharey=True, tight_layout=True, figsize=(xfig+5, yfig+2)) title = 'GLOBAL MEAN' @@ -110,7 +127,7 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addn fmt='.2f', cmap=ListedColormap(['none']), cbar=False) chart = chart.set_facecolor('whitesmoke') - axs.set_title(f'{title} {diag.modelname} {diag.expname} {diag.year1} {diag.year2}', fontsize=25) + axs.set_title(f'{title} {diag["modelname"]} {diag["expname"]} {diag["year1"]} {diag["year2"]}', fontsize=25) axs.vlines(list(range(sss, tot + sss, sss)), ymin=-1, ymax=len(clean.index), colors='k') names = [' '.join(x) for x in clean.columns] axs.set_xticks([x + .5 for x in range(len(names))], names, rotation=45, ha='right', fontsize=16) diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index e5c6367..43bb451 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -35,6 +35,7 @@ dask.config.set(scheduler="synchronous") + def pi_worker(util, piclim, face, diag, field_3d, varstat, varlist): """Main parallel diagnostic worker for performance indices @@ -183,21 +184,21 @@ def pi_worker(util, piclim, face, diag, field_3d, varstat, varlist): # horizontal averaging with land-sea mask else: - + complete = (final - cfield)**2 / vfield outarray = mask_field(xfield=complete, - mask_type=piclim[var]['mask'], - dom=domain, mask=domain_mask) + mask_type=piclim[var]['mask'], + dom=domain, mask=domain_mask) # loop on different regions for region in diag.regions: slicearray = select_region(outarray, region) - + # latitude-based averaging weights = np.cos(np.deg2rad(slicearray.lat)) out = slicearray.weighted(weights).mean().data - # store the PI + # store the PI result[season][region] = round(float(out), 3) # diagnostic @@ -294,15 +295,12 @@ def performance_indices(exp, year1, year2, loggy.info('Preproc in {:.4f} seconds'.format(toc - tic)) tic = time() - # loop on the variables, create the parallel process for varlist in weight_split(diag.field_all, diag.numproc): - #print(varlist) - + # print(varlist) - core = Process( - target=pi_worker, args=(util_dictionary, piclim, - face, diag, diag.field_3d, varstat, varlist)) + core = Process(target=pi_worker, args=(util_dictionary, piclim, + face, diag, diag.field_3d, varstat, varlist)) core.start() processes.append(core) @@ -310,7 +308,6 @@ def performance_indices(exp, year1, year2, for proc in processes: proc.join() - toc = time() # evaluate tic-toc time of execution loggy.info('Done in {:.4f} seconds'.format(toc - tic) + ' with ' + str(diag.numproc) + ' processors') @@ -341,7 +338,7 @@ def performance_indices(exp, year1, year2, # set longname, reorganize the dictionaries plotted = {} cmip6 = {} - longnames =[] + longnames = [] for var in diag.field_all: plotted[piclim[var]['longname']] = ordered[var] cmip6[piclim[var]['longname']] = filt_piclim[var] @@ -351,7 +348,6 @@ def performance_indices(exp, year1, year2, data_table = dict_to_dataframe(plotted) loggy.debug(data_table) - # relative pi with re-ordering of rows cmip6_table = data_table.div(dict_to_dataframe(cmip6).reindex(longnames)) @@ -363,12 +359,13 @@ def performance_indices(exp, year1, year2, cmip6_table = cmip6_table[lll] loggy.debug(cmip6_table) - # call the heatmap routine for a plot mapfile = diag.figdir / \ f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf' # heatmap_comparison_old(data_table, diag, mapfile) - heatmap_comparison_pi(cmip6_table, diag, mapfile) + diag_dict = {'modelname': diag.modelname, 'expname': diag.expname, + 'year1': diag.year1, 'year2': diag.year2} + heatmap_comparison_pi(cmip6_table, diag_dict, mapfile) toc = time() # evaluate tic-toc time of postprocessing @@ -391,4 +388,3 @@ def pi_entry_point(): interface=args.interface, config=args.config, model=args.model, ensemble=args.ensemble, outputdir=args.outputdir) - From 0cc274f3d7aa65dfa3b1dfc72219d148811820ef Mon Sep 17 00:00:00 2001 From: mnurisso Date: Fri, 27 Sep 2024 13:17:48 +0300 Subject: [PATCH 03/26] test linting --- tests/test_global_mean.py | 12 +++++++----- tests/test_performance_indices.py | 3 --- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/test_global_mean.py b/tests/test_global_mean.py index b1a8f78..7bdc9e0 100644 --- a/tests/test_global_mean.py +++ b/tests/test_global_mean.py @@ -13,6 +13,7 @@ # set up coverage env var env = {**os.environ, "COVERAGE_PROCESS_START": ".coveragerc"} + # Open the text file for reading def load_gm_txt_files(textfile): """ @@ -26,18 +27,19 @@ def load_gm_txt_files(textfile): for line in file: # Remove leading and trailing whitespace and split the line by '|' columns = line.strip().split('|') - + # Check if there are at least 5 columns (including the header row) if len(columns) >= 5: # Extract the first and fourth columns and remove leading/trailing whitespace variable = columns[1].strip() value = columns[4].strip() - + # Add the data to the dictionary if it's not empty if variable and value: data_dict[variable] = value return data_dict + # call on coupled ECE using parser and debug mode def test_cmd_global_mean_coupled(): file1 = 'tests/table/global_mean_cpld_EC-Earth4_r1i1p1f1_1990_1990.txt' @@ -47,7 +49,7 @@ def test_cmd_global_mean_coupled(): subprocess.run(['global_mean', 'cpld', '1990', '1990', '-j', '2', '-c', 'tests/config.yml', '-t', '-v', 'debug'], env=env, check=True) - + data1 = load_gm_txt_files(file1) data2 = load_gm_txt_files(file2) @@ -62,12 +64,13 @@ def test_global_mean_amip(): os.remove(file1) global_mean(exp='amip', year1=1990, year2=1990, numproc=1, config='tests/config.yml', line=True, addnan=True) - + data1 = load_gm_txt_files(file1) data2 = load_gm_txt_files(file2) assert are_dicts_equal(data1, data2, TOLERANCE), "TXT files are not identical." + # call on amip ECE using the xdataset option def test_global_mean_amip_xdataset(): file1 = 'tests/table/global_mean_amip_EC-Earth4_r1i1p1f1_1990_1990.txt' @@ -95,4 +98,3 @@ def test_global_mean_CMIP6(): data2 = load_gm_txt_files(file2) assert are_dicts_equal(data1, data2, TOLERANCE), "TXT files are not identical." - diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py index a3cfcab..c62a67f 100644 --- a/tests/test_performance_indices.py +++ b/tests/test_performance_indices.py @@ -71,6 +71,3 @@ def test_performance_indices_amip_xdataset(clim): data2 = yaml.safe_load(f2) assert are_dicts_equal(data1, data2, TOLERANCE), "YAML files are not identical." - - - From 35d8d55cf0c28f1d2c9cefa69828e1e734ff9e43 Mon Sep 17 00:00:00 2001 From: mnurisso Date: Fri, 27 Sep 2024 14:52:05 +0300 Subject: [PATCH 04/26] title kwargs --- ecmean/libs/plotting.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py index a3ad3ce..827c902 100644 --- a/ecmean/libs/plotting.py +++ b/ecmean/libs/plotting.py @@ -15,7 +15,7 @@ import numpy as np -def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=14): +def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=14, **kwargs): """ Function to produce a heatmap - seaborn based - for Performance Indices based on CMIP6 ratio @@ -25,6 +25,9 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1 diag (dict): dictionary containing diagnostic information filemap (str): path to save the plot size_model (int): size of the PIs in the plot + + Keyword Args: + title (str): title of the plot, overrides default title """ # defining plot size @@ -37,7 +40,12 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1 thr = [0, 1, 5] tictoc = [0, 0.25, 0.5, 0.75, 1, 2, 3, 4, 5] - title = 'CMIP6 RELATIVE PI' + + if 'title' in kwargs: + title = kwargs['title'] + else: + title = 'CMIP6 RELATIVE PI' + title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + diag['year1'] + ' ' + diag['year2'] tot = len(myfield.columns) sss = len(set([tup[1] for tup in myfield.columns])) @@ -49,7 +57,7 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1 annot_kws={'fontsize': size_model, 'fontweight': 'bold'}) chart = chart.set_facecolor('whitesmoke') - axs.set_title(f'{title} {diag["modelname"]} {diag["expname"]} {diag["year1"]} {diag["year2"]}', fontsize=25) + axs.set_title(title, fontsize=25) axs.vlines(list(range(sss, tot + sss, sss)), ymin=-1, ymax=len(myfield.index), colors='k') axs.hlines(len(myfield.index) - 1, xmin=-1, xmax=len(myfield.columns), colors='purple', lw=2) names = [' '.join(x) for x in myfield.columns] @@ -66,7 +74,7 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1 def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap: str, - addnan=True, size_model=14, size_obs=8): + addnan=True, size_model=14, size_obs=8, **kwargs): """ Function to produce a heatmap - seaborn based - for Global Mean based on season-averaged standard deviation ratio @@ -80,6 +88,9 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap addnan (bool): add to the final plots also fields which cannot be compared against observations size_model (int): size of the model values in the plot size_obs (int): size of the observation values in the plot + + Keyword Args: + title (str): title of the plot, overrides default title """ # define array @@ -95,7 +106,11 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap yfig = len(clean.index) _, axs = plt.subplots(1, 1, sharey=True, tight_layout=True, figsize=(xfig+5, yfig+2)) - title = 'GLOBAL MEAN' + if 'title' in kwargs: + title = kwargs['title'] + else: + title = 'GLOBAL MEAN' + title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + diag['year1'] + ' ' + diag['year2'] # set color range and palette thr = 10 @@ -127,7 +142,7 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap fmt='.2f', cmap=ListedColormap(['none']), cbar=False) chart = chart.set_facecolor('whitesmoke') - axs.set_title(f'{title} {diag["modelname"]} {diag["expname"]} {diag["year1"]} {diag["year2"]}', fontsize=25) + axs.set_title(title, fontsize=25) axs.vlines(list(range(sss, tot + sss, sss)), ymin=-1, ymax=len(clean.index), colors='k') names = [' '.join(x) for x in clean.columns] axs.set_xticks([x + .5 for x in range(len(names))], names, rotation=45, ha='right', fontsize=16) From 893b83f39d8c4b7472983530878bee1997b9ef8c Mon Sep 17 00:00:00 2001 From: mnurisso Date: Fri, 27 Sep 2024 15:12:52 +0300 Subject: [PATCH 05/26] kwargs refined --- ecmean/libs/plotting.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py index 827c902..06bc6c4 100644 --- a/ecmean/libs/plotting.py +++ b/ecmean/libs/plotting.py @@ -13,6 +13,7 @@ from matplotlib.colors import ListedColormap import seaborn as sns import numpy as np +from .general import dict_to_dataframe def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=14, **kwargs): @@ -21,7 +22,7 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1 based on CMIP6 ratio Args: - relative_table (pandas.DataFrame): table of relative performance indices + relative_table (pandas.DataFrame or dict): table of relative performance indices diag (dict): dictionary containing diagnostic information filemap (str): path to save the plot size_model (int): size of the PIs in the plot @@ -29,6 +30,8 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1 Keyword Args: title (str): title of the plot, overrides default title """ + if isinstance(relative_table, dict): + relative_table = dict_to_dataframe(relative_table) # defining plot size myfield = relative_table @@ -45,7 +48,7 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1 title = kwargs['title'] else: title = 'CMIP6 RELATIVE PI' - title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + diag['year1'] + ' ' + diag['year2'] + title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + str(diag['year1']) + ' ' + str(diag['year2']) tot = len(myfield.columns) sss = len(set([tup[1] for tup in myfield.columns])) @@ -80,9 +83,9 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap based on season-averaged standard deviation ratio Args: - data_table (pandas.DataFrame): table of model data - mean_table (pandas.DataFrame): table of observations - std_table (pandas.DataFrame): table of standard deviation + data_table (pandas.DataFrame or dict): table of model data + mean_table (pandas.DataFrame or dict): table of observations + std_table (pandas.DataFrame or dict): table of standard deviation diag (dict): dictionary containing diagnostic information filemap (str): path to save the plot addnan (bool): add to the final plots also fields which cannot be compared against observations @@ -92,6 +95,9 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap Keyword Args: title (str): title of the plot, overrides default title """ + for tab in [data_table, mean_table, std_table]: + if isinstance(tab, dict): + tab = dict_to_dataframe(tab) # define array ratio = (data_table - mean_table) / std_table @@ -110,7 +116,7 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap title = kwargs['title'] else: title = 'GLOBAL MEAN' - title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + diag['year1'] + ' ' + diag['year2'] + title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + str(diag['year1']) + ' ' + str(diag['year2']) # set color range and palette thr = 10 From d9916672d9269d6ecee26bb7f0b6ff9b32ba72e2 Mon Sep 17 00:00:00 2001 From: mnurisso Date: Fri, 27 Sep 2024 15:14:19 +0300 Subject: [PATCH 06/26] added PI plot test --- tests/test_performance_indices.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py index c62a67f..44a3971 100644 --- a/tests/test_performance_indices.py +++ b/tests/test_performance_indices.py @@ -10,6 +10,7 @@ from ecmean.performance_indices import performance_indices from ecmean.libs.ncfixers import xr_preproc from ecmean.libs.general import are_dicts_equal +from ecmean.libs.plotting import heatmap_comparison_pi # set TOLERANCE TOLERANCE = 1e-3 @@ -71,3 +72,12 @@ def test_performance_indices_amip_xdataset(clim): data2 = yaml.safe_load(f2) assert are_dicts_equal(data1, data2, TOLERANCE), "YAML files are not identical." + + +def test_pi_plot(): + file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml' + with open(file1, 'r', encoding='utf8') as f1: + data1 = yaml.safe_load(f1) + heatmap_comparison_pi(data1, {'modelname': 'EC-Earth4', 'expname': 'amip', 'year1': '1990', 'year2': '1990'}, 'tests/pluto/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.png') + assert os.path.isfile('tests/pluto/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.png'), "Plot not created." + os.remove('tests/pluto/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.png') From 93960c5aaa501d9b4e906edb94d2dd1851e3d04a Mon Sep 17 00:00:00 2001 From: mnurisso Date: Fri, 27 Sep 2024 16:24:47 +0300 Subject: [PATCH 07/26] gm linting --- ecmean/global_mean.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index 511e72a..b6d2969 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -37,6 +37,7 @@ dask.config.set(scheduler="synchronous") + def gm_worker(util, ref, face, diag, varmean, vartrend, varlist): """Main parallel diagnostic worker for global mean @@ -61,7 +62,6 @@ def gm_worker(util, ref, face, diag, varmean, vartrend, varlist): result = init_mydict(diag.seasons, diag.regions) trend = init_mydict(diag.seasons, diag.regions) - # check if the variable is in the interface file if check_var_interface(var, face): @@ -215,7 +215,7 @@ def global_mean(exp, year1, year2, # create util dictionary including mask and weights for both atmosphere # and ocean grids - util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], + util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=True, remap=False) # main loop: manager is required for shared variables @@ -225,14 +225,12 @@ def global_mean(exp, year1, year2, varmean = mgr.dict() vartrend = mgr.dict() processes = [] - - # loop on the variables, create the parallel process for varlist in weight_split(diag.var_all, diag.numproc): core = Process( target=gm_worker, args=(util_dictionary, ref, face, diag, - varmean, vartrend, varlist)) + varmean, vartrend, varlist)) core.start() processes.append(core) @@ -327,8 +325,8 @@ def global_mean(exp, year1, year2, loggy.debug(data_table) # call the heatmap routine for a plot - mapfile = diag.figdir / \ - f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf' + mapfile = os.join(diag.figdir, + f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf') loggy.info('Figure file is: %s', mapfile) diag_dict = {'modelname': diag.modelname, 'expname': diag.expname, From c17026c0705815116dc8607d4ad08dfea8aca1d1 Mon Sep 17 00:00:00 2001 From: mnurisso Date: Fri, 27 Sep 2024 16:25:20 +0300 Subject: [PATCH 08/26] diag and filemap are not mandatory for external usage --- ecmean/libs/plotting.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py index 06bc6c4..5fb9c97 100644 --- a/ecmean/libs/plotting.py +++ b/ecmean/libs/plotting.py @@ -16,7 +16,7 @@ from .general import dict_to_dataframe -def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=14, **kwargs): +def heatmap_comparison_pi(relative_table, diag: dict = None, filemap: str = None, size_model=14, **kwargs): """ Function to produce a heatmap - seaborn based - for Performance Indices based on CMIP6 ratio @@ -70,6 +70,12 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1 axs.figure.axes[-1].yaxis.label.set_size(15) axs.set(xlabel=None) + if filemap is None: + if diag is not None: + filemap = f'PI4_{diag["expname"]}_{diag["modelname"]}_{diag["year1"]}_{diag["year2"]}.pdf' + else: + filemap = 'PI4_heatmap.pdf' + # save and close plt.savefig(filemap) plt.cla() From 1f7596636978bea0b9b04692d871c1411d3eb2cc Mon Sep 17 00:00:00 2001 From: mnurisso Date: Fri, 27 Sep 2024 16:26:38 +0300 Subject: [PATCH 09/26] linting and correct arguments for heatmap (not positional anymore) --- ecmean/performance_indices.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index 43bb451..11277b9 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -360,12 +360,12 @@ def performance_indices(exp, year1, year2, loggy.debug(cmip6_table) # call the heatmap routine for a plot - mapfile = diag.figdir / \ - f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf' + mapfile = os.join(diag.figdir, + f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf') # heatmap_comparison_old(data_table, diag, mapfile) diag_dict = {'modelname': diag.modelname, 'expname': diag.expname, 'year1': diag.year1, 'year2': diag.year2} - heatmap_comparison_pi(cmip6_table, diag_dict, mapfile) + heatmap_comparison_pi(cmip6_table, diag=diag_dict, filemap=mapfile) toc = time() # evaluate tic-toc time of postprocessing From b24d89179858f323147c9a670175ddb28202c326 Mon Sep 17 00:00:00 2001 From: mnurisso Date: Fri, 27 Sep 2024 16:42:07 +0300 Subject: [PATCH 10/26] os management improved --- ecmean/global_mean.py | 12 ++++++------ ecmean/performance_indices.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index b6d2969..2d9f612 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -290,8 +290,8 @@ def global_mean(exp, year1, year2, head = head + ['Obs.', 'Dataset', 'Years'] # write the file with tabulate - tablefile = diag.tabdir / \ - f'global_mean_{diag.expname}_{diag.modelname}_{diag.ensemble}_{diag.year1}_{diag.year2}.txt' + tablefile = os.path.join(diag.tabdir, + f'global_mean_{diag.expname}_{diag.modelname}_{diag.ensemble}_{diag.year1}_{diag.year2}.txt') loggy.info('Table file is: %s', tablefile) with open(tablefile, 'w', encoding='utf-8') as out: out.write(tabulate(global_table, headers=head, stralign='center', tablefmt='orgtbl')) @@ -302,8 +302,8 @@ def global_mean(exp, year1, year2, ordered[var] = varmean[var] # dump the yaml file for global_mean, including all the seasons - yamlfile = diag.tabdir / \ - f'global_mean_{diag.expname}_{diag.modelname}_{diag.ensemble}_{diag.year1}_{diag.year2}.yml' + yamlfile = os.path.join(diag.tabdir, + f'global_mean_{diag.expname}_{diag.modelname}_{diag.ensemble}_{diag.year1}_{diag.year2}.yml') loggy.info('YAML file is: %s', tablefile) with open(yamlfile, 'w', encoding='utf-8') as file: yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False) @@ -325,8 +325,8 @@ def global_mean(exp, year1, year2, loggy.debug(data_table) # call the heatmap routine for a plot - mapfile = os.join(diag.figdir, - f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf') + mapfile = os.path.join(diag.figdir, + f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf') loggy.info('Figure file is: %s', mapfile) diag_dict = {'modelname': diag.modelname, 'expname': diag.expname, diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index 11277b9..12a21a6 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -360,8 +360,8 @@ def performance_indices(exp, year1, year2, loggy.debug(cmip6_table) # call the heatmap routine for a plot - mapfile = os.join(diag.figdir, - f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf') + mapfile = os.path.join(diag.figdir, + f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf') # heatmap_comparison_old(data_table, diag, mapfile) diag_dict = {'modelname': diag.modelname, 'expname': diag.expname, 'year1': diag.year1, 'year2': diag.year2} From 5c27a94e71d38bc43d0ad309ec8fc227f0b75a43 Mon Sep 17 00:00:00 2001 From: mnurisso Date: Fri, 27 Sep 2024 16:50:13 +0300 Subject: [PATCH 11/26] tests update --- tests/test_performance_indices.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py index 44a3971..0d47daa 100644 --- a/tests/test_performance_indices.py +++ b/tests/test_performance_indices.py @@ -43,12 +43,13 @@ def test_performance_indices_amip(clim): assert are_dicts_equal(data1, data2, TOLERANCE), "YAML files are not identical." + # test performance_indices from commnand line + debug @pytest.mark.parametrize("clim", ['RK08', 'EC23']) def test_cmd_performance_indices_CMIP6(clim): subprocess.run(['performance_indices', 'historical', '1990', '1990', '-j', '2', '-c', - 'tests/config_CMIP6.yml', '-k', clim, '-m', 'EC-Earth3', '-v', 'debug'], - env=env, check=True) + 'tests/config_CMIP6.yml', '-k', clim, '-m', 'EC-Earth3', '-v', 'debug'], + env=env, check=True) file1 = 'tests/table/PI4_' + clim + '_historical_EC-Earth3_r1i1p1f1_1990_1990.yml' file2 = 'tests/table/PI4_' + clim + '_CMIP6_1990_1990.ref' with open(file1, 'r', encoding='utf8') as f1, open(file2, 'r', encoding='utf8') as f2: @@ -57,6 +58,7 @@ def test_cmd_performance_indices_CMIP6(clim): assert are_dicts_equal(data1, data2, TOLERANCE), "YAML files are not identical." + # test on amip but with access from xarray dataset @pytest.mark.parametrize("clim", ['RK08', 'EC23']) def test_performance_indices_amip_xdataset(clim): @@ -78,6 +80,6 @@ def test_pi_plot(): file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml' with open(file1, 'r', encoding='utf8') as f1: data1 = yaml.safe_load(f1) - heatmap_comparison_pi(data1, {'modelname': 'EC-Earth4', 'expname': 'amip', 'year1': '1990', 'year2': '1990'}, 'tests/pluto/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.png') - assert os.path.isfile('tests/pluto/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.png'), "Plot not created." - os.remove('tests/pluto/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.png') + heatmap_comparison_pi(data1, title='CMIP6 RELATIVE PI test') + assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created." + os.remove('PI4_heatmap.pdf') From afc5bc0ec38b06fba25b008a540c586df62a4025 Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Wed, 18 Dec 2024 15:37:27 +0100 Subject: [PATCH 12/26] getting closer --- ecmean/global_mean.py | 22 +++++-------- ecmean/libs/plotting.py | 51 +++++++++++++++++++++++-------- ecmean/performance_indices.py | 31 ++++++------------- tests/test_performance_indices.py | 9 ++++-- 4 files changed, 61 insertions(+), 52 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index 2d9f612..b5873f3 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -309,31 +309,23 @@ def global_mean(exp, year1, year2, yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False) # set longname, get units - plotted = {} + data2plot = {} units_list = [] for var in diag.var_all: - plotted[ref[var]['longname']] = ordered[var] + data2plot[ref[var]['longname']] = ordered[var] units_list = units_list + [ref[var]['units']] - # convert the three dictionary to pandas and then add units - data_table = dict_to_dataframe(plotted) - mean_table = dict_to_dataframe(obsmean) - std_table = dict_to_dataframe(obsstd) - for table in [data_table, mean_table, std_table]: - table.index = table.index + ' [' + units_list + ']' - - loggy.debug(data_table) - # call the heatmap routine for a plot mapfile = os.path.join(diag.figdir, f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf') loggy.info('Figure file is: %s', mapfile) diag_dict = {'modelname': diag.modelname, 'expname': diag.expname, - 'year1': diag.year1, 'year2': diag.year2} + 'year1': diag.year1, 'year2': diag.year2, + 'units_list': units_list} - heatmap_comparison_gm(data_table, mean_table, std_table, - diag_dict, mapfile, addnan=diag.addnan) + heatmap_comparison_gm(data_dict=data2plot, mean_dict=obsmean, std_dict=obsstd, + diag=diag_dict, filemap=mapfile, addnan=diag.addnan) # Print appending one line to table (for tuning) if diag.ftable: @@ -342,7 +334,7 @@ def global_mean(exp, year1, year2, toc = time() # evaluate tic-toc time of postprocessing - loggy.info(f"Postproc done in {toc - tic:.4f} seconds") + loggy.info("Postproc done in %.4f seconds", toc - tic) print('ECmean4 Global Mean succesfully computed!') diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py index 5fb9c97..1df0fc1 100644 --- a/ecmean/libs/plotting.py +++ b/ecmean/libs/plotting.py @@ -8,21 +8,24 @@ ################## import textwrap +import logging from matplotlib.colors import TwoSlopeNorm import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap import seaborn as sns import numpy as np -from .general import dict_to_dataframe +from ecmean.libs.general import dict_to_dataframe +loggy = logging.getLogger(__name__) -def heatmap_comparison_pi(relative_table, diag: dict = None, filemap: str = None, size_model=14, **kwargs): +def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str = None, size_model=14, **kwargs): """ Function to produce a heatmap - seaborn based - for Performance Indices based on CMIP6 ratio Args: - relative_table (pandas.DataFrame or dict): table of relative performance indices + data_dict (dict): dictionary of absolute performance indices + cmip6_dict (dict): dictionary of CMIP6 performance indices diag (dict): dictionary containing diagnostic information filemap (str): path to save the plot size_model (int): size of the PIs in the plot @@ -30,8 +33,23 @@ def heatmap_comparison_pi(relative_table, diag: dict = None, filemap: str = None Keyword Args: title (str): title of the plot, overrides default title """ - if isinstance(relative_table, dict): - relative_table = dict_to_dataframe(relative_table) + + # convert output dictionary to pandas dataframe + data_table = dict_to_dataframe(data_dict) + loggy.debug("Data table") + loggy.debug(data_table) + + # relative pi with re-ordering of rows + relative_table = data_table.div(dict_to_dataframe(cmip6_dict).reindex(diag['longnames'])) + + # compute the total PI mean + relative_table.loc['Total PI'] = relative_table.mean() + + # reordering columns + lll = [(x, y) for x in diag['seasons'] for y in diag['regions']] + relative_table = relative_table[lll] + loggy.debug("Relative table") + loggy.debug(relative_table) # defining plot size myfield = relative_table @@ -82,16 +100,17 @@ def heatmap_comparison_pi(relative_table, diag: dict = None, filemap: str = None plt.close() -def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap: str, +def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: str, addnan=True, size_model=14, size_obs=8, **kwargs): """ Function to produce a heatmap - seaborn based - for Global Mean based on season-averaged standard deviation ratio Args: - data_table (pandas.DataFrame or dict): table of model data - mean_table (pandas.DataFrame or dict): table of observations - std_table (pandas.DataFrame or dict): table of standard deviation + data_dict (dict): table of model data + mean_dict (dict): table of observations + std_dict (dict): table of standard deviation + units_list (str): list of units diag (dict): dictionary containing diagnostic information filemap (str): path to save the plot addnan (bool): add to the final plots also fields which cannot be compared against observations @@ -101,9 +120,17 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap Keyword Args: title (str): title of the plot, overrides default title """ - for tab in [data_table, mean_table, std_table]: - if isinstance(tab, dict): - tab = dict_to_dataframe(tab) + + # convert the three dictionary to pandas and then add units + data_table = dict_to_dataframe(data_dict) + mean_table = dict_to_dataframe(mean_dict) + std_table = dict_to_dataframe(std_dict) + for table in [data_table, mean_table, std_table]: + table.index = table.index + ' [' + diag['units_list'] + ']' + + loggy.debug("Data table") + loggy.debug(data_table) + # define array ratio = (data_table - mean_table) / std_table diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index 12a21a6..5a91a70 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -310,7 +310,7 @@ def performance_indices(exp, year1, year2, toc = time() # evaluate tic-toc time of execution - loggy.info('Done in {:.4f} seconds'.format(toc - tic) + ' with ' + str(diag.numproc) + ' processors') + loggy.info('Done in %.4f seconds with %d processors', toc - tic, diag.numproc) tic = time() @@ -336,40 +336,27 @@ def performance_indices(exp, year1, year2, del filt_piclim[k][f] # set longname, reorganize the dictionaries - plotted = {} + data2plot = {} cmip6 = {} longnames = [] for var in diag.field_all: - plotted[piclim[var]['longname']] = ordered[var] + data2plot[piclim[var]['longname']] = ordered[var] cmip6[piclim[var]['longname']] = filt_piclim[var] longnames = longnames + [piclim[var]['longname']] - # convert output dictionary to pandas dataframe - data_table = dict_to_dataframe(plotted) - loggy.debug(data_table) - - # relative pi with re-ordering of rows - cmip6_table = data_table.div(dict_to_dataframe(cmip6).reindex(longnames)) - - # compute the total PI mean - cmip6_table.loc['Total PI'] = cmip6_table.mean() - - # reordering columns - lll = [(x, y) for x in diag.seasons for y in diag.regions] - cmip6_table = cmip6_table[lll] - loggy.debug(cmip6_table) - # call the heatmap routine for a plot mapfile = os.path.join(diag.figdir, f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf') - # heatmap_comparison_old(data_table, diag, mapfile) diag_dict = {'modelname': diag.modelname, 'expname': diag.expname, - 'year1': diag.year1, 'year2': diag.year2} - heatmap_comparison_pi(cmip6_table, diag=diag_dict, filemap=mapfile) + 'year1': diag.year1, 'year2': diag.year2, + 'seasons': diag.seasons, 'regions': diag.regions, + 'longnames': longnames} + heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, + diag=diag_dict, filemap=mapfile) toc = time() # evaluate tic-toc time of postprocessing - loggy.info('Postproc done in {:.4f} seconds'.format(toc - tic)) + loggy.info("Postproc done in %.4f seconds", toc - tic) print('ECmean4 Performance Indices succesfully computed!') diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py index ff46537..037511c 100644 --- a/tests/test_performance_indices.py +++ b/tests/test_performance_indices.py @@ -21,7 +21,8 @@ # test on coupled @pytest.mark.parametrize("clim", ['RK08', 'EC23']) def test_performance_indices_cpld(clim): - performance_indices('cpld', 1990, 1990, numproc=4, climatology=clim, config='tests/config.yml') + performance_indices('cpld', 1990, 1990, numproc=4, + climatology=clim, config='tests/config.yml') file1 = 'tests/table/PI4_' + clim + '_cpld_EC-Earth4_r1i1p1f1_1990_1990.yml' file2 = 'tests/table/PI4_' + clim + '_cpld_1990_1990.ref' with open(file1, 'r', encoding='utf8') as f1, open(file2, 'r', encoding='utf8') as f2: @@ -34,7 +35,8 @@ def test_performance_indices_cpld(clim): # test on amip @pytest.mark.parametrize("clim", ['RK08', 'EC23']) def test_performance_indices_amip(clim): - performance_indices('amip', 1990, 1990, numproc=1, climatology=clim, config='tests/config.yml', outputdir='tests/pluto') + performance_indices('amip', 1990, 1990, numproc=1, + climatology=clim, config='tests/config.yml', outputdir='tests/pluto') file1 = 'tests/pluto/YAML/PI4_' + clim + '_amip_EC-Earth4_r1i1p1f1_1990_1990.yml' file2 = 'tests/table/PI4_' + clim + '_amip_1990_1990.ref' with open(file1, 'r', encoding='utf8') as f1, open(file2, 'r', encoding='utf8') as f2: @@ -57,6 +59,7 @@ def test_cmd_performance_indices_CMIP6(clim): data2 = yaml.safe_load(f2) assert are_dicts_equal(data1, data2, TOLERANCE), f"YAML files are not identical.\nData1: {data1}\nData2: {data2}" + # test on amip but with access from xarray dataset @pytest.mark.parametrize("clim", ['RK08', 'EC23']) def test_performance_indices_amip_xdataset(clim): @@ -78,6 +81,6 @@ def test_pi_plot(): file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml' with open(file1, 'r', encoding='utf8') as f1: data1 = yaml.safe_load(f1) - heatmap_comparison_pi(data1, title='CMIP6 RELATIVE PI test') + heatmap_comparison_pi(data_dict=data1, cmip6_dict=file1, title='CMIP6 RELATIVE PI test') assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created." os.remove('PI4_heatmap.pdf') From 4ce96cc6781d57b1bc8d2a901abe1d2def44c9e2 Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Wed, 18 Dec 2024 15:48:09 +0100 Subject: [PATCH 13/26] test --- ecmean/global_mean.py | 4 ++-- ecmean/libs/plotting.py | 13 ++++++++----- ecmean/performance_indices.py | 2 +- tests/test_performance_indices.py | 6 ++++-- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index b5873f3..aeab6ab 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -24,7 +24,7 @@ from ecmean import Diagnostic, Supporter, UnitsHandler from ecmean.libs.general import weight_split, write_tuning_table, get_domain, \ - check_time_axis, dict_to_dataframe, init_mydict, \ + check_time_axis, init_mydict, \ check_var_interface, check_var_climatology, set_multiprocessing_start_method from ecmean.libs.files import var_is_there, get_inifiles, load_yaml, make_input_filename from ecmean.libs.formula import formula_wrapper @@ -241,7 +241,7 @@ def global_mean(exp, year1, year2, toc = time() # evaluate tic-toc time of execution - loggy.info('Analysis done in {:.4f} seconds'.format(toc - tic)) + loggy.info('Analysis done in %.4f seconds', toc - tic) tic = time() diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py index 1df0fc1..3ee9749 100644 --- a/ecmean/libs/plotting.py +++ b/ecmean/libs/plotting.py @@ -40,14 +40,18 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str loggy.debug(data_table) # relative pi with re-ordering of rows - relative_table = data_table.div(dict_to_dataframe(cmip6_dict).reindex(diag['longnames'])) + cmip6_table = dict_to_dataframe(cmip6_dict) + if diag is not None and 'longnames' in diag: + cmip6_table = cmip6_table.reindex(diag['longnames']) + relative_table = data_table.div(cmip6_table) # compute the total PI mean relative_table.loc['Total PI'] = relative_table.mean() - # reordering columns - lll = [(x, y) for x in diag['seasons'] for y in diag['regions']] - relative_table = relative_table[lll] + # reordering columns if info is available + if diag is not None and 'regions' in diag and 'seasons' in diag: + lll = [(x, y) for x in diag['seasons'] for y in diag['regions']] + relative_table = relative_table[lll] loggy.debug("Relative table") loggy.debug(relative_table) @@ -110,7 +114,6 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s data_dict (dict): table of model data mean_dict (dict): table of observations std_dict (dict): table of standard deviation - units_list (str): list of units diag (dict): dictionary containing diagnostic information filemap (str): path to save the plot addnan (bool): add to the final plots also fields which cannot be compared against observations diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index 5a91a70..f47cbdd 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -203,7 +203,7 @@ def pi_worker(util, piclim, face, diag, field_3d, varstat, varlist): # diagnostic if region == 'Global': - logging.info('PI for', region, season, var, result[season][region]) + logging.info('PI for %s %s %s %s', region, season, var, result[season][region]) # nested dictionary, to be redifend as a dict to remove lambdas varstat[var] = result diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py index 037511c..cb81d71 100644 --- a/tests/test_performance_indices.py +++ b/tests/test_performance_indices.py @@ -77,10 +77,12 @@ def test_performance_indices_amip_xdataset(clim): assert are_dicts_equal(data1, data2, TOLERANCE),f"YAML files are not identical.\nData1: {data1}\nData2: {data2}" -def test_pi_plot(): +def test_pi_plot(tmp_path): file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml' with open(file1, 'r', encoding='utf8') as f1: data1 = yaml.safe_load(f1) - heatmap_comparison_pi(data_dict=data1, cmip6_dict=file1, title='CMIP6 RELATIVE PI test') + heatmap_comparison_pi(data_dict=data1, cmip6_dict=data1, + title='CMIP6 RELATIVE PI test', + mapfile=os.path.join(tmp_path, 'PI4_heatmap.pdf')) assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created." os.remove('PI4_heatmap.pdf') From 65d5146b10bede10924ac56d22f87ff1ee8aef62 Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Wed, 18 Dec 2024 15:57:29 +0100 Subject: [PATCH 14/26] clean up --- ecmean/global_mean.py | 2 +- ecmean/libs/general.py | 40 +++++++------------------------ ecmean/performance_indices.py | 10 ++++---- tests/test_performance_indices.py | 8 +++---- 4 files changed, 19 insertions(+), 41 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index aeab6ab..e9b9d72 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -313,7 +313,7 @@ def global_mean(exp, year1, year2, units_list = [] for var in diag.var_all: data2plot[ref[var]['longname']] = ordered[var] - units_list = units_list + [ref[var]['units']] + units_list.append(ref[var]['units']) # call the heatmap routine for a plot mapfile = os.path.join(diag.figdir, diff --git a/ecmean/libs/general.py b/ecmean/libs/general.py index a8d92bd..d966b58 100644 --- a/ecmean/libs/general.py +++ b/ecmean/libs/general.py @@ -17,27 +17,8 @@ ################## -# def is_number(s): -# """Check if input is a float type""" - -# try: -# float(s) -# return True -# except ValueError: -# return False - loggy = logging.getLogger(__name__) -# def numeric_loglevel(loglevel): -# """Define the logging level """ -# # log level with logging -# # currently basic definition trought the text -# numeric_level = getattr(logging, loglevel.upper(), None) -# if not isinstance(numeric_level, int): -# raise ValueError(f'Invalid log level: {loglevel}') - -# return numeric_level - def set_multiprocessing_start_method(): """Function to set the multiprocessing spawn method to fork""" plat = platform.system() @@ -163,23 +144,20 @@ def get_domain(var, face): return domain[comp] -# def get_component(face): # unused function -# """Return a dictionary providing the domain associated with a variable -# (the interface file specifies the domain for each component instead)""" - -# d = face['component'] -# p = dict(zip([list(d.values())[x]['domain'] -# for x in range(len(d.values()))], d.keys())) -# return p - - #################### # OUTPUT FUNCTIONS # #################### def dict_to_dataframe(varstat): - """very clumsy conversion of the nested 3-level dictionary - to a pd.dataframe: NEED TO BE IMPROVED""" + """ + Converts a nested 3-level dictionary to a pandas DataFrame. + + Parameters: + varstat (dict): Nested dictionary with 3 levels. + + Returns: + pd.DataFrame: Transformed DataFrame with hierarchical keys. + """ data_table = {} for i in varstat.keys(): pippo = {} diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index f47cbdd..f685c7d 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -19,7 +19,7 @@ import yaml import dask from ecmean import Diagnostic, Supporter, UnitsHandler -from ecmean.libs.general import weight_split, get_domain, dict_to_dataframe, \ +from ecmean.libs.general import weight_split, get_domain, \ check_time_axis, init_mydict, check_var_interface, check_var_climatology, \ set_multiprocessing_start_method from ecmean.libs.files import var_is_there, get_inifiles, load_yaml, \ @@ -338,11 +338,11 @@ def performance_indices(exp, year1, year2, # set longname, reorganize the dictionaries data2plot = {} cmip6 = {} - longnames = [] + longnames = [piclim[var]['longname'] for var in diag.field_all] for var in diag.field_all: - data2plot[piclim[var]['longname']] = ordered[var] - cmip6[piclim[var]['longname']] = filt_piclim[var] - longnames = longnames + [piclim[var]['longname']] + longname = piclim[var]['longname'] + data2plot[longname] = ordered[var] + cmip6[longname] = filt_piclim[var] # call the heatmap routine for a plot mapfile = os.path.join(diag.figdir, diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py index cb81d71..0bef6ae 100644 --- a/tests/test_performance_indices.py +++ b/tests/test_performance_indices.py @@ -13,7 +13,7 @@ from ecmean.libs.plotting import heatmap_comparison_pi # set TOLERANCE -TOLERANCE = 1e-3 +TOLERANCE = 1e-1 # set up coverage env var env = {**os.environ, "COVERAGE_PROCESS_START": ".coveragerc"} @@ -35,7 +35,7 @@ def test_performance_indices_cpld(clim): # test on amip @pytest.mark.parametrize("clim", ['RK08', 'EC23']) def test_performance_indices_amip(clim): - performance_indices('amip', 1990, 1990, numproc=1, + performance_indices('amip', 1990, 1990, numproc=1, climatology=clim, config='tests/config.yml', outputdir='tests/pluto') file1 = 'tests/pluto/YAML/PI4_' + clim + '_amip_EC-Earth4_r1i1p1f1_1990_1990.yml' file2 = 'tests/table/PI4_' + clim + '_amip_1990_1990.ref' @@ -81,8 +81,8 @@ def test_pi_plot(tmp_path): file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml' with open(file1, 'r', encoding='utf8') as f1: data1 = yaml.safe_load(f1) - heatmap_comparison_pi(data_dict=data1, cmip6_dict=data1, - title='CMIP6 RELATIVE PI test', + heatmap_comparison_pi(data_dict=data1, cmip6_dict=data1, + title='CMIP6 RELATIVE PI test', mapfile=os.path.join(tmp_path, 'PI4_heatmap.pdf')) assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created." os.remove('PI4_heatmap.pdf') From 4f4497ca1478719683b284552b6c30967b94f84f Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Wed, 18 Dec 2024 16:40:48 +0100 Subject: [PATCH 15/26] preparation functions --- ecmean/global_mean.py | 35 ++++---------- ecmean/libs/plotting.py | 90 +++++++++++++++++++++++++++++++++-- ecmean/performance_indices.py | 22 +++------ 3 files changed, 101 insertions(+), 46 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index e9b9d72..e1130ea 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -32,7 +32,7 @@ from ecmean.libs.units import units_extra_definition from ecmean.libs.ncfixers import xr_preproc from ecmean.libs.parser import parse_arguments -from ecmean.libs.plotting import heatmap_comparison_gm +from ecmean.libs.plotting import heatmap_comparison_gm, prepare_clim_dictionaries_gm from ecmean.libs.loggy import setup_logger dask.config.set(scheduler="synchronous") @@ -245,37 +245,22 @@ def global_mean(exp, year1, year2, tic = time() + # loop on the variables to create the output table global_table = [] - obsmean = {} - obsstd = {} - for var in diag.var_atm + diag.var_oce + diag.var_ice: - + for var in diag.var_all: gamma = ref[var] - # get the predifined value or the ALL GLobal one + # get the predefined value or the ALL Global one if isinstance(gamma['obs'], dict): tabval = gamma['obs']['ALL']['Global'] outval = str(tabval['mean']) + '\u00B1' + str(tabval['std']) else: outval = gamma['obs'] - # extract from yaml table for obs mean and standard deviation - mmm = init_mydict(diag.seasons, diag.regions) - sss = init_mydict(diag.seasons, diag.regions) - # if we have all the obs/std available - if isinstance(gamma['obs'], dict): - for season in diag.seasons: - for region in diag.regions: - mmm[season][region] = gamma['obs'][season][region]['mean'] - sss[season][region] = gamma['obs'][season][region]['std'] - # if only global observation is available - else: - mmm['ALL']['Global'] = gamma['obs'] - obsmean[gamma['longname']] = mmm - obsstd[gamma['longname']] = sss - if 'year1' in gamma.keys(): years = str(gamma['year1']) + '-' + str(gamma['year2']) + else: + raise ValueError('Year1 and Year2 are not defined in the reference file') out_sequence = [var, gamma['longname'], gamma['units'], varmean[var]['ALL']['Global']] if diag.ftrend: @@ -308,12 +293,8 @@ def global_mean(exp, year1, year2, with open(yamlfile, 'w', encoding='utf-8') as file: yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False) - # set longname, get units - data2plot = {} - units_list = [] - for var in diag.var_all: - data2plot[ref[var]['longname']] = ordered[var] - units_list.append(ref[var]['units']) + # prepare dictionaries for global mean + obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(ordered, ref, diag.var_all, diag.seasons, diag.regions) # call the heatmap routine for a plot mapfile = os.path.join(diag.figdir, diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py index 3ee9749..91786cf 100644 --- a/ecmean/libs/plotting.py +++ b/ecmean/libs/plotting.py @@ -14,7 +14,7 @@ from matplotlib.colors import ListedColormap import seaborn as sns import numpy as np -from ecmean.libs.general import dict_to_dataframe +from ecmean.libs.general import dict_to_dataframe, init_mydict loggy = logging.getLogger(__name__) @@ -94,7 +94,7 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str if filemap is None: if diag is not None: - filemap = f'PI4_{diag["expname"]}_{diag["modelname"]}_{diag["year1"]}_{diag["year2"]}.pdf' + filemap = f'PI4_{diag["climatology"]}_{diag["expname"]}_{diag["modelname"]}_{diag["year1"]}_{diag["year2"]}.pdf' else: filemap = 'PI4_heatmap.pdf' @@ -134,7 +134,6 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s loggy.debug("Data table") loggy.debug(data_table) - # define array ratio = (data_table - mean_table) / std_table if addnan: @@ -194,7 +193,92 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s axs.figure.axes[-1].yaxis.label.set_size(15) axs.set(xlabel=None) + if filemap is None: + if diag is not None: + filemap = f'Global_Mean_{diag["expname"]}_{diag["modelname"]}_{diag["year1"]}_{diag["year2"]}.pdf' + else: + filemap = 'Global_Mean_Heatmap.pdf' + # save and close plt.savefig(filemap) plt.cla() plt.close() + +def prepare_clim_dictionaries_pi(data, clim, shortnames): + """ + Prepare dictionaries for plotting + Args: + data: dictionary with data + clim: dictionary with climatology + shortnames: list of shortnames + Returns: + data2plot: dictionary with data for plotting + cmip6: dictionary with CMIP6 data + longnames: list of longnames + """ + + # uniform dictionaries + filt_piclim = {} + for k in clim.keys(): + filt_piclim[k] = clim[k]['cmip6'] + for f in ['models', 'year1', 'year2']: + del filt_piclim[k][f] + + # set longname, reorganize the dictionaries + data2plot = {} + cmip6 = {} + longnames = [clim[var]['longname'] for var in shortnames] + for var in shortnames: + longname = clim[var]['longname'] + data2plot[longname] = data[var] + cmip6[longname] = filt_piclim[var] + + return data2plot, cmip6, longnames + +def prepare_clim_dictionaries_gm(data, clim, shortnames, seasons, regions): + """ + Prepare dictionaries for global mean plotting + + Args: + data: dictionary with the data + clim: dictionary with the climatology + shortnames: list of shortnames + seasons: list of seasons + regions: list of regions + + Returns: + obsmean: dictionary with the mean + obsstd: dictionary with the standard deviation + data2plot: dictionary with the data to plot + units_list: list of units + """ + + # loop on the variables to create obsmean and obsstd + obsmean = {} + obsstd = {} + for var in shortnames: + gamma = clim[var] + + # extract from yaml table for obs mean and standard deviation + mmm = init_mydict(seasons, regions) + sss = init_mydict(seasons, regions) + # if we have all the obs/std available + if isinstance(gamma['obs'], dict): + for season in seasons: + for region in regions: + mmm[season][region] = gamma['obs'][season][region]['mean'] + sss[season][region] = gamma['obs'][season][region]['std'] + # if only global observation is available + else: + mmm['ALL']['Global'] = gamma['obs'] + obsmean[gamma['longname']] = mmm + obsstd[gamma['longname']] = sss + + # set longname, get units + data2plot = {} + units_list = [] + for var in shortnames: + data2plot[clim[var]['longname']] = data[var] + units_list.append(clim[var]['units']) + + return obsmean, obsstd, data2plot, units_list \ No newline at end of file diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index f685c7d..a331509 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -29,7 +29,7 @@ from ecmean.libs.areas import guess_bounds from ecmean.libs.units import units_extra_definition from ecmean.libs.ncfixers import xr_preproc, adjust_clim_file -from ecmean.libs.plotting import heatmap_comparison_pi +from ecmean.libs.plotting import heatmap_comparison_pi, prepare_clim_dictionaries_pi from ecmean.libs.parser import parse_arguments from ecmean.libs.loggy import setup_logger @@ -328,26 +328,15 @@ def performance_indices(exp, year1, year2, # to this date, only EC23 support comparison with CMIP6 data if diag.climatology == 'EC23': - # uniform dictionaries - filt_piclim = {} - for k in piclim.keys(): - filt_piclim[k] = piclim[k]['cmip6'] - for f in ['models', 'year1', 'year2']: - del filt_piclim[k][f] - - # set longname, reorganize the dictionaries - data2plot = {} - cmip6 = {} - longnames = [piclim[var]['longname'] for var in diag.field_all] - for var in diag.field_all: - longname = piclim[var]['longname'] - data2plot[longname] = ordered[var] - cmip6[longname] = filt_piclim[var] + # prepare the data for the heatmap from the original yaml dictionaries + data2plot, cmip6, longnames = prepare_clim_dictionaries_pi(data=varstat, clim=piclim, + shortnames=diag.field_all) # call the heatmap routine for a plot mapfile = os.path.join(diag.figdir, f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf') diag_dict = {'modelname': diag.modelname, 'expname': diag.expname, + 'climatology': diag.climatology, 'year1': diag.year1, 'year2': diag.year2, 'seasons': diag.seasons, 'regions': diag.regions, 'longnames': longnames} @@ -360,6 +349,7 @@ def performance_indices(exp, year1, year2, print('ECmean4 Performance Indices succesfully computed!') + def pi_entry_point(): """ Command line interface to run the global_mean function From b30d9e7ef762f839d2d1cac4cf98f033ac9b8f0a Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Wed, 18 Dec 2024 17:04:53 +0100 Subject: [PATCH 16/26] example preprocess --- ecmean/global_mean.py | 2 +- tests/test_performance_indices.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index e1130ea..e964d76 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -298,7 +298,7 @@ def global_mean(exp, year1, year2, # call the heatmap routine for a plot mapfile = os.path.join(diag.figdir, - f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf') + f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.png') loggy.info('Figure file is: %s', mapfile) diag_dict = {'modelname': diag.modelname, 'expname': diag.expname, diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py index 0bef6ae..9feef91 100644 --- a/tests/test_performance_indices.py +++ b/tests/test_performance_indices.py @@ -10,7 +10,7 @@ from ecmean.performance_indices import performance_indices from ecmean.libs.ncfixers import xr_preproc from ecmean.libs.general import are_dicts_equal -from ecmean.libs.plotting import heatmap_comparison_pi +from ecmean.libs.plotting import heatmap_comparison_pi, prepare_clim_dictionaries_pi # set TOLERANCE TOLERANCE = 1e-1 @@ -80,8 +80,12 @@ def test_performance_indices_amip_xdataset(clim): def test_pi_plot(tmp_path): file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml' with open(file1, 'r', encoding='utf8') as f1: - data1 = yaml.safe_load(f1) - heatmap_comparison_pi(data_dict=data1, cmip6_dict=data1, + data = yaml.safe_load(f1) + file2 = 'ecmean/climatology/EC23/pi_climatology_EC23.yml' + with open(file2, 'r', encoding='utf8') as f1: + clim = yaml.safe_load(f1) + data_dict, clim_dict, _ = prepare_clim_dictionaries_pi(data=data, clim=clim, shortnames=data.keys()) + heatmap_comparison_pi(data_dict=data_dict, cmip6_dict=clim_dict, title='CMIP6 RELATIVE PI test', mapfile=os.path.join(tmp_path, 'PI4_heatmap.pdf')) assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created." From be862d1efefd51cb03fb21091a0c6196b4368432 Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Fri, 20 Dec 2024 10:16:06 +0100 Subject: [PATCH 17/26] performance indidces as a class --- ecmean/__init__.py | 4 +- ecmean/libs/diagnostic.py | 4 +- ecmean/performance_indices.py | 556 ++++++++++++++++------------------ 3 files changed, 264 insertions(+), 300 deletions(-) diff --git a/ecmean/__init__.py b/ecmean/__init__.py index a43cb63..916a143 100644 --- a/ecmean/__init__.py +++ b/ecmean/__init__.py @@ -7,6 +7,6 @@ from ecmean.libs.support import Supporter from ecmean.libs.units import UnitsHandler from ecmean.global_mean import global_mean -from ecmean.performance_indices import performance_indices +from ecmean.performance_indices import PerformanceIndices, performance_indices -__all__ = ["global_mean", "performance_indices", "Diagnostic", "Supporter", "UnitsHandler"] +__all__ = ["global_mean", "PerformanceIndices", "performance_indices", "Diagnostic", "Supporter", "UnitsHandler"] diff --git a/ecmean/libs/diagnostic.py b/ecmean/libs/diagnostic.py index 02a6290..3cab468 100644 --- a/ecmean/libs/diagnostic.py +++ b/ecmean/libs/diagnostic.py @@ -40,7 +40,7 @@ def __init__(self, args): self.resolution = getattr(args, 'resolution', '') self.ensemble = getattr(args, 'ensemble', 'r1i1p1f1') self.addnan = getattr(args, 'addnan', False) - self.funcname = args.funcname.split(".")[1] + self.funcname = args.funcname self.version = version if self.year1 == self.year2: self.ftrend = False @@ -86,7 +86,7 @@ def __init__(self, args): self.cfg_global_mean(cfg) # init for performance indices - if self.funcname in 'performance_indices': + if self.funcname in 'PerformanceIndices': self.cfg_performance_indices(cfg) # setting up interface file diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index a331509..69a442d 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -35,333 +35,297 @@ dask.config.set(scheduler="synchronous") - -def pi_worker(util, piclim, face, diag, field_3d, varstat, varlist): - """Main parallel diagnostic worker for performance indices - - Args: - util: the utility dictionary, including mask, area and remap weights - piclim: the reference climatology for the global mean - face: the interface to be used to access the data - diag: the diagnostic class object - field_3d: the list of 3d vars to be handled differently - varstat: the dictionary for the variable PI (empty) - varlist: the variable on which compute the global mean - - Returns: - vartrend under the form of a dictionaries +class PerformanceIndices: """ - - loggy = logging.getLogger(__name__) - - for var in varlist: - - # store NaN in dict (can't use defaultdict due to multiprocessing) - result = init_mydict(diag.seasons, diag.regions) - - if check_var_interface(var, face): - - # get domain - domain = get_domain(var, face) - - # get masks - domain_mask = getattr(util, domain + 'mask') - - # check if required variables are there: use interface file - # check into first file, and load also model variable units - infile = make_input_filename(var, face, diag) - - # check if var is available - isavail, varunit = var_is_there(infile, var, face) - - # if the variable is available - if isavail: - - # perform the unit conversion extracting offset and factor - units_handler = UnitsHandler(var, org_units=varunit, clim=piclim, face=face) - offset, factor = units_handler.offset, units_handler.factor - - # open file: chunking on time only, might be improved - if not isinstance(infile, (xr.DataArray, xr.Dataset)): - xfield = xr.open_mfdataset(infile, preprocess=xr_preproc, chunks={'time': 12}) - else: - xfield = infile - - # in case of big files with multi year, be sure of having opened the right records - xfield = xfield.sel(time=xfield.time.dt.year.isin(diag.years_joined)) - - # check time axis - check_time_axis(xfield.time, diag.years_joined) - - # get the data-array field for the required var - outfield = formula_wrapper(var, face, xfield) - - # mean over time and fixing of the units - for season in diag.seasons: - - loggy.debug(season) - - # copy of the full field - tmean = outfield.copy(deep=True) - - # get filenames for climatology - clim, vvvv = get_clim_files(piclim, var, diag, season) - - # open climatology files, fix their metadata - cfield = adjust_clim_file( - xr.open_mfdataset(clim, preprocess=xr_preproc)) - vfield = adjust_clim_file( - xr.open_mfdataset(vvvv, preprocess=xr_preproc), remove_zero=True) - - # season selection - if season != 'ALL': - tmean = tmean.sel(time=tmean.time.dt.season.isin(season)) - cfield = cfield.sel(time=cfield.time.dt.season.isin(season)) - vfield = vfield.sel(time=vfield.time.dt.season.isin(season)) - - # averaging - tmean = tmean.mean(dim='time') - cfield = cfield.load() # this ensure no crash with multiprocessing - vfield = vfield.load() - - # safe check for old RK08 which has a different format - if diag.climatology != 'RK08': - cfield = cfield.mean(dim='time') - vfield = vfield.mean(dim='time') - - tmean = tmean * factor + offset - - # this computation is required to ensure that file access is - # done in a correct way. resource errors found if disabled in some - # specific configuration - tmean = tmean.compute() - - # apply interpolation, if fixer is availble and with different - # grids - fix = getattr(util, f'{domain}fix') - remap = getattr(util, f'{domain}remap') - - if fix: - tmean = fix(tmean, keep_attrs=True) - try: - final = remap(tmean, keep_attrs=True) - except ValueError: - loggy.error('Cannot interpolate %s with the current weights...', var) - continue - - # vertical interpolation - if var in field_3d: - - # xarray interpolation on plev, forcing to be in Pascal - final = final.metpy.convert_coordinate_units('plev', 'Pa') - if set(final['plev'].data) != set(cfield['plev'].data): - loggy.warning('%s: Need to interpolate vertical levels...', var) - final = final.interp(plev=cfield['plev'].data, method='linear') - - # safety check for missing values - sample = final.isel(lon=0, lat=0) - if np.sum(np.isnan(sample)) != 0: - loggy.warning('%s: You have NaN after the interpolation, this will affect your PIs...', var) - levnan = cfield['plev'].where(np.isnan(sample)) - loggy.warning(levnan[~np.isnan(levnan)].data) - - # zonal mean - final = final.mean(dim='lon') - - # compute PI - complete = (final - cfield)**2 / vfield - - # compute vertical bounds as weights - bounds_lev = guess_bounds(complete['plev'], name='plev') - bounds = abs(bounds_lev[:, 0] - bounds_lev[:, 1]) - www = xr.DataArray( - bounds, coords=[ - complete['plev']], dims=['plev']) - - # vertical mean - outarray = complete.weighted(www).mean(dim='plev') - - # horizontal averaging with land-sea mask - else: - - complete = (final - cfield)**2 / vfield - outarray = mask_field(xfield=complete, - mask_type=piclim[var]['mask'], - dom=domain, mask=domain_mask) - - # loop on different regions - for region in diag.regions: - - slicearray = select_region(outarray, region) - - # latitude-based averaging - weights = np.cos(np.deg2rad(slicearray.lat)) - out = slicearray.weighted(weights).mean().data - # store the PI - result[season][region] = round(float(out), 3) - - # diagnostic - if region == 'Global': - logging.info('PI for %s %s %s %s', region, season, var, result[season][region]) - - # nested dictionary, to be redifend as a dict to remove lambdas - varstat[var] = result - - -def performance_indices(exp, year1, year2, - config='config.yml', - loglevel='WARNING', - numproc=1, - climatology='EC23', - interface=None, model=None, ensemble='r1i1p1f1', - silent=None, xdataset=None, - outputdir=None): - """Main performance indices calculation - - :param exp: Experiment name or ID - :param year1: Initial year - :param year2: Final year - :param config: configuration file, optional (default 'config.yml') - :param loglevel: level of logging, optional (default 'WARNING') - :param numproc: number of multiprocessing cores, optional (default '1') - :param interface: interface file to be used, optional (default as specifified in config file) - :param model: model to be analyzed, optional (default as specifified in config file) - :param ensemble: ensemble member to be analyzed, optional (default as 'r1i1p1f1') - :param silent: do not print anything to std output, optional - :param climatology: climatology to be compared. default: EC23. Options: [RK08, EC22, EC23] - :param xdataset: xarray dataset - already open - to be used without looking for files - :param outputdir: if specified, override the target destination in the configuration files for both tables and figures - - :returns: the performance indices yaml file and heatmap - + Class to compute the performance indices for a given experiment and years. """ - # create a name space with all the arguments to feed the Diagnostic class - # This is not the neatest option, but it is very compact - funcname = __name__ - argv = argparse.Namespace(**locals()) - - # set loglevel - loggy = setup_logger(level=argv.loglevel) - - # set dask and multiprocessing fork - plat, mprocmethod = set_multiprocessing_start_method() - loggy.info('Running on %s and multiprocessing method set as "%s"', plat, mprocmethod) - - # start time - tic = time() - - # initialize the diag class, load the inteface and the reference file - diag = Diagnostic(argv) - face = load_yaml(diag.interface) - piclim = load_yaml(diag.climfile) - - # check that everyhing is there - check_var_climatology(diag.field_all, piclim.keys()) + def __init__(self, exp, year1, year2, config='config.yml', + loglevel='WARNING', numproc=1, climatology='EC23', + interface=None, model=None, ensemble='r1i1p1f1', + silent=None, xdataset=None, outputdir=None): + """Initialize the PerformanceIndices class with the given parameters.""" + + self.exp = exp + self.year1 = year1 + self.year2 = year2 + self.config = config + self.loglevel = loglevel + self.numproc = numproc + self.climatology = climatology + self.interface = interface + self.model = model + self.ensemble = ensemble + self.silent = silent + self.xdataset = xdataset + self.outputdir = outputdir + self.loggy = setup_logger(level=self.loglevel) + self.diag = None + self.face = None + self.piclim = None + self.util_dictionary = None + self.varstat = None + self.funcname = self.__class__.__name__ + self.start_time = time() + + def toc(self, message): + """Update the timer and log the elapsed time.""" + elapsed_time = time() - self.start_time + self.start_time = time() + self.loggy.info('%s time: %.2f seconds', message, elapsed_time) + + def prepare(self): + """Prepare the necessary components for performance indices calculation.""" + # set dask and multiprocessing fork + plat, mprocmethod = set_multiprocessing_start_method() + self.loggy.info('Running on %s and multiprocessing method set as "%s"', plat, mprocmethod) + + # initialize the diag class, load the interface and the reference file + self.diag = Diagnostic(argparse.Namespace(**self.__dict__)) + self.face = load_yaml(self.diag.interface) + self.piclim = load_yaml(self.diag.climfile) + + # check that everything is there + check_var_climatology(self.diag.field_all, self.piclim.keys()) + + # Create missing folders + os.makedirs(self.diag.tabdir, exist_ok=True) + os.makedirs(self.diag.figdir, exist_ok=True) + + # new bunch of functions to set grids, create correction command, masks and areas + comp = self.face['model']['component'] # Get component for each domain + + # all clim have the same grid, read from the first clim available and get target grid + clim, _ = get_clim_files(self.piclim, 'tas', self.diag, 'ALL') + target_remap_grid = xr.open_dataset(clim) + + # get file info files + inifiles = get_inifiles(self.face, self.diag) + + # add missing unit definitions + units_extra_definition() + + # create remap dictionary with atm and oce interpolators + self.util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=False, remap=True, targetgrid=target_remap_grid) + + def run(self): + """Run the performance indices calculation.""" + # main loop: manager is required for shared variables + mgr = Manager() + + # dictionaries are shared, so they have to be passed as functions + self.varstat = mgr.dict() + processes = [] + + # loop on the variables, create the parallel process + for varlist in weight_split(self.diag.field_all, self.diag.numproc): + core = Process(target=self.pi_worker, args=(self.util_dictionary, self.piclim, self.face, self.diag, self.diag.field_3d, self.varstat, varlist)) + core.start() + processes.append(core) + + # wait for the processes to finish + for proc in processes: + proc.join() + + def store(self, yamlfile=None): + """Store the performance indices in a yaml file.""" + # order according to the original request the fields in the yaml file + ordered = {} + for item in self.diag.field_all: + ordered[item] = self.varstat[item] + + # dump the yaml file for PI, including all the seasons (need to copy to avoid mess) + if yamlfile is None: + yamlfile = self.diag.tabdir / f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.yml' + with open(yamlfile, 'w', encoding='utf-8') as file: + yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False) + + def plot(self, mapfile=None): + """Generate the heatmap for performance indices.""" + # to this date, only EC23 support comparison with CMIP6 data + if self.diag.climatology == 'EC23': + # prepare the data for the heatmap from the original yaml dictionaries + data2plot, cmip6, longnames = prepare_clim_dictionaries_pi(data=self.varstat, + clim=self.piclim, + shortnames=self.diag.field_all) + + # call the heatmap routine for a plot + if mapfile is None: + mapfile = os.path.join(self.diag.figdir, f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.pdf') + diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname, 'climatology': self.diag.climatology, 'year1': self.diag.year1, 'year2': self.diag.year2, 'seasons': self.diag.seasons, 'regions': self.diag.regions, 'longnames': longnames} + heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, diag=diag_dict, filemap=mapfile) + + @staticmethod + def pi_worker(util, piclim, face, diag, field_3d, varstat, varlist): + """Main parallel diagnostic worker for performance indices.""" + loggy = logging.getLogger(__name__) + + for var in varlist: + # store NaN in dict (can't use defaultdict due to multiprocessing) + result = init_mydict(diag.seasons, diag.regions) + + if check_var_interface(var, face): + # get domain + domain = get_domain(var, face) + + # get masks + domain_mask = getattr(util, domain + 'mask') + + # check if required variables are there: use interface file + # check into first file, and load also model variable units + infile = make_input_filename(var, face, diag) + + # check if var is available + isavail, varunit = var_is_there(infile, var, face) + + # if the variable is available + if isavail: + # perform the unit conversion extracting offset and factor + units_handler = UnitsHandler(var, org_units=varunit, clim=piclim, face=face) + offset, factor = units_handler.offset, units_handler.factor + + # open file: chunking on time only, might be improved + if not isinstance(infile, (xr.DataArray, xr.Dataset)): + xfield = xr.open_mfdataset(infile, preprocess=xr_preproc, chunks={'time': 12}) + else: + xfield = infile + + # in case of big files with multi year, be sure of having opened the right records + xfield = xfield.sel(time=xfield.time.dt.year.isin(diag.years_joined)) + + # check time axis + check_time_axis(xfield.time, diag.years_joined) + + # get the data-array field for the required var + outfield = formula_wrapper(var, face, xfield) - # Create missing folders - os.makedirs(diag.tabdir, exist_ok=True) - os.makedirs(diag.figdir, exist_ok=True) + # mean over time and fixing of the units + for season in diag.seasons: + loggy.debug(season) - # new bunch of functions to set grids, create correction command, masks - # and areas - comp = face['model']['component'] # Get component for each domain + # copy of the full field + tmean = outfield.copy(deep=True) - # all clim have the same grid, read from the first clim available and get - # target grid - clim, _ = get_clim_files(piclim, 'tas', diag, 'ALL') - target_remap_grid = xr.open_dataset(clim) + # get filenames for climatology + clim, vvvv = get_clim_files(piclim, var, diag, season) - # get file info files - inifiles = get_inifiles(face, diag) + # open climatology files, fix their metadata + cfield = adjust_clim_file(xr.open_mfdataset(clim, preprocess=xr_preproc)) + vfield = adjust_clim_file(xr.open_mfdataset(vvvv, preprocess=xr_preproc), remove_zero=True) - # add missing unit definitions - units_extra_definition() + # season selection + if season != 'ALL': + tmean = tmean.sel(time=tmean.time.dt.season.isin(season)) + cfield = cfield.sel(time=cfield.time.dt.season.isin(season)) + vfield = vfield.sel(time=vfield.time.dt.season.isin(season)) - # create remap dictionary with atm and oce interpolators - util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], - areas=False, remap=True, - targetgrid=target_remap_grid) + # averaging + tmean = tmean.mean(dim='time') + cfield = cfield.load() # this ensure no crash with multiprocessing + vfield = vfield.load() - # main loop: manager is required for shared variables - mgr = Manager() + # safe check for old RK08 which has a different format + if diag.climatology != 'RK08': + cfield = cfield.mean(dim='time') + vfield = vfield.mean(dim='time') - # dictionaries are shared, so they have to be passed as functions - varstat = mgr.dict() - processes = [] + tmean = tmean * factor + offset - toc = time() - loggy.info('Preproc in {:.4f} seconds'.format(toc - tic)) - tic = time() + # this computation is required to ensure that file access is done in a correct way. resource errors found if disabled in some specific configuration + tmean = tmean.compute() - # loop on the variables, create the parallel process - for varlist in weight_split(diag.field_all, diag.numproc): - # print(varlist) + # apply interpolation, if fixer is available and with different grids + fix = getattr(util, f'{domain}fix') + remap = getattr(util, f'{domain}remap') - core = Process(target=pi_worker, args=(util_dictionary, piclim, - face, diag, diag.field_3d, varstat, varlist)) - core.start() - processes.append(core) + if fix: + tmean = fix(tmean, keep_attrs=True) + try: + final = remap(tmean, keep_attrs=True) + except ValueError: + loggy.error('Cannot interpolate %s with the current weights...', var) + continue - # wait for the processes to finish - for proc in processes: - proc.join() + # vertical interpolation + if var in field_3d: + # xarray interpolation on plev, forcing to be in Pascal + final = final.metpy.convert_coordinate_units('plev', 'Pa') + if set(final['plev'].data) != set(cfield['plev'].data): + loggy.warning('%s: Need to interpolate vertical levels...', var) + final = final.interp(plev=cfield['plev'].data, method='linear') - toc = time() - # evaluate tic-toc time of execution - loggy.info('Done in %.4f seconds with %d processors', toc - tic, diag.numproc) + # safety check for missing values + sample = final.isel(lon=0, lat=0) + if np.sum(np.isnan(sample)) != 0: + loggy.warning('%s: You have NaN after the interpolation, this will affect your PIs...', var) + levnan = cfield['plev'].where(np.isnan(sample)) + loggy.warning(levnan[~np.isnan(levnan)].data) - tic = time() + # zonal mean + final = final.mean(dim='lon') - # order according to the original request the fields in the yaml file - ordered = {} - for item in diag.field_all: - ordered[item] = varstat[item] + # compute PI + complete = (final - cfield)**2 / vfield - # dump the yaml file for PI, including all the seasons (need to copy to avoid mess) - yamlfile = diag.tabdir / \ - f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.yml' - with open(yamlfile, 'w', encoding='utf-8') as file: - yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False) + # compute vertical bounds as weights + bounds_lev = guess_bounds(complete['plev'], name='plev') + bounds = abs(bounds_lev[:, 0] - bounds_lev[:, 1]) + www = xr.DataArray(bounds, coords=[complete['plev']], dims=['plev']) - # to this date, only EC23 support comparison with CMIP6 data - if diag.climatology == 'EC23': + # vertical mean + outarray = complete.weighted(www).mean(dim='plev') - # prepare the data for the heatmap from the original yaml dictionaries - data2plot, cmip6, longnames = prepare_clim_dictionaries_pi(data=varstat, clim=piclim, - shortnames=diag.field_all) + # horizontal averaging with land-sea mask + else: + complete = (final - cfield)**2 / vfield + outarray = mask_field(xfield=complete, mask_type=piclim[var]['mask'], dom=domain, mask=domain_mask) - # call the heatmap routine for a plot - mapfile = os.path.join(diag.figdir, - f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf') - diag_dict = {'modelname': diag.modelname, 'expname': diag.expname, - 'climatology': diag.climatology, - 'year1': diag.year1, 'year2': diag.year2, - 'seasons': diag.seasons, 'regions': diag.regions, - 'longnames': longnames} - heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, - diag=diag_dict, filemap=mapfile) + # loop on different regions + for region in diag.regions: + slicearray = select_region(outarray, region) - toc = time() - # evaluate tic-toc time of postprocessing - loggy.info("Postproc done in %.4f seconds", toc - tic) - print('ECmean4 Performance Indices succesfully computed!') + # latitude-based averaging + weights = np.cos(np.deg2rad(slicearray.lat)) + out = slicearray.weighted(weights).mean().data + # store the PI + result[season][region] = round(float(out), 3) + # diagnostic + if region == 'Global': + logging.info('PI for %s %s %s %s', region, season, var, result[season][region]) + + # nested dictionary, to be redefined as a dict to remove lambdas + varstat[var] = result def pi_entry_point(): """ - Command line interface to run the global_mean function + Command line interface to run the performance indices calculation. """ - # read arguments from command line args = parse_arguments(sys.argv[1:], script='pi') performance_indices(exp=args.exp, year1=args.year1, year2=args.year2, - numproc=args.numproc, - loglevel=args.loglevel, - climatology=args.climatology, - interface=args.interface, config=args.config, - model=args.model, ensemble=args.ensemble, - outputdir=args.outputdir) + numproc=args.numproc, loglevel=args.loglevel, + climatology=args.climatology, + interface=args.interface, config=args.config, + model=args.model, ensemble=args.ensemble, outputdir=args.outputdir) + +def performance_indices(exp, year1, year2, config='config.yml', loglevel='WARNING', + numproc=1, climatology='EC23', interface=None, model=None, + ensemble='r1i1p1f1', silent=None, xdataset=None, outputdir=None): + """ + Wrapper function to compute the performance indices for a given experiment and years. + """ + pi = PerformanceIndices(exp=exp, year1=year1, year2=year2, config=config, + loglevel=loglevel, numproc=numproc, climatology=climatology, + interface=interface, model=model, ensemble=ensemble, silent=silent, + xdataset=xdataset, outputdir=outputdir) + pi.prepare() + pi.toc('Preparation') + pi.run() + pi.toc('Calculation') + pi.store() + pi.plot() + pi.toc('Plotting') From 884137cc82e63b2530edad3824a942c26fbd957a Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Fri, 20 Dec 2024 10:35:15 +0100 Subject: [PATCH 18/26] global mean conversion --- ecmean/__init__.py | 5 +- ecmean/global_mean.py | 506 ++++++++++++++++---------------------- ecmean/libs/diagnostic.py | 2 +- 3 files changed, 222 insertions(+), 291 deletions(-) diff --git a/ecmean/__init__.py b/ecmean/__init__.py index 916a143..ebb2f0b 100644 --- a/ecmean/__init__.py +++ b/ecmean/__init__.py @@ -6,7 +6,8 @@ from ecmean.libs.diagnostic import Diagnostic from ecmean.libs.support import Supporter from ecmean.libs.units import UnitsHandler -from ecmean.global_mean import global_mean +from ecmean.global_mean import GlobalMean, global_mean from ecmean.performance_indices import PerformanceIndices, performance_indices -__all__ = ["global_mean", "PerformanceIndices", "performance_indices", "Diagnostic", "Supporter", "UnitsHandler"] +__all__ = ["GlobalMean", "global_mean", "PerformanceIndices", + "performance_indices", "Diagnostic", "Supporter", "UnitsHandler"] diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index e964d76..bdeda87 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ - python3 version of ECmean global mean tool. - Using a reference file from yaml and Xarray + python3 version of ECmean global mean tool. + Using a reference file from yaml and Xarray - @author Paolo Davini (p.davini@isac.cnr.it), Sep 2022. - @author Jost von Hardenberg (jost.hardenberg@polito.it), Sep 2022 + @author Paolo Davini (p.davini@isac.cnr.it), Sep 2022. + @author Jost von Hardenberg (jost.hardenberg@polito.it), Sep 2022 """ __author__ = "Paolo Davini (p.davini@isac.cnr.it), Sep 2022." @@ -24,8 +24,8 @@ from ecmean import Diagnostic, Supporter, UnitsHandler from ecmean.libs.general import weight_split, write_tuning_table, get_domain, \ - check_time_axis, init_mydict, \ - check_var_interface, check_var_climatology, set_multiprocessing_start_method + check_time_axis, init_mydict, \ + check_var_interface, check_var_climatology, set_multiprocessing_start_method from ecmean.libs.files import var_is_there, get_inifiles, load_yaml, make_input_filename from ecmean.libs.formula import formula_wrapper from ecmean.libs.masks import masked_meansum, select_region @@ -38,300 +38,230 @@ dask.config.set(scheduler="synchronous") -def gm_worker(util, ref, face, diag, varmean, vartrend, varlist): - """Main parallel diagnostic worker for global mean - - Args: - util: the utility dictionary, including mask and weights - ref: the reference dictionary for the global mean - face: the interface to be used to access the data - diag: the diagnostic class object - varmean: the dictionary for the global mean (empty) - vartrend: the dictionary for the trends (empty) - varlist: the variable on which compute the global mean - - Returns: - vartrend and varmean under the form of a dictionaries +class GlobalMean: """ - - loggy = logging.getLogger(__name__) - - for var in varlist: - - # create empty nested dictionaries - result = init_mydict(diag.seasons, diag.regions) - trend = init_mydict(diag.seasons, diag.regions) - - # check if the variable is in the interface file - if check_var_interface(var, face): - - # get domain - domain = get_domain(var, face) - - # compute weights - weights = getattr(util, domain + 'area') - domain_mask = getattr(util, domain + 'mask') - - # get input files/fielf - infile = make_input_filename(var, face, diag) - - # check if variables are available - isavail, varunit = var_is_there(infile, var, face) - - if isavail: - - # perform the unit conversion extracting offset and factor - units_handler = UnitsHandler(var, org_units=varunit, clim=ref, face=face) - offset, factor = units_handler.offset, units_handler.factor - - # load the object - if not isinstance(infile, (xr.DataArray, xr.Dataset)): - xfield = xr.open_mfdataset(infile, preprocess=xr_preproc, chunks={'time': 12}) - else: - xfield = infile - - # in case of big files with multi year, be sure of having opened the right records - xfield = xfield.sel(time=xfield.time.dt.year.isin(diag.years_joined)) - - # check time axis - check_time_axis(xfield.time, diag.years_joined) - - # get the data-array field for the required var - cfield = formula_wrapper(var, face, xfield).compute() - - for season in diag.seasons: - - # copy of the full field - tfield = cfield.copy(deep=True) - - if season != 'ALL': - tfield = tfield.sel(time=cfield.time.dt.season.isin(season)) - - if diag.ftrend: - # this does not consider continuous seasons for DJF, but JF+D - tfield = tfield.groupby('time.year').mean('time') + Class to compute the global mean of an experiment. + """ + def __init__(self, exp, year1, year2, config='config.yml', loglevel='WARNING', numproc=1, + interface=None, model=None, ensemble='r1i1p1f1', addnan=False, silent=None, + trend=None, line=None, outputdir=None, xdataset=None): + self.exp = exp + self.year1 = year1 + self.year2 = year2 + self.config = config + self.loglevel = loglevel + self.numproc = numproc + self.interface = interface + self.model = model + self.ensemble = ensemble + self.addnan = addnan + self.silent = silent + self.trend = trend + self.line = line + self.outputdir = outputdir + self.xdataset = xdataset + self.loggy = setup_logger(level=self.loglevel) + self.diag = None + self.face = None + self.ref = None + self.util_dictionary = None + self.varmean = None + self.vartrend = None + self.funcname = self.__class__.__name__ + self.start_time = time() + + def toc(self, message): + """Update the timer and log the elapsed time.""" + elapsed_time = time() - self.start_time + self.start_time = time() + self.loggy.info('%s time: %.2f seconds', message, elapsed_time) + + def prepare(self): + plat, mprocmethod = set_multiprocessing_start_method() + self.loggy.info('Running on %s and multiprocessing method set as "%s"', plat, mprocmethod) + + self.diag = Diagnostic(argparse.Namespace(**self.__dict__)) + self.face = load_yaml(self.diag.interface) + self.ref = load_yaml(self.diag.reffile) + + check_var_climatology(self.diag.var_all, self.ref.keys()) + + os.makedirs(self.diag.tabdir, exist_ok=True) + os.makedirs(self.diag.figdir, exist_ok=True) + + comp = self.face['model']['component'] + inifiles = get_inifiles(self.face, self.diag) + + units_extra_definition() + + self.util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=True, remap=False) + + def run(self): + mgr = Manager() + self.varmean = mgr.dict() + self.vartrend = mgr.dict() + processes = [] + + for varlist in weight_split(self.diag.var_all, self.diag.numproc): + core = Process(target=self.gm_worker, args=(self.util_dictionary, self.ref, self.face, self.diag, + self.varmean, self.vartrend, varlist)) + core.start() + processes.append(core) + + for proc in processes: + proc.join() + + def store(self): + global_table = [] + for var in self.diag.var_all: + gamma = self.ref[var] + if isinstance(gamma['obs'], dict): + tabval = gamma['obs']['ALL']['Global'] + outval = str(tabval['mean']) + '\u00B1' + str(tabval['std']) + else: + outval = gamma['obs'] + + if 'year1' in gamma.keys(): + years = str(gamma['year1']) + '-' + str(gamma['year2']) + else: + raise ValueError('Year1 and Year2 are not defined in the reference file') + + out_sequence = [var, gamma['longname'], gamma['units'], self.varmean[var]['ALL']['Global']] + if self.diag.ftrend: + out_sequence = out_sequence + [self.vartrend[var]['ALL']['Global']] + out_sequence = out_sequence + [outval, gamma.get('dataset', ''), years] + global_table.append(out_sequence) + + head = ['Variable', 'Longname', 'Units', self.diag.modelname] + if self.diag.ftrend: + head = head + ['Trend'] + head = head + ['Obs.', 'Dataset', 'Years'] + + tablefile = os.path.join(self.diag.tabdir, + f'global_mean_{self.diag.expname}_{self.diag.modelname}_{self.diag.ensemble}_{self.diag.year1}_{self.diag.year2}.txt') + self.loggy.info('Table file is: %s', tablefile) + with open(tablefile, 'w', encoding='utf-8') as out: + out.write(tabulate(global_table, headers=head, stralign='center', tablefmt='orgtbl')) + + ordered = {} + for var in self.diag.var_all: + ordered[var] = self.varmean[var] + + yamlfile = os.path.join(self.diag.tabdir, + f'global_mean_{self.diag.expname}_{self.diag.modelname}_{self.diag.ensemble}_{self.diag.year1}_{self.diag.year2}.yml') + self.loggy.info('YAML file is: %s', tablefile) + with open(yamlfile, 'w', encoding='utf-8') as file: + yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False) + + def plot(self, mapfile=None): + + obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(self.varmean, self.ref, + self.diag.var_all, self.diag.seasons, + self.diag.regions) + if mapfile is None: + mapfile = os.path.join(self.diag.figdir, + f'global_mean_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.pdf') + self.loggy.info('Figure file is: %s', mapfile) + + diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname, + 'year1': self.diag.year1, 'year2': self.diag.year2, + 'units_list': units_list} + + heatmap_comparison_gm(data_dict=data2plot, mean_dict=obsmean, std_dict=obsstd, + diag=diag_dict, filemap=mapfile, addnan=self.diag.addnan) + + if self.diag.ftable: + self.loggy.info('Line file is: %s', self.diag.linefile) + write_tuning_table(self.diag.linefile, self.varmean, self.diag.var_table, self.diag, self.ref) + + @staticmethod + def gm_worker(util, ref, face, diag, varmean, vartrend, varlist): + """" + Workhorse for the global mean computation. + + """ + loggy = logging.getLogger(__name__) + + for var in varlist: + result = init_mydict(diag.seasons, diag.regions) + trend = init_mydict(diag.seasons, diag.regions) + + if check_var_interface(var, face): + domain = get_domain(var, face) + weights = getattr(util, domain + 'area') + domain_mask = getattr(util, domain + 'mask') + infile = make_input_filename(var, face, diag) + isavail, varunit = var_is_there(infile, var, face) + + if isavail: + units_handler = UnitsHandler(var, org_units=varunit, clim=ref, face=face) + offset, factor = units_handler.offset, units_handler.factor + + if not isinstance(infile, (xr.DataArray, xr.Dataset)): + xfield = xr.open_mfdataset(infile, preprocess=xr_preproc, chunks={'time': 12}) else: - tfield = tfield.mean(dim='time') - - for region in diag.regions: + xfield = infile - slicefield = select_region(tfield, region) - sliceweights = select_region(weights, region) - if isinstance(domain_mask, xr.DataArray): - slicemask = select_region(domain_mask, region) - else: - slicemask = 0. + xfield = xfield.sel(time=xfield.time.dt.year.isin(diag.years_joined)) + check_time_axis(xfield.time, diag.years_joined) + cfield = formula_wrapper(var, face, xfield).compute() - # final operation on the field - avg = masked_meansum( - xfield=slicefield, weights=sliceweights, mask=slicemask, - operation=ref[var].get('operation', 'mean'), - mask_type=ref[var].get('mask', 'global'), - domain=domain) + for season in diag.seasons: + tfield = cfield.copy(deep=True) + if season != 'ALL': + tfield = tfield.sel(time=cfield.time.dt.season.isin(season)) - # if dask delayead object, compute - if isinstance(avg, dask.array.core.Array): - avg = avg.compute() + if diag.ftrend: + tfield = tfield.groupby('time.year').mean('time') + else: + tfield = tfield.mean(dim='time') - result[season][region] = float((np.nanmean(avg) + offset) * factor) + for region in diag.regions: + slicefield = select_region(tfield, region) + sliceweights = select_region(weights, region) + if isinstance(domain_mask, xr.DataArray): + slicemask = select_region(domain_mask, region) + else: + slicemask = 0. - if diag.ftrend: - if len(avg) == len(diag.years_joined): - trend[season][region] = np.polyfit(diag.years_joined, avg, 1)[0] - if season == 'ALL' and region == 'Global': - loggy.info('Average: %s %s %s %s', var, season, region, result[season][region]) - - # nested dictionary, to be redifend as a dict to remove lambdas - varmean[var] = result - vartrend[var] = trend - - -def global_mean(exp, year1, year2, - config='config.yml', - loglevel='WARNING', - numproc=1, - interface=None, model=None, ensemble='r1i1p1f1', - addnan=False, - silent=None, trend=None, line=None, - outputdir=None, xdataset=None): - """The main ECmean4 global mean function - - :param exp: Experiment name or ID - :param year1: Initial year - :param year2: Final year - :param config: configuration file, optional (default 'config.yml') - :param loglevel: level of logging, optional (default 'WARNING') - :param numproc: number of multiprocessing cores, optional (default '1') - :param interface: interface file to be used, optional (default as specifified in config file) - :param model: model to be analyzed, optional (default as specifified in config file) - :param ensemble: ensemble member to be analyzed, optional (default as 'r1i1p1f1') - :param add_nan: add to the final plots also fields which cannot be compared against observations - :param silent: do not print anything to std output, optional - :param trend: compute yearly trends, optional - :param line: appends also single line to a table, optional - :param outputdir: output directory for the single line output, optional - :param xdataset: xarray dataset - already open - to be used without looking for files - - :returns: the global mean txt table as defined in the output + avg = masked_meansum( + xfield=slicefield, weights=sliceweights, mask=slicemask, + operation=ref[var].get('operation', 'mean'), + mask_type=ref[var].get('mask', 'global'), + domain=domain) - """ + if isinstance(avg, dask.array.core.Array): + avg = avg.compute() - # create a name space with all the arguments to feed the Diagnostic class - # This is not the neatest option, but it is very compact - funcname = __name__ - argv = argparse.Namespace(**locals()) + result[season][region] = float((np.nanmean(avg) + offset) * factor) - # set loglevel - loggy = setup_logger(level=argv.loglevel) - - # set dask and multiprocessing fork - plat, mprocmethod = set_multiprocessing_start_method() - loggy.info('Running on %s and multiprocessing method set as "%s"', plat, mprocmethod) - - # start time - tic = time() - - # initialize the diag class, load the inteface and the reference file - diag = Diagnostic(argv) - face = load_yaml(diag.interface) - ref = load_yaml(diag.reffile) - - # check that everyhing is there - check_var_climatology(diag.var_all, ref.keys()) - - # Create missing folders - os.makedirs(diag.tabdir, exist_ok=True) - os.makedirs(diag.figdir, exist_ok=True) - - # Can probably be cleaned up further - comp = face['model']['component'] # Get component for each domain - - # get file info - inifiles = get_inifiles(face, diag) - - - # add missing unit definition - units_extra_definition() - - # create util dictionary including mask and weights for both atmosphere - # and ocean grids - util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], - areas=True, remap=False) - - # main loop: manager is required for shared variables - mgr = Manager() - - # dictionaries are shared, so they have to be passed as functions - varmean = mgr.dict() - vartrend = mgr.dict() - processes = [] - - # loop on the variables, create the parallel process - for varlist in weight_split(diag.var_all, diag.numproc): - core = Process( - target=gm_worker, args=(util_dictionary, ref, face, diag, - varmean, vartrend, varlist)) - core.start() - processes.append(core) - - # wait for the processes to finish - for proc in processes: - proc.join() - - toc = time() - - # evaluate tic-toc time of execution - loggy.info('Analysis done in %.4f seconds', toc - tic) - - tic = time() - - - # loop on the variables to create the output table - global_table = [] - for var in diag.var_all: - gamma = ref[var] - # get the predefined value or the ALL Global one - if isinstance(gamma['obs'], dict): - tabval = gamma['obs']['ALL']['Global'] - outval = str(tabval['mean']) + '\u00B1' + str(tabval['std']) - else: - outval = gamma['obs'] - - if 'year1' in gamma.keys(): - years = str(gamma['year1']) + '-' + str(gamma['year2']) - else: - raise ValueError('Year1 and Year2 are not defined in the reference file') - - out_sequence = [var, gamma['longname'], gamma['units'], varmean[var]['ALL']['Global']] - if diag.ftrend: - out_sequence = out_sequence + [vartrend[var]['ALL']['Global']] - out_sequence = out_sequence + [outval, gamma.get('dataset', ''), years] - global_table.append(out_sequence) - - # prepare the header for the table - head = ['Variable', 'Longname', 'Units', diag.modelname] - if diag.ftrend: - head = head + ['Trend'] - head = head + ['Obs.', 'Dataset', 'Years'] - - # write the file with tabulate - tablefile = os.path.join(diag.tabdir, - f'global_mean_{diag.expname}_{diag.modelname}_{diag.ensemble}_{diag.year1}_{diag.year2}.txt') - loggy.info('Table file is: %s', tablefile) - with open(tablefile, 'w', encoding='utf-8') as out: - out.write(tabulate(global_table, headers=head, stralign='center', tablefmt='orgtbl')) + if diag.ftrend: + if len(avg) == len(diag.years_joined): + trend[season][region] = np.polyfit(diag.years_joined, avg, 1)[0] + if season == 'ALL' and region == 'Global': + loggy.info('Average: %s %s %s %s', var, season, region, result[season][region]) - # required to avoid problem with multiprocessing - ordered = {} - for var in diag.var_all: - ordered[var] = varmean[var] - - # dump the yaml file for global_mean, including all the seasons - yamlfile = os.path.join(diag.tabdir, - f'global_mean_{diag.expname}_{diag.modelname}_{diag.ensemble}_{diag.year1}_{diag.year2}.yml') - loggy.info('YAML file is: %s', tablefile) - with open(yamlfile, 'w', encoding='utf-8') as file: - yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False) - - # prepare dictionaries for global mean - obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(ordered, ref, diag.var_all, diag.seasons, diag.regions) - - # call the heatmap routine for a plot - mapfile = os.path.join(diag.figdir, - f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.png') - loggy.info('Figure file is: %s', mapfile) - - diag_dict = {'modelname': diag.modelname, 'expname': diag.expname, - 'year1': diag.year1, 'year2': diag.year2, - 'units_list': units_list} - - heatmap_comparison_gm(data_dict=data2plot, mean_dict=obsmean, std_dict=obsstd, - diag=diag_dict, filemap=mapfile, addnan=diag.addnan) - - # Print appending one line to table (for tuning) - if diag.ftable: - loggy.info('Line file is: %s', diag.linefile) - write_tuning_table(diag.linefile, varmean, diag.var_table, diag, ref) - - toc = time() - # evaluate tic-toc time of postprocessing - loggy.info("Postproc done in %.4f seconds", toc - tic) - print('ECmean4 Global Mean succesfully computed!') + varmean[var] = result + vartrend[var] = trend def gm_entry_point(): - """ - Command line interface to run the global_mean function - """ - - # read arguments from command line args = parse_arguments(sys.argv[1:], script='gm') - - global_mean(exp=args.exp, year1=args.year1, year2=args.year2, - numproc=args.numproc, - trend=args.trend, line=args.line, - loglevel=args.loglevel, - interface=args.interface, config=args.config, - model=args.model, ensemble=args.ensemble, - addnan=args.addnan, - outputdir=args.outputdir) + global_mean(exp=args.exp, year1=args.year1, year2=args.year2, numproc=args.numproc, + trend=args.trend, line=args.line, loglevel=args.loglevel, + interface=args.interface, config=args.config, model=args.model, + ensemble=args.ensemble, addnan=args.addnan, outputdir=args.outputdir) + print('ECmean4 Global Mean successfully computed!') + +def global_mean(exp, year1, year2, config='config.yml', loglevel='WARNING', numproc=1, + interface=None, model=None, ensemble='r1i1p1f1', addnan=False, silent=None, + trend=None, line=None, outputdir=None, xdataset=None): + gm = GlobalMean(exp, year1, year2, config, loglevel, numproc, interface, model, ensemble, addnan, silent, trend, line, outputdir, xdataset) + gm.prepare() + gm.toc('Preparation') + gm.run() + gm.toc('Computation') + gm.store() + gm.plot() + gm.toc('Plotting') + print('ECmean4 Global Mean successfully computed!') diff --git a/ecmean/libs/diagnostic.py b/ecmean/libs/diagnostic.py index 3cab468..eac7832 100644 --- a/ecmean/libs/diagnostic.py +++ b/ecmean/libs/diagnostic.py @@ -82,7 +82,7 @@ def __init__(self, args): self.figdir = Path(os.path.join(outputdir, 'PDF')) # init for global mean - if self.funcname == 'global_mean': + if self.funcname == 'GlobalMean': self.cfg_global_mean(cfg) # init for performance indices From 8e5afb640710ecfc85888ec7ef9d4a9f0947664c Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Fri, 20 Dec 2024 10:40:03 +0100 Subject: [PATCH 19/26] polish --- ecmean/global_mean.py | 12 ++++++++---- ecmean/performance_indices.py | 11 ++++++++--- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index bdeda87..1858506 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -153,14 +153,19 @@ def store(self): with open(yamlfile, 'w', encoding='utf-8') as file: yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False) - def plot(self, mapfile=None): - + def plot(self, mapfile=None, figformat='pdf'): + """" + Plot the global mean values. + Args: + mapfile: Path to the output file. If None, it will be defined automatically following ECmean syntax + figformat: Format of the output file. + """ obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(self.varmean, self.ref, self.diag.var_all, self.diag.seasons, self.diag.regions) if mapfile is None: mapfile = os.path.join(self.diag.figdir, - f'global_mean_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.pdf') + f'global_mean_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}') self.loggy.info('Figure file is: %s', mapfile) diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname, @@ -264,4 +269,3 @@ def global_mean(exp, year1, year2, config='config.yml', loglevel='WARNING', nump gm.store() gm.plot() gm.toc('Plotting') - print('ECmean4 Global Mean successfully computed!') diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index 69a442d..1dbeb27 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -141,8 +141,13 @@ def store(self, yamlfile=None): with open(yamlfile, 'w', encoding='utf-8') as file: yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False) - def plot(self, mapfile=None): - """Generate the heatmap for performance indices.""" + def plot(self, mapfile=None, figformat='pdf'): + """Generate the heatmap for performance indices. + + Args: + mapfile: Path to the output file. If None, it will be defined automatically following ECmean syntax + figformat: Format of the output file. + """ # to this date, only EC23 support comparison with CMIP6 data if self.diag.climatology == 'EC23': # prepare the data for the heatmap from the original yaml dictionaries @@ -152,7 +157,7 @@ def plot(self, mapfile=None): # call the heatmap routine for a plot if mapfile is None: - mapfile = os.path.join(self.diag.figdir, f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.pdf') + mapfile = os.path.join(self.diag.figdir, f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}') diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname, 'climatology': self.diag.climatology, 'year1': self.diag.year1, 'year2': self.diag.year2, 'seasons': self.diag.seasons, 'regions': self.diag.regions, 'longnames': longnames} heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, diag=diag_dict, filemap=mapfile) From 145e1113a019ae86c2eb90020e10d0c0cf555c42 Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Fri, 20 Dec 2024 11:13:35 +0100 Subject: [PATCH 20/26] raise --- ecmean/libs/files.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ecmean/libs/files.py b/ecmean/libs/files.py index 6b8d191..d549481 100644 --- a/ecmean/libs/files.py +++ b/ecmean/libs/files.py @@ -116,6 +116,8 @@ def get_clim_files(piclim, var, diag, season): vvvv = str( diag.resclmdir / f'{stringname}_variance_{var}_{dataref}_{diag.resolution}_{datayear1}-{datayear2}.nc') + else: + raise ValueError('Climatology not supported/existing!') return clim, vvvv From b61130bb9f466cd48c9780458948b295088a5af6 Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Fri, 20 Dec 2024 11:25:19 +0100 Subject: [PATCH 21/26] docstring --- ecmean/global_mean.py | 109 +++++++++++++++++++++++----------- ecmean/performance_indices.py | 45 ++++++++++++-- 2 files changed, 113 insertions(+), 41 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index 1858506..827c934 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -40,7 +40,42 @@ class GlobalMean: """ - Class to compute the global mean of an experiment. + Attributes: + exp (str): Experiment name. + year1 (int): Start year of the experiment. + year2 (int): End year of the experiment. + config (str): Path to the configuration file. Default is 'config.yml'. + loglevel (str): Logging level. Default is 'WARNING'. + numproc (int): Number of processes to use. Default is 1. + interface (str): Path to the interface file. Default is None. + model (str): Model name. Default is None. + ensemble (str): Ensemble identifier. Default is 'r1i1p1f1'. + addnan (bool): Whether to add NaNs. Default is False. + silent (bool): Whether to suppress output. Default is None. + trend (bool): Whether to compute trends. Default is None. + line (str): Line identifier. Default is None. + outputdir (str): Output directory. Default is None. + xdataset (str): Path to the xdataset. Default is None. + loggy (logging.Logger): Logger instance. + diag (Diagnostic): Diagnostic instance. + face (dict): Interface dictionary. + ref (dict): Reference dictionary. + util_dictionary (Supporter): Supporter instance. + varmean (dict): Dictionary to store variable means. + vartrend (dict): Dictionary to store variable trends. + funcname (str): Name of the class. + start_time (float): Start time for the timer. + Methods: + toc(message): + Update the timer and log the elapsed time. + prepare(): + Prepare the necessary components for the global mean computation. + run(): + Run the global mean computation using multiprocessing. + store(): + Store the computed global mean values in a table and YAML file. + plot(mapfile=None, figformat='pdf'): + gm_worker(util, ref, face, diag, varmean, vartrend, varlist): """ def __init__(self, exp, year1, year2, config='config.yml', loglevel='WARNING', numproc=1, interface=None, model=None, ensemble='r1i1p1f1', addnan=False, silent=None, @@ -95,6 +130,7 @@ def prepare(self): units_extra_definition() self.util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=True, remap=False) + self.toc('Computation') def run(self): mgr = Manager() @@ -110,25 +146,26 @@ def run(self): for proc in processes: proc.join() + self.toc('Computation') def store(self): global_table = [] for var in self.diag.var_all: gamma = self.ref[var] if isinstance(gamma['obs'], dict): - tabval = gamma['obs']['ALL']['Global'] - outval = str(tabval['mean']) + '\u00B1' + str(tabval['std']) + tabval = gamma['obs']['ALL']['Global'] + outval = str(tabval['mean']) + '\u00B1' + str(tabval['std']) else: - outval = gamma['obs'] + outval = gamma['obs'] if 'year1' in gamma.keys(): - years = str(gamma['year1']) + '-' + str(gamma['year2']) + years = str(gamma['year1']) + '-' + str(gamma['year2']) else: - raise ValueError('Year1 and Year2 are not defined in the reference file') + raise ValueError('Year1 and Year2 are not defined in the reference file') out_sequence = [var, gamma['longname'], gamma['units'], self.varmean[var]['ALL']['Global']] if self.diag.ftrend: - out_sequence = out_sequence + [self.vartrend[var]['ALL']['Global']] + out_sequence = out_sequence + [self.vartrend[var]['ALL']['Global']] out_sequence = out_sequence + [outval, gamma.get('dataset', ''), years] global_table.append(out_sequence) @@ -178,6 +215,7 @@ def plot(self, mapfile=None, figformat='pdf'): if self.diag.ftable: self.loggy.info('Line file is: %s', self.diag.linefile) write_tuning_table(self.diag.linefile, self.varmean, self.diag.var_table, self.diag, self.ref) + self.toc('Plotting') @staticmethod def gm_worker(util, ref, face, diag, varmean, vartrend, varlist): @@ -214,37 +252,37 @@ def gm_worker(util, ref, face, diag, varmean, vartrend, varlist): for season in diag.seasons: tfield = cfield.copy(deep=True) if season != 'ALL': - tfield = tfield.sel(time=cfield.time.dt.season.isin(season)) + tfield = tfield.sel(time=cfield.time.dt.season.isin(season)) if diag.ftrend: - tfield = tfield.groupby('time.year').mean('time') + tfield = tfield.groupby('time.year').mean('time') else: - tfield = tfield.mean(dim='time') + tfield = tfield.mean(dim='time') for region in diag.regions: - slicefield = select_region(tfield, region) - sliceweights = select_region(weights, region) - if isinstance(domain_mask, xr.DataArray): - slicemask = select_region(domain_mask, region) - else: - slicemask = 0. - - avg = masked_meansum( - xfield=slicefield, weights=sliceweights, mask=slicemask, - operation=ref[var].get('operation', 'mean'), - mask_type=ref[var].get('mask', 'global'), - domain=domain) - - if isinstance(avg, dask.array.core.Array): - avg = avg.compute() - - result[season][region] = float((np.nanmean(avg) + offset) * factor) - - if diag.ftrend: - if len(avg) == len(diag.years_joined): - trend[season][region] = np.polyfit(diag.years_joined, avg, 1)[0] - if season == 'ALL' and region == 'Global': - loggy.info('Average: %s %s %s %s', var, season, region, result[season][region]) + slicefield = select_region(tfield, region) + sliceweights = select_region(weights, region) + if isinstance(domain_mask, xr.DataArray): + slicemask = select_region(domain_mask, region) + else: + slicemask = 0. + + avg = masked_meansum( + xfield=slicefield, weights=sliceweights, mask=slicemask, + operation=ref[var].get('operation', 'mean'), + mask_type=ref[var].get('mask', 'global'), + domain=domain) + + if isinstance(avg, dask.array.core.Array): + avg = avg.compute() + + result[season][region] = float((np.nanmean(avg) + offset) * factor) + + if diag.ftrend: + if len(avg) == len(diag.years_joined): + trend[season][region] = np.polyfit(diag.years_joined, avg, 1)[0] + if season == 'ALL' and region == 'Global': + loggy.info('Average: %s %s %s %s', var, season, region, result[season][region]) varmean[var] = result vartrend[var] = trend @@ -263,9 +301,8 @@ def global_mean(exp, year1, year2, config='config.yml', loglevel='WARNING', nump trend=None, line=None, outputdir=None, xdataset=None): gm = GlobalMean(exp, year1, year2, config, loglevel, numproc, interface, model, ensemble, addnan, silent, trend, line, outputdir, xdataset) gm.prepare() - gm.toc('Preparation') + gm.run() - gm.toc('Computation') gm.store() gm.plot() - gm.toc('Plotting') + diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index 1dbeb27..3af89b9 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -36,9 +36,44 @@ dask.config.set(scheduler="synchronous") class PerformanceIndices: - """ - Class to compute the performance indices for a given experiment and years. + Class to compute the performance indices for a given experiment and years + + Attributes: + exp (str): Experiment name. + year1 (int): Start year of the experiment. + year2 (int): End year of the experiment. + config (str): Path to the configuration file. Default is 'config.yml'. + loglevel (str): Logging level. Default is 'WARNING'. + numproc (int): Number of processes to use. Default is 1. + climatology (str): Climatology to use. Default is 'EC23'. + interface (str): Path to the interface file. + model (str): Model name. + ensemble (str): Ensemble identifier. Default is 'r1i1p1f1'. + silent (bool): If True, suppress output. Default is None. + xdataset (xarray.Dataset): Dataset to use. + outputdir (str): Directory to store output files. + loggy (logging.Logger): Logger instance. + diag (Diagnostic): Diagnostic instance. + face (dict): Interface dictionary. + piclim (dict): Climatology dictionary. + util_dictionary (Supporter): Utility dictionary for remapping and masks. + varstat (dict): Dictionary to store variable statistics. + funcname (str): Name of the class. + start_time (float): Start time for performance measurement. + Methods: + toc(message): + Update the timer and log the elapsed time. + prepare(): + Prepare the necessary components for performance indices calculation. + run(): + Run the performance indices calculation. + store(yamlfile=None): + Store the performance indices in a yaml file. + plot(mapfile=None, figformat='pdf'): + Generate the heatmap for performance indices. + pi_worker(util, piclim, face, diag, field_3d, varstat, varlist): + Main parallel diagnostic worker for performance indices. """ def __init__(self, exp, year1, year2, config='config.yml', @@ -108,6 +143,7 @@ def prepare(self): # create remap dictionary with atm and oce interpolators self.util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=False, remap=True, targetgrid=target_remap_grid) + self.toc('Calculation') def run(self): """Run the performance indices calculation.""" @@ -127,6 +163,7 @@ def run(self): # wait for the processes to finish for proc in processes: proc.join() + self.toc('Computation') def store(self, yamlfile=None): """Store the performance indices in a yaml file.""" @@ -160,6 +197,7 @@ def plot(self, mapfile=None, figformat='pdf'): mapfile = os.path.join(self.diag.figdir, f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}') diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname, 'climatology': self.diag.climatology, 'year1': self.diag.year1, 'year2': self.diag.year2, 'seasons': self.diag.seasons, 'regions': self.diag.regions, 'longnames': longnames} heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, diag=diag_dict, filemap=mapfile) + self.toc('Plotting') @staticmethod def pi_worker(util, piclim, face, diag, field_3d, varstat, varlist): @@ -328,9 +366,6 @@ def performance_indices(exp, year1, year2, config='config.yml', loglevel='WARNIN interface=interface, model=model, ensemble=ensemble, silent=silent, xdataset=xdataset, outputdir=outputdir) pi.prepare() - pi.toc('Preparation') pi.run() - pi.toc('Calculation') pi.store() pi.plot() - pi.toc('Plotting') From 307aad229b7adf99e1c8184a2b7e7c9d628dc44f Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Fri, 20 Dec 2024 11:26:42 +0100 Subject: [PATCH 22/26] changelgo --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ba9d76..6b98d57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) Unreleased is the current development version, which is currently lying in `main` branch. - Allowing for configuration file as dictionary (#106) +- GlobalMean and PerformanceIndices classe introduced (#110) ## [v0.1.11] From 33368e324798f876892c1eeb7222021b555e9431 Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Fri, 20 Dec 2024 16:04:23 +0100 Subject: [PATCH 23/26] cleaning of plotting --- ecmean/global_mean.py | 20 ++++++++++---------- ecmean/libs/plotting.py | 35 ++++++++++++++--------------------- ecmean/performance_indices.py | 12 +++++++----- 3 files changed, 31 insertions(+), 36 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index 827c934..b43c65a 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -112,6 +112,7 @@ def toc(self, message): self.loggy.info('%s time: %.2f seconds', message, elapsed_time) def prepare(self): + """Prepare the necessary components for the global mean computation.""" plat, mprocmethod = set_multiprocessing_start_method() self.loggy.info('Running on %s and multiprocessing method set as "%s"', plat, mprocmethod) @@ -130,9 +131,10 @@ def prepare(self): units_extra_definition() self.util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=True, remap=False) - self.toc('Computation') + self.toc('Preparation') def run(self): + """Run the global mean computaacross all variables on using multiprocessing.""" mgr = Manager() self.varmean = mgr.dict() self.vartrend = mgr.dict() @@ -149,6 +151,7 @@ def run(self): self.toc('Computation') def store(self): + """Rearrange the data and save the yaml file and the table.""" global_table = [] for var in self.diag.var_all: gamma = self.ref[var] @@ -197,20 +200,17 @@ def plot(self, mapfile=None, figformat='pdf'): mapfile: Path to the output file. If None, it will be defined automatically following ECmean syntax figformat: Format of the output file. """ - obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(self.varmean, self.ref, - self.diag.var_all, self.diag.seasons, + obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(self.varmean, self.ref, + self.diag.var_all, self.diag.seasons, self.diag.regions) if mapfile is None: mapfile = os.path.join(self.diag.figdir, - f'global_mean_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}') + f'global_mean_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}') self.loggy.info('Figure file is: %s', mapfile) - diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname, - 'year1': self.diag.year1, 'year2': self.diag.year2, - 'units_list': units_list} - heatmap_comparison_gm(data_dict=data2plot, mean_dict=obsmean, std_dict=obsstd, - diag=diag_dict, filemap=mapfile, addnan=self.diag.addnan) + diag=self.diag, units_list=units_list, + filemap=mapfile, addnan=self.diag.addnan) if self.diag.ftable: self.loggy.info('Line file is: %s', self.diag.linefile) @@ -301,8 +301,8 @@ def global_mean(exp, year1, year2, config='config.yml', loglevel='WARNING', nump trend=None, line=None, outputdir=None, xdataset=None): gm = GlobalMean(exp, year1, year2, config, loglevel, numproc, interface, model, ensemble, addnan, silent, trend, line, outputdir, xdataset) gm.prepare() - gm.run() gm.store() gm.plot() + diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py index 91786cf..e1c2165 100644 --- a/ecmean/libs/plotting.py +++ b/ecmean/libs/plotting.py @@ -18,7 +18,7 @@ loggy = logging.getLogger(__name__) -def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str = None, size_model=14, **kwargs): +def heatmap_comparison_pi(data_dict, cmip6_dict, diag, longnames, filemap: str = None, size_model=14, **kwargs): """ Function to produce a heatmap - seaborn based - for Performance Indices based on CMIP6 ratio @@ -26,7 +26,8 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str Args: data_dict (dict): dictionary of absolute performance indices cmip6_dict (dict): dictionary of CMIP6 performance indices - diag (dict): dictionary containing diagnostic information + diag (object): Diagnostic object + units_list (list): list of units filemap (str): path to save the plot size_model (int): size of the PIs in the plot @@ -40,18 +41,15 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str loggy.debug(data_table) # relative pi with re-ordering of rows - cmip6_table = dict_to_dataframe(cmip6_dict) - if diag is not None and 'longnames' in diag: - cmip6_table = cmip6_table.reindex(diag['longnames']) + cmip6_table = dict_to_dataframe(cmip6_dict).reindex(longnames) relative_table = data_table.div(cmip6_table) # compute the total PI mean relative_table.loc['Total PI'] = relative_table.mean() # reordering columns if info is available - if diag is not None and 'regions' in diag and 'seasons' in diag: - lll = [(x, y) for x in diag['seasons'] for y in diag['regions']] - relative_table = relative_table[lll] + lll = [(x, y) for x in diag.seasons for y in diag.regions] + relative_table = relative_table[lll] loggy.debug("Relative table") loggy.debug(relative_table) @@ -70,7 +68,7 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str title = kwargs['title'] else: title = 'CMIP6 RELATIVE PI' - title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + str(diag['year1']) + ' ' + str(diag['year2']) + title += f" {diag.modelname} {diag.expname} {diag.year1} {diag.year2}" tot = len(myfield.columns) sss = len(set([tup[1] for tup in myfield.columns])) @@ -93,10 +91,7 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str axs.set(xlabel=None) if filemap is None: - if diag is not None: - filemap = f'PI4_{diag["climatology"]}_{diag["expname"]}_{diag["modelname"]}_{diag["year1"]}_{diag["year2"]}.pdf' - else: - filemap = 'PI4_heatmap.pdf' + filemap = 'PI4_heatmap.pdf' # save and close plt.savefig(filemap) @@ -104,7 +99,7 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str plt.close() -def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: str, +def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag, units_list, filemap=None, addnan=True, size_model=14, size_obs=8, **kwargs): """ Function to produce a heatmap - seaborn based - for Global Mean @@ -114,7 +109,8 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s data_dict (dict): table of model data mean_dict (dict): table of observations std_dict (dict): table of standard deviation - diag (dict): dictionary containing diagnostic information + diag (dict): diagnostic object + units_list (list): list of units filemap (str): path to save the plot addnan (bool): add to the final plots also fields which cannot be compared against observations size_model (int): size of the model values in the plot @@ -129,7 +125,7 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s mean_table = dict_to_dataframe(mean_dict) std_table = dict_to_dataframe(std_dict) for table in [data_table, mean_table, std_table]: - table.index = table.index + ' [' + diag['units_list'] + ']' + table.index = table.index + ' [' + units_list + ']' loggy.debug("Data table") loggy.debug(data_table) @@ -151,7 +147,7 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s title = kwargs['title'] else: title = 'GLOBAL MEAN' - title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + str(diag['year1']) + ' ' + str(diag['year2']) + title += f" {diag.modelname} {diag.expname} {diag.year1} {diag.year2}" # set color range and palette thr = 10 @@ -194,10 +190,7 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s axs.set(xlabel=None) if filemap is None: - if diag is not None: - filemap = f'Global_Mean_{diag["expname"]}_{diag["modelname"]}_{diag["year1"]}_{diag["year2"]}.pdf' - else: - filemap = 'Global_Mean_Heatmap.pdf' + filemap = 'Global_Mean_Heatmap.pdf' # save and close plt.savefig(filemap) diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index 3af89b9..05a88e2 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -143,7 +143,7 @@ def prepare(self): # create remap dictionary with atm and oce interpolators self.util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=False, remap=True, targetgrid=target_remap_grid) - self.toc('Calculation') + self.toc('Preparation') def run(self): """Run the performance indices calculation.""" @@ -156,7 +156,9 @@ def run(self): # loop on the variables, create the parallel process for varlist in weight_split(self.diag.field_all, self.diag.numproc): - core = Process(target=self.pi_worker, args=(self.util_dictionary, self.piclim, self.face, self.diag, self.diag.field_3d, self.varstat, varlist)) + core = Process(target=self.pi_worker, args=(self.util_dictionary, self.piclim, + self.face, self.diag, self.diag.field_3d, + self.varstat, varlist)) core.start() processes.append(core) @@ -194,9 +196,9 @@ def plot(self, mapfile=None, figformat='pdf'): # call the heatmap routine for a plot if mapfile is None: - mapfile = os.path.join(self.diag.figdir, f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}') - diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname, 'climatology': self.diag.climatology, 'year1': self.diag.year1, 'year2': self.diag.year2, 'seasons': self.diag.seasons, 'regions': self.diag.regions, 'longnames': longnames} - heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, diag=diag_dict, filemap=mapfile) + mapfile = os.path.join(self.diag.figdir, + f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}') + heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, diag=self.diag, longnames=longnames, filemap=mapfile) self.toc('Plotting') @staticmethod From 41fe87c12d2dc01acb6af613ff8927b15da0a022 Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Fri, 20 Dec 2024 16:15:58 +0100 Subject: [PATCH 24/26] remove test for maps --- tests/test_performance_indices.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py index 9feef91..db60696 100644 --- a/tests/test_performance_indices.py +++ b/tests/test_performance_indices.py @@ -7,10 +7,10 @@ import pytest import xarray as xr import yaml -from ecmean.performance_indices import performance_indices +from ecmean import performance_indices from ecmean.libs.ncfixers import xr_preproc from ecmean.libs.general import are_dicts_equal -from ecmean.libs.plotting import heatmap_comparison_pi, prepare_clim_dictionaries_pi +#from ecmean.libs.plotting import heatmap_comparison_pi, prepare_clim_dictionaries_pi # set TOLERANCE TOLERANCE = 1e-1 @@ -77,16 +77,16 @@ def test_performance_indices_amip_xdataset(clim): assert are_dicts_equal(data1, data2, TOLERANCE),f"YAML files are not identical.\nData1: {data1}\nData2: {data2}" -def test_pi_plot(tmp_path): - file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml' - with open(file1, 'r', encoding='utf8') as f1: - data = yaml.safe_load(f1) - file2 = 'ecmean/climatology/EC23/pi_climatology_EC23.yml' - with open(file2, 'r', encoding='utf8') as f1: - clim = yaml.safe_load(f1) - data_dict, clim_dict, _ = prepare_clim_dictionaries_pi(data=data, clim=clim, shortnames=data.keys()) - heatmap_comparison_pi(data_dict=data_dict, cmip6_dict=clim_dict, - title='CMIP6 RELATIVE PI test', - mapfile=os.path.join(tmp_path, 'PI4_heatmap.pdf')) - assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created." - os.remove('PI4_heatmap.pdf') +# def test_pi_plot(tmp_path): +# file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml' +# with open(file1, 'r', encoding='utf8') as f1: +# data = yaml.safe_load(f1) +# file2 = 'ecmean/climatology/EC23/pi_climatology_EC23.yml' +# with open(file2, 'r', encoding='utf8') as f1: +# clim = yaml.safe_load(f1) +# data_dict, clim_dict, longnames = prepare_clim_dictionaries_pi(data=data, clim=clim, shortnames=data.keys()) + +# heatmap_comparison_pi(data_dict=data_dict, cmip6_dict=clim_dict, +# title='CMIP6 RELATIVE PI test', longnames=longnames, +# mapfile=os.path.join(tmp_path, 'PI4_heatmap.pdf')) +# assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created." From 65427a639aab3e717cb65ba2f8b8703b35edd12d Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Fri, 20 Dec 2024 16:44:34 +0100 Subject: [PATCH 25/26] independent plots --- ecmean/global_mean.py | 34 ++++++++++++++++++++++++------- ecmean/performance_indices.py | 30 ++++++++++++++++++++------- ecmean/utils/clim-create.py | 1 + tests/test_global_mean.py | 9 +++++++- tests/test_performance_indices.py | 21 +++++++------------ 5 files changed, 66 insertions(+), 29 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index b43c65a..a27d23a 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -150,9 +150,17 @@ def run(self): proc.join() self.toc('Computation') - def store(self): + def yamlfile(self): + """Define the output YAML filename""" + + yamlfile= self.diag.tabdir / f'global_mean_{self.diag.expname}_{self.diag.modelname}_{self.diag.ensemble}_{self.diag.year1}_{self.diag.year2}.yml' + return yamlfile + + def store(self, yamlfile=None): """Rearrange the data and save the yaml file and the table.""" global_table = [] + + # reorder the data to be stored for var in self.diag.var_all: gamma = self.ref[var] if isinstance(gamma['obs'], dict): @@ -177,21 +185,23 @@ def store(self): head = head + ['Trend'] head = head + ['Obs.', 'Dataset', 'Years'] + # save table tablefile = os.path.join(self.diag.tabdir, f'global_mean_{self.diag.expname}_{self.diag.modelname}_{self.diag.ensemble}_{self.diag.year1}_{self.diag.year2}.txt') self.loggy.info('Table file is: %s', tablefile) with open(tablefile, 'w', encoding='utf-8') as out: out.write(tabulate(global_table, headers=head, stralign='center', tablefmt='orgtbl')) - ordered = {} - for var in self.diag.var_all: - ordered[var] = self.varmean[var] + # reaorder + self.varmean = {var: self.varmean[var] for var in self.diag.var_all} - yamlfile = os.path.join(self.diag.tabdir, - f'global_mean_{self.diag.expname}_{self.diag.modelname}_{self.diag.ensemble}_{self.diag.year1}_{self.diag.year2}.yml') + # save yaml fil + if yamlfile is None: + yamlfile = self.yamlfile() + self.loggy.info('YAML file is: %s', tablefile) with open(yamlfile, 'w', encoding='utf-8') as file: - yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False) + yaml.safe_dump(self.varmean, file, default_flow_style=False, sort_keys=False) def plot(self, mapfile=None, figformat='pdf'): """" @@ -200,6 +210,15 @@ def plot(self, mapfile=None, figformat='pdf'): mapfile: Path to the output file. If None, it will be defined automatically following ECmean syntax figformat: Format of the output file. """ + + # load yaml file if is missing + if not self.varmean: + yamlfile = self.yamlfile() + self.loggy.info('Loading the stored data from the yaml file %s', yamlfile) + with open(yamlfile, 'r', encoding='utf-8') as file: + self.varmean = yaml.safe_load(file) + + # prepare the dictionaries for the plotting obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(self.varmean, self.ref, self.diag.var_all, self.diag.seasons, self.diag.regions) @@ -208,6 +227,7 @@ def plot(self, mapfile=None, figformat='pdf'): f'global_mean_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}') self.loggy.info('Figure file is: %s', mapfile) + # call the heatmap for plottinh heatmap_comparison_gm(data_dict=data2plot, mean_dict=obsmean, std_dict=obsstd, diag=self.diag, units_list=units_list, filemap=mapfile, addnan=self.diag.addnan) diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index 05a88e2..d0553b3 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -156,8 +156,8 @@ def run(self): # loop on the variables, create the parallel process for varlist in weight_split(self.diag.field_all, self.diag.numproc): - core = Process(target=self.pi_worker, args=(self.util_dictionary, self.piclim, - self.face, self.diag, self.diag.field_3d, + core = Process(target=self.pi_worker, args=(self.util_dictionary, self.piclim, + self.face, self.diag, self.diag.field_3d, self.varstat, varlist)) core.start() processes.append(core) @@ -167,18 +167,23 @@ def run(self): proc.join() self.toc('Computation') + def yamlfile(self): + """Define the output YAML filename""" + + yamlfile= self.diag.tabdir / f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.yml' + return yamlfile + def store(self, yamlfile=None): """Store the performance indices in a yaml file.""" + # order according to the original request the fields in the yaml file - ordered = {} - for item in self.diag.field_all: - ordered[item] = self.varstat[item] + self.varstat = {var: self.varstat[var] for var in self.diag.field_all} # dump the yaml file for PI, including all the seasons (need to copy to avoid mess) if yamlfile is None: - yamlfile = self.diag.tabdir / f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.yml' + yamlfile = self.yamlfile() with open(yamlfile, 'w', encoding='utf-8') as file: - yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False) + yaml.safe_dump(self.varstat, file, default_flow_style=False, sort_keys=False) def plot(self, mapfile=None, figformat='pdf'): """Generate the heatmap for performance indices. @@ -189,6 +194,14 @@ def plot(self, mapfile=None, figformat='pdf'): """ # to this date, only EC23 support comparison with CMIP6 data if self.diag.climatology == 'EC23': + + # load yaml file if is missing + if not self.varstat: + yamlfile = self.yamlfile() + self.loggy.info('Loading the stored data from the yaml file %s', yamlfile) + with open(yamlfile, 'r', encoding='utf-8') as file: + self.varstat = yaml.safe_load(file) + # prepare the data for the heatmap from the original yaml dictionaries data2plot, cmip6, longnames = prepare_clim_dictionaries_pi(data=self.varstat, clim=self.piclim, @@ -199,6 +212,9 @@ def plot(self, mapfile=None, figformat='pdf'): mapfile = os.path.join(self.diag.figdir, f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}') heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, diag=self.diag, longnames=longnames, filemap=mapfile) + else: + self.loggy.warning('Only EC23 climatology is supported for comparison with CMIP6 data.') + self.toc('Plotting') @staticmethod diff --git a/ecmean/utils/clim-create.py b/ecmean/utils/clim-create.py index dab436a..4a16f95 100755 --- a/ecmean/utils/clim-create.py +++ b/ecmean/utils/clim-create.py @@ -9,6 +9,7 @@ __author__ = "Paolo Davini (p.davini@isac.cnr.it), Sep 2022." import tempfile +import sys from ecmean.libs.files import load_yaml from ecmean.libs.units import units_extra_definition from ecmean.libs.ncfixers import xr_preproc diff --git a/tests/test_global_mean.py b/tests/test_global_mean.py index 1200750..a924649 100644 --- a/tests/test_global_mean.py +++ b/tests/test_global_mean.py @@ -3,7 +3,7 @@ import os import subprocess import xarray as xr -from ecmean.global_mean import global_mean +from ecmean.global_mean import global_mean, GlobalMean from ecmean.libs.ncfixers import xr_preproc from ecmean.libs.general import are_dicts_equal from ecmean.libs.files import load_yaml @@ -97,3 +97,10 @@ def test_global_mean_CMIP6(): data2 = load_gm_txt_files(file2) assert are_dicts_equal(data1, data2, TOLERANCE), "TXT files are not identical." + +def test_gm_plot(tmp_path): + outputfile = tmp_path / 'Global_Mean_Heatmap.pdf' + gm = GlobalMean('amip', 1990, 1990, config='tests/config.yml', loglevel='info') + gm.prepare() + gm.plot(mapfile=outputfile) + assert os.path.isfile(outputfile), "Plot not created." diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py index db60696..1e14dd8 100644 --- a/tests/test_performance_indices.py +++ b/tests/test_performance_indices.py @@ -7,7 +7,7 @@ import pytest import xarray as xr import yaml -from ecmean import performance_indices +from ecmean import performance_indices, PerformanceIndices from ecmean.libs.ncfixers import xr_preproc from ecmean.libs.general import are_dicts_equal #from ecmean.libs.plotting import heatmap_comparison_pi, prepare_clim_dictionaries_pi @@ -77,16 +77,9 @@ def test_performance_indices_amip_xdataset(clim): assert are_dicts_equal(data1, data2, TOLERANCE),f"YAML files are not identical.\nData1: {data1}\nData2: {data2}" -# def test_pi_plot(tmp_path): -# file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml' -# with open(file1, 'r', encoding='utf8') as f1: -# data = yaml.safe_load(f1) -# file2 = 'ecmean/climatology/EC23/pi_climatology_EC23.yml' -# with open(file2, 'r', encoding='utf8') as f1: -# clim = yaml.safe_load(f1) -# data_dict, clim_dict, longnames = prepare_clim_dictionaries_pi(data=data, clim=clim, shortnames=data.keys()) - -# heatmap_comparison_pi(data_dict=data_dict, cmip6_dict=clim_dict, -# title='CMIP6 RELATIVE PI test', longnames=longnames, -# mapfile=os.path.join(tmp_path, 'PI4_heatmap.pdf')) -# assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created." +def test_pi_plot(tmp_path): + outputfile = tmp_path / 'PI4_heatmap.png' + pi = PerformanceIndices('amip', 1990, 1990, config='tests/config.yml') + pi.prepare() + pi.plot(mapfile=outputfile) + assert os.path.isfile(outputfile), "Plot not created." From 894a4aad8d48a44622437b4442b7ae7802a0d8cb Mon Sep 17 00:00:00 2001 From: oloapinivad Date: Fri, 20 Dec 2024 16:47:59 +0100 Subject: [PATCH 26/26] add raise --- ecmean/global_mean.py | 7 +++++-- ecmean/performance_indices.py | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py index a27d23a..89bbe35 100755 --- a/ecmean/global_mean.py +++ b/ecmean/global_mean.py @@ -215,8 +215,11 @@ def plot(self, mapfile=None, figformat='pdf'): if not self.varmean: yamlfile = self.yamlfile() self.loggy.info('Loading the stored data from the yaml file %s', yamlfile) - with open(yamlfile, 'r', encoding='utf-8') as file: - self.varmean = yaml.safe_load(file) + if os.path.isfile(yamlfile): + with open(yamlfile, 'r', encoding='utf-8') as file: + self.varmean = yaml.safe_load(file) + else: + raise FileNotFoundError(f'YAML file {yamlfile} not found') # prepare the dictionaries for the plotting obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(self.varmean, self.ref, diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py index d0553b3..91372a8 100755 --- a/ecmean/performance_indices.py +++ b/ecmean/performance_indices.py @@ -199,8 +199,11 @@ def plot(self, mapfile=None, figformat='pdf'): if not self.varstat: yamlfile = self.yamlfile() self.loggy.info('Loading the stored data from the yaml file %s', yamlfile) - with open(yamlfile, 'r', encoding='utf-8') as file: - self.varstat = yaml.safe_load(file) + if os.path.isfile(yamlfile): + with open(yamlfile, 'r', encoding='utf-8') as file: + self.varstat = yaml.safe_load(file) + else: + raise FileNotFoundError(f'File {yamlfile} not found.') # prepare the data for the heatmap from the original yaml dictionaries data2plot, cmip6, longnames = prepare_clim_dictionaries_pi(data=self.varstat,