From 5bdb701c4780fd77fd4c960c63ef5e7e622090c2 Mon Sep 17 00:00:00 2001
From: mnurisso <m.nurisso@isac.cnr.it>
Date: Fri, 27 Sep 2024 12:34:42 +0300
Subject: [PATCH 01/26] initial linting

---
 ecmean/libs/plotting.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py
index 1004d87..8ed4cfa 100644
--- a/ecmean/libs/plotting.py
+++ b/ecmean/libs/plotting.py
@@ -15,8 +15,7 @@
 import numpy as np
 
 
-
-def heatmap_comparison_pi(relative_table, diag, filemap, size_model = 14):
+def heatmap_comparison_pi(relative_table, diag, filemap, size_model=14):
     """Function to produce a heatmap - seaborn based - for Performance Indices
     based on CMIP6 ratio"""
 
@@ -32,7 +31,6 @@ def heatmap_comparison_pi(relative_table, diag, filemap, size_model = 14):
     tictoc = [0, 0.25, 0.5, 0.75, 1, 2, 3, 4, 5]
     title = 'CMIP6 RELATIVE PI'
 
-
     # axs.subplots_adjust(bottom=0.2)
     # pal = sns.diverging_palette(h_neg=130, h_pos=10, s=99, l=55, sep=3, as_cmap=True)
     tot = len(myfield.columns)
@@ -44,7 +42,7 @@ def heatmap_comparison_pi(relative_table, diag, filemap, size_model = 14):
                         cbar_kws={"ticks": tictoc, 'label': title},
                         ax=axs, annot=True, linewidth=0.5, fmt='.2f',
                         annot_kws={'fontsize': size_model, 'fontweight': 'bold'})
-    
+
     chart = chart.set_facecolor('whitesmoke')
     axs.set_title(f'{title} {diag.modelname} {diag.expname} {diag.year1} {diag.year2}', fontsize=25)
     axs.vlines(list(range(sss, tot + sss, sss)), ymin=-1, ymax=len(myfield.index), colors='k')
@@ -62,8 +60,8 @@ def heatmap_comparison_pi(relative_table, diag, filemap, size_model = 14):
     plt.close()
 
 
-def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addnan = True,
-                          size_model = 14, size_obs = 8):
+def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addnan=True,
+                          size_model=14, size_obs=8):
     """Function to produce a heatmap - seaborn based - for Global Mean
     based on season-averaged standard deviation ratio"""
 
@@ -96,10 +94,10 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addn
                         fmt='.2f', cmap=pal)
     if addnan:
         empty = np.where(clean.isna(), 0, np.nan)
-        empty = np.where(data_table[mask]==0, np.nan, empty)
+        empty = np.where(data_table[mask] == 0, np.nan, empty)
         chart = sns.heatmap(empty, annot=data_table[mask], fmt='.2f',
                             vmin=-thr - 0.5, vmax=thr + 0.5, center=0,
-                            annot_kws={'va': 'bottom', 'fontsize': size_model, 'color':'dimgrey'}, cbar=False,
+                            annot_kws={'va': 'bottom', 'fontsize': size_model, 'color': 'dimgrey'}, cbar=False,
                             cmap=ListedColormap(['none']))
     chart = sns.heatmap(clean, annot=mean_table[mask], vmin=-thr - 0.5, vmax=thr + 0.5, center=0,
                         annot_kws={'va': 'top', 'fontsize': size_obs, 'fontstyle': 'italic'},
@@ -108,8 +106,8 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addn
         empty = np.where(clean.isna(), 0, np.nan)
         empty = np.where(mean_table[mask].isna(), np.nan, empty)
         chart = sns.heatmap(empty, annot=mean_table[mask], vmin=-thr - 0.5, vmax=thr + 0.5, center=0,
-                        annot_kws={'va': 'top', 'fontsize': size_obs, 'fontstyle': 'italic', 'color':'dimgrey'},
-                        fmt='.2f', cmap=ListedColormap(['none']), cbar=False)
+                            annot_kws={'va': 'top', 'fontsize': size_obs, 'fontstyle': 'italic', 'color': 'dimgrey'},
+                            fmt='.2f', cmap=ListedColormap(['none']), cbar=False)
 
     chart = chart.set_facecolor('whitesmoke')
     axs.set_title(f'{title} {diag.modelname} {diag.expname} {diag.year1} {diag.year2}', fontsize=25)

From 35aa21328585385d9386287d107c48e2030c33f4 Mon Sep 17 00:00:00 2001
From: mnurisso <m.nurisso@isac.cnr.it>
Date: Fri, 27 Sep 2024 13:17:35 +0300
Subject: [PATCH 02/26] plot takes now a dictionary with diag infos

---
 ecmean/global_mean.py         |  7 +++---
 ecmean/libs/plotting.py       | 43 ++++++++++++++++++++++++-----------
 ecmean/performance_indices.py | 30 +++++++++++-------------
 3 files changed, 47 insertions(+), 33 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index 191bdff..511e72a 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -331,8 +331,11 @@ def global_mean(exp, year1, year2,
         f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf'
     loggy.info('Figure file is: %s', mapfile)
 
+    diag_dict = {'modelname': diag.modelname, 'expname': diag.expname,
+                 'year1': diag.year1, 'year2': diag.year2}
+
     heatmap_comparison_gm(data_table, mean_table, std_table,
-                          diag, mapfile, addnan=diag.addnan)
+                          diag_dict, mapfile, addnan=diag.addnan)
 
     # Print appending one line to table (for tuning)
     if diag.ftable:
@@ -345,7 +348,6 @@ def global_mean(exp, year1, year2,
     print('ECmean4 Global Mean succesfully computed!')
 
 
-
 def gm_entry_point():
     """
     Command line interface to run the global_mean function
@@ -362,4 +364,3 @@ def gm_entry_point():
                 model=args.model, ensemble=args.ensemble,
                 addnan=args.addnan,
                 outputdir=args.outputdir)
-
diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py
index 8ed4cfa..a3ad3ce 100644
--- a/ecmean/libs/plotting.py
+++ b/ecmean/libs/plotting.py
@@ -15,9 +15,17 @@
 import numpy as np
 
 
-def heatmap_comparison_pi(relative_table, diag, filemap, size_model=14):
-    """Function to produce a heatmap - seaborn based - for Performance Indices
-    based on CMIP6 ratio"""
+def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=14):
+    """
+    Function to produce a heatmap - seaborn based - for Performance Indices
+    based on CMIP6 ratio
+
+    Args:
+        relative_table (pandas.DataFrame): table of relative performance indices
+        diag (dict): dictionary containing diagnostic information
+        filemap (str): path to save the plot
+        size_model (int): size of the PIs in the plot
+    """
 
     # defining plot size
     myfield = relative_table
@@ -31,20 +39,17 @@ def heatmap_comparison_pi(relative_table, diag, filemap, size_model=14):
     tictoc = [0, 0.25, 0.5, 0.75, 1, 2, 3, 4, 5]
     title = 'CMIP6 RELATIVE PI'
 
-    # axs.subplots_adjust(bottom=0.2)
-    # pal = sns.diverging_palette(h_neg=130, h_pos=10, s=99, l=55, sep=3, as_cmap=True)
     tot = len(myfield.columns)
     sss = len(set([tup[1] for tup in myfield.columns]))
     divnorm = TwoSlopeNorm(vmin=thr[0], vcenter=thr[1], vmax=thr[2])
     pal = sns.color_palette("Spectral_r", as_cmap=True)
-    # pal = sns.diverging_palette(220, 20, as_cmap=True)
     chart = sns.heatmap(myfield, norm=divnorm, cmap=pal,
                         cbar_kws={"ticks": tictoc, 'label': title},
                         ax=axs, annot=True, linewidth=0.5, fmt='.2f',
                         annot_kws={'fontsize': size_model, 'fontweight': 'bold'})
 
     chart = chart.set_facecolor('whitesmoke')
-    axs.set_title(f'{title} {diag.modelname} {diag.expname} {diag.year1} {diag.year2}', fontsize=25)
+    axs.set_title(f'{title} {diag["modelname"]} {diag["expname"]} {diag["year1"]} {diag["year2"]}', fontsize=25)
     axs.vlines(list(range(sss, tot + sss, sss)), ymin=-1, ymax=len(myfield.index), colors='k')
     axs.hlines(len(myfield.index) - 1, xmin=-1, xmax=len(myfield.columns), colors='purple', lw=2)
     names = [' '.join(x) for x in myfield.columns]
@@ -60,10 +65,22 @@ def heatmap_comparison_pi(relative_table, diag, filemap, size_model=14):
     plt.close()
 
 
-def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addnan=True,
-                          size_model=14, size_obs=8):
-    """Function to produce a heatmap - seaborn based - for Global Mean
-    based on season-averaged standard deviation ratio"""
+def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap: str,
+                          addnan=True, size_model=14, size_obs=8):
+    """
+    Function to produce a heatmap - seaborn based - for Global Mean
+    based on season-averaged standard deviation ratio
+
+    Args:
+        data_table (pandas.DataFrame): table of model data
+        mean_table (pandas.DataFrame): table of observations
+        std_table (pandas.DataFrame): table of standard deviation
+        diag (dict): dictionary containing diagnostic information
+        filemap (str): path to save the plot
+        addnan (bool): add to the final plots also fields which cannot be compared against observations
+        size_model (int): size of the model values in the plot
+        size_obs (int): size of the observation values in the plot
+    """
 
     # define array
     ratio = (data_table - mean_table) / std_table
@@ -76,7 +93,7 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addn
     # for dimension of plots
     xfig = len(clean.columns)
     yfig = len(clean.index)
-    fig, axs = plt.subplots(1, 1, sharey=True, tight_layout=True, figsize=(xfig+5, yfig+2))
+    _, axs = plt.subplots(1, 1, sharey=True, tight_layout=True, figsize=(xfig+5, yfig+2))
 
     title = 'GLOBAL MEAN'
 
@@ -110,7 +127,7 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag, filemap, addn
                             fmt='.2f', cmap=ListedColormap(['none']), cbar=False)
 
     chart = chart.set_facecolor('whitesmoke')
-    axs.set_title(f'{title} {diag.modelname} {diag.expname} {diag.year1} {diag.year2}', fontsize=25)
+    axs.set_title(f'{title} {diag["modelname"]} {diag["expname"]} {diag["year1"]} {diag["year2"]}', fontsize=25)
     axs.vlines(list(range(sss, tot + sss, sss)), ymin=-1, ymax=len(clean.index), colors='k')
     names = [' '.join(x) for x in clean.columns]
     axs.set_xticks([x + .5 for x in range(len(names))], names, rotation=45, ha='right', fontsize=16)
diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index e5c6367..43bb451 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -35,6 +35,7 @@
 
 dask.config.set(scheduler="synchronous")
 
+
 def pi_worker(util, piclim, face, diag, field_3d, varstat, varlist):
     """Main parallel diagnostic worker for performance indices
 
@@ -183,21 +184,21 @@ def pi_worker(util, piclim, face, diag, field_3d, varstat, varlist):
 
                     # horizontal averaging with land-sea mask
                     else:
-                        
+
                         complete = (final - cfield)**2 / vfield
                         outarray = mask_field(xfield=complete,
-                                            mask_type=piclim[var]['mask'],
-                                            dom=domain, mask=domain_mask)
+                                              mask_type=piclim[var]['mask'],
+                                              dom=domain, mask=domain_mask)
 
                     # loop on different regions
                     for region in diag.regions:
 
                         slicearray = select_region(outarray, region)
- 
+
                         # latitude-based averaging
                         weights = np.cos(np.deg2rad(slicearray.lat))
                         out = slicearray.weighted(weights).mean().data
-                         # store the PI
+                        # store the PI
                         result[season][region] = round(float(out), 3)
 
                         # diagnostic
@@ -294,15 +295,12 @@ def performance_indices(exp, year1, year2,
     loggy.info('Preproc in {:.4f} seconds'.format(toc - tic))
     tic = time()
 
-  
     # loop on the variables, create the parallel process
     for varlist in weight_split(diag.field_all, diag.numproc):
-        #print(varlist)
-
+        # print(varlist)
 
-        core = Process(
-            target=pi_worker, args=(util_dictionary, piclim,
-                face, diag, diag.field_3d, varstat, varlist))
+        core = Process(target=pi_worker, args=(util_dictionary, piclim,
+                       face, diag, diag.field_3d, varstat, varlist))
         core.start()
         processes.append(core)
 
@@ -310,7 +308,6 @@ def performance_indices(exp, year1, year2,
     for proc in processes:
         proc.join()
 
-
     toc = time()
     # evaluate tic-toc time  of execution
     loggy.info('Done in {:.4f} seconds'.format(toc - tic) + ' with ' + str(diag.numproc) + ' processors')
@@ -341,7 +338,7 @@ def performance_indices(exp, year1, year2,
         # set longname, reorganize the dictionaries
         plotted = {}
         cmip6 = {}
-        longnames =[]
+        longnames = []
         for var in diag.field_all:
             plotted[piclim[var]['longname']] = ordered[var]
             cmip6[piclim[var]['longname']] = filt_piclim[var]
@@ -351,7 +348,6 @@ def performance_indices(exp, year1, year2,
         data_table = dict_to_dataframe(plotted)
         loggy.debug(data_table)
 
-
         # relative pi with re-ordering of rows
         cmip6_table = data_table.div(dict_to_dataframe(cmip6).reindex(longnames))
 
@@ -363,12 +359,13 @@ def performance_indices(exp, year1, year2,
         cmip6_table = cmip6_table[lll]
         loggy.debug(cmip6_table)
 
-
         # call the heatmap routine for a plot
         mapfile = diag.figdir / \
             f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf'
         # heatmap_comparison_old(data_table, diag, mapfile)
-        heatmap_comparison_pi(cmip6_table, diag, mapfile)
+        diag_dict = {'modelname': diag.modelname, 'expname': diag.expname,
+                     'year1': diag.year1, 'year2': diag.year2}
+        heatmap_comparison_pi(cmip6_table, diag_dict, mapfile)
 
     toc = time()
     # evaluate tic-toc time of postprocessing
@@ -391,4 +388,3 @@ def pi_entry_point():
                         interface=args.interface, config=args.config,
                         model=args.model, ensemble=args.ensemble,
                         outputdir=args.outputdir)
-

From 0cc274f3d7aa65dfa3b1dfc72219d148811820ef Mon Sep 17 00:00:00 2001
From: mnurisso <m.nurisso@isac.cnr.it>
Date: Fri, 27 Sep 2024 13:17:48 +0300
Subject: [PATCH 03/26] test linting

---
 tests/test_global_mean.py         | 12 +++++++-----
 tests/test_performance_indices.py |  3 ---
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/tests/test_global_mean.py b/tests/test_global_mean.py
index b1a8f78..7bdc9e0 100644
--- a/tests/test_global_mean.py
+++ b/tests/test_global_mean.py
@@ -13,6 +13,7 @@
 # set up coverage env var
 env = {**os.environ, "COVERAGE_PROCESS_START": ".coveragerc"}
 
+
 # Open the text file for reading
 def load_gm_txt_files(textfile):
     """
@@ -26,18 +27,19 @@ def load_gm_txt_files(textfile):
         for line in file:
             # Remove leading and trailing whitespace and split the line by '|'
             columns = line.strip().split('|')
-            
+
             # Check if there are at least 5 columns (including the header row)
             if len(columns) >= 5:
                 # Extract the first and fourth columns and remove leading/trailing whitespace
                 variable = columns[1].strip()
                 value = columns[4].strip()
-                
+
                 # Add the data to the dictionary if it's not empty
                 if variable and value:
                     data_dict[variable] = value
     return data_dict
 
+
 # call on coupled ECE using parser and debug mode
 def test_cmd_global_mean_coupled():
     file1 = 'tests/table/global_mean_cpld_EC-Earth4_r1i1p1f1_1990_1990.txt'
@@ -47,7 +49,7 @@ def test_cmd_global_mean_coupled():
     subprocess.run(['global_mean', 'cpld', '1990', '1990', '-j', '2',
                     '-c', 'tests/config.yml', '-t', '-v', 'debug'],
                     env=env, check=True)
-    
+
     data1 = load_gm_txt_files(file1)
     data2 = load_gm_txt_files(file2)
 
@@ -62,12 +64,13 @@ def test_global_mean_amip():
         os.remove(file1)
     global_mean(exp='amip', year1=1990, year2=1990, numproc=1, config='tests/config.yml',
                 line=True, addnan=True)
-    
+
     data1 = load_gm_txt_files(file1)
     data2 = load_gm_txt_files(file2)
 
     assert are_dicts_equal(data1, data2, TOLERANCE), "TXT files are not identical."
 
+
 # call on amip ECE using the xdataset option
 def test_global_mean_amip_xdataset():
     file1 = 'tests/table/global_mean_amip_EC-Earth4_r1i1p1f1_1990_1990.txt'
@@ -95,4 +98,3 @@ def test_global_mean_CMIP6():
     data2 = load_gm_txt_files(file2)
 
     assert are_dicts_equal(data1, data2, TOLERANCE), "TXT files are not identical."
-
diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py
index a3cfcab..c62a67f 100644
--- a/tests/test_performance_indices.py
+++ b/tests/test_performance_indices.py
@@ -71,6 +71,3 @@ def test_performance_indices_amip_xdataset(clim):
         data2 = yaml.safe_load(f2)
 
     assert are_dicts_equal(data1, data2, TOLERANCE), "YAML files are not identical."
-
-
-

From 35d8d55cf0c28f1d2c9cefa69828e1e734ff9e43 Mon Sep 17 00:00:00 2001
From: mnurisso <m.nurisso@isac.cnr.it>
Date: Fri, 27 Sep 2024 14:52:05 +0300
Subject: [PATCH 04/26] title kwargs

---
 ecmean/libs/plotting.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py
index a3ad3ce..827c902 100644
--- a/ecmean/libs/plotting.py
+++ b/ecmean/libs/plotting.py
@@ -15,7 +15,7 @@
 import numpy as np
 
 
-def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=14):
+def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=14, **kwargs):
     """
     Function to produce a heatmap - seaborn based - for Performance Indices
     based on CMIP6 ratio
@@ -25,6 +25,9 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1
         diag (dict): dictionary containing diagnostic information
         filemap (str): path to save the plot
         size_model (int): size of the PIs in the plot
+
+    Keyword Args:
+        title (str): title of the plot, overrides default title
     """
 
     # defining plot size
@@ -37,7 +40,12 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1
 
     thr = [0, 1, 5]
     tictoc = [0, 0.25, 0.5, 0.75, 1, 2, 3, 4, 5]
-    title = 'CMIP6 RELATIVE PI'
+
+    if 'title' in kwargs:
+        title = kwargs['title']
+    else:
+        title = 'CMIP6 RELATIVE PI'
+        title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + diag['year1'] + ' ' + diag['year2']
 
     tot = len(myfield.columns)
     sss = len(set([tup[1] for tup in myfield.columns]))
@@ -49,7 +57,7 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1
                         annot_kws={'fontsize': size_model, 'fontweight': 'bold'})
 
     chart = chart.set_facecolor('whitesmoke')
-    axs.set_title(f'{title} {diag["modelname"]} {diag["expname"]} {diag["year1"]} {diag["year2"]}', fontsize=25)
+    axs.set_title(title, fontsize=25)
     axs.vlines(list(range(sss, tot + sss, sss)), ymin=-1, ymax=len(myfield.index), colors='k')
     axs.hlines(len(myfield.index) - 1, xmin=-1, xmax=len(myfield.columns), colors='purple', lw=2)
     names = [' '.join(x) for x in myfield.columns]
@@ -66,7 +74,7 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1
 
 
 def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap: str,
-                          addnan=True, size_model=14, size_obs=8):
+                          addnan=True, size_model=14, size_obs=8, **kwargs):
     """
     Function to produce a heatmap - seaborn based - for Global Mean
     based on season-averaged standard deviation ratio
@@ -80,6 +88,9 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap
         addnan (bool): add to the final plots also fields which cannot be compared against observations
         size_model (int): size of the model values in the plot
         size_obs (int): size of the observation values in the plot
+
+    Keyword Args:
+        title (str): title of the plot, overrides default title
     """
 
     # define array
@@ -95,7 +106,11 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap
     yfig = len(clean.index)
     _, axs = plt.subplots(1, 1, sharey=True, tight_layout=True, figsize=(xfig+5, yfig+2))
 
-    title = 'GLOBAL MEAN'
+    if 'title' in kwargs:
+        title = kwargs['title']
+    else:
+        title = 'GLOBAL MEAN'
+        title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + diag['year1'] + ' ' + diag['year2']
 
     # set color range and palette
     thr = 10
@@ -127,7 +142,7 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap
                             fmt='.2f', cmap=ListedColormap(['none']), cbar=False)
 
     chart = chart.set_facecolor('whitesmoke')
-    axs.set_title(f'{title} {diag["modelname"]} {diag["expname"]} {diag["year1"]} {diag["year2"]}', fontsize=25)
+    axs.set_title(title, fontsize=25)
     axs.vlines(list(range(sss, tot + sss, sss)), ymin=-1, ymax=len(clean.index), colors='k')
     names = [' '.join(x) for x in clean.columns]
     axs.set_xticks([x + .5 for x in range(len(names))], names, rotation=45, ha='right', fontsize=16)

From 893b83f39d8c4b7472983530878bee1997b9ef8c Mon Sep 17 00:00:00 2001
From: mnurisso <m.nurisso@isac.cnr.it>
Date: Fri, 27 Sep 2024 15:12:52 +0300
Subject: [PATCH 05/26] kwargs refined

---
 ecmean/libs/plotting.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py
index 827c902..06bc6c4 100644
--- a/ecmean/libs/plotting.py
+++ b/ecmean/libs/plotting.py
@@ -13,6 +13,7 @@
 from matplotlib.colors import ListedColormap
 import seaborn as sns
 import numpy as np
+from .general import dict_to_dataframe
 
 
 def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=14, **kwargs):
@@ -21,7 +22,7 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1
     based on CMIP6 ratio
 
     Args:
-        relative_table (pandas.DataFrame): table of relative performance indices
+        relative_table (pandas.DataFrame or dict): table of relative performance indices
         diag (dict): dictionary containing diagnostic information
         filemap (str): path to save the plot
         size_model (int): size of the PIs in the plot
@@ -29,6 +30,8 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1
     Keyword Args:
         title (str): title of the plot, overrides default title
     """
+    if isinstance(relative_table, dict):
+        relative_table = dict_to_dataframe(relative_table)
 
     # defining plot size
     myfield = relative_table
@@ -45,7 +48,7 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1
         title = kwargs['title']
     else:
         title = 'CMIP6 RELATIVE PI'
-        title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + diag['year1'] + ' ' + diag['year2']
+        title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + str(diag['year1']) + ' ' + str(diag['year2'])
 
     tot = len(myfield.columns)
     sss = len(set([tup[1] for tup in myfield.columns]))
@@ -80,9 +83,9 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap
     based on season-averaged standard deviation ratio
 
     Args:
-        data_table (pandas.DataFrame): table of model data
-        mean_table (pandas.DataFrame): table of observations
-        std_table (pandas.DataFrame): table of standard deviation
+        data_table (pandas.DataFrame or dict): table of model data
+        mean_table (pandas.DataFrame or dict): table of observations
+        std_table (pandas.DataFrame or dict): table of standard deviation
         diag (dict): dictionary containing diagnostic information
         filemap (str): path to save the plot
         addnan (bool): add to the final plots also fields which cannot be compared against observations
@@ -92,6 +95,9 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap
     Keyword Args:
         title (str): title of the plot, overrides default title
     """
+    for tab in [data_table, mean_table, std_table]:
+        if isinstance(tab, dict):
+            tab = dict_to_dataframe(tab)
 
     # define array
     ratio = (data_table - mean_table) / std_table
@@ -110,7 +116,7 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap
         title = kwargs['title']
     else:
         title = 'GLOBAL MEAN'
-        title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + diag['year1'] + ' ' + diag['year2']
+        title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + str(diag['year1']) + ' ' + str(diag['year2'])
 
     # set color range and palette
     thr = 10

From d9916672d9269d6ecee26bb7f0b6ff9b32ba72e2 Mon Sep 17 00:00:00 2001
From: mnurisso <m.nurisso@isac.cnr.it>
Date: Fri, 27 Sep 2024 15:14:19 +0300
Subject: [PATCH 06/26] added PI plot test

---
 tests/test_performance_indices.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py
index c62a67f..44a3971 100644
--- a/tests/test_performance_indices.py
+++ b/tests/test_performance_indices.py
@@ -10,6 +10,7 @@
 from ecmean.performance_indices import performance_indices
 from ecmean.libs.ncfixers import xr_preproc
 from ecmean.libs.general import are_dicts_equal
+from ecmean.libs.plotting import heatmap_comparison_pi
 
 # set TOLERANCE
 TOLERANCE = 1e-3
@@ -71,3 +72,12 @@ def test_performance_indices_amip_xdataset(clim):
         data2 = yaml.safe_load(f2)
 
     assert are_dicts_equal(data1, data2, TOLERANCE), "YAML files are not identical."
+
+
+def test_pi_plot():
+    file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml'
+    with open(file1, 'r', encoding='utf8') as f1:
+        data1 = yaml.safe_load(f1)
+    heatmap_comparison_pi(data1, {'modelname': 'EC-Earth4', 'expname': 'amip', 'year1': '1990', 'year2': '1990'}, 'tests/pluto/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.png')
+    assert os.path.isfile('tests/pluto/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.png'), "Plot not created."
+    os.remove('tests/pluto/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.png')

From 93960c5aaa501d9b4e906edb94d2dd1851e3d04a Mon Sep 17 00:00:00 2001
From: mnurisso <m.nurisso@isac.cnr.it>
Date: Fri, 27 Sep 2024 16:24:47 +0300
Subject: [PATCH 07/26] gm linting

---
 ecmean/global_mean.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index 511e72a..b6d2969 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -37,6 +37,7 @@
 
 dask.config.set(scheduler="synchronous")
 
+
 def gm_worker(util, ref, face, diag, varmean, vartrend, varlist):
     """Main parallel diagnostic worker for global mean
 
@@ -61,7 +62,6 @@ def gm_worker(util, ref, face, diag, varmean, vartrend, varlist):
         result = init_mydict(diag.seasons, diag.regions)
         trend = init_mydict(diag.seasons, diag.regions)
 
-
         # check if the variable is in the interface file
         if check_var_interface(var, face):
 
@@ -215,7 +215,7 @@ def global_mean(exp, year1, year2,
 
     # create util dictionary including mask and weights for both atmosphere
     # and ocean grids
-    util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], 
+    util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'],
                                 areas=True, remap=False)
 
     # main loop: manager is required for shared variables
@@ -225,14 +225,12 @@ def global_mean(exp, year1, year2,
     varmean = mgr.dict()
     vartrend = mgr.dict()
     processes = []
-    
-
 
     # loop on the variables, create the parallel process
     for varlist in weight_split(diag.var_all, diag.numproc):
         core = Process(
             target=gm_worker, args=(util_dictionary, ref, face, diag,
-                                            varmean, vartrend, varlist))
+                                    varmean, vartrend, varlist))
         core.start()
         processes.append(core)
 
@@ -327,8 +325,8 @@ def global_mean(exp, year1, year2,
     loggy.debug(data_table)
 
     # call the heatmap routine for a plot
-    mapfile = diag.figdir / \
-        f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf'
+    mapfile = os.join(diag.figdir,
+                      f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf')
     loggy.info('Figure file is: %s', mapfile)
 
     diag_dict = {'modelname': diag.modelname, 'expname': diag.expname,

From c17026c0705815116dc8607d4ad08dfea8aca1d1 Mon Sep 17 00:00:00 2001
From: mnurisso <m.nurisso@isac.cnr.it>
Date: Fri, 27 Sep 2024 16:25:20 +0300
Subject: [PATCH 08/26] diag and filemap are not mandatory for external usage

---
 ecmean/libs/plotting.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py
index 06bc6c4..5fb9c97 100644
--- a/ecmean/libs/plotting.py
+++ b/ecmean/libs/plotting.py
@@ -16,7 +16,7 @@
 from .general import dict_to_dataframe
 
 
-def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=14, **kwargs):
+def heatmap_comparison_pi(relative_table, diag: dict = None, filemap: str = None, size_model=14, **kwargs):
     """
     Function to produce a heatmap - seaborn based - for Performance Indices
     based on CMIP6 ratio
@@ -70,6 +70,12 @@ def heatmap_comparison_pi(relative_table, diag: dict, filemap: str, size_model=1
     axs.figure.axes[-1].yaxis.label.set_size(15)
     axs.set(xlabel=None)
 
+    if filemap is None:
+        if diag is not None:
+            filemap = f'PI4_{diag["expname"]}_{diag["modelname"]}_{diag["year1"]}_{diag["year2"]}.pdf'
+        else:
+            filemap = 'PI4_heatmap.pdf'
+
     # save and close
     plt.savefig(filemap)
     plt.cla()

From 1f7596636978bea0b9b04692d871c1411d3eb2cc Mon Sep 17 00:00:00 2001
From: mnurisso <m.nurisso@isac.cnr.it>
Date: Fri, 27 Sep 2024 16:26:38 +0300
Subject: [PATCH 09/26] linting and correct arguments for heatmap (not
 positional anymore)

---
 ecmean/performance_indices.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index 43bb451..11277b9 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -360,12 +360,12 @@ def performance_indices(exp, year1, year2,
         loggy.debug(cmip6_table)
 
         # call the heatmap routine for a plot
-        mapfile = diag.figdir / \
-            f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf'
+        mapfile = os.join(diag.figdir,
+                          f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf')
         # heatmap_comparison_old(data_table, diag, mapfile)
         diag_dict = {'modelname': diag.modelname, 'expname': diag.expname,
                      'year1': diag.year1, 'year2': diag.year2}
-        heatmap_comparison_pi(cmip6_table, diag_dict, mapfile)
+        heatmap_comparison_pi(cmip6_table, diag=diag_dict, filemap=mapfile)
 
     toc = time()
     # evaluate tic-toc time of postprocessing

From b24d89179858f323147c9a670175ddb28202c326 Mon Sep 17 00:00:00 2001
From: mnurisso <m.nurisso@isac.cnr.it>
Date: Fri, 27 Sep 2024 16:42:07 +0300
Subject: [PATCH 10/26] os management improved

---
 ecmean/global_mean.py         | 12 ++++++------
 ecmean/performance_indices.py |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index b6d2969..2d9f612 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -290,8 +290,8 @@ def global_mean(exp, year1, year2,
     head = head + ['Obs.', 'Dataset', 'Years']
 
     # write the file with tabulate
-    tablefile = diag.tabdir / \
-        f'global_mean_{diag.expname}_{diag.modelname}_{diag.ensemble}_{diag.year1}_{diag.year2}.txt'
+    tablefile = os.path.join(diag.tabdir,
+                             f'global_mean_{diag.expname}_{diag.modelname}_{diag.ensemble}_{diag.year1}_{diag.year2}.txt')
     loggy.info('Table file is: %s', tablefile)
     with open(tablefile, 'w', encoding='utf-8') as out:
         out.write(tabulate(global_table, headers=head, stralign='center', tablefmt='orgtbl'))
@@ -302,8 +302,8 @@ def global_mean(exp, year1, year2,
         ordered[var] = varmean[var]
 
     # dump the yaml file for global_mean, including all the seasons
-    yamlfile = diag.tabdir / \
-        f'global_mean_{diag.expname}_{diag.modelname}_{diag.ensemble}_{diag.year1}_{diag.year2}.yml'
+    yamlfile = os.path.join(diag.tabdir,
+                            f'global_mean_{diag.expname}_{diag.modelname}_{diag.ensemble}_{diag.year1}_{diag.year2}.yml')
     loggy.info('YAML file is: %s', tablefile)
     with open(yamlfile, 'w', encoding='utf-8') as file:
         yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False)
@@ -325,8 +325,8 @@ def global_mean(exp, year1, year2,
     loggy.debug(data_table)
 
     # call the heatmap routine for a plot
-    mapfile = os.join(diag.figdir,
-                      f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf')
+    mapfile = os.path.join(diag.figdir,
+                           f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf')
     loggy.info('Figure file is: %s', mapfile)
 
     diag_dict = {'modelname': diag.modelname, 'expname': diag.expname,
diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index 11277b9..12a21a6 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -360,8 +360,8 @@ def performance_indices(exp, year1, year2,
         loggy.debug(cmip6_table)
 
         # call the heatmap routine for a plot
-        mapfile = os.join(diag.figdir,
-                          f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf')
+        mapfile = os.path.join(diag.figdir,
+                               f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf')
         # heatmap_comparison_old(data_table, diag, mapfile)
         diag_dict = {'modelname': diag.modelname, 'expname': diag.expname,
                      'year1': diag.year1, 'year2': diag.year2}

From 5c27a94e71d38bc43d0ad309ec8fc227f0b75a43 Mon Sep 17 00:00:00 2001
From: mnurisso <m.nurisso@isac.cnr.it>
Date: Fri, 27 Sep 2024 16:50:13 +0300
Subject: [PATCH 11/26] tests update

---
 tests/test_performance_indices.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py
index 44a3971..0d47daa 100644
--- a/tests/test_performance_indices.py
+++ b/tests/test_performance_indices.py
@@ -43,12 +43,13 @@ def test_performance_indices_amip(clim):
 
     assert are_dicts_equal(data1, data2, TOLERANCE), "YAML files are not identical."
 
+
 # test performance_indices from commnand line + debug
 @pytest.mark.parametrize("clim", ['RK08', 'EC23'])
 def test_cmd_performance_indices_CMIP6(clim):
     subprocess.run(['performance_indices', 'historical', '1990', '1990', '-j', '2', '-c',
-                   'tests/config_CMIP6.yml', '-k', clim, '-m', 'EC-Earth3', '-v', 'debug'],
-                    env=env, check=True)
+                    'tests/config_CMIP6.yml', '-k', clim, '-m', 'EC-Earth3', '-v', 'debug'],
+                   env=env, check=True)
     file1 = 'tests/table/PI4_' + clim + '_historical_EC-Earth3_r1i1p1f1_1990_1990.yml'
     file2 = 'tests/table/PI4_' + clim + '_CMIP6_1990_1990.ref'
     with open(file1, 'r', encoding='utf8') as f1, open(file2, 'r', encoding='utf8') as f2:
@@ -57,6 +58,7 @@ def test_cmd_performance_indices_CMIP6(clim):
 
     assert are_dicts_equal(data1, data2, TOLERANCE), "YAML files are not identical."
 
+
 # test on amip but with access from xarray dataset
 @pytest.mark.parametrize("clim", ['RK08', 'EC23'])
 def test_performance_indices_amip_xdataset(clim):
@@ -78,6 +80,6 @@ def test_pi_plot():
     file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml'
     with open(file1, 'r', encoding='utf8') as f1:
         data1 = yaml.safe_load(f1)
-    heatmap_comparison_pi(data1, {'modelname': 'EC-Earth4', 'expname': 'amip', 'year1': '1990', 'year2': '1990'}, 'tests/pluto/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.png')
-    assert os.path.isfile('tests/pluto/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.png'), "Plot not created."
-    os.remove('tests/pluto/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.png')
+    heatmap_comparison_pi(data1, title='CMIP6 RELATIVE PI test')
+    assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created."
+    os.remove('PI4_heatmap.pdf')

From afc5bc0ec38b06fba25b008a540c586df62a4025 Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Wed, 18 Dec 2024 15:37:27 +0100
Subject: [PATCH 12/26] getting closer

---
 ecmean/global_mean.py             | 22 +++++--------
 ecmean/libs/plotting.py           | 51 +++++++++++++++++++++++--------
 ecmean/performance_indices.py     | 31 ++++++-------------
 tests/test_performance_indices.py |  9 ++++--
 4 files changed, 61 insertions(+), 52 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index 2d9f612..b5873f3 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -309,31 +309,23 @@ def global_mean(exp, year1, year2,
         yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False)
 
     # set longname, get units
-    plotted = {}
+    data2plot = {}
     units_list = []
     for var in diag.var_all:
-        plotted[ref[var]['longname']] = ordered[var]
+        data2plot[ref[var]['longname']] = ordered[var]
         units_list = units_list + [ref[var]['units']]
 
-    # convert the three dictionary to pandas and then add units
-    data_table = dict_to_dataframe(plotted)
-    mean_table = dict_to_dataframe(obsmean)
-    std_table = dict_to_dataframe(obsstd)
-    for table in [data_table, mean_table, std_table]:
-        table.index = table.index + ' [' + units_list + ']'
-
-    loggy.debug(data_table)
-
     # call the heatmap routine for a plot
     mapfile = os.path.join(diag.figdir,
                            f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf')
     loggy.info('Figure file is: %s', mapfile)
 
     diag_dict = {'modelname': diag.modelname, 'expname': diag.expname,
-                 'year1': diag.year1, 'year2': diag.year2}
+                 'year1': diag.year1, 'year2': diag.year2,
+                 'units_list': units_list}
 
-    heatmap_comparison_gm(data_table, mean_table, std_table,
-                          diag_dict, mapfile, addnan=diag.addnan)
+    heatmap_comparison_gm(data_dict=data2plot, mean_dict=obsmean, std_dict=obsstd,
+                          diag=diag_dict, filemap=mapfile, addnan=diag.addnan)
 
     # Print appending one line to table (for tuning)
     if diag.ftable:
@@ -342,7 +334,7 @@ def global_mean(exp, year1, year2,
 
     toc = time()
     # evaluate tic-toc time of postprocessing
-    loggy.info(f"Postproc done in {toc - tic:.4f} seconds")
+    loggy.info("Postproc done in %.4f seconds", toc - tic)
     print('ECmean4 Global Mean succesfully computed!')
 
 
diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py
index 5fb9c97..1df0fc1 100644
--- a/ecmean/libs/plotting.py
+++ b/ecmean/libs/plotting.py
@@ -8,21 +8,24 @@
 ##################
 
 import textwrap
+import logging
 from matplotlib.colors import TwoSlopeNorm
 import matplotlib.pyplot as plt
 from matplotlib.colors import ListedColormap
 import seaborn as sns
 import numpy as np
-from .general import dict_to_dataframe
+from ecmean.libs.general import dict_to_dataframe
 
+loggy = logging.getLogger(__name__)
 
-def heatmap_comparison_pi(relative_table, diag: dict = None, filemap: str = None, size_model=14, **kwargs):
+def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str = None, size_model=14, **kwargs):
     """
     Function to produce a heatmap - seaborn based - for Performance Indices
     based on CMIP6 ratio
 
     Args:
-        relative_table (pandas.DataFrame or dict): table of relative performance indices
+        data_dict (dict): dictionary of absolute performance indices
+        cmip6_dict (dict): dictionary of CMIP6 performance indices
         diag (dict): dictionary containing diagnostic information
         filemap (str): path to save the plot
         size_model (int): size of the PIs in the plot
@@ -30,8 +33,23 @@ def heatmap_comparison_pi(relative_table, diag: dict = None, filemap: str = None
     Keyword Args:
         title (str): title of the plot, overrides default title
     """
-    if isinstance(relative_table, dict):
-        relative_table = dict_to_dataframe(relative_table)
+
+    # convert output dictionary to pandas dataframe
+    data_table = dict_to_dataframe(data_dict)
+    loggy.debug("Data table")
+    loggy.debug(data_table)
+
+    # relative pi with re-ordering of rows
+    relative_table = data_table.div(dict_to_dataframe(cmip6_dict).reindex(diag['longnames']))
+
+    # compute the total PI mean
+    relative_table.loc['Total PI'] = relative_table.mean()
+
+    # reordering columns
+    lll = [(x, y) for x in diag['seasons'] for y in diag['regions']]
+    relative_table = relative_table[lll]
+    loggy.debug("Relative table")
+    loggy.debug(relative_table)
 
     # defining plot size
     myfield = relative_table
@@ -82,16 +100,17 @@ def heatmap_comparison_pi(relative_table, diag: dict = None, filemap: str = None
     plt.close()
 
 
-def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap: str,
+def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: str,
                           addnan=True, size_model=14, size_obs=8, **kwargs):
     """
     Function to produce a heatmap - seaborn based - for Global Mean
     based on season-averaged standard deviation ratio
 
     Args:
-        data_table (pandas.DataFrame or dict): table of model data
-        mean_table (pandas.DataFrame or dict): table of observations
-        std_table (pandas.DataFrame or dict): table of standard deviation
+        data_dict (dict): table of model data
+        mean_dict (dict): table of observations
+        std_dict (dict): table of standard deviation
+        units_list (str): list of units
         diag (dict): dictionary containing diagnostic information
         filemap (str): path to save the plot
         addnan (bool): add to the final plots also fields which cannot be compared against observations
@@ -101,9 +120,17 @@ def heatmap_comparison_gm(data_table, mean_table, std_table, diag: dict, filemap
     Keyword Args:
         title (str): title of the plot, overrides default title
     """
-    for tab in [data_table, mean_table, std_table]:
-        if isinstance(tab, dict):
-            tab = dict_to_dataframe(tab)
+
+    # convert the three dictionary to pandas and then add units
+    data_table = dict_to_dataframe(data_dict)
+    mean_table = dict_to_dataframe(mean_dict)
+    std_table = dict_to_dataframe(std_dict)
+    for table in [data_table, mean_table, std_table]:
+        table.index = table.index + ' [' + diag['units_list'] + ']'
+
+    loggy.debug("Data table")
+    loggy.debug(data_table)
+
 
     # define array
     ratio = (data_table - mean_table) / std_table
diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index 12a21a6..5a91a70 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -310,7 +310,7 @@ def performance_indices(exp, year1, year2,
 
     toc = time()
     # evaluate tic-toc time  of execution
-    loggy.info('Done in {:.4f} seconds'.format(toc - tic) + ' with ' + str(diag.numproc) + ' processors')
+    loggy.info('Done in %.4f seconds with %d processors', toc - tic, diag.numproc)
 
     tic = time()
 
@@ -336,40 +336,27 @@ def performance_indices(exp, year1, year2,
                 del filt_piclim[k][f]
 
         # set longname, reorganize the dictionaries
-        plotted = {}
+        data2plot = {}
         cmip6 = {}
         longnames = []
         for var in diag.field_all:
-            plotted[piclim[var]['longname']] = ordered[var]
+            data2plot[piclim[var]['longname']] = ordered[var]
             cmip6[piclim[var]['longname']] = filt_piclim[var]
             longnames = longnames + [piclim[var]['longname']]
 
-        # convert output dictionary to pandas dataframe
-        data_table = dict_to_dataframe(plotted)
-        loggy.debug(data_table)
-
-        # relative pi with re-ordering of rows
-        cmip6_table = data_table.div(dict_to_dataframe(cmip6).reindex(longnames))
-
-        # compute the total PI mean
-        cmip6_table.loc['Total PI'] = cmip6_table.mean()
-
-        # reordering columns
-        lll = [(x, y) for x in diag.seasons for y in diag.regions]
-        cmip6_table = cmip6_table[lll]
-        loggy.debug(cmip6_table)
-
         # call the heatmap routine for a plot
         mapfile = os.path.join(diag.figdir,
                                f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf')
-        # heatmap_comparison_old(data_table, diag, mapfile)
         diag_dict = {'modelname': diag.modelname, 'expname': diag.expname,
-                     'year1': diag.year1, 'year2': diag.year2}
-        heatmap_comparison_pi(cmip6_table, diag=diag_dict, filemap=mapfile)
+                     'year1': diag.year1, 'year2': diag.year2,
+                     'seasons': diag.seasons, 'regions': diag.regions,
+                     'longnames': longnames}
+        heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6,
+                              diag=diag_dict, filemap=mapfile)
 
     toc = time()
     # evaluate tic-toc time of postprocessing
-    loggy.info('Postproc done in {:.4f} seconds'.format(toc - tic))
+    loggy.info("Postproc done in %.4f seconds", toc - tic)
     print('ECmean4 Performance Indices succesfully computed!')
 
 
diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py
index ff46537..037511c 100644
--- a/tests/test_performance_indices.py
+++ b/tests/test_performance_indices.py
@@ -21,7 +21,8 @@
 # test on coupled
 @pytest.mark.parametrize("clim", ['RK08', 'EC23'])
 def test_performance_indices_cpld(clim):
-    performance_indices('cpld', 1990, 1990, numproc=4, climatology=clim, config='tests/config.yml')
+    performance_indices('cpld', 1990, 1990, numproc=4, 
+                        climatology=clim, config='tests/config.yml')
     file1 = 'tests/table/PI4_' + clim + '_cpld_EC-Earth4_r1i1p1f1_1990_1990.yml'
     file2 = 'tests/table/PI4_' + clim + '_cpld_1990_1990.ref'
     with open(file1, 'r', encoding='utf8') as f1, open(file2, 'r', encoding='utf8') as f2:
@@ -34,7 +35,8 @@ def test_performance_indices_cpld(clim):
 # test on amip
 @pytest.mark.parametrize("clim", ['RK08', 'EC23'])
 def test_performance_indices_amip(clim):
-    performance_indices('amip', 1990, 1990, numproc=1, climatology=clim, config='tests/config.yml', outputdir='tests/pluto')
+    performance_indices('amip', 1990, 1990, numproc=1, 
+                        climatology=clim, config='tests/config.yml', outputdir='tests/pluto')
     file1 = 'tests/pluto/YAML/PI4_' + clim + '_amip_EC-Earth4_r1i1p1f1_1990_1990.yml'
     file2 = 'tests/table/PI4_' + clim + '_amip_1990_1990.ref'
     with open(file1, 'r', encoding='utf8') as f1, open(file2, 'r', encoding='utf8') as f2:
@@ -57,6 +59,7 @@ def test_cmd_performance_indices_CMIP6(clim):
         data2 = yaml.safe_load(f2)
 
     assert are_dicts_equal(data1, data2, TOLERANCE), f"YAML files are not identical.\nData1: {data1}\nData2: {data2}"
+
 # test on amip but with access from xarray dataset
 @pytest.mark.parametrize("clim", ['RK08', 'EC23'])
 def test_performance_indices_amip_xdataset(clim):
@@ -78,6 +81,6 @@ def test_pi_plot():
     file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml'
     with open(file1, 'r', encoding='utf8') as f1:
         data1 = yaml.safe_load(f1)
-    heatmap_comparison_pi(data1, title='CMIP6 RELATIVE PI test')
+    heatmap_comparison_pi(data_dict=data1, cmip6_dict=file1, title='CMIP6 RELATIVE PI test')
     assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created."
     os.remove('PI4_heatmap.pdf')

From 4ce96cc6781d57b1bc8d2a901abe1d2def44c9e2 Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Wed, 18 Dec 2024 15:48:09 +0100
Subject: [PATCH 13/26] test

---
 ecmean/global_mean.py             |  4 ++--
 ecmean/libs/plotting.py           | 13 ++++++++-----
 ecmean/performance_indices.py     |  2 +-
 tests/test_performance_indices.py |  6 ++++--
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index b5873f3..aeab6ab 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -24,7 +24,7 @@
 
 from ecmean import Diagnostic, Supporter, UnitsHandler
 from ecmean.libs.general import weight_split, write_tuning_table, get_domain, \
-    check_time_axis, dict_to_dataframe, init_mydict, \
+    check_time_axis, init_mydict, \
         check_var_interface, check_var_climatology, set_multiprocessing_start_method
 from ecmean.libs.files import var_is_there, get_inifiles, load_yaml, make_input_filename
 from ecmean.libs.formula import formula_wrapper
@@ -241,7 +241,7 @@ def global_mean(exp, year1, year2,
     toc = time()
 
     # evaluate tic-toc time  of execution
-    loggy.info('Analysis done in {:.4f} seconds'.format(toc - tic))
+    loggy.info('Analysis done in %.4f seconds', toc - tic)
 
     tic = time()
 
diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py
index 1df0fc1..3ee9749 100644
--- a/ecmean/libs/plotting.py
+++ b/ecmean/libs/plotting.py
@@ -40,14 +40,18 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str
     loggy.debug(data_table)
 
     # relative pi with re-ordering of rows
-    relative_table = data_table.div(dict_to_dataframe(cmip6_dict).reindex(diag['longnames']))
+    cmip6_table = dict_to_dataframe(cmip6_dict)
+    if diag is not None and 'longnames' in diag:
+        cmip6_table = cmip6_table.reindex(diag['longnames'])
+    relative_table = data_table.div(cmip6_table)
 
     # compute the total PI mean
     relative_table.loc['Total PI'] = relative_table.mean()
 
-    # reordering columns
-    lll = [(x, y) for x in diag['seasons'] for y in diag['regions']]
-    relative_table = relative_table[lll]
+    # reordering columns if info is available
+    if diag is not None and 'regions' in diag and 'seasons' in diag:
+        lll = [(x, y) for x in diag['seasons'] for y in diag['regions']]
+        relative_table = relative_table[lll]
     loggy.debug("Relative table")
     loggy.debug(relative_table)
 
@@ -110,7 +114,6 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s
         data_dict (dict): table of model data
         mean_dict (dict): table of observations
         std_dict (dict): table of standard deviation
-        units_list (str): list of units
         diag (dict): dictionary containing diagnostic information
         filemap (str): path to save the plot
         addnan (bool): add to the final plots also fields which cannot be compared against observations
diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index 5a91a70..f47cbdd 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -203,7 +203,7 @@ def pi_worker(util, piclim, face, diag, field_3d, varstat, varlist):
 
                         # diagnostic
                         if region == 'Global':
-                            logging.info('PI for', region, season, var, result[season][region])
+                            logging.info('PI for %s %s %s %s', region, season, var, result[season][region])
 
         # nested dictionary, to be redifend as a dict to remove lambdas
         varstat[var] = result
diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py
index 037511c..cb81d71 100644
--- a/tests/test_performance_indices.py
+++ b/tests/test_performance_indices.py
@@ -77,10 +77,12 @@ def test_performance_indices_amip_xdataset(clim):
     assert are_dicts_equal(data1, data2, TOLERANCE),f"YAML files are not identical.\nData1: {data1}\nData2: {data2}"
 
 
-def test_pi_plot():
+def test_pi_plot(tmp_path):
     file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml'
     with open(file1, 'r', encoding='utf8') as f1:
         data1 = yaml.safe_load(f1)
-    heatmap_comparison_pi(data_dict=data1, cmip6_dict=file1, title='CMIP6 RELATIVE PI test')
+    heatmap_comparison_pi(data_dict=data1, cmip6_dict=data1, 
+                          title='CMIP6 RELATIVE PI test', 
+                          mapfile=os.path.join(tmp_path, 'PI4_heatmap.pdf'))
     assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created."
     os.remove('PI4_heatmap.pdf')

From 65d5146b10bede10924ac56d22f87ff1ee8aef62 Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Wed, 18 Dec 2024 15:57:29 +0100
Subject: [PATCH 14/26] clean up

---
 ecmean/global_mean.py             |  2 +-
 ecmean/libs/general.py            | 40 +++++++------------------------
 ecmean/performance_indices.py     | 10 ++++----
 tests/test_performance_indices.py |  8 +++----
 4 files changed, 19 insertions(+), 41 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index aeab6ab..e9b9d72 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -313,7 +313,7 @@ def global_mean(exp, year1, year2,
     units_list = []
     for var in diag.var_all:
         data2plot[ref[var]['longname']] = ordered[var]
-        units_list = units_list + [ref[var]['units']]
+        units_list.append(ref[var]['units'])
 
     # call the heatmap routine for a plot
     mapfile = os.path.join(diag.figdir,
diff --git a/ecmean/libs/general.py b/ecmean/libs/general.py
index a8d92bd..d966b58 100644
--- a/ecmean/libs/general.py
+++ b/ecmean/libs/general.py
@@ -17,27 +17,8 @@
 ##################
 
 
-# def is_number(s):
-#     """Check if input is a float type"""
-
-#     try:
-#         float(s)
-#         return True
-#     except ValueError:
-#         return False
-
 loggy = logging.getLogger(__name__)
 
-# def numeric_loglevel(loglevel):
-#     """Define the logging level """
-#     # log level with logging
-#     # currently basic definition trought the text
-#     numeric_level = getattr(logging, loglevel.upper(), None)
-#     if not isinstance(numeric_level, int):
-#         raise ValueError(f'Invalid log level: {loglevel}')
-
-#     return numeric_level
-
 def set_multiprocessing_start_method():
     """Function to set the multiprocessing spawn method to fork"""
     plat = platform.system()
@@ -163,23 +144,20 @@ def get_domain(var, face):
     return domain[comp]
 
 
-# def get_component(face):  # unused function
-#     """Return a dictionary providing the domain associated with a variable
-#     (the interface file specifies the domain for each component instead)"""
-
-#     d = face['component']
-#     p = dict(zip([list(d.values())[x]['domain']
-#              for x in range(len(d.values()))], d.keys()))
-#     return p
-
-
 ####################
 # OUTPUT FUNCTIONS #
 ####################
 
 def dict_to_dataframe(varstat):
-    """very clumsy conversion of the nested 3-level dictionary
-    to a pd.dataframe: NEED TO BE IMPROVED"""
+    """
+    Converts a nested 3-level dictionary to a pandas DataFrame.
+    
+    Parameters:
+    varstat (dict): Nested dictionary with 3 levels.
+
+    Returns:
+    pd.DataFrame: Transformed DataFrame with hierarchical keys.
+    """
     data_table = {}
     for i in varstat.keys():
         pippo = {}
diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index f47cbdd..f685c7d 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -19,7 +19,7 @@
 import yaml
 import dask
 from ecmean import Diagnostic, Supporter, UnitsHandler
-from ecmean.libs.general import weight_split, get_domain, dict_to_dataframe, \
+from ecmean.libs.general import weight_split, get_domain, \
    check_time_axis, init_mydict, check_var_interface, check_var_climatology, \
    set_multiprocessing_start_method
 from ecmean.libs.files import var_is_there, get_inifiles, load_yaml, \
@@ -338,11 +338,11 @@ def performance_indices(exp, year1, year2,
         # set longname, reorganize the dictionaries
         data2plot = {}
         cmip6 = {}
-        longnames = []
+        longnames = [piclim[var]['longname'] for var in diag.field_all]
         for var in diag.field_all:
-            data2plot[piclim[var]['longname']] = ordered[var]
-            cmip6[piclim[var]['longname']] = filt_piclim[var]
-            longnames = longnames + [piclim[var]['longname']]
+            longname = piclim[var]['longname']
+            data2plot[longname] = ordered[var]
+            cmip6[longname] = filt_piclim[var]
 
         # call the heatmap routine for a plot
         mapfile = os.path.join(diag.figdir,
diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py
index cb81d71..0bef6ae 100644
--- a/tests/test_performance_indices.py
+++ b/tests/test_performance_indices.py
@@ -13,7 +13,7 @@
 from ecmean.libs.plotting import heatmap_comparison_pi
 
 # set TOLERANCE
-TOLERANCE = 1e-3
+TOLERANCE = 1e-1
 
 # set up coverage env var
 env = {**os.environ, "COVERAGE_PROCESS_START": ".coveragerc"}
@@ -35,7 +35,7 @@ def test_performance_indices_cpld(clim):
 # test on amip
 @pytest.mark.parametrize("clim", ['RK08', 'EC23'])
 def test_performance_indices_amip(clim):
-    performance_indices('amip', 1990, 1990, numproc=1, 
+    performance_indices('amip', 1990, 1990, numproc=1,
                         climatology=clim, config='tests/config.yml', outputdir='tests/pluto')
     file1 = 'tests/pluto/YAML/PI4_' + clim + '_amip_EC-Earth4_r1i1p1f1_1990_1990.yml'
     file2 = 'tests/table/PI4_' + clim + '_amip_1990_1990.ref'
@@ -81,8 +81,8 @@ def test_pi_plot(tmp_path):
     file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml'
     with open(file1, 'r', encoding='utf8') as f1:
         data1 = yaml.safe_load(f1)
-    heatmap_comparison_pi(data_dict=data1, cmip6_dict=data1, 
-                          title='CMIP6 RELATIVE PI test', 
+    heatmap_comparison_pi(data_dict=data1, cmip6_dict=data1,
+                          title='CMIP6 RELATIVE PI test',
                           mapfile=os.path.join(tmp_path, 'PI4_heatmap.pdf'))
     assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created."
     os.remove('PI4_heatmap.pdf')

From 4f4497ca1478719683b284552b6c30967b94f84f Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Wed, 18 Dec 2024 16:40:48 +0100
Subject: [PATCH 15/26] preparation functions

---
 ecmean/global_mean.py         | 35 ++++----------
 ecmean/libs/plotting.py       | 90 +++++++++++++++++++++++++++++++++--
 ecmean/performance_indices.py | 22 +++------
 3 files changed, 101 insertions(+), 46 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index e9b9d72..e1130ea 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -32,7 +32,7 @@
 from ecmean.libs.units import units_extra_definition
 from ecmean.libs.ncfixers import xr_preproc
 from ecmean.libs.parser import parse_arguments
-from ecmean.libs.plotting import heatmap_comparison_gm
+from ecmean.libs.plotting import heatmap_comparison_gm, prepare_clim_dictionaries_gm
 from ecmean.libs.loggy import setup_logger
 
 dask.config.set(scheduler="synchronous")
@@ -245,37 +245,22 @@ def global_mean(exp, year1, year2,
 
     tic = time()
 
+
     # loop on the variables to create the output table
     global_table = []
-    obsmean = {}
-    obsstd = {}
-    for var in diag.var_atm + diag.var_oce + diag.var_ice:
-
+    for var in diag.var_all:
         gamma = ref[var]
-        # get the predifined value or the ALL GLobal one
+        # get the predefined value or the ALL Global one
         if isinstance(gamma['obs'], dict):
             tabval = gamma['obs']['ALL']['Global']
             outval = str(tabval['mean']) + '\u00B1' + str(tabval['std'])
         else:
             outval = gamma['obs']
 
-        # extract from yaml table for obs mean and standard deviation
-        mmm = init_mydict(diag.seasons, diag.regions)
-        sss = init_mydict(diag.seasons, diag.regions)
-        # if we have all the obs/std available
-        if isinstance(gamma['obs'], dict):
-            for season in diag.seasons:
-                for region in diag.regions:
-                    mmm[season][region] = gamma['obs'][season][region]['mean']
-                    sss[season][region] = gamma['obs'][season][region]['std']
-        # if only global observation is available
-        else:
-            mmm['ALL']['Global'] = gamma['obs']
-        obsmean[gamma['longname']] = mmm
-        obsstd[gamma['longname']] = sss
-
         if 'year1' in gamma.keys():
             years = str(gamma['year1']) + '-' + str(gamma['year2'])
+        else:
+            raise ValueError('Year1 and Year2 are not defined in the reference file')
 
         out_sequence = [var, gamma['longname'], gamma['units'], varmean[var]['ALL']['Global']]
         if diag.ftrend:
@@ -308,12 +293,8 @@ def global_mean(exp, year1, year2,
     with open(yamlfile, 'w', encoding='utf-8') as file:
         yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False)
 
-    # set longname, get units
-    data2plot = {}
-    units_list = []
-    for var in diag.var_all:
-        data2plot[ref[var]['longname']] = ordered[var]
-        units_list.append(ref[var]['units'])
+    # prepare dictionaries for global mean
+    obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(ordered, ref, diag.var_all, diag.seasons, diag.regions)
 
     # call the heatmap routine for a plot
     mapfile = os.path.join(diag.figdir,
diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py
index 3ee9749..91786cf 100644
--- a/ecmean/libs/plotting.py
+++ b/ecmean/libs/plotting.py
@@ -14,7 +14,7 @@
 from matplotlib.colors import ListedColormap
 import seaborn as sns
 import numpy as np
-from ecmean.libs.general import dict_to_dataframe
+from ecmean.libs.general import dict_to_dataframe, init_mydict
 
 loggy = logging.getLogger(__name__)
 
@@ -94,7 +94,7 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str
 
     if filemap is None:
         if diag is not None:
-            filemap = f'PI4_{diag["expname"]}_{diag["modelname"]}_{diag["year1"]}_{diag["year2"]}.pdf'
+            filemap = f'PI4_{diag["climatology"]}_{diag["expname"]}_{diag["modelname"]}_{diag["year1"]}_{diag["year2"]}.pdf'
         else:
             filemap = 'PI4_heatmap.pdf'
 
@@ -134,7 +134,6 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s
     loggy.debug("Data table")
     loggy.debug(data_table)
 
-
     # define array
     ratio = (data_table - mean_table) / std_table
     if addnan:
@@ -194,7 +193,92 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s
     axs.figure.axes[-1].yaxis.label.set_size(15)
     axs.set(xlabel=None)
 
+    if filemap is None:
+        if diag is not None:
+            filemap = f'Global_Mean_{diag["expname"]}_{diag["modelname"]}_{diag["year1"]}_{diag["year2"]}.pdf'
+        else:
+            filemap = 'Global_Mean_Heatmap.pdf'
+
     # save and close
     plt.savefig(filemap)
     plt.cla()
     plt.close()
+
+def prepare_clim_dictionaries_pi(data, clim, shortnames):
+    """
+    Prepare dictionaries for plotting
+    Args:
+        data: dictionary with data
+        clim: dictionary with climatology
+        shortnames: list of shortnames
+    Returns:    
+        data2plot: dictionary with data for plotting
+        cmip6: dictionary with CMIP6 data
+        longnames: list of longnames
+    """
+
+    # uniform dictionaries
+    filt_piclim = {}
+    for k in clim.keys():
+        filt_piclim[k] = clim[k]['cmip6']
+        for f in ['models', 'year1', 'year2']:
+            del filt_piclim[k][f]
+
+    # set longname, reorganize the dictionaries
+    data2plot = {}
+    cmip6 = {}
+    longnames = [clim[var]['longname'] for var in shortnames]
+    for var in shortnames:
+        longname = clim[var]['longname']
+        data2plot[longname] = data[var]
+        cmip6[longname] = filt_piclim[var]
+        
+    return data2plot, cmip6, longnames
+
+def prepare_clim_dictionaries_gm(data, clim, shortnames, seasons, regions):
+    """
+    Prepare dictionaries for global mean plotting
+
+    Args:
+        data: dictionary with the data
+        clim: dictionary with the climatology
+        shortnames: list of shortnames
+        seasons: list of seasons
+        regions: list of regions
+
+    Returns:
+        obsmean: dictionary with the mean
+        obsstd: dictionary with the standard deviation
+        data2plot: dictionary with the data to plot
+        units_list: list of units
+    """
+
+    # loop on the variables to create obsmean and obsstd
+    obsmean = {}
+    obsstd = {}
+    for var in shortnames:
+        gamma = clim[var]
+
+        # extract from yaml table for obs mean and standard deviation
+        mmm = init_mydict(seasons, regions)
+        sss = init_mydict(seasons, regions)
+        # if we have all the obs/std available
+        if isinstance(gamma['obs'], dict):
+            for season in seasons:
+                for region in regions:
+                    mmm[season][region] = gamma['obs'][season][region]['mean']
+                    sss[season][region] = gamma['obs'][season][region]['std']
+        # if only global observation is available
+        else:
+            mmm['ALL']['Global'] = gamma['obs']
+        obsmean[gamma['longname']] = mmm
+        obsstd[gamma['longname']] = sss
+
+    # set longname, get units
+    data2plot = {}
+    units_list = []
+    for var in shortnames:
+        data2plot[clim[var]['longname']] = data[var]
+        units_list.append(clim[var]['units'])
+
+    return obsmean, obsstd, data2plot, units_list
\ No newline at end of file
diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index f685c7d..a331509 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -29,7 +29,7 @@
 from ecmean.libs.areas import guess_bounds
 from ecmean.libs.units import units_extra_definition
 from ecmean.libs.ncfixers import xr_preproc, adjust_clim_file
-from ecmean.libs.plotting import heatmap_comparison_pi
+from ecmean.libs.plotting import heatmap_comparison_pi, prepare_clim_dictionaries_pi
 from ecmean.libs.parser import parse_arguments
 from ecmean.libs.loggy import setup_logger
 
@@ -328,26 +328,15 @@ def performance_indices(exp, year1, year2,
     # to this date, only EC23 support comparison with CMIP6 data
     if diag.climatology == 'EC23':
 
-        # uniform dictionaries
-        filt_piclim = {}
-        for k in piclim.keys():
-            filt_piclim[k] = piclim[k]['cmip6']
-            for f in ['models', 'year1', 'year2']:
-                del filt_piclim[k][f]
-
-        # set longname, reorganize the dictionaries
-        data2plot = {}
-        cmip6 = {}
-        longnames = [piclim[var]['longname'] for var in diag.field_all]
-        for var in diag.field_all:
-            longname = piclim[var]['longname']
-            data2plot[longname] = ordered[var]
-            cmip6[longname] = filt_piclim[var]
+        # prepare the data for the heatmap from the original yaml dictionaries
+        data2plot, cmip6, longnames = prepare_clim_dictionaries_pi(data=varstat, clim=piclim,
+                                                                   shortnames=diag.field_all)
 
         # call the heatmap routine for a plot
         mapfile = os.path.join(diag.figdir,
                                f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf')
         diag_dict = {'modelname': diag.modelname, 'expname': diag.expname,
+                     'climatology': diag.climatology,
                      'year1': diag.year1, 'year2': diag.year2,
                      'seasons': diag.seasons, 'regions': diag.regions,
                      'longnames': longnames}
@@ -360,6 +349,7 @@ def performance_indices(exp, year1, year2,
     print('ECmean4 Performance Indices succesfully computed!')
 
 
+
 def pi_entry_point():
     """
     Command line interface to run the global_mean function

From b30d9e7ef762f839d2d1cac4cf98f033ac9b8f0a Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Wed, 18 Dec 2024 17:04:53 +0100
Subject: [PATCH 16/26] example preprocess

---
 ecmean/global_mean.py             |  2 +-
 tests/test_performance_indices.py | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index e1130ea..e964d76 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -298,7 +298,7 @@ def global_mean(exp, year1, year2,
 
     # call the heatmap routine for a plot
     mapfile = os.path.join(diag.figdir,
-                           f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf')
+                           f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.png')
     loggy.info('Figure file is: %s', mapfile)
 
     diag_dict = {'modelname': diag.modelname, 'expname': diag.expname,
diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py
index 0bef6ae..9feef91 100644
--- a/tests/test_performance_indices.py
+++ b/tests/test_performance_indices.py
@@ -10,7 +10,7 @@
 from ecmean.performance_indices import performance_indices
 from ecmean.libs.ncfixers import xr_preproc
 from ecmean.libs.general import are_dicts_equal
-from ecmean.libs.plotting import heatmap_comparison_pi
+from ecmean.libs.plotting import heatmap_comparison_pi, prepare_clim_dictionaries_pi
 
 # set TOLERANCE
 TOLERANCE = 1e-1
@@ -80,8 +80,12 @@ def test_performance_indices_amip_xdataset(clim):
 def test_pi_plot(tmp_path):
     file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml'
     with open(file1, 'r', encoding='utf8') as f1:
-        data1 = yaml.safe_load(f1)
-    heatmap_comparison_pi(data_dict=data1, cmip6_dict=data1,
+        data = yaml.safe_load(f1)
+    file2 = 'ecmean/climatology/EC23/pi_climatology_EC23.yml'
+    with open(file2, 'r', encoding='utf8') as f1:
+        clim = yaml.safe_load(f1)
+    data_dict, clim_dict, _ = prepare_clim_dictionaries_pi(data=data, clim=clim, shortnames=data.keys())
+    heatmap_comparison_pi(data_dict=data_dict, cmip6_dict=clim_dict,
                           title='CMIP6 RELATIVE PI test',
                           mapfile=os.path.join(tmp_path, 'PI4_heatmap.pdf'))
     assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created."

From be862d1efefd51cb03fb21091a0c6196b4368432 Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Fri, 20 Dec 2024 10:16:06 +0100
Subject: [PATCH 17/26] performance indidces as a class

---
 ecmean/__init__.py            |   4 +-
 ecmean/libs/diagnostic.py     |   4 +-
 ecmean/performance_indices.py | 556 ++++++++++++++++------------------
 3 files changed, 264 insertions(+), 300 deletions(-)

diff --git a/ecmean/__init__.py b/ecmean/__init__.py
index a43cb63..916a143 100644
--- a/ecmean/__init__.py
+++ b/ecmean/__init__.py
@@ -7,6 +7,6 @@
 from ecmean.libs.support import Supporter
 from ecmean.libs.units import UnitsHandler
 from ecmean.global_mean import global_mean
-from ecmean.performance_indices import performance_indices
+from ecmean.performance_indices import PerformanceIndices, performance_indices
 
-__all__ = ["global_mean", "performance_indices", "Diagnostic", "Supporter", "UnitsHandler"]
+__all__ = ["global_mean", "PerformanceIndices", "performance_indices", "Diagnostic", "Supporter", "UnitsHandler"]
diff --git a/ecmean/libs/diagnostic.py b/ecmean/libs/diagnostic.py
index 02a6290..3cab468 100644
--- a/ecmean/libs/diagnostic.py
+++ b/ecmean/libs/diagnostic.py
@@ -40,7 +40,7 @@ def __init__(self, args):
         self.resolution = getattr(args, 'resolution', '')
         self.ensemble = getattr(args, 'ensemble', 'r1i1p1f1')
         self.addnan = getattr(args, 'addnan', False)
-        self.funcname = args.funcname.split(".")[1]
+        self.funcname = args.funcname
         self.version = version
         if self.year1 == self.year2:
             self.ftrend = False
@@ -86,7 +86,7 @@ def __init__(self, args):
             self.cfg_global_mean(cfg)
 
         # init for performance indices
-        if self.funcname in 'performance_indices':
+        if self.funcname in 'PerformanceIndices':
             self.cfg_performance_indices(cfg)
 
         # setting up interface file
diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index a331509..69a442d 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -35,333 +35,297 @@
 
 dask.config.set(scheduler="synchronous")
 
-
-def pi_worker(util, piclim, face, diag, field_3d, varstat, varlist):
-    """Main parallel diagnostic worker for performance indices
-
-    Args:
-        util: the utility dictionary, including mask, area and remap weights
-        piclim: the reference climatology for the global mean
-        face: the interface to be used to access the data
-        diag: the diagnostic class object
-        field_3d: the list of 3d vars to be handled differently
-        varstat: the dictionary for the variable PI (empty)
-        varlist: the variable on which compute the global mean
-
-    Returns:
-        vartrend under the form of a dictionaries
+class PerformanceIndices:
 
     """
-
-    loggy = logging.getLogger(__name__)
-
-    for var in varlist:
-
-        # store NaN in dict (can't use defaultdict due to multiprocessing)
-        result = init_mydict(diag.seasons, diag.regions)
-
-        if check_var_interface(var, face):
-
-            # get domain
-            domain = get_domain(var, face)
-
-            # get masks
-            domain_mask = getattr(util, domain + 'mask')
-
-            # check if required variables are there: use interface file
-            # check into first file, and load also model variable units
-            infile = make_input_filename(var, face, diag)
-
-            # check if var is available
-            isavail, varunit = var_is_there(infile, var, face)
-
-            # if the variable is available
-            if isavail:
-
-                # perform the unit conversion extracting offset and factor
-                units_handler = UnitsHandler(var, org_units=varunit, clim=piclim, face=face)
-                offset, factor = units_handler.offset, units_handler.factor
-
-                # open file: chunking on time only, might be improved
-                if not isinstance(infile, (xr.DataArray, xr.Dataset)):
-                    xfield = xr.open_mfdataset(infile, preprocess=xr_preproc, chunks={'time': 12})
-                else:
-                    xfield = infile
-
-                # in case of big files with multi year, be sure of having opened the right records
-                xfield = xfield.sel(time=xfield.time.dt.year.isin(diag.years_joined))
-
-                # check time axis
-                check_time_axis(xfield.time, diag.years_joined)
-
-                # get the data-array field for the required var
-                outfield = formula_wrapper(var, face, xfield)
-
-                # mean over time and fixing of the units
-                for season in diag.seasons:
-
-                    loggy.debug(season)
-
-                    # copy of the full field
-                    tmean = outfield.copy(deep=True)
-
-                    # get filenames for climatology
-                    clim, vvvv = get_clim_files(piclim, var, diag, season)
-
-                    # open climatology files, fix their metadata
-                    cfield = adjust_clim_file(
-                        xr.open_mfdataset(clim, preprocess=xr_preproc))
-                    vfield = adjust_clim_file(
-                        xr.open_mfdataset(vvvv, preprocess=xr_preproc), remove_zero=True)
-
-                    # season selection
-                    if season != 'ALL':
-                        tmean = tmean.sel(time=tmean.time.dt.season.isin(season))
-                        cfield = cfield.sel(time=cfield.time.dt.season.isin(season))
-                        vfield = vfield.sel(time=vfield.time.dt.season.isin(season))
-
-                    # averaging
-                    tmean = tmean.mean(dim='time')
-                    cfield = cfield.load()  # this ensure no crash with multiprocessing
-                    vfield = vfield.load()
-
-                    # safe check for old RK08 which has a different format
-                    if diag.climatology != 'RK08':
-                        cfield = cfield.mean(dim='time')
-                        vfield = vfield.mean(dim='time')
-
-                    tmean = tmean * factor + offset
-
-                    # this computation is required to ensure that file access is
-                    # done in a correct way. resource errors found if disabled in some
-                    # specific configuration
-                    tmean = tmean.compute()
-
-                    # apply interpolation, if fixer is availble and with different
-                    # grids
-                    fix = getattr(util, f'{domain}fix')
-                    remap = getattr(util, f'{domain}remap')
-
-                    if fix:
-                        tmean = fix(tmean, keep_attrs=True)
-                    try:
-                        final = remap(tmean, keep_attrs=True)
-                    except ValueError:
-                        loggy.error('Cannot interpolate %s with the current weights...', var)
-                        continue
-
-                    # vertical interpolation
-                    if var in field_3d:
-
-                        # xarray interpolation on plev, forcing to be in Pascal
-                        final = final.metpy.convert_coordinate_units('plev', 'Pa')
-                        if set(final['plev'].data) != set(cfield['plev'].data):
-                            loggy.warning('%s: Need to interpolate vertical levels...', var)
-                            final = final.interp(plev=cfield['plev'].data, method='linear')
-
-                            # safety check for missing values
-                            sample = final.isel(lon=0, lat=0)
-                            if np.sum(np.isnan(sample)) != 0:
-                                loggy.warning('%s: You have NaN after the interpolation, this will affect your PIs...', var)
-                                levnan = cfield['plev'].where(np.isnan(sample))
-                                loggy.warning(levnan[~np.isnan(levnan)].data)
-
-                        # zonal mean
-                        final = final.mean(dim='lon')
-
-                        # compute PI
-                        complete = (final - cfield)**2 / vfield
-
-                        # compute vertical bounds as weights
-                        bounds_lev = guess_bounds(complete['plev'], name='plev')
-                        bounds = abs(bounds_lev[:, 0] - bounds_lev[:, 1])
-                        www = xr.DataArray(
-                            bounds, coords=[
-                                complete['plev']], dims=['plev'])
-
-                        # vertical mean
-                        outarray = complete.weighted(www).mean(dim='plev')
-
-                    # horizontal averaging with land-sea mask
-                    else:
-
-                        complete = (final - cfield)**2 / vfield
-                        outarray = mask_field(xfield=complete,
-                                              mask_type=piclim[var]['mask'],
-                                              dom=domain, mask=domain_mask)
-
-                    # loop on different regions
-                    for region in diag.regions:
-
-                        slicearray = select_region(outarray, region)
-
-                        # latitude-based averaging
-                        weights = np.cos(np.deg2rad(slicearray.lat))
-                        out = slicearray.weighted(weights).mean().data
-                        # store the PI
-                        result[season][region] = round(float(out), 3)
-
-                        # diagnostic
-                        if region == 'Global':
-                            logging.info('PI for %s %s %s %s', region, season, var, result[season][region])
-
-        # nested dictionary, to be redifend as a dict to remove lambdas
-        varstat[var] = result
-
-
-def performance_indices(exp, year1, year2,
-                        config='config.yml',
-                        loglevel='WARNING',
-                        numproc=1,
-                        climatology='EC23',
-                        interface=None, model=None, ensemble='r1i1p1f1',
-                        silent=None, xdataset=None,
-                        outputdir=None):
-    """Main performance indices calculation
-
-    :param exp: Experiment name or ID
-    :param year1: Initial year
-    :param year2: Final year
-    :param config: configuration file, optional (default 'config.yml')
-    :param loglevel: level of logging, optional (default 'WARNING')
-    :param numproc: number of multiprocessing cores, optional (default '1')
-    :param interface: interface file to be used, optional (default as specifified in config file)
-    :param model: model to be analyzed, optional (default as specifified in config file)
-    :param ensemble: ensemble member to be analyzed, optional (default as 'r1i1p1f1')
-    :param silent: do not print anything to std output, optional
-    :param climatology: climatology to be compared. default: EC23. Options: [RK08, EC22, EC23]
-    :param xdataset: xarray dataset - already open - to be used without looking for files
-    :param outputdir: if specified, override the target destination in the configuration files for both tables and figures
-
-    :returns: the performance indices yaml file and heatmap
-
+    Class to compute the performance indices for a given experiment and years.
     """
 
-    # create a name space with all the arguments to feed the Diagnostic class
-    # This is not the neatest option, but it is very compact
-    funcname = __name__
-    argv = argparse.Namespace(**locals())
-
-    # set loglevel
-    loggy = setup_logger(level=argv.loglevel)
-
-    # set dask and multiprocessing fork
-    plat, mprocmethod = set_multiprocessing_start_method()
-    loggy.info('Running on %s and multiprocessing method set as "%s"', plat, mprocmethod)
-
-    # start time
-    tic = time()
-
-    # initialize the diag class, load the inteface and the reference file
-    diag = Diagnostic(argv)
-    face = load_yaml(diag.interface)
-    piclim = load_yaml(diag.climfile)
-
-    # check that everyhing is there
-    check_var_climatology(diag.field_all, piclim.keys())
+    def __init__(self, exp, year1, year2, config='config.yml',
+                 loglevel='WARNING', numproc=1, climatology='EC23',
+                 interface=None, model=None, ensemble='r1i1p1f1',
+                 silent=None, xdataset=None, outputdir=None):
+        """Initialize the PerformanceIndices class with the given parameters."""
+
+        self.exp = exp
+        self.year1 = year1
+        self.year2 = year2
+        self.config = config
+        self.loglevel = loglevel
+        self.numproc = numproc
+        self.climatology = climatology
+        self.interface = interface
+        self.model = model
+        self.ensemble = ensemble
+        self.silent = silent
+        self.xdataset = xdataset
+        self.outputdir = outputdir
+        self.loggy = setup_logger(level=self.loglevel)
+        self.diag = None
+        self.face = None
+        self.piclim = None
+        self.util_dictionary = None
+        self.varstat = None
+        self.funcname = self.__class__.__name__
+        self.start_time = time()
+
+    def toc(self, message):
+        """Update the timer and log the elapsed time."""
+        elapsed_time = time() - self.start_time
+        self.start_time = time()
+        self.loggy.info('%s time: %.2f seconds', message, elapsed_time)
+
+    def prepare(self):
+        """Prepare the necessary components for performance indices calculation."""
+        # set dask and multiprocessing fork
+        plat, mprocmethod = set_multiprocessing_start_method()
+        self.loggy.info('Running on %s and multiprocessing method set as "%s"', plat, mprocmethod)
+
+        # initialize the diag class, load the interface and the reference file
+        self.diag = Diagnostic(argparse.Namespace(**self.__dict__))
+        self.face = load_yaml(self.diag.interface)
+        self.piclim = load_yaml(self.diag.climfile)
+
+        # check that everything is there
+        check_var_climatology(self.diag.field_all, self.piclim.keys())
+
+        # Create missing folders
+        os.makedirs(self.diag.tabdir, exist_ok=True)
+        os.makedirs(self.diag.figdir, exist_ok=True)
+
+        # new bunch of functions to set grids, create correction command, masks and areas
+        comp = self.face['model']['component']  # Get component for each domain
+
+        # all clim have the same grid, read from the first clim available and get target grid
+        clim, _ = get_clim_files(self.piclim, 'tas', self.diag, 'ALL')
+        target_remap_grid = xr.open_dataset(clim)
+
+        # get file info files
+        inifiles = get_inifiles(self.face, self.diag)
+
+        # add missing unit definitions
+        units_extra_definition()
+
+        # create remap dictionary with atm and oce interpolators
+        self.util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=False, remap=True, targetgrid=target_remap_grid)
+
+    def run(self):
+        """Run the performance indices calculation."""
+        # main loop: manager is required for shared variables
+        mgr = Manager()
+
+        # dictionaries are shared, so they have to be passed as functions
+        self.varstat = mgr.dict()
+        processes = []
+
+        # loop on the variables, create the parallel process
+        for varlist in weight_split(self.diag.field_all, self.diag.numproc):
+            core = Process(target=self.pi_worker, args=(self.util_dictionary, self.piclim, self.face, self.diag, self.diag.field_3d, self.varstat, varlist))
+            core.start()
+            processes.append(core)
+
+        # wait for the processes to finish
+        for proc in processes:
+            proc.join()
+
+    def store(self, yamlfile=None):
+        """Store the performance indices in a yaml file."""
+        # order according to the original request the fields in the yaml file
+        ordered = {}
+        for item in self.diag.field_all:
+            ordered[item] = self.varstat[item]
+
+        # dump the yaml file for PI, including all the seasons (need to copy to avoid mess)
+        if yamlfile is None:
+            yamlfile = self.diag.tabdir / f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.yml'
+        with open(yamlfile, 'w', encoding='utf-8') as file:
+            yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False)
+
+    def plot(self, mapfile=None):
+        """Generate the heatmap for performance indices."""
+        # to this date, only EC23 support comparison with CMIP6 data
+        if self.diag.climatology == 'EC23':
+            # prepare the data for the heatmap from the original yaml dictionaries
+            data2plot, cmip6, longnames = prepare_clim_dictionaries_pi(data=self.varstat,
+                                                                       clim=self.piclim,
+                                                                       shortnames=self.diag.field_all)
+
+            # call the heatmap routine for a plot
+            if mapfile is None:
+                mapfile = os.path.join(self.diag.figdir, f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.pdf')
+            diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname, 'climatology': self.diag.climatology, 'year1': self.diag.year1, 'year2': self.diag.year2, 'seasons': self.diag.seasons, 'regions': self.diag.regions, 'longnames': longnames}
+            heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, diag=diag_dict, filemap=mapfile)
+
+    @staticmethod
+    def pi_worker(util, piclim, face, diag, field_3d, varstat, varlist):
+        """Main parallel diagnostic worker for performance indices."""
+        loggy = logging.getLogger(__name__)
+
+        for var in varlist:
+            # store NaN in dict (can't use defaultdict due to multiprocessing)
+            result = init_mydict(diag.seasons, diag.regions)
+
+            if check_var_interface(var, face):
+                # get domain
+                domain = get_domain(var, face)
+
+                # get masks
+                domain_mask = getattr(util, domain + 'mask')
+
+                # check if required variables are there: use interface file
+                # check into first file, and load also model variable units
+                infile = make_input_filename(var, face, diag)
+
+                # check if var is available
+                isavail, varunit = var_is_there(infile, var, face)
+
+                # if the variable is available
+                if isavail:
+                    # perform the unit conversion extracting offset and factor
+                    units_handler = UnitsHandler(var, org_units=varunit, clim=piclim, face=face)
+                    offset, factor = units_handler.offset, units_handler.factor
+
+                    # open file: chunking on time only, might be improved
+                    if not isinstance(infile, (xr.DataArray, xr.Dataset)):
+                        xfield = xr.open_mfdataset(infile, preprocess=xr_preproc, chunks={'time': 12})
+                    else:
+                        xfield = infile
+
+                    # in case of big files with multi year, be sure of having opened the right records
+                    xfield = xfield.sel(time=xfield.time.dt.year.isin(diag.years_joined))
+
+                    # check time axis
+                    check_time_axis(xfield.time, diag.years_joined)
+
+                    # get the data-array field for the required var
+                    outfield = formula_wrapper(var, face, xfield)
 
-    # Create missing folders
-    os.makedirs(diag.tabdir, exist_ok=True)
-    os.makedirs(diag.figdir, exist_ok=True)
+                    # mean over time and fixing of the units
+                    for season in diag.seasons:
+                        loggy.debug(season)
 
-    # new bunch of functions to set grids, create correction command, masks
-    # and areas
-    comp = face['model']['component']  # Get component for each domain
+                        # copy of the full field
+                        tmean = outfield.copy(deep=True)
 
-    # all clim have the same grid, read from the first clim available and get
-    # target grid
-    clim, _ = get_clim_files(piclim, 'tas', diag, 'ALL')
-    target_remap_grid = xr.open_dataset(clim)
+                        # get filenames for climatology
+                        clim, vvvv = get_clim_files(piclim, var, diag, season)
 
-    # get file info files
-    inifiles = get_inifiles(face, diag)
+                        # open climatology files, fix their metadata
+                        cfield = adjust_clim_file(xr.open_mfdataset(clim, preprocess=xr_preproc))
+                        vfield = adjust_clim_file(xr.open_mfdataset(vvvv, preprocess=xr_preproc), remove_zero=True)
 
-    # add missing unit definitions
-    units_extra_definition()
+                        # season selection
+                        if season != 'ALL':
+                            tmean = tmean.sel(time=tmean.time.dt.season.isin(season))
+                            cfield = cfield.sel(time=cfield.time.dt.season.isin(season))
+                            vfield = vfield.sel(time=vfield.time.dt.season.isin(season))
 
-    # create remap dictionary with atm and oce interpolators
-    util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'],
-                                areas=False, remap=True,
-                                targetgrid=target_remap_grid)
+                        # averaging
+                        tmean = tmean.mean(dim='time')
+                        cfield = cfield.load()  # this ensure no crash with multiprocessing
+                        vfield = vfield.load()
 
-    # main loop: manager is required for shared variables
-    mgr = Manager()
+                        # safe check for old RK08 which has a different format
+                        if diag.climatology != 'RK08':
+                            cfield = cfield.mean(dim='time')
+                            vfield = vfield.mean(dim='time')
 
-    # dictionaries are shared, so they have to be passed as functions
-    varstat = mgr.dict()
-    processes = []
+                        tmean = tmean * factor + offset
 
-    toc = time()
-    loggy.info('Preproc in {:.4f} seconds'.format(toc - tic))
-    tic = time()
+                        # this computation is required to ensure that file access is done in a correct way. resource errors found if disabled in some specific configuration
+                        tmean = tmean.compute()
 
-    # loop on the variables, create the parallel process
-    for varlist in weight_split(diag.field_all, diag.numproc):
-        # print(varlist)
+                        # apply interpolation, if fixer is available and with different grids
+                        fix = getattr(util, f'{domain}fix')
+                        remap = getattr(util, f'{domain}remap')
 
-        core = Process(target=pi_worker, args=(util_dictionary, piclim,
-                       face, diag, diag.field_3d, varstat, varlist))
-        core.start()
-        processes.append(core)
+                        if fix:
+                            tmean = fix(tmean, keep_attrs=True)
+                        try:
+                            final = remap(tmean, keep_attrs=True)
+                        except ValueError:
+                            loggy.error('Cannot interpolate %s with the current weights...', var)
+                            continue
 
-    # wait for the processes to finish
-    for proc in processes:
-        proc.join()
+                        # vertical interpolation
+                        if var in field_3d:
+                            # xarray interpolation on plev, forcing to be in Pascal
+                            final = final.metpy.convert_coordinate_units('plev', 'Pa')
+                            if set(final['plev'].data) != set(cfield['plev'].data):
+                                loggy.warning('%s: Need to interpolate vertical levels...', var)
+                                final = final.interp(plev=cfield['plev'].data, method='linear')
 
-    toc = time()
-    # evaluate tic-toc time  of execution
-    loggy.info('Done in %.4f seconds with %d processors', toc - tic, diag.numproc)
+                                # safety check for missing values
+                                sample = final.isel(lon=0, lat=0)
+                                if np.sum(np.isnan(sample)) != 0:
+                                    loggy.warning('%s: You have NaN after the interpolation, this will affect your PIs...', var)
+                                    levnan = cfield['plev'].where(np.isnan(sample))
+                                    loggy.warning(levnan[~np.isnan(levnan)].data)
 
-    tic = time()
+                            # zonal mean
+                            final = final.mean(dim='lon')
 
-    # order according to the original request the fields in the yaml file
-    ordered = {}
-    for item in diag.field_all:
-        ordered[item] = varstat[item]
+                            # compute PI
+                            complete = (final - cfield)**2 / vfield
 
-    # dump the yaml file for PI, including all the seasons (need to copy to avoid mess)
-    yamlfile = diag.tabdir / \
-        f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.yml'
-    with open(yamlfile, 'w', encoding='utf-8') as file:
-        yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False)
+                            # compute vertical bounds as weights
+                            bounds_lev = guess_bounds(complete['plev'], name='plev')
+                            bounds = abs(bounds_lev[:, 0] - bounds_lev[:, 1])
+                            www = xr.DataArray(bounds, coords=[complete['plev']], dims=['plev'])
 
-    # to this date, only EC23 support comparison with CMIP6 data
-    if diag.climatology == 'EC23':
+                            # vertical mean
+                            outarray = complete.weighted(www).mean(dim='plev')
 
-        # prepare the data for the heatmap from the original yaml dictionaries
-        data2plot, cmip6, longnames = prepare_clim_dictionaries_pi(data=varstat, clim=piclim,
-                                                                   shortnames=diag.field_all)
+                        # horizontal averaging with land-sea mask
+                        else:
+                            complete = (final - cfield)**2 / vfield
+                            outarray = mask_field(xfield=complete, mask_type=piclim[var]['mask'], dom=domain, mask=domain_mask)
 
-        # call the heatmap routine for a plot
-        mapfile = os.path.join(diag.figdir,
-                               f'PI4_{diag.climatology}_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.pdf')
-        diag_dict = {'modelname': diag.modelname, 'expname': diag.expname,
-                     'climatology': diag.climatology,
-                     'year1': diag.year1, 'year2': diag.year2,
-                     'seasons': diag.seasons, 'regions': diag.regions,
-                     'longnames': longnames}
-        heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6,
-                              diag=diag_dict, filemap=mapfile)
+                        # loop on different regions
+                        for region in diag.regions:
+                            slicearray = select_region(outarray, region)
 
-    toc = time()
-    # evaluate tic-toc time of postprocessing
-    loggy.info("Postproc done in %.4f seconds", toc - tic)
-    print('ECmean4 Performance Indices succesfully computed!')
+                            # latitude-based averaging
+                            weights = np.cos(np.deg2rad(slicearray.lat))
+                            out = slicearray.weighted(weights).mean().data
+                            # store the PI
+                            result[season][region] = round(float(out), 3)
 
+                            # diagnostic
+                            if region == 'Global':
+                                logging.info('PI for %s %s %s %s', region, season, var, result[season][region])
+
+            # nested dictionary, to be redefined as a dict to remove lambdas
+            varstat[var] = result
 
 
 def pi_entry_point():
     """
-    Command line interface to run the global_mean function
+    Command line interface to run the performance indices calculation.
     """
-
     # read arguments from command line
     args = parse_arguments(sys.argv[1:], script='pi')
 
     performance_indices(exp=args.exp, year1=args.year1, year2=args.year2,
-                        numproc=args.numproc,
-                        loglevel=args.loglevel,
-                        climatology=args.climatology,
-                        interface=args.interface, config=args.config,
-                        model=args.model, ensemble=args.ensemble,
-                        outputdir=args.outputdir)
+                            numproc=args.numproc, loglevel=args.loglevel, 
+                            climatology=args.climatology,
+                            interface=args.interface, config=args.config,
+                            model=args.model, ensemble=args.ensemble, outputdir=args.outputdir)
+
+def performance_indices(exp, year1, year2, config='config.yml', loglevel='WARNING',
+                        numproc=1, climatology='EC23', interface=None, model=None,
+                        ensemble='r1i1p1f1', silent=None, xdataset=None, outputdir=None):
+    """
+    Wrapper function to compute the performance indices for a given experiment and years.
+    """
+    pi = PerformanceIndices(exp=exp, year1=year1, year2=year2, config=config,
+                            loglevel=loglevel, numproc=numproc, climatology=climatology,
+                            interface=interface, model=model, ensemble=ensemble, silent=silent,
+                            xdataset=xdataset, outputdir=outputdir)
+    pi.prepare()
+    pi.toc('Preparation')
+    pi.run()
+    pi.toc('Calculation')
+    pi.store()
+    pi.plot()
+    pi.toc('Plotting')

From 884137cc82e63b2530edad3824a942c26fbd957a Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Fri, 20 Dec 2024 10:35:15 +0100
Subject: [PATCH 18/26] global mean conversion

---
 ecmean/__init__.py        |   5 +-
 ecmean/global_mean.py     | 506 ++++++++++++++++----------------------
 ecmean/libs/diagnostic.py |   2 +-
 3 files changed, 222 insertions(+), 291 deletions(-)

diff --git a/ecmean/__init__.py b/ecmean/__init__.py
index 916a143..ebb2f0b 100644
--- a/ecmean/__init__.py
+++ b/ecmean/__init__.py
@@ -6,7 +6,8 @@
 from ecmean.libs.diagnostic import Diagnostic
 from ecmean.libs.support import Supporter
 from ecmean.libs.units import UnitsHandler
-from ecmean.global_mean import global_mean
+from ecmean.global_mean import GlobalMean, global_mean
 from ecmean.performance_indices import PerformanceIndices, performance_indices
 
-__all__ = ["global_mean", "PerformanceIndices", "performance_indices", "Diagnostic", "Supporter", "UnitsHandler"]
+__all__ = ["GlobalMean", "global_mean", "PerformanceIndices",
+           "performance_indices", "Diagnostic", "Supporter", "UnitsHandler"]
diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index e964d76..bdeda87 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-   python3 version of ECmean global mean tool.
-   Using a reference file from yaml and Xarray
+    python3 version of ECmean global mean tool.
+    Using a reference file from yaml and Xarray
 
-   @author Paolo Davini (p.davini@isac.cnr.it), Sep 2022.
-   @author Jost von Hardenberg (jost.hardenberg@polito.it), Sep 2022
+    @author Paolo Davini (p.davini@isac.cnr.it), Sep 2022.
+    @author Jost von Hardenberg (jost.hardenberg@polito.it), Sep 2022
 """
 
 __author__ = "Paolo Davini (p.davini@isac.cnr.it), Sep 2022."
@@ -24,8 +24,8 @@
 
 from ecmean import Diagnostic, Supporter, UnitsHandler
 from ecmean.libs.general import weight_split, write_tuning_table, get_domain, \
-    check_time_axis, init_mydict, \
-        check_var_interface, check_var_climatology, set_multiprocessing_start_method
+     check_time_axis, init_mydict, \
+          check_var_interface, check_var_climatology, set_multiprocessing_start_method
 from ecmean.libs.files import var_is_there, get_inifiles, load_yaml, make_input_filename
 from ecmean.libs.formula import formula_wrapper
 from ecmean.libs.masks import masked_meansum, select_region
@@ -38,300 +38,230 @@
 dask.config.set(scheduler="synchronous")
 
 
-def gm_worker(util, ref, face, diag, varmean, vartrend, varlist):
-    """Main parallel diagnostic worker for global mean
-
-    Args:
-        util: the utility dictionary, including mask and weights
-        ref: the reference dictionary for the global mean
-        face: the interface to be used to access the data
-        diag: the diagnostic class object
-        varmean: the dictionary for the global mean (empty)
-        vartrend: the dictionary for the trends (empty)
-        varlist: the variable on which compute the global mean
-
-    Returns:
-        vartrend and varmean under the form of a dictionaries
+class GlobalMean:
     """
-
-    loggy = logging.getLogger(__name__)
-
-    for var in varlist:
-
-        # create empty nested dictionaries
-        result = init_mydict(diag.seasons, diag.regions)
-        trend = init_mydict(diag.seasons, diag.regions)
-
-        # check if the variable is in the interface file
-        if check_var_interface(var, face):
-
-            # get domain
-            domain = get_domain(var, face)
-
-            # compute weights
-            weights = getattr(util, domain + 'area')
-            domain_mask = getattr(util, domain + 'mask')
-
-            # get input files/fielf
-            infile = make_input_filename(var, face, diag)
-
-            # check if variables are available
-            isavail, varunit = var_is_there(infile, var, face)
-
-            if isavail:
-
-                # perform the unit conversion extracting offset and factor
-                units_handler = UnitsHandler(var, org_units=varunit, clim=ref, face=face)
-                offset, factor = units_handler.offset, units_handler.factor
-
-                # load the object
-                if not isinstance(infile, (xr.DataArray, xr.Dataset)):
-                    xfield = xr.open_mfdataset(infile, preprocess=xr_preproc, chunks={'time': 12})
-                else:
-                    xfield = infile
-
-                # in case of big files with multi year, be sure of having opened the right records
-                xfield = xfield.sel(time=xfield.time.dt.year.isin(diag.years_joined))
-
-                # check time axis
-                check_time_axis(xfield.time, diag.years_joined)
-
-                # get the data-array field for the required var
-                cfield = formula_wrapper(var, face, xfield).compute()
-
-                for season in diag.seasons:
-
-                    # copy of the full field
-                    tfield = cfield.copy(deep=True)
-
-                    if season != 'ALL':
-                        tfield = tfield.sel(time=cfield.time.dt.season.isin(season))
-
-                    if diag.ftrend:
-                        # this does not consider continuous seasons for DJF, but JF+D
-                        tfield = tfield.groupby('time.year').mean('time')
+    Class to compute the global mean of an experiment.
+    """
+    def __init__(self, exp, year1, year2, config='config.yml', loglevel='WARNING', numproc=1,
+                      interface=None, model=None, ensemble='r1i1p1f1', addnan=False, silent=None,
+                      trend=None, line=None, outputdir=None, xdataset=None):
+        self.exp = exp
+        self.year1 = year1
+        self.year2 = year2
+        self.config = config
+        self.loglevel = loglevel
+        self.numproc = numproc
+        self.interface = interface
+        self.model = model
+        self.ensemble = ensemble
+        self.addnan = addnan
+        self.silent = silent
+        self.trend = trend
+        self.line = line
+        self.outputdir = outputdir
+        self.xdataset = xdataset
+        self.loggy = setup_logger(level=self.loglevel)
+        self.diag = None
+        self.face = None
+        self.ref = None
+        self.util_dictionary = None
+        self.varmean = None
+        self.vartrend = None
+        self.funcname = self.__class__.__name__
+        self.start_time = time()
+    
+    def toc(self, message):
+        """Update the timer and log the elapsed time."""
+        elapsed_time = time() - self.start_time
+        self.start_time = time()
+        self.loggy.info('%s time: %.2f seconds', message, elapsed_time)
+
+    def prepare(self):
+        plat, mprocmethod = set_multiprocessing_start_method()
+        self.loggy.info('Running on %s and multiprocessing method set as "%s"', plat, mprocmethod)
+
+        self.diag = Diagnostic(argparse.Namespace(**self.__dict__))
+        self.face = load_yaml(self.diag.interface)
+        self.ref = load_yaml(self.diag.reffile)
+
+        check_var_climatology(self.diag.var_all, self.ref.keys())
+
+        os.makedirs(self.diag.tabdir, exist_ok=True)
+        os.makedirs(self.diag.figdir, exist_ok=True)
+
+        comp = self.face['model']['component']
+        inifiles = get_inifiles(self.face, self.diag)
+
+        units_extra_definition()
+
+        self.util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=True, remap=False)
+
+    def run(self):
+        mgr = Manager()
+        self.varmean = mgr.dict()
+        self.vartrend = mgr.dict()
+        processes = []
+
+        for varlist in weight_split(self.diag.var_all, self.diag.numproc):
+            core = Process(target=self.gm_worker, args=(self.util_dictionary, self.ref, self.face, self.diag,
+                                                                        self.varmean, self.vartrend, varlist))
+            core.start()
+            processes.append(core)
+
+        for proc in processes:
+            proc.join()
+
+    def store(self):
+        global_table = []
+        for var in self.diag.var_all:
+            gamma = self.ref[var]
+            if isinstance(gamma['obs'], dict):
+                    tabval = gamma['obs']['ALL']['Global']
+                    outval = str(tabval['mean']) + '\u00B1' + str(tabval['std'])
+            else:
+                    outval = gamma['obs']
+
+            if 'year1' in gamma.keys():
+                    years = str(gamma['year1']) + '-' + str(gamma['year2'])
+            else:
+                    raise ValueError('Year1 and Year2 are not defined in the reference file')
+
+            out_sequence = [var, gamma['longname'], gamma['units'], self.varmean[var]['ALL']['Global']]
+            if self.diag.ftrend:
+                    out_sequence = out_sequence + [self.vartrend[var]['ALL']['Global']]
+            out_sequence = out_sequence + [outval, gamma.get('dataset', ''), years]
+            global_table.append(out_sequence)
+
+        head = ['Variable', 'Longname', 'Units', self.diag.modelname]
+        if self.diag.ftrend:
+            head = head + ['Trend']
+        head = head + ['Obs.', 'Dataset', 'Years']
+
+        tablefile = os.path.join(self.diag.tabdir,
+                                        f'global_mean_{self.diag.expname}_{self.diag.modelname}_{self.diag.ensemble}_{self.diag.year1}_{self.diag.year2}.txt')
+        self.loggy.info('Table file is: %s', tablefile)
+        with open(tablefile, 'w', encoding='utf-8') as out:
+            out.write(tabulate(global_table, headers=head, stralign='center', tablefmt='orgtbl'))
+
+        ordered = {}
+        for var in self.diag.var_all:
+            ordered[var] = self.varmean[var]
+
+        yamlfile = os.path.join(self.diag.tabdir,
+                                        f'global_mean_{self.diag.expname}_{self.diag.modelname}_{self.diag.ensemble}_{self.diag.year1}_{self.diag.year2}.yml')
+        self.loggy.info('YAML file is: %s', tablefile)
+        with open(yamlfile, 'w', encoding='utf-8') as file:
+            yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False)
+
+    def plot(self, mapfile=None):
+
+        obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(self.varmean, self.ref, 
+                                                                              self.diag.var_all, self.diag.seasons, 
+                                                                              self.diag.regions)
+        if mapfile is None:
+            mapfile = os.path.join(self.diag.figdir,
+                                        f'global_mean_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.pdf')
+        self.loggy.info('Figure file is: %s', mapfile)
+
+        diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname,
+                        'year1': self.diag.year1, 'year2': self.diag.year2,
+                        'units_list': units_list}
+
+        heatmap_comparison_gm(data_dict=data2plot, mean_dict=obsmean, std_dict=obsstd,
+                                    diag=diag_dict, filemap=mapfile, addnan=self.diag.addnan)
+
+        if self.diag.ftable:
+            self.loggy.info('Line file is: %s', self.diag.linefile)
+            write_tuning_table(self.diag.linefile, self.varmean, self.diag.var_table, self.diag, self.ref)
+
+    @staticmethod
+    def gm_worker(util, ref, face, diag, varmean, vartrend, varlist):
+        """"
+        Workhorse for the global mean computation.
+
+        """
+        loggy = logging.getLogger(__name__)
+
+        for var in varlist:
+            result = init_mydict(diag.seasons, diag.regions)
+            trend = init_mydict(diag.seasons, diag.regions)
+
+            if check_var_interface(var, face):
+                domain = get_domain(var, face)
+                weights = getattr(util, domain + 'area')
+                domain_mask = getattr(util, domain + 'mask')
+                infile = make_input_filename(var, face, diag)
+                isavail, varunit = var_is_there(infile, var, face)
+
+                if isavail:
+                    units_handler = UnitsHandler(var, org_units=varunit, clim=ref, face=face)
+                    offset, factor = units_handler.offset, units_handler.factor
+
+                    if not isinstance(infile, (xr.DataArray, xr.Dataset)):
+                        xfield = xr.open_mfdataset(infile, preprocess=xr_preproc, chunks={'time': 12})
                     else:
-                        tfield = tfield.mean(dim='time')
-
-                    for region in diag.regions:
+                        xfield = infile
 
-                        slicefield = select_region(tfield, region)
-                        sliceweights = select_region(weights, region)
-                        if isinstance(domain_mask, xr.DataArray):
-                            slicemask = select_region(domain_mask, region)
-                        else:
-                            slicemask = 0.
+                    xfield = xfield.sel(time=xfield.time.dt.year.isin(diag.years_joined))
+                    check_time_axis(xfield.time, diag.years_joined)
+                    cfield = formula_wrapper(var, face, xfield).compute()
 
-                        # final operation on the field
-                        avg = masked_meansum(
-                            xfield=slicefield, weights=sliceweights, mask=slicemask,
-                            operation=ref[var].get('operation', 'mean'),
-                            mask_type=ref[var].get('mask', 'global'),
-                            domain=domain)
+                    for season in diag.seasons:
+                        tfield = cfield.copy(deep=True)
+                        if season != 'ALL':
+                                tfield = tfield.sel(time=cfield.time.dt.season.isin(season))
 
-                        # if dask delayead object, compute
-                        if isinstance(avg, dask.array.core.Array):
-                            avg = avg.compute()
+                        if diag.ftrend:
+                                tfield = tfield.groupby('time.year').mean('time')
+                        else:
+                                tfield = tfield.mean(dim='time')
 
-                        result[season][region] = float((np.nanmean(avg) + offset) * factor)
+                        for region in diag.regions:
+                                slicefield = select_region(tfield, region)
+                                sliceweights = select_region(weights, region)
+                                if isinstance(domain_mask, xr.DataArray):
+                                    slicemask = select_region(domain_mask, region)
+                                else:
+                                    slicemask = 0.
 
-                        if diag.ftrend:
-                            if len(avg) == len(diag.years_joined):
-                                trend[season][region] = np.polyfit(diag.years_joined, avg, 1)[0]
-                        if season == 'ALL' and region == 'Global':
-                            loggy.info('Average: %s %s %s %s', var, season, region, result[season][region])
-
-        # nested dictionary, to be redifend as a dict to remove lambdas
-        varmean[var] = result
-        vartrend[var] = trend
-
-
-def global_mean(exp, year1, year2,
-                config='config.yml',
-                loglevel='WARNING',
-                numproc=1,
-                interface=None, model=None, ensemble='r1i1p1f1',
-                addnan=False,
-                silent=None, trend=None, line=None,
-                outputdir=None, xdataset=None):
-    """The main ECmean4 global mean function
-
-    :param exp: Experiment name or ID
-    :param year1: Initial year
-    :param year2: Final year
-    :param config: configuration file, optional (default 'config.yml')
-    :param loglevel: level of logging, optional (default 'WARNING')
-    :param numproc: number of multiprocessing cores, optional (default '1')
-    :param interface: interface file to be used, optional (default as specifified in config file)
-    :param model: model to be analyzed, optional (default as specifified in config file)
-    :param ensemble: ensemble member to be analyzed, optional (default as 'r1i1p1f1')
-    :param add_nan: add to the final plots also fields which cannot be compared against observations
-    :param silent: do not print anything to std output, optional
-    :param trend: compute yearly trends, optional
-    :param line: appends also single line to a table, optional
-    :param outputdir: output directory for the single line output, optional
-    :param xdataset: xarray dataset - already open - to be used without looking for files
-
-    :returns: the global mean txt table as defined in the output
+                                avg = masked_meansum(
+                                    xfield=slicefield, weights=sliceweights, mask=slicemask,
+                                    operation=ref[var].get('operation', 'mean'),
+                                    mask_type=ref[var].get('mask', 'global'),
+                                    domain=domain)
 
-    """
+                                if isinstance(avg, dask.array.core.Array):
+                                    avg = avg.compute()
 
-    # create a name space with all the arguments to feed the Diagnostic class
-    # This is not the neatest option, but it is very compact
-    funcname = __name__
-    argv = argparse.Namespace(**locals())
+                                result[season][region] = float((np.nanmean(avg) + offset) * factor)
 
-    # set loglevel
-    loggy = setup_logger(level=argv.loglevel)
-
-    # set dask and multiprocessing fork
-    plat, mprocmethod = set_multiprocessing_start_method()
-    loggy.info('Running on %s and multiprocessing method set as "%s"', plat, mprocmethod)
-
-    # start time
-    tic = time()
-
-    # initialize the diag class, load the inteface and the reference file
-    diag = Diagnostic(argv)
-    face = load_yaml(diag.interface)
-    ref = load_yaml(diag.reffile)
-
-    # check that everyhing is there
-    check_var_climatology(diag.var_all, ref.keys())
-
-    # Create missing folders
-    os.makedirs(diag.tabdir, exist_ok=True)
-    os.makedirs(diag.figdir, exist_ok=True)
-
-    # Can probably be cleaned up further
-    comp = face['model']['component']  # Get component for each domain
-
-    # get file info
-    inifiles = get_inifiles(face, diag)
-
-
-    # add missing unit definition
-    units_extra_definition()
-
-    # create util dictionary including mask and weights for both atmosphere
-    # and ocean grids
-    util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'],
-                                areas=True, remap=False)
-
-    # main loop: manager is required for shared variables
-    mgr = Manager()
-
-    # dictionaries are shared, so they have to be passed as functions
-    varmean = mgr.dict()
-    vartrend = mgr.dict()
-    processes = []
-
-    # loop on the variables, create the parallel process
-    for varlist in weight_split(diag.var_all, diag.numproc):
-        core = Process(
-            target=gm_worker, args=(util_dictionary, ref, face, diag,
-                                    varmean, vartrend, varlist))
-        core.start()
-        processes.append(core)
-
-    # wait for the processes to finish
-    for proc in processes:
-        proc.join()
-
-    toc = time()
-
-    # evaluate tic-toc time  of execution
-    loggy.info('Analysis done in %.4f seconds', toc - tic)
-
-    tic = time()
-
-
-    # loop on the variables to create the output table
-    global_table = []
-    for var in diag.var_all:
-        gamma = ref[var]
-        # get the predefined value or the ALL Global one
-        if isinstance(gamma['obs'], dict):
-            tabval = gamma['obs']['ALL']['Global']
-            outval = str(tabval['mean']) + '\u00B1' + str(tabval['std'])
-        else:
-            outval = gamma['obs']
-
-        if 'year1' in gamma.keys():
-            years = str(gamma['year1']) + '-' + str(gamma['year2'])
-        else:
-            raise ValueError('Year1 and Year2 are not defined in the reference file')
-
-        out_sequence = [var, gamma['longname'], gamma['units'], varmean[var]['ALL']['Global']]
-        if diag.ftrend:
-            out_sequence = out_sequence + [vartrend[var]['ALL']['Global']]
-        out_sequence = out_sequence + [outval, gamma.get('dataset', ''), years]
-        global_table.append(out_sequence)
-
-    # prepare the header for the table
-    head = ['Variable', 'Longname', 'Units', diag.modelname]
-    if diag.ftrend:
-        head = head + ['Trend']
-    head = head + ['Obs.', 'Dataset', 'Years']
-
-    # write the file with tabulate
-    tablefile = os.path.join(diag.tabdir,
-                             f'global_mean_{diag.expname}_{diag.modelname}_{diag.ensemble}_{diag.year1}_{diag.year2}.txt')
-    loggy.info('Table file is: %s', tablefile)
-    with open(tablefile, 'w', encoding='utf-8') as out:
-        out.write(tabulate(global_table, headers=head, stralign='center', tablefmt='orgtbl'))
+                                if diag.ftrend:
+                                    if len(avg) == len(diag.years_joined):
+                                        trend[season][region] = np.polyfit(diag.years_joined, avg, 1)[0]
+                                if season == 'ALL' and region == 'Global':
+                                    loggy.info('Average: %s %s %s %s', var, season, region, result[season][region])
 
-    # required to avoid problem with multiprocessing
-    ordered = {}
-    for var in diag.var_all:
-        ordered[var] = varmean[var]
-
-    # dump the yaml file for global_mean, including all the seasons
-    yamlfile = os.path.join(diag.tabdir,
-                            f'global_mean_{diag.expname}_{diag.modelname}_{diag.ensemble}_{diag.year1}_{diag.year2}.yml')
-    loggy.info('YAML file is: %s', tablefile)
-    with open(yamlfile, 'w', encoding='utf-8') as file:
-        yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False)
-
-    # prepare dictionaries for global mean
-    obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(ordered, ref, diag.var_all, diag.seasons, diag.regions)
-
-    # call the heatmap routine for a plot
-    mapfile = os.path.join(diag.figdir,
-                           f'global_mean_{diag.expname}_{diag.modelname}_r1i1p1f1_{diag.year1}_{diag.year2}.png')
-    loggy.info('Figure file is: %s', mapfile)
-
-    diag_dict = {'modelname': diag.modelname, 'expname': diag.expname,
-                 'year1': diag.year1, 'year2': diag.year2,
-                 'units_list': units_list}
-
-    heatmap_comparison_gm(data_dict=data2plot, mean_dict=obsmean, std_dict=obsstd,
-                          diag=diag_dict, filemap=mapfile, addnan=diag.addnan)
-
-    # Print appending one line to table (for tuning)
-    if diag.ftable:
-        loggy.info('Line file is: %s', diag.linefile)
-        write_tuning_table(diag.linefile, varmean, diag.var_table, diag, ref)
-
-    toc = time()
-    # evaluate tic-toc time of postprocessing
-    loggy.info("Postproc done in %.4f seconds", toc - tic)
-    print('ECmean4 Global Mean succesfully computed!')
+            varmean[var] = result
+            vartrend[var] = trend
 
 
 def gm_entry_point():
-    """
-    Command line interface to run the global_mean function
-    """
-
-    # read arguments from command line
     args = parse_arguments(sys.argv[1:], script='gm')
-
-    global_mean(exp=args.exp, year1=args.year1, year2=args.year2,
-                numproc=args.numproc,
-                trend=args.trend, line=args.line,
-                loglevel=args.loglevel,
-                interface=args.interface, config=args.config,
-                model=args.model, ensemble=args.ensemble,
-                addnan=args.addnan,
-                outputdir=args.outputdir)
+    global_mean(exp=args.exp, year1=args.year1, year2=args.year2, numproc=args.numproc,
+                          trend=args.trend, line=args.line, loglevel=args.loglevel,
+                          interface=args.interface, config=args.config, model=args.model,
+                          ensemble=args.ensemble, addnan=args.addnan, outputdir=args.outputdir)
+    print('ECmean4 Global Mean successfully computed!')
+
+def global_mean(exp, year1, year2, config='config.yml', loglevel='WARNING', numproc=1,
+                interface=None, model=None, ensemble='r1i1p1f1', addnan=False, silent=None,
+                trend=None, line=None, outputdir=None, xdataset=None):
+    gm = GlobalMean(exp, year1, year2, config, loglevel, numproc, interface, model, ensemble, addnan, silent, trend, line, outputdir, xdataset)
+    gm.prepare()
+    gm.toc('Preparation')
+    gm.run()
+    gm.toc('Computation')
+    gm.store()
+    gm.plot()
+    gm.toc('Plotting')
+    print('ECmean4 Global Mean successfully computed!')
diff --git a/ecmean/libs/diagnostic.py b/ecmean/libs/diagnostic.py
index 3cab468..eac7832 100644
--- a/ecmean/libs/diagnostic.py
+++ b/ecmean/libs/diagnostic.py
@@ -82,7 +82,7 @@ def __init__(self, args):
             self.figdir = Path(os.path.join(outputdir, 'PDF'))
 
         # init for global mean
-        if self.funcname == 'global_mean':
+        if self.funcname == 'GlobalMean':
             self.cfg_global_mean(cfg)
 
         # init for performance indices

From 8e5afb640710ecfc85888ec7ef9d4a9f0947664c Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Fri, 20 Dec 2024 10:40:03 +0100
Subject: [PATCH 19/26] polish

---
 ecmean/global_mean.py         | 12 ++++++++----
 ecmean/performance_indices.py | 11 ++++++++---
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index bdeda87..1858506 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -153,14 +153,19 @@ def store(self):
         with open(yamlfile, 'w', encoding='utf-8') as file:
             yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False)
 
-    def plot(self, mapfile=None):
-
+    def plot(self, mapfile=None, figformat='pdf'):
+        """"
+        Plot the global mean values.
+        Args:
+            mapfile: Path to the output file. If None, it will be defined automatically following ECmean syntax
+            figformat: Format of the output file.
+        """
         obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(self.varmean, self.ref, 
                                                                               self.diag.var_all, self.diag.seasons, 
                                                                               self.diag.regions)
         if mapfile is None:
             mapfile = os.path.join(self.diag.figdir,
-                                        f'global_mean_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.pdf')
+                                        f'global_mean_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}')
         self.loggy.info('Figure file is: %s', mapfile)
 
         diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname,
@@ -264,4 +269,3 @@ def global_mean(exp, year1, year2, config='config.yml', loglevel='WARNING', nump
     gm.store()
     gm.plot()
     gm.toc('Plotting')
-    print('ECmean4 Global Mean successfully computed!')
diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index 69a442d..1dbeb27 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -141,8 +141,13 @@ def store(self, yamlfile=None):
         with open(yamlfile, 'w', encoding='utf-8') as file:
             yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False)
 
-    def plot(self, mapfile=None):
-        """Generate the heatmap for performance indices."""
+    def plot(self, mapfile=None, figformat='pdf'):
+        """Generate the heatmap for performance indices.
+        
+        Args:
+            mapfile: Path to the output file. If None, it will be defined automatically following ECmean syntax
+            figformat: Format of the output file.
+        """
         # to this date, only EC23 support comparison with CMIP6 data
         if self.diag.climatology == 'EC23':
             # prepare the data for the heatmap from the original yaml dictionaries
@@ -152,7 +157,7 @@ def plot(self, mapfile=None):
 
             # call the heatmap routine for a plot
             if mapfile is None:
-                mapfile = os.path.join(self.diag.figdir, f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.pdf')
+                mapfile = os.path.join(self.diag.figdir, f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}')
             diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname, 'climatology': self.diag.climatology, 'year1': self.diag.year1, 'year2': self.diag.year2, 'seasons': self.diag.seasons, 'regions': self.diag.regions, 'longnames': longnames}
             heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, diag=diag_dict, filemap=mapfile)
 

From 145e1113a019ae86c2eb90020e10d0c0cf555c42 Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Fri, 20 Dec 2024 11:13:35 +0100
Subject: [PATCH 20/26] raise

---
 ecmean/libs/files.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ecmean/libs/files.py b/ecmean/libs/files.py
index 6b8d191..d549481 100644
--- a/ecmean/libs/files.py
+++ b/ecmean/libs/files.py
@@ -116,6 +116,8 @@ def get_clim_files(piclim, var, diag, season):
         vvvv = str(
             diag.resclmdir /
             f'{stringname}_variance_{var}_{dataref}_{diag.resolution}_{datayear1}-{datayear2}.nc')
+    else:
+        raise ValueError('Climatology not supported/existing!')
 
     return clim, vvvv
 

From b61130bb9f466cd48c9780458948b295088a5af6 Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Fri, 20 Dec 2024 11:25:19 +0100
Subject: [PATCH 21/26] docstring

---
 ecmean/global_mean.py         | 109 +++++++++++++++++++++++-----------
 ecmean/performance_indices.py |  45 ++++++++++++--
 2 files changed, 113 insertions(+), 41 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index 1858506..827c934 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -40,7 +40,42 @@
 
 class GlobalMean:
     """
-    Class to compute the global mean of an experiment.
+    Attributes:
+        exp (str): Experiment name.
+        year1 (int): Start year of the experiment.
+        year2 (int): End year of the experiment.
+        config (str): Path to the configuration file. Default is 'config.yml'.
+        loglevel (str): Logging level. Default is 'WARNING'.
+        numproc (int): Number of processes to use. Default is 1.
+        interface (str): Path to the interface file. Default is None.
+        model (str): Model name. Default is None.
+        ensemble (str): Ensemble identifier. Default is 'r1i1p1f1'.
+        addnan (bool): Whether to add NaNs. Default is False.
+        silent (bool): Whether to suppress output. Default is None.
+        trend (bool): Whether to compute trends. Default is None.
+        line (str): Line identifier. Default is None.
+        outputdir (str): Output directory. Default is None.
+        xdataset (str): Path to the xdataset. Default is None.
+        loggy (logging.Logger): Logger instance.
+        diag (Diagnostic): Diagnostic instance.
+        face (dict): Interface dictionary.
+        ref (dict): Reference dictionary.
+        util_dictionary (Supporter): Supporter instance.
+        varmean (dict): Dictionary to store variable means.
+        vartrend (dict): Dictionary to store variable trends.
+        funcname (str): Name of the class.
+        start_time (float): Start time for the timer.
+    Methods:
+        toc(message):
+            Update the timer and log the elapsed time.
+        prepare():
+            Prepare the necessary components for the global mean computation.
+        run():
+            Run the global mean computation using multiprocessing.
+        store():
+            Store the computed global mean values in a table and YAML file.
+        plot(mapfile=None, figformat='pdf'):
+        gm_worker(util, ref, face, diag, varmean, vartrend, varlist):
     """
     def __init__(self, exp, year1, year2, config='config.yml', loglevel='WARNING', numproc=1,
                       interface=None, model=None, ensemble='r1i1p1f1', addnan=False, silent=None,
@@ -95,6 +130,7 @@ def prepare(self):
         units_extra_definition()
 
         self.util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=True, remap=False)
+        self.toc('Computation')
 
     def run(self):
         mgr = Manager()
@@ -110,25 +146,26 @@ def run(self):
 
         for proc in processes:
             proc.join()
+        self.toc('Computation')
 
     def store(self):
         global_table = []
         for var in self.diag.var_all:
             gamma = self.ref[var]
             if isinstance(gamma['obs'], dict):
-                    tabval = gamma['obs']['ALL']['Global']
-                    outval = str(tabval['mean']) + '\u00B1' + str(tabval['std'])
+                tabval = gamma['obs']['ALL']['Global']
+                outval = str(tabval['mean']) + '\u00B1' + str(tabval['std'])
             else:
-                    outval = gamma['obs']
+                outval = gamma['obs']
 
             if 'year1' in gamma.keys():
-                    years = str(gamma['year1']) + '-' + str(gamma['year2'])
+                years = str(gamma['year1']) + '-' + str(gamma['year2'])
             else:
-                    raise ValueError('Year1 and Year2 are not defined in the reference file')
+                raise ValueError('Year1 and Year2 are not defined in the reference file')
 
             out_sequence = [var, gamma['longname'], gamma['units'], self.varmean[var]['ALL']['Global']]
             if self.diag.ftrend:
-                    out_sequence = out_sequence + [self.vartrend[var]['ALL']['Global']]
+                out_sequence = out_sequence + [self.vartrend[var]['ALL']['Global']]
             out_sequence = out_sequence + [outval, gamma.get('dataset', ''), years]
             global_table.append(out_sequence)
 
@@ -178,6 +215,7 @@ def plot(self, mapfile=None, figformat='pdf'):
         if self.diag.ftable:
             self.loggy.info('Line file is: %s', self.diag.linefile)
             write_tuning_table(self.diag.linefile, self.varmean, self.diag.var_table, self.diag, self.ref)
+        self.toc('Plotting')
 
     @staticmethod
     def gm_worker(util, ref, face, diag, varmean, vartrend, varlist):
@@ -214,37 +252,37 @@ def gm_worker(util, ref, face, diag, varmean, vartrend, varlist):
                     for season in diag.seasons:
                         tfield = cfield.copy(deep=True)
                         if season != 'ALL':
-                                tfield = tfield.sel(time=cfield.time.dt.season.isin(season))
+                            tfield = tfield.sel(time=cfield.time.dt.season.isin(season))
 
                         if diag.ftrend:
-                                tfield = tfield.groupby('time.year').mean('time')
+                            tfield = tfield.groupby('time.year').mean('time')
                         else:
-                                tfield = tfield.mean(dim='time')
+                            tfield = tfield.mean(dim='time')
 
                         for region in diag.regions:
-                                slicefield = select_region(tfield, region)
-                                sliceweights = select_region(weights, region)
-                                if isinstance(domain_mask, xr.DataArray):
-                                    slicemask = select_region(domain_mask, region)
-                                else:
-                                    slicemask = 0.
-
-                                avg = masked_meansum(
-                                    xfield=slicefield, weights=sliceweights, mask=slicemask,
-                                    operation=ref[var].get('operation', 'mean'),
-                                    mask_type=ref[var].get('mask', 'global'),
-                                    domain=domain)
-
-                                if isinstance(avg, dask.array.core.Array):
-                                    avg = avg.compute()
-
-                                result[season][region] = float((np.nanmean(avg) + offset) * factor)
-
-                                if diag.ftrend:
-                                    if len(avg) == len(diag.years_joined):
-                                        trend[season][region] = np.polyfit(diag.years_joined, avg, 1)[0]
-                                if season == 'ALL' and region == 'Global':
-                                    loggy.info('Average: %s %s %s %s', var, season, region, result[season][region])
+                            slicefield = select_region(tfield, region)
+                            sliceweights = select_region(weights, region)
+                            if isinstance(domain_mask, xr.DataArray):
+                                slicemask = select_region(domain_mask, region)
+                            else:
+                                slicemask = 0.
+
+                            avg = masked_meansum(
+                                xfield=slicefield, weights=sliceweights, mask=slicemask,
+                                operation=ref[var].get('operation', 'mean'),
+                                mask_type=ref[var].get('mask', 'global'),
+                                domain=domain)
+
+                            if isinstance(avg, dask.array.core.Array):
+                                avg = avg.compute()
+
+                            result[season][region] = float((np.nanmean(avg) + offset) * factor)
+
+                            if diag.ftrend:
+                                if len(avg) == len(diag.years_joined):
+                                    trend[season][region] = np.polyfit(diag.years_joined, avg, 1)[0]
+                            if season == 'ALL' and region == 'Global':
+                                loggy.info('Average: %s %s %s %s', var, season, region, result[season][region])
 
             varmean[var] = result
             vartrend[var] = trend
@@ -263,9 +301,8 @@ def global_mean(exp, year1, year2, config='config.yml', loglevel='WARNING', nump
                 trend=None, line=None, outputdir=None, xdataset=None):
     gm = GlobalMean(exp, year1, year2, config, loglevel, numproc, interface, model, ensemble, addnan, silent, trend, line, outputdir, xdataset)
     gm.prepare()
-    gm.toc('Preparation')
+   
     gm.run()
-    gm.toc('Computation')
     gm.store()
     gm.plot()
-    gm.toc('Plotting')
+
diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index 1dbeb27..3af89b9 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -36,9 +36,44 @@
 dask.config.set(scheduler="synchronous")
 
 class PerformanceIndices:
-
     """
-    Class to compute the performance indices for a given experiment and years.
+    Class to compute the performance indices for a given experiment and years
+
+    Attributes:
+        exp (str): Experiment name.
+        year1 (int): Start year of the experiment.
+        year2 (int): End year of the experiment.
+        config (str): Path to the configuration file. Default is 'config.yml'.
+        loglevel (str): Logging level. Default is 'WARNING'.
+        numproc (int): Number of processes to use. Default is 1.
+        climatology (str): Climatology to use. Default is 'EC23'.
+        interface (str): Path to the interface file.
+        model (str): Model name.
+        ensemble (str): Ensemble identifier. Default is 'r1i1p1f1'.
+        silent (bool): If True, suppress output. Default is None.
+        xdataset (xarray.Dataset): Dataset to use.
+        outputdir (str): Directory to store output files.
+        loggy (logging.Logger): Logger instance.
+        diag (Diagnostic): Diagnostic instance.
+        face (dict): Interface dictionary.
+        piclim (dict): Climatology dictionary.
+        util_dictionary (Supporter): Utility dictionary for remapping and masks.
+        varstat (dict): Dictionary to store variable statistics.
+        funcname (str): Name of the class.
+        start_time (float): Start time for performance measurement.
+    Methods:
+        toc(message):
+            Update the timer and log the elapsed time.
+        prepare():
+            Prepare the necessary components for performance indices calculation.
+        run():
+            Run the performance indices calculation.
+        store(yamlfile=None):
+            Store the performance indices in a yaml file.
+        plot(mapfile=None, figformat='pdf'):
+            Generate the heatmap for performance indices.
+        pi_worker(util, piclim, face, diag, field_3d, varstat, varlist):
+            Main parallel diagnostic worker for performance indices.
     """
 
     def __init__(self, exp, year1, year2, config='config.yml',
@@ -108,6 +143,7 @@ def prepare(self):
 
         # create remap dictionary with atm and oce interpolators
         self.util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=False, remap=True, targetgrid=target_remap_grid)
+        self.toc('Calculation')
 
     def run(self):
         """Run the performance indices calculation."""
@@ -127,6 +163,7 @@ def run(self):
         # wait for the processes to finish
         for proc in processes:
             proc.join()
+        self.toc('Computation')
 
     def store(self, yamlfile=None):
         """Store the performance indices in a yaml file."""
@@ -160,6 +197,7 @@ def plot(self, mapfile=None, figformat='pdf'):
                 mapfile = os.path.join(self.diag.figdir, f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}')
             diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname, 'climatology': self.diag.climatology, 'year1': self.diag.year1, 'year2': self.diag.year2, 'seasons': self.diag.seasons, 'regions': self.diag.regions, 'longnames': longnames}
             heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, diag=diag_dict, filemap=mapfile)
+        self.toc('Plotting')
 
     @staticmethod
     def pi_worker(util, piclim, face, diag, field_3d, varstat, varlist):
@@ -328,9 +366,6 @@ def performance_indices(exp, year1, year2, config='config.yml', loglevel='WARNIN
                             interface=interface, model=model, ensemble=ensemble, silent=silent,
                             xdataset=xdataset, outputdir=outputdir)
     pi.prepare()
-    pi.toc('Preparation')
     pi.run()
-    pi.toc('Calculation')
     pi.store()
     pi.plot()
-    pi.toc('Plotting')

From 307aad229b7adf99e1c8184a2b7e7c9d628dc44f Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Fri, 20 Dec 2024 11:26:42 +0100
Subject: [PATCH 22/26] changelgo

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ba9d76..6b98d57 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 Unreleased is the current development version, which is currently lying in `main` branch.
 
 - Allowing for configuration file as dictionary (#106)
+- GlobalMean and PerformanceIndices classe introduced (#110)
 
 ## [v0.1.11]
 

From 33368e324798f876892c1eeb7222021b555e9431 Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Fri, 20 Dec 2024 16:04:23 +0100
Subject: [PATCH 23/26] cleaning of plotting

---
 ecmean/global_mean.py         | 20 ++++++++++----------
 ecmean/libs/plotting.py       | 35 ++++++++++++++---------------------
 ecmean/performance_indices.py | 12 +++++++-----
 3 files changed, 31 insertions(+), 36 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index 827c934..b43c65a 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -112,6 +112,7 @@ def toc(self, message):
         self.loggy.info('%s time: %.2f seconds', message, elapsed_time)
 
     def prepare(self):
+        """Prepare the necessary components for the global mean computation."""
         plat, mprocmethod = set_multiprocessing_start_method()
         self.loggy.info('Running on %s and multiprocessing method set as "%s"', plat, mprocmethod)
 
@@ -130,9 +131,10 @@ def prepare(self):
         units_extra_definition()
 
         self.util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=True, remap=False)
-        self.toc('Computation')
+        self.toc('Preparation')
 
     def run(self):
+        """Run the global mean computaacross all variables on using multiprocessing."""
         mgr = Manager()
         self.varmean = mgr.dict()
         self.vartrend = mgr.dict()
@@ -149,6 +151,7 @@ def run(self):
         self.toc('Computation')
 
     def store(self):
+        """Rearrange the data and save the yaml file and the table."""
         global_table = []
         for var in self.diag.var_all:
             gamma = self.ref[var]
@@ -197,20 +200,17 @@ def plot(self, mapfile=None, figformat='pdf'):
             mapfile: Path to the output file. If None, it will be defined automatically following ECmean syntax
             figformat: Format of the output file.
         """
-        obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(self.varmean, self.ref, 
-                                                                              self.diag.var_all, self.diag.seasons, 
+        obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(self.varmean, self.ref,
+                                                                              self.diag.var_all, self.diag.seasons,
                                                                               self.diag.regions)
         if mapfile is None:
             mapfile = os.path.join(self.diag.figdir,
-                                        f'global_mean_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}')
+                                    f'global_mean_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}')
         self.loggy.info('Figure file is: %s', mapfile)
 
-        diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname,
-                        'year1': self.diag.year1, 'year2': self.diag.year2,
-                        'units_list': units_list}
-
         heatmap_comparison_gm(data_dict=data2plot, mean_dict=obsmean, std_dict=obsstd,
-                                    diag=diag_dict, filemap=mapfile, addnan=self.diag.addnan)
+                                    diag=self.diag, units_list=units_list,
+                                    filemap=mapfile, addnan=self.diag.addnan)
 
         if self.diag.ftable:
             self.loggy.info('Line file is: %s', self.diag.linefile)
@@ -301,8 +301,8 @@ def global_mean(exp, year1, year2, config='config.yml', loglevel='WARNING', nump
                 trend=None, line=None, outputdir=None, xdataset=None):
     gm = GlobalMean(exp, year1, year2, config, loglevel, numproc, interface, model, ensemble, addnan, silent, trend, line, outputdir, xdataset)
     gm.prepare()
-   
     gm.run()
     gm.store()
     gm.plot()
+    
 
diff --git a/ecmean/libs/plotting.py b/ecmean/libs/plotting.py
index 91786cf..e1c2165 100644
--- a/ecmean/libs/plotting.py
+++ b/ecmean/libs/plotting.py
@@ -18,7 +18,7 @@
 
 loggy = logging.getLogger(__name__)
 
-def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str = None, size_model=14, **kwargs):
+def heatmap_comparison_pi(data_dict, cmip6_dict, diag, longnames, filemap: str = None, size_model=14, **kwargs):
     """
     Function to produce a heatmap - seaborn based - for Performance Indices
     based on CMIP6 ratio
@@ -26,7 +26,8 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str
     Args:
         data_dict (dict): dictionary of absolute performance indices
         cmip6_dict (dict): dictionary of CMIP6 performance indices
-        diag (dict): dictionary containing diagnostic information
+        diag (object): Diagnostic object
+        units_list (list): list of units
         filemap (str): path to save the plot
         size_model (int): size of the PIs in the plot
 
@@ -40,18 +41,15 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str
     loggy.debug(data_table)
 
     # relative pi with re-ordering of rows
-    cmip6_table = dict_to_dataframe(cmip6_dict)
-    if diag is not None and 'longnames' in diag:
-        cmip6_table = cmip6_table.reindex(diag['longnames'])
+    cmip6_table = dict_to_dataframe(cmip6_dict).reindex(longnames)
     relative_table = data_table.div(cmip6_table)
 
     # compute the total PI mean
     relative_table.loc['Total PI'] = relative_table.mean()
 
     # reordering columns if info is available
-    if diag is not None and 'regions' in diag and 'seasons' in diag:
-        lll = [(x, y) for x in diag['seasons'] for y in diag['regions']]
-        relative_table = relative_table[lll]
+    lll = [(x, y) for x in diag.seasons for y in diag.regions]
+    relative_table = relative_table[lll]
     loggy.debug("Relative table")
     loggy.debug(relative_table)
 
@@ -70,7 +68,7 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str
         title = kwargs['title']
     else:
         title = 'CMIP6 RELATIVE PI'
-        title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + str(diag['year1']) + ' ' + str(diag['year2'])
+        title += f" {diag.modelname} {diag.expname} {diag.year1} {diag.year2}"
 
     tot = len(myfield.columns)
     sss = len(set([tup[1] for tup in myfield.columns]))
@@ -93,10 +91,7 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str
     axs.set(xlabel=None)
 
     if filemap is None:
-        if diag is not None:
-            filemap = f'PI4_{diag["climatology"]}_{diag["expname"]}_{diag["modelname"]}_{diag["year1"]}_{diag["year2"]}.pdf'
-        else:
-            filemap = 'PI4_heatmap.pdf'
+        filemap = 'PI4_heatmap.pdf'
 
     # save and close
     plt.savefig(filemap)
@@ -104,7 +99,7 @@ def heatmap_comparison_pi(data_dict, cmip6_dict, diag: dict = None, filemap: str
     plt.close()
 
 
-def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: str,
+def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag, units_list, filemap=None,
                           addnan=True, size_model=14, size_obs=8, **kwargs):
     """
     Function to produce a heatmap - seaborn based - for Global Mean
@@ -114,7 +109,8 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s
         data_dict (dict): table of model data
         mean_dict (dict): table of observations
         std_dict (dict): table of standard deviation
-        diag (dict): dictionary containing diagnostic information
+        diag (dict): diagnostic object
+        units_list (list): list of units
         filemap (str): path to save the plot
         addnan (bool): add to the final plots also fields which cannot be compared against observations
         size_model (int): size of the model values in the plot
@@ -129,7 +125,7 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s
     mean_table = dict_to_dataframe(mean_dict)
     std_table = dict_to_dataframe(std_dict)
     for table in [data_table, mean_table, std_table]:
-        table.index = table.index + ' [' + diag['units_list'] + ']'
+        table.index = table.index + ' [' + units_list + ']'
 
     loggy.debug("Data table")
     loggy.debug(data_table)
@@ -151,7 +147,7 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s
         title = kwargs['title']
     else:
         title = 'GLOBAL MEAN'
-        title += ' ' + diag['modelname'] + ' ' + diag['expname'] + ' ' + str(diag['year1']) + ' ' + str(diag['year2'])
+        title += f" {diag.modelname} {diag.expname} {diag.year1} {diag.year2}"
 
     # set color range and palette
     thr = 10
@@ -194,10 +190,7 @@ def heatmap_comparison_gm(data_dict, mean_dict, std_dict, diag: dict, filemap: s
     axs.set(xlabel=None)
 
     if filemap is None:
-        if diag is not None:
-            filemap = f'Global_Mean_{diag["expname"]}_{diag["modelname"]}_{diag["year1"]}_{diag["year2"]}.pdf'
-        else:
-            filemap = 'Global_Mean_Heatmap.pdf'
+        filemap = 'Global_Mean_Heatmap.pdf'
 
     # save and close
     plt.savefig(filemap)
diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index 3af89b9..05a88e2 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -143,7 +143,7 @@ def prepare(self):
 
         # create remap dictionary with atm and oce interpolators
         self.util_dictionary = Supporter(comp, inifiles['atm'], inifiles['oce'], areas=False, remap=True, targetgrid=target_remap_grid)
-        self.toc('Calculation')
+        self.toc('Preparation')
 
     def run(self):
         """Run the performance indices calculation."""
@@ -156,7 +156,9 @@ def run(self):
 
         # loop on the variables, create the parallel process
         for varlist in weight_split(self.diag.field_all, self.diag.numproc):
-            core = Process(target=self.pi_worker, args=(self.util_dictionary, self.piclim, self.face, self.diag, self.diag.field_3d, self.varstat, varlist))
+            core = Process(target=self.pi_worker, args=(self.util_dictionary, self.piclim, 
+                                                        self.face, self.diag, self.diag.field_3d, 
+                                                        self.varstat, varlist))
             core.start()
             processes.append(core)
 
@@ -194,9 +196,9 @@ def plot(self, mapfile=None, figformat='pdf'):
 
             # call the heatmap routine for a plot
             if mapfile is None:
-                mapfile = os.path.join(self.diag.figdir, f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}')
-            diag_dict = {'modelname': self.diag.modelname, 'expname': self.diag.expname, 'climatology': self.diag.climatology, 'year1': self.diag.year1, 'year2': self.diag.year2, 'seasons': self.diag.seasons, 'regions': self.diag.regions, 'longnames': longnames}
-            heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, diag=diag_dict, filemap=mapfile)
+                mapfile = os.path.join(self.diag.figdir,
+                                    f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}')
+            heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, diag=self.diag, longnames=longnames, filemap=mapfile)
         self.toc('Plotting')
 
     @staticmethod

From 41fe87c12d2dc01acb6af613ff8927b15da0a022 Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Fri, 20 Dec 2024 16:15:58 +0100
Subject: [PATCH 24/26] remove test for maps

---
 tests/test_performance_indices.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py
index 9feef91..db60696 100644
--- a/tests/test_performance_indices.py
+++ b/tests/test_performance_indices.py
@@ -7,10 +7,10 @@
 import pytest
 import xarray as xr
 import yaml
-from ecmean.performance_indices import performance_indices
+from ecmean import performance_indices
 from ecmean.libs.ncfixers import xr_preproc
 from ecmean.libs.general import are_dicts_equal
-from ecmean.libs.plotting import heatmap_comparison_pi, prepare_clim_dictionaries_pi
+#from ecmean.libs.plotting import heatmap_comparison_pi, prepare_clim_dictionaries_pi
 
 # set TOLERANCE
 TOLERANCE = 1e-1
@@ -77,16 +77,16 @@ def test_performance_indices_amip_xdataset(clim):
     assert are_dicts_equal(data1, data2, TOLERANCE),f"YAML files are not identical.\nData1: {data1}\nData2: {data2}"
 
 
-def test_pi_plot(tmp_path):
-    file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml'
-    with open(file1, 'r', encoding='utf8') as f1:
-        data = yaml.safe_load(f1)
-    file2 = 'ecmean/climatology/EC23/pi_climatology_EC23.yml'
-    with open(file2, 'r', encoding='utf8') as f1:
-        clim = yaml.safe_load(f1)
-    data_dict, clim_dict, _ = prepare_clim_dictionaries_pi(data=data, clim=clim, shortnames=data.keys())
-    heatmap_comparison_pi(data_dict=data_dict, cmip6_dict=clim_dict,
-                          title='CMIP6 RELATIVE PI test',
-                          mapfile=os.path.join(tmp_path, 'PI4_heatmap.pdf'))
-    assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created."
-    os.remove('PI4_heatmap.pdf')
+# def test_pi_plot(tmp_path):
+#     file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml'
+#     with open(file1, 'r', encoding='utf8') as f1:
+#         data = yaml.safe_load(f1)
+#     file2 = 'ecmean/climatology/EC23/pi_climatology_EC23.yml'
+#     with open(file2, 'r', encoding='utf8') as f1:
+#         clim = yaml.safe_load(f1)
+#     data_dict, clim_dict, longnames = prepare_clim_dictionaries_pi(data=data, clim=clim, shortnames=data.keys())
+    
+#     heatmap_comparison_pi(data_dict=data_dict, cmip6_dict=clim_dict,
+#                           title='CMIP6 RELATIVE PI test', longnames=longnames,
+#                           mapfile=os.path.join(tmp_path, 'PI4_heatmap.pdf'))
+#     assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created."

From 65427a639aab3e717cb65ba2f8b8703b35edd12d Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Fri, 20 Dec 2024 16:44:34 +0100
Subject: [PATCH 25/26] independent plots

---
 ecmean/global_mean.py             | 34 ++++++++++++++++++++++++-------
 ecmean/performance_indices.py     | 30 ++++++++++++++++++++-------
 ecmean/utils/clim-create.py       |  1 +
 tests/test_global_mean.py         |  9 +++++++-
 tests/test_performance_indices.py | 21 +++++++------------
 5 files changed, 66 insertions(+), 29 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index b43c65a..a27d23a 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -150,9 +150,17 @@ def run(self):
             proc.join()
         self.toc('Computation')
 
-    def store(self):
+    def yamlfile(self):
+        """Define the output YAML filename"""
+
+        yamlfile= self.diag.tabdir / f'global_mean_{self.diag.expname}_{self.diag.modelname}_{self.diag.ensemble}_{self.diag.year1}_{self.diag.year2}.yml'
+        return yamlfile
+
+    def store(self, yamlfile=None):
         """Rearrange the data and save the yaml file and the table."""
         global_table = []
+
+        # reorder the data to be stored
         for var in self.diag.var_all:
             gamma = self.ref[var]
             if isinstance(gamma['obs'], dict):
@@ -177,21 +185,23 @@ def store(self):
             head = head + ['Trend']
         head = head + ['Obs.', 'Dataset', 'Years']
 
+        # save table
         tablefile = os.path.join(self.diag.tabdir,
                                         f'global_mean_{self.diag.expname}_{self.diag.modelname}_{self.diag.ensemble}_{self.diag.year1}_{self.diag.year2}.txt')
         self.loggy.info('Table file is: %s', tablefile)
         with open(tablefile, 'w', encoding='utf-8') as out:
             out.write(tabulate(global_table, headers=head, stralign='center', tablefmt='orgtbl'))
 
-        ordered = {}
-        for var in self.diag.var_all:
-            ordered[var] = self.varmean[var]
+        # reaorder
+        self.varmean = {var: self.varmean[var] for var in self.diag.var_all}
 
-        yamlfile = os.path.join(self.diag.tabdir,
-                                        f'global_mean_{self.diag.expname}_{self.diag.modelname}_{self.diag.ensemble}_{self.diag.year1}_{self.diag.year2}.yml')
+        # save yaml fil
+        if yamlfile is None:
+            yamlfile = self.yamlfile()
+        
         self.loggy.info('YAML file is: %s', tablefile)
         with open(yamlfile, 'w', encoding='utf-8') as file:
-            yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False)
+            yaml.safe_dump(self.varmean, file, default_flow_style=False, sort_keys=False)
 
     def plot(self, mapfile=None, figformat='pdf'):
         """"
@@ -200,6 +210,15 @@ def plot(self, mapfile=None, figformat='pdf'):
             mapfile: Path to the output file. If None, it will be defined automatically following ECmean syntax
             figformat: Format of the output file.
         """
+
+        # load yaml file if is missing
+        if not self.varmean:
+            yamlfile = self.yamlfile()
+            self.loggy.info('Loading the stored data from the yaml file %s', yamlfile)
+            with open(yamlfile, 'r', encoding='utf-8') as file:
+                self.varmean = yaml.safe_load(file)
+
+        # prepare the dictionaries for the plotting
         obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(self.varmean, self.ref,
                                                                               self.diag.var_all, self.diag.seasons,
                                                                               self.diag.regions)
@@ -208,6 +227,7 @@ def plot(self, mapfile=None, figformat='pdf'):
                                     f'global_mean_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}')
         self.loggy.info('Figure file is: %s', mapfile)
 
+        # call the heatmap for plottinh
         heatmap_comparison_gm(data_dict=data2plot, mean_dict=obsmean, std_dict=obsstd,
                                     diag=self.diag, units_list=units_list,
                                     filemap=mapfile, addnan=self.diag.addnan)
diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index 05a88e2..d0553b3 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -156,8 +156,8 @@ def run(self):
 
         # loop on the variables, create the parallel process
         for varlist in weight_split(self.diag.field_all, self.diag.numproc):
-            core = Process(target=self.pi_worker, args=(self.util_dictionary, self.piclim, 
-                                                        self.face, self.diag, self.diag.field_3d, 
+            core = Process(target=self.pi_worker, args=(self.util_dictionary, self.piclim,
+                                                        self.face, self.diag, self.diag.field_3d,
                                                         self.varstat, varlist))
             core.start()
             processes.append(core)
@@ -167,18 +167,23 @@ def run(self):
             proc.join()
         self.toc('Computation')
 
+    def yamlfile(self):
+        """Define the output YAML filename"""
+
+        yamlfile= self.diag.tabdir / f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.yml'
+        return yamlfile
+    
     def store(self, yamlfile=None):
         """Store the performance indices in a yaml file."""
+
         # order according to the original request the fields in the yaml file
-        ordered = {}
-        for item in self.diag.field_all:
-            ordered[item] = self.varstat[item]
+        self.varstat = {var: self.varstat[var] for var in self.diag.field_all}
 
         # dump the yaml file for PI, including all the seasons (need to copy to avoid mess)
         if yamlfile is None:
-            yamlfile = self.diag.tabdir / f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.yml'
+            yamlfile = self.yamlfile()
         with open(yamlfile, 'w', encoding='utf-8') as file:
-            yaml.safe_dump(ordered, file, default_flow_style=False, sort_keys=False)
+            yaml.safe_dump(self.varstat, file, default_flow_style=False, sort_keys=False)
 
     def plot(self, mapfile=None, figformat='pdf'):
         """Generate the heatmap for performance indices.
@@ -189,6 +194,14 @@ def plot(self, mapfile=None, figformat='pdf'):
         """
         # to this date, only EC23 support comparison with CMIP6 data
         if self.diag.climatology == 'EC23':
+
+            # load yaml file if is missing
+            if not self.varstat:
+                yamlfile = self.yamlfile()
+                self.loggy.info('Loading the stored data from the yaml file %s', yamlfile)
+                with open(yamlfile, 'r', encoding='utf-8') as file:
+                    self.varstat = yaml.safe_load(file)
+
             # prepare the data for the heatmap from the original yaml dictionaries
             data2plot, cmip6, longnames = prepare_clim_dictionaries_pi(data=self.varstat,
                                                                        clim=self.piclim,
@@ -199,6 +212,9 @@ def plot(self, mapfile=None, figformat='pdf'):
                 mapfile = os.path.join(self.diag.figdir,
                                     f'PI4_{self.diag.climatology}_{self.diag.expname}_{self.diag.modelname}_r1i1p1f1_{self.diag.year1}_{self.diag.year2}.{figformat}')
             heatmap_comparison_pi(data_dict=data2plot, cmip6_dict=cmip6, diag=self.diag, longnames=longnames, filemap=mapfile)
+        else:
+            self.loggy.warning('Only EC23 climatology is supported for comparison with CMIP6 data.')
+        
         self.toc('Plotting')
 
     @staticmethod
diff --git a/ecmean/utils/clim-create.py b/ecmean/utils/clim-create.py
index dab436a..4a16f95 100755
--- a/ecmean/utils/clim-create.py
+++ b/ecmean/utils/clim-create.py
@@ -9,6 +9,7 @@
 __author__ = "Paolo Davini (p.davini@isac.cnr.it), Sep 2022."
 
 import tempfile
+import sys
 from ecmean.libs.files import load_yaml
 from ecmean.libs.units import units_extra_definition
 from ecmean.libs.ncfixers import xr_preproc
diff --git a/tests/test_global_mean.py b/tests/test_global_mean.py
index 1200750..a924649 100644
--- a/tests/test_global_mean.py
+++ b/tests/test_global_mean.py
@@ -3,7 +3,7 @@
 import os
 import subprocess
 import xarray as xr
-from ecmean.global_mean import global_mean
+from ecmean.global_mean import global_mean, GlobalMean
 from ecmean.libs.ncfixers import xr_preproc
 from ecmean.libs.general import are_dicts_equal
 from ecmean.libs.files import load_yaml
@@ -97,3 +97,10 @@ def test_global_mean_CMIP6():
     data2 = load_gm_txt_files(file2)
 
     assert are_dicts_equal(data1, data2, TOLERANCE), "TXT files are not identical."
+
+def test_gm_plot(tmp_path):
+    outputfile = tmp_path / 'Global_Mean_Heatmap.pdf'
+    gm = GlobalMean('amip', 1990, 1990, config='tests/config.yml', loglevel='info')
+    gm.prepare()
+    gm.plot(mapfile=outputfile)
+    assert os.path.isfile(outputfile), "Plot not created."
diff --git a/tests/test_performance_indices.py b/tests/test_performance_indices.py
index db60696..1e14dd8 100644
--- a/tests/test_performance_indices.py
+++ b/tests/test_performance_indices.py
@@ -7,7 +7,7 @@
 import pytest
 import xarray as xr
 import yaml
-from ecmean import performance_indices
+from ecmean import performance_indices, PerformanceIndices
 from ecmean.libs.ncfixers import xr_preproc
 from ecmean.libs.general import are_dicts_equal
 #from ecmean.libs.plotting import heatmap_comparison_pi, prepare_clim_dictionaries_pi
@@ -77,16 +77,9 @@ def test_performance_indices_amip_xdataset(clim):
     assert are_dicts_equal(data1, data2, TOLERANCE),f"YAML files are not identical.\nData1: {data1}\nData2: {data2}"
 
 
-# def test_pi_plot(tmp_path):
-#     file1 = 'tests/table/PI4_EC23_amip_EC-Earth4_r1i1p1f1_1990_1990.yml'
-#     with open(file1, 'r', encoding='utf8') as f1:
-#         data = yaml.safe_load(f1)
-#     file2 = 'ecmean/climatology/EC23/pi_climatology_EC23.yml'
-#     with open(file2, 'r', encoding='utf8') as f1:
-#         clim = yaml.safe_load(f1)
-#     data_dict, clim_dict, longnames = prepare_clim_dictionaries_pi(data=data, clim=clim, shortnames=data.keys())
-    
-#     heatmap_comparison_pi(data_dict=data_dict, cmip6_dict=clim_dict,
-#                           title='CMIP6 RELATIVE PI test', longnames=longnames,
-#                           mapfile=os.path.join(tmp_path, 'PI4_heatmap.pdf'))
-#     assert os.path.isfile('PI4_heatmap.pdf'), "Plot not created."
+def test_pi_plot(tmp_path):
+    outputfile = tmp_path / 'PI4_heatmap.png'
+    pi = PerformanceIndices('amip', 1990, 1990, config='tests/config.yml')
+    pi.prepare()
+    pi.plot(mapfile=outputfile)
+    assert os.path.isfile(outputfile), "Plot not created."

From 894a4aad8d48a44622437b4442b7ae7802a0d8cb Mon Sep 17 00:00:00 2001
From: oloapinivad <p.davini@isac.cnr.it>
Date: Fri, 20 Dec 2024 16:47:59 +0100
Subject: [PATCH 26/26] add raise

---
 ecmean/global_mean.py         | 7 +++++--
 ecmean/performance_indices.py | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/ecmean/global_mean.py b/ecmean/global_mean.py
index a27d23a..89bbe35 100755
--- a/ecmean/global_mean.py
+++ b/ecmean/global_mean.py
@@ -215,8 +215,11 @@ def plot(self, mapfile=None, figformat='pdf'):
         if not self.varmean:
             yamlfile = self.yamlfile()
             self.loggy.info('Loading the stored data from the yaml file %s', yamlfile)
-            with open(yamlfile, 'r', encoding='utf-8') as file:
-                self.varmean = yaml.safe_load(file)
+            if os.path.isfile(yamlfile):
+                with open(yamlfile, 'r', encoding='utf-8') as file:
+                    self.varmean = yaml.safe_load(file)
+            else:
+                raise FileNotFoundError(f'YAML file {yamlfile} not found')
 
         # prepare the dictionaries for the plotting
         obsmean, obsstd, data2plot, units_list = prepare_clim_dictionaries_gm(self.varmean, self.ref,
diff --git a/ecmean/performance_indices.py b/ecmean/performance_indices.py
index d0553b3..91372a8 100755
--- a/ecmean/performance_indices.py
+++ b/ecmean/performance_indices.py
@@ -199,8 +199,11 @@ def plot(self, mapfile=None, figformat='pdf'):
             if not self.varstat:
                 yamlfile = self.yamlfile()
                 self.loggy.info('Loading the stored data from the yaml file %s', yamlfile)
-                with open(yamlfile, 'r', encoding='utf-8') as file:
-                    self.varstat = yaml.safe_load(file)
+                if os.path.isfile(yamlfile):
+                    with open(yamlfile, 'r', encoding='utf-8') as file:
+                        self.varstat = yaml.safe_load(file)
+                else:
+                    raise FileNotFoundError(f'File {yamlfile} not found.')
 
             # prepare the data for the heatmap from the original yaml dictionaries
             data2plot, cmip6, longnames = prepare_clim_dictionaries_pi(data=self.varstat,