From c5042aa0499f4b288f46ebb157968dfccd391eb4 Mon Sep 17 00:00:00 2001 From: JuliaGast Date: Thu, 30 May 2024 19:50:09 -0400 Subject: [PATCH] mini updates for x-labels on figures --- stats_figures/create_edges_figures.py | 32 ++++++++---------------- stats_figures/create_relation_figures.py | 14 +++++++---- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/stats_figures/create_edges_figures.py b/stats_figures/create_edges_figures.py index 1b6140d..c1c0d7d 100644 --- a/stats_figures/create_edges_figures.py +++ b/stats_figures/create_edges_figures.py @@ -15,17 +15,17 @@ # specify params -names = [ 'tkgl-polecat', 'tkgl-icews', 'tkgl-smallpedia', 'tkgl-wikidata', 'thgl-myket', 'thgl-github', 'thgl-forum', 'thgl-software'] +names = ['thgl-github', 'tkgl-polecat', 'tkgl-icews', 'tkgl-smallpedia', 'tkgl-wikidata', 'thgl-myket', 'thgl-forum', 'thgl-software'] granularity ={} #for labels granularity['tkgl-polecat'] = 'days' granularity['tkgl-icews'] = 'days' granularity['tkgl-smallpedia'] = 'years' granularity['tkgl-wikidata'] = 'years' granularity['tkgl-yago'] = 'years' -granularity['thgl-myket'] = 'sec.' -granularity['thgl-github'] = 'sec.' -granularity['thgl-software'] = 'sec.' -granularity['thgl-forum'] = 'sec.' +granularity['thgl-myket'] = 's.' +granularity['thgl-github'] = 's.' +granularity['thgl-software'] = 's.' +granularity['thgl-forum'] = 's.' # colors from tgb logo colortgb = '#60ab84' @@ -74,23 +74,8 @@ capthick=1 elinewidth=1 ts_discretized_mean, ts_discretized_sum, ts_discretized_min, ts_discretized_max, start_indices, end_indices, mid_indices = du.discretize_values(n_edges_list, num_bars) - # different version of the figures - # plt.figure() - # plt.tick_params(axis='both', which='major', labelsize=labelsize) - # # plt.bar(mid_indices, ts_discretized_mean, width=(len(n_edges_list) // num_bars), label ='Mean Value', color =colortgb) - # plt.step(mid_indices, ts_discretized_mean, where='mid', linestyle='-', label ='Mean Value', color=colortgb) - # plt.scatter(mid_indices, ts_discretized_min, label ='min value') - # plt.scatter(mid_indices, ts_discretized_max, label ='max value') - # plt.xlabel(f'Timestep [{granularity[dataset_name]}] from {start_date} to {end_date}', fontsize=fontsize) - # plt.ylabel('Number of Edges', fontsize=fontsize) - # plt.legend() - # #plt.title(dataset_name+ ' - Number of Edges aggregated across multiple timesteps') - # save_path = (os.path.join(figs_dir, f"num_edges_discretized_{num_bars}_{dataset_name}.png")) - # plt.savefig(save_path, bbox_inches='tight') - # save_path = (os.path.join(figs_dir, f"num_edges_discretized_{num_bars}_{dataset_name}.pdf")) - # plt.savefig(save_path, bbox_inches='tight') - + # line chart plt.figure() plt.tick_params(axis='both', which='major', labelsize=labelsize) mins = np.array(ts_discretized_min) @@ -100,9 +85,10 @@ plt.step(mid_indices, ts_discretized_mean, where='mid', linestyle='-', label ='Mean Value', color=colortgb, linewidth=2) #plt.scatter(mid_indices, ts_discretized_mean, label ='Mean Value', color=colortgb) plt.errorbar(mid_indices, maxs, yerr=[maxs-mins, maxs-maxs], fmt='none', alpha=0.9, color='grey',capsize=capsize, capthick=capthick, elinewidth=elinewidth, label='Min-Max Range') - plt.xlabel(f'Timestep [{granularity[dataset_name]}] from {start_date} to {end_date}', fontsize=fontsize) + plt.xlabel(f'Ts. [{granularity[dataset_name]}] from {start_date} to {end_date}', fontsize=fontsize) plt.ylabel('Number of Edges', fontsize=fontsize) plt.legend() + plt.tight_layout() #plt.title(dataset_name+ ' - Number of Edges aggregated across multiple timesteps') plt.show() save_path2 = (os.path.join(figs_dir,f"num_edges_discretized_{num_bars}_{dataset_name}2.png")) @@ -110,6 +96,7 @@ save_path2 = (os.path.join(figs_dir,f"num_edges_discretized_{num_bars}_{dataset_name}2.pdf")) plt.savefig(save_path2, bbox_inches='tight') + # bar chart plt.figure() plt.tick_params(axis='both', which='major', labelsize=labelsize) mins = np.array(ts_discretized_min) @@ -130,6 +117,7 @@ try: + # try log scale plt.figure() plt.tick_params(axis='both', which='major', labelsize=labelsize) mins = np.array(ts_discretized_min) diff --git a/stats_figures/create_relation_figures.py b/stats_figures/create_relation_figures.py index 29e937c..796c232 100644 --- a/stats_figures/create_relation_figures.py +++ b/stats_figures/create_relation_figures.py @@ -17,13 +17,16 @@ # specify params +# which datasets names = [ 'tkgl-polecat', 'tkgl-icews', 'tkgl-wikidata', 'tkgl-smallpedia','tkgl-polecat'] #'tkgl-polecat','tkgl-smallpedia', 'tkgl-yago', 'tkgl-icews' ,'tkgl-smallpedia','thgl-myket','tkgl-yago', 'tkgl-icews','thgl-github', 'thgl-forum', 'tkgl-wikidata'] +# which methods for the mrr_per_rel figures methods = ['recurrency', 'regcn', 'cen'] #'recurrency' colortgb = '#60ab84' #tgb logo colors colortgb2 = '#eeb641' colortgb3 = '#dd613a' head_tail_flag = False # if true, the head and tail of the relation are shown in the plot, otherwise just the mean across both directions +# pie chart colors colors = [colortgb,colortgb2,colortgb3] # from tgb logo colors2= ['#8e0152', '#c51b7d', '#de77ae', '#f1b6da', '#fde0ef', '#f7f7f7', '#e6f5d0', '#b8e186', '#7fbc41', '#4d9221', '#276419'] # from https://colorbrewer2.org/#type=diverging&scheme=PiYG&n=11 color blind friendly @@ -43,6 +46,7 @@ plot_values_list = [] plot_names_multi_line_list =[] for dataset_name in names: + print('dataset_name:', dataset_name) # some directory stuff modified_dataset_name = dataset_name.replace('-', '_') current_dir = os.path.dirname(os.path.abspath(__file__)) @@ -59,7 +63,7 @@ if not os.path.exists(stats_dir): os.makedirs(stats_dir) - ### A pie charts #plot top k relations accordung to the number of occurences plus a slice for "others" + ### A) pie charts #plot top k relations accordung to the number of occurences plus a slice for "others" plot_names = list(stats_df['rel_string_word'].iloc[:k]) plot_values = list(stats_df['number_total_occurences'].iloc[:k]) all_others = np.sum(stats_df['number_total_occurences'].iloc[k:]) #slice for "others" (sum of all other relations occurences) @@ -107,7 +111,7 @@ if dataset_name == 'tkgl-wikidata': #then we do not want to plot the mrr for the relations continue - ### B plot the mrr for each relation for each method, different color for different number of occurences or for different recurrency degree + ### B) plot the mrr for each relation for each method, different color for different number of occurences or for different recurrency degree # prepare the dataframe: only take the top ten relations according to number of occurences and sort by recurrency degree # we use selected_df_sorted to plot the relations in the order of recurrency degree @@ -195,8 +199,7 @@ plt.savefig(save_path, bbox_inches='tight') save_path = (os.path.join(figs_dir, f"rel_mrrperrel_recdeg_{dataset_name}.pdf")) plt.savefig(save_path, bbox_inches='tight') - print('saved') - + print('saved in ', save_path) # version 2) colors are the number of occurences plt.figure() @@ -220,7 +223,8 @@ save_path = (os.path.join(figs_dir, f"rel_mrrperrel_occ_{dataset_name}.png")) plt.savefig(save_path, bbox_inches='tight') - ### now we plot all sorts of correlation matrix. I specify different columns for the different plots + + ### C) plot all sorts of correlation matrix. I specify different columns for the different plots df = stats_df[['recurrency_degree', 'direct_recurrency-degree', 'recurrency_tail', 'recurrency_head', 'regcn_tail', 'regcn_head', 'cen_tail', 'cen_head']] corrmat= df.corr() f = plt.figure(figsize=(19, 15))