Skip to content

Commit

Permalink
mini updates for x-labels on figures
Browse files Browse the repository at this point in the history
  • Loading branch information
JuliaGast committed May 30, 2024
1 parent 29f675a commit c5042aa
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 27 deletions.
32 changes: 10 additions & 22 deletions stats_figures/create_edges_figures.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@


# specify params
names = [ 'tkgl-polecat', 'tkgl-icews', 'tkgl-smallpedia', 'tkgl-wikidata', 'thgl-myket', 'thgl-github', 'thgl-forum', 'thgl-software']
names = ['thgl-github', 'tkgl-polecat', 'tkgl-icews', 'tkgl-smallpedia', 'tkgl-wikidata', 'thgl-myket', 'thgl-forum', 'thgl-software']
granularity ={} #for labels
granularity['tkgl-polecat'] = 'days'
granularity['tkgl-icews'] = 'days'
granularity['tkgl-smallpedia'] = 'years'
granularity['tkgl-wikidata'] = 'years'
granularity['tkgl-yago'] = 'years'
granularity['thgl-myket'] = 'sec.'
granularity['thgl-github'] = 'sec.'
granularity['thgl-software'] = 'sec.'
granularity['thgl-forum'] = 'sec.'
granularity['thgl-myket'] = 's.'
granularity['thgl-github'] = 's.'
granularity['thgl-software'] = 's.'
granularity['thgl-forum'] = 's.'

# colors from tgb logo
colortgb = '#60ab84'
Expand Down Expand Up @@ -74,23 +74,8 @@
capthick=1
elinewidth=1
ts_discretized_mean, ts_discretized_sum, ts_discretized_min, ts_discretized_max, start_indices, end_indices, mid_indices = du.discretize_values(n_edges_list, num_bars)
# different version of the figures
# plt.figure()
# plt.tick_params(axis='both', which='major', labelsize=labelsize)
# # plt.bar(mid_indices, ts_discretized_mean, width=(len(n_edges_list) // num_bars), label ='Mean Value', color =colortgb)
# plt.step(mid_indices, ts_discretized_mean, where='mid', linestyle='-', label ='Mean Value', color=colortgb)
# plt.scatter(mid_indices, ts_discretized_min, label ='min value')
# plt.scatter(mid_indices, ts_discretized_max, label ='max value')
# plt.xlabel(f'Timestep [{granularity[dataset_name]}] from {start_date} to {end_date}', fontsize=fontsize)
# plt.ylabel('Number of Edges', fontsize=fontsize)
# plt.legend()
# #plt.title(dataset_name+ ' - Number of Edges aggregated across multiple timesteps')
# save_path = (os.path.join(figs_dir, f"num_edges_discretized_{num_bars}_{dataset_name}.png"))
# plt.savefig(save_path, bbox_inches='tight')
# save_path = (os.path.join(figs_dir, f"num_edges_discretized_{num_bars}_{dataset_name}.pdf"))
# plt.savefig(save_path, bbox_inches='tight')


# line chart
plt.figure()
plt.tick_params(axis='both', which='major', labelsize=labelsize)
mins = np.array(ts_discretized_min)
Expand All @@ -100,16 +85,18 @@
plt.step(mid_indices, ts_discretized_mean, where='mid', linestyle='-', label ='Mean Value', color=colortgb, linewidth=2)
#plt.scatter(mid_indices, ts_discretized_mean, label ='Mean Value', color=colortgb)
plt.errorbar(mid_indices, maxs, yerr=[maxs-mins, maxs-maxs], fmt='none', alpha=0.9, color='grey',capsize=capsize, capthick=capthick, elinewidth=elinewidth, label='Min-Max Range')
plt.xlabel(f'Timestep [{granularity[dataset_name]}] from {start_date} to {end_date}', fontsize=fontsize)
plt.xlabel(f'Ts. [{granularity[dataset_name]}] from {start_date} to {end_date}', fontsize=fontsize)
plt.ylabel('Number of Edges', fontsize=fontsize)
plt.legend()
plt.tight_layout()
#plt.title(dataset_name+ ' - Number of Edges aggregated across multiple timesteps')
plt.show()
save_path2 = (os.path.join(figs_dir,f"num_edges_discretized_{num_bars}_{dataset_name}2.png"))
plt.savefig(save_path2, bbox_inches='tight')
save_path2 = (os.path.join(figs_dir,f"num_edges_discretized_{num_bars}_{dataset_name}2.pdf"))
plt.savefig(save_path2, bbox_inches='tight')

# bar chart
plt.figure()
plt.tick_params(axis='both', which='major', labelsize=labelsize)
mins = np.array(ts_discretized_min)
Expand All @@ -130,6 +117,7 @@


try:
# try log scale
plt.figure()
plt.tick_params(axis='both', which='major', labelsize=labelsize)
mins = np.array(ts_discretized_min)
Expand Down
14 changes: 9 additions & 5 deletions stats_figures/create_relation_figures.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@


# specify params
# which datasets
names = [ 'tkgl-polecat', 'tkgl-icews', 'tkgl-wikidata', 'tkgl-smallpedia','tkgl-polecat'] #'tkgl-polecat','tkgl-smallpedia', 'tkgl-yago', 'tkgl-icews' ,'tkgl-smallpedia','thgl-myket','tkgl-yago', 'tkgl-icews','thgl-github', 'thgl-forum', 'tkgl-wikidata']
# which methods for the mrr_per_rel figures
methods = ['recurrency', 'regcn', 'cen'] #'recurrency'
colortgb = '#60ab84' #tgb logo colors
colortgb2 = '#eeb641'
colortgb3 = '#dd613a'
head_tail_flag = False # if true, the head and tail of the relation are shown in the plot, otherwise just the mean across both directions

# pie chart colors
colors = [colortgb,colortgb2,colortgb3] # from tgb logo
colors2= ['#8e0152', '#c51b7d', '#de77ae', '#f1b6da', '#fde0ef', '#f7f7f7', '#e6f5d0', '#b8e186', '#7fbc41', '#4d9221', '#276419']
# from https://colorbrewer2.org/#type=diverging&scheme=PiYG&n=11 color blind friendly
Expand All @@ -43,6 +46,7 @@
plot_values_list = []
plot_names_multi_line_list =[]
for dataset_name in names:
print('dataset_name:', dataset_name)
# some directory stuff
modified_dataset_name = dataset_name.replace('-', '_')
current_dir = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -59,7 +63,7 @@
if not os.path.exists(stats_dir):
os.makedirs(stats_dir)

### A pie charts #plot top k relations accordung to the number of occurences plus a slice for "others"
### A) pie charts #plot top k relations accordung to the number of occurences plus a slice for "others"
plot_names = list(stats_df['rel_string_word'].iloc[:k])
plot_values = list(stats_df['number_total_occurences'].iloc[:k])
all_others = np.sum(stats_df['number_total_occurences'].iloc[k:]) #slice for "others" (sum of all other relations occurences)
Expand Down Expand Up @@ -107,7 +111,7 @@
if dataset_name == 'tkgl-wikidata': #then we do not want to plot the mrr for the relations
continue

### B plot the mrr for each relation for each method, different color for different number of occurences or for different recurrency degree
### B) plot the mrr for each relation for each method, different color for different number of occurences or for different recurrency degree

# prepare the dataframe: only take the top ten relations according to number of occurences and sort by recurrency degree
# we use selected_df_sorted to plot the relations in the order of recurrency degree
Expand Down Expand Up @@ -195,8 +199,7 @@
plt.savefig(save_path, bbox_inches='tight')
save_path = (os.path.join(figs_dir, f"rel_mrrperrel_recdeg_{dataset_name}.pdf"))
plt.savefig(save_path, bbox_inches='tight')
print('saved')

print('saved in ', save_path)

# version 2) colors are the number of occurences
plt.figure()
Expand All @@ -220,7 +223,8 @@
save_path = (os.path.join(figs_dir, f"rel_mrrperrel_occ_{dataset_name}.png"))
plt.savefig(save_path, bbox_inches='tight')

### now we plot all sorts of correlation matrix. I specify different columns for the different plots

### C) plot all sorts of correlation matrix. I specify different columns for the different plots
df = stats_df[['recurrency_degree', 'direct_recurrency-degree', 'recurrency_tail', 'recurrency_head', 'regcn_tail', 'regcn_head', 'cen_tail', 'cen_head']]
corrmat= df.corr()
f = plt.figure(figsize=(19, 15))
Expand Down

0 comments on commit c5042aa

Please sign in to comment.