From 5e72e0bee46280dda626cbde675ed11b26d080c1 Mon Sep 17 00:00:00 2001
From: Shenyang Huang <shenyang.huang@mail.mcgill.ca>
Date: Wed, 29 May 2024 16:31:13 -0400
Subject: [PATCH 1/5] adding recurrency baseline to run

---
 examples/linkproppred/thgl-github/recurrencybaseline.py | 1 +
 tgb_modules/tkg_utils.py                                | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/linkproppred/thgl-github/recurrencybaseline.py b/examples/linkproppred/thgl-github/recurrencybaseline.py
index f42b1f2..5f764fe 100644
--- a/examples/linkproppred/thgl-github/recurrencybaseline.py
+++ b/examples/linkproppred/thgl-github/recurrencybaseline.py
@@ -9,6 +9,7 @@
   organization={International Joint Conferences on Artificial Intelligence Organization}
 }
 
+python recurrencybaseline.py --seed 1 --num_processes 1 -tr False
 """
 
 ## imports
diff --git a/tgb_modules/tkg_utils.py b/tgb_modules/tkg_utils.py
index 00352ac..1c45c69 100644
--- a/tgb_modules/tkg_utils.py
+++ b/tgb_modules/tkg_utils.py
@@ -2,7 +2,7 @@
 from itertools import groupby
 from operator import itemgetter
 from collections import defaultdict
-import dgl
+# import dgl
 import sys
 import argparse
 import torch

From 64bf43a28103a82bbb6194e7c0486210a3d4aca5 Mon Sep 17 00:00:00 2001
From: Shenyang Huang <shenyang.huang@mail.mcgill.ca>
Date: Mon, 3 Jun 2024 11:18:18 -0400
Subject: [PATCH 2/5] adding gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index d21b06e..33157d1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 !requirements*.txt
+get_croissant.py
 #dataset
 *.xz
 *.dict

From 40174df33ef848ac747be16a851f60fca608a801 Mon Sep 17 00:00:00 2001
From: Shenyang Huang <shenyang.huang@mail.mcgill.ca>
Date: Mon, 10 Jun 2024 15:43:45 -0400
Subject: [PATCH 3/5] small change

---
 stats_figures/create_relation_figures.py | 550 +++++++++++------------
 1 file changed, 275 insertions(+), 275 deletions(-)

diff --git a/stats_figures/create_relation_figures.py b/stats_figures/create_relation_figures.py
index 16164f4..6b136f3 100644
--- a/stats_figures/create_relation_figures.py
+++ b/stats_figures/create_relation_figures.py
@@ -1,275 +1,275 @@
-""" pie charts, mrr per relation charts
-"""
-
-## imports
-import numpy as np
-import sys
-import os
-import os.path as osp
-tgb_modules_path = osp.abspath(os.path.join(os.path.dirname(__file__), '..'))
-sys.path.append(tgb_modules_path)
-import matplotlib.pyplot as plt
-from matplotlib.colors import LogNorm
-from matplotlib.colors import Normalize
-import numpy as np
-import pandas as pd
-import stats_figures.dataset_utils as du
-
-
-# specify params
-# which datasets
-names = ['thgl-software']#['tkgl-smallpedia','tkgl-polecat', 'tkgl-icews']#[ 'tkgl-polecat', 'tkgl-icews', 'tkgl-wikidata', 'tkgl-smallpedia', 'thgl-software'] #['thgl-software']#
-# names = [ 'tkgl-polecat', 'tkgl-icews', 'tkgl-wikidata', 'tkgl-smallpedia','tkgl-polecat'] #'tkgl-polecat','tkgl-smallpedia',  'tkgl-yago',  'tkgl-icews' ,'tkgl-smallpedia','thgl-myket','tkgl-yago',  'tkgl-icews','thgl-github', 'thgl-forum', 'tkgl-wikidata']
-# which methods for the mrr_per_rel figures
-methods = ['recurrency', 'regcn', 'cen'] #'recurrency'
-colortgb = '#60ab84' #tgb logo colors
-colortgb2 = '#eeb641'
-colortgb3 = '#dd613a'
-head_tail_flag = False # if true, the head and tail of the relation are shown in the plot, otherwise just the mean across both directions
-
-# pie chart colors
-colors = [colortgb,colortgb2,colortgb3]  # from tgb logo
-colors2= ['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ffff99'] #from https://colorbrewer2.org/#type=qualitative&scheme=Paired&n=11
-
-# colors2= ['#8e0152', '#c51b7d', '#de77ae', '#f1b6da', '#fde0ef', '#f7f7f7', '#e6f5d0', '#b8e186', '#7fbc41', '#4d9221', '#276419']
-# from https://colorbrewer2.org/#type=diverging&scheme=PiYG&n=11 color blind friendly 
-
-capsize=1.5
-capthick=1.5
-elinewidth=1.5
-occ_threshold = 5
-k=10 # how many slices in the cake +1
-# k = 14
-plots_flag = True
-ylimdict = {'tkgl-polecat': 0.25, 'tkgl-icews':0.6, 'tkgl-smallpedia': 1.01} # for the mrr charts the upper mrr limit
-
-overall_min = -1 # for the correlation matrix colorbar
-overall_max =1 # for the correlation matrix colorbar
-num_rels_plot = 10 # how many relations to we want to plot in the mrr chart
-i = 0
-plot_values_list = []
-plot_names_multi_line_list =[]
-for dataset_name in names:
-    print('dataset_name:', dataset_name)
-    # some directory stuff
-    modified_dataset_name = dataset_name.replace('-', '_')
-    current_dir = os.path.dirname(os.path.abspath(__file__))
-
-    stats_dir = os.path.join( current_dir,dataset_name,'stats')
-    tgb_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
-    figs_dir = os.path.join(current_dir,dataset_name,'figs_rel')
-    stats_df = pd.read_csv(os.path.join(stats_dir, f"relation_statistics_{dataset_name}.csv"))
-
-    # Create the 'figs' directory if it doesn't exist
-    if not os.path.exists(figs_dir):
-        os.makedirs(figs_dir)
-    stats_dir = os.path.join( current_dir,dataset_name,'stats')
-    if not os.path.exists(stats_dir):
-        os.makedirs(stats_dir)
-
-    ### A) pie charts #plot top k relations accordung to the number of occurences plus a slice for "others"
-    plot_names = list(stats_df['rel_string_word'].iloc[:k]) 
-    plot_values = list(stats_df['number_total_occurences'].iloc[:k])
-    all_others = np.sum(stats_df['number_total_occurences'].iloc[k:]) #slice for "others" (sum of all other relations occurences)
-    plot_values.append(all_others)
-    plot_names.append('Others')
-    # for the pie chart labels to be more readable (i.e. force line break if words are long)
-    plot_names_multi_line= []
-    for name in plot_names: # add some \n to make the labels more fittable to the pie chart
-        if type(name) == str:
-            words = name.split()
-            newname = words[0]
-            if len(words) > 1:
-                for i in range(len(words)-1):
-                    if not '(' in words[i+1]:
-                        if len(words[i]) > 3:
-                            newname+='\n'
-                        else:
-                            newname+=' ' 
-                        newname+=words[i+1]
-        else:
-            newname = str(name) #then only plot the int as is. 
-        plot_names_multi_line.append(newname)
-
-    num_slices = len(plot_names)
-    plt.figure(figsize=(7, 7))
-    wedges, texts, autotexts =plt.pie(plot_values,autopct=lambda pct: f"{pct:.0f}%" if pct > 1.5 else '', startangle=140, colors=colors2, labeldistance=2.2) #repeated_colors)
-    # Increase the font size of the percentage values
-    for autotext in autotexts:
-        autotext.set_fontsize(20) #15
-    plt.axis('equal')  
-    # Move the percentage labels further outside
-    for autotext, wedge in zip(autotexts, wedges):
-        angle = (wedge.theta2 - wedge.theta1) / 2 + wedge.theta1
-        x = np.cos(np.deg2rad(angle))
-        y = np.sin(np.deg2rad(angle))
-        distance = 0.85  # Adjust this value to move the labels further or closer to the center
-        autotext.set_position((x * distance, y * distance))
-    # Set the labels for each pie slice
-    # plt.legend(wedges, plot_names_multi_line, loc="center left", bbox_to_anchor=(1, 0, 0.5, 1), fontsize=14)
-    plt.legend(wedges, plot_names_multi_line, loc="center left", bbox_to_anchor=(1, 0, 0.5, 1), fontsize=14)
-    save_path = (os.path.join(figs_dir, f"rel_pie_{dataset_name}.png"))
-    plt.savefig(save_path, bbox_inches='tight')
-    save_path = (os.path.join(figs_dir, f"rel_pie_{dataset_name}.pdf"))
-    plt.savefig(save_path, bbox_inches='tight')
-
-    if dataset_name == 'tkgl-wikidata': #then we do not want to plot the mrr for the relations
-        continue
-
-    ### B) plot the mrr for each relation for each method, different color for different number of occurences or for different recurrency degree
-    
-    # prepare the dataframe: only take the top ten relations according to number of occurences and sort by recurrency degree
-    # we use selected_df_sorted to plot the relations in the order of recurrency degree
-    rels_sorted =  np.array(stats_df['relation'])[0:num_rels_plot]
-    mask = stats_df['relation'].isin(rels_sorted)
-    selected_df = stats_df[mask] #only the parts of the dataframe that contain the top ten relations according to number of occurences
-    selected_df_sorted = selected_df.sort_values(by='recurrency_degree', ascending=False) # Sort selected_df by 'recurrency_degree' column in descending order
-    rels_to_plot = list(selected_df_sorted['relation'])
-    labels = np.array(selected_df_sorted['relation'])# only plotting the id for space reasons
-    mrr_per_rel_freq = [] # list of mrr values for each relation - three lists for three methods
-    mrr_per_rel_freq2 = []
-    mrr_per_rel_freq3 = []
-    lab = []
-    lab_ht = []
-    lab_rel = []
-    # rel_oc_dict[rel] = count_occurrences
-    count_occurrences_sorted = []
-    rec_degree_sorted = []
-    for index, r in enumerate(rels_to_plot):   
-        if head_tail_flag:
-            lab_ht.append('h')
-            lab_ht.append('t')
-            lab_rel.append(str(labels[index])+'    ') # add spaces to make the labels longer
-        else:
-            lab_rel.append(str(labels[index])+'') # add spaces to make the labels longer
-        
-        lab.append(labels[index])
-        if head_tail_flag: # if we do head and tail separately we need the value for head and tail direction
-            mrr_per_rel_freq.append(selected_df_sorted['recurrency_head'].iloc[index])
-            mrr_per_rel_freq.append(selected_df_sorted['recurrency_tail'].iloc[index])
-            mrr_per_rel_freq2.append(selected_df_sorted['regcn_head'].iloc[index])
-            mrr_per_rel_freq2.append(selected_df_sorted['regcn_tail'].iloc[index])
-            mrr_per_rel_freq3.append(selected_df_sorted['cen_head'].iloc[index])
-            mrr_per_rel_freq3.append(selected_df_sorted['cen_tail'].iloc[index])
-            count_occurrences_sorted.append(selected_df_sorted['number_total_occurences'].iloc[index])#append twice for head and tail
-            count_occurrences_sorted.append(selected_df_sorted['number_total_occurences'].iloc[index])
-            rec_degree_sorted.append(selected_df_sorted['recurrency_degree'].iloc[index]) #append twice for head and tail
-            rec_degree_sorted.append(selected_df_sorted['recurrency_degree'].iloc[index])
-        else:# if we do  NOT head and tail separately we need the mean value for head and tail direction
-            mrr_per_rel_freq.append(np.mean([selected_df_sorted['recurrency_head'].iloc[index], selected_df_sorted['recurrency_tail'].iloc[index]]))
-            mrr_per_rel_freq2.append(np.mean([selected_df_sorted['regcn_head'].iloc[index],selected_df_sorted['regcn_tail'].iloc[index]]))
-            mrr_per_rel_freq3.append(np.mean([selected_df_sorted['cen_head'].iloc[index], selected_df_sorted['cen_tail'].iloc[index]]))
-            count_occurrences_sorted.append(selected_df_sorted['number_total_occurences'].iloc[index])#append twice for head and tail
-            rec_degree_sorted.append(selected_df_sorted['recurrency_degree'].iloc[index])
-
-    # these are the x-values of the ticks. in case we plot head and tail separately, we need to have two ticks per relation
-    x_values = []
-    x_values_rel = []
-    for i in range(0,num_rels_plot):
-        if head_tail_flag:
-            x_values.append(i*2+0.4)
-            x_values.append(i*2+0.8)
-        else:
-            x_values.append(i*2+0.4)
-        x_values_rel.append(i*2+0.4)
-
-    lab_lines = lab_rel #labels, for now
-    a = count_occurrences_sorted 
-
-    # version 1) colors are based on the reucrrency degree
-    plt.figure()
-    sca = plt.scatter(x_values, mrr_per_rel_freq2,  marker='p',s=150,   c = rec_degree_sorted, alpha=1, edgecolor='grey',  cmap='jet',  norm=Normalize(vmin=0, vmax=1), label='REGCN')           # cmap='gist_rainbow',
-    sca = plt.scatter(x_values, mrr_per_rel_freq3 , marker='*',s=150,   c = rec_degree_sorted, alpha=1,  edgecolor='grey', cmap='jet',  norm=Normalize(vmin=0, vmax=1), label='CEN')      
-    sca = plt.scatter(x_values, mrr_per_rel_freq,   marker='o',s=60,    c = rec_degree_sorted, alpha=1,  edgecolor='grey', cmap='jet', norm=Normalize(vmin=0, vmax=1), label='Recurrency Baseline')
-    plt.ylabel('MRR', fontsize=14) 
-    plt.xlabel('Relation', fontsize=14) 
-    plt.legend(fontsize=14)
-    cbar =plt.colorbar(sca)
-    plt.ylim([0,ylimdict[dataset_name]])
-    cbar.ax.yaxis.label.set_color('gray')
-
-    if head_tail_flag:
-        plt.xticks(x_values, lab_ht, size=13) #, verticalalignment="center") #  ha='right', 
-        plt.xticks(x_values_rel, lab_lines,  size=14, minor=True)
-        plt.tick_params(axis='x', which='minor',  rotation=90,  length=0)
-    else:
-        plt.xticks(x_values_rel, lab_lines,  size=14)
-        plt.tick_params(axis='x',  rotation=90,  length=0)
-    plt.yticks(size=13)
-    # Create a locator for the second set of x-ticks
-    # plt.secondary_xaxis('top', x_values_rel)
-
-    plt.grid()
-    save_path = (os.path.join(figs_dir, f"rel_mrrperrel_recdeg_{dataset_name}.png"))
-    plt.savefig(save_path, bbox_inches='tight')
-    save_path = (os.path.join(figs_dir, f"rel_mrrperrel_recdeg_{dataset_name}.pdf"))
-    plt.savefig(save_path, bbox_inches='tight')
-    print('saved in ', save_path)
-
-    # version 2) colors are the number of occurences
-    plt.figure()
-    sca = plt.scatter(x_values, mrr_per_rel_freq2, marker='p',s=150,   c = a, alpha=1, edgecolor='grey', norm=LogNorm(), cmap='jet', label='REGCN')          
-    sca = plt.scatter(x_values, mrr_per_rel_freq3 , marker='*',s=150,   c = a, alpha=1, edgecolor='grey', norm=LogNorm(), cmap='jet', label='CEN')      
-    sca = plt.scatter(x_values, mrr_per_rel_freq,   marker='o',s=60,    c = a, alpha=1, edgecolor='grey', norm=LogNorm(), cmap='jet', label='Recurrency Baseline')
-    plt.ylabel('MRR', fontsize=14) 
-    plt.xlabel('Relation', fontsize=14) 
-    plt.legend(fontsize=14)
-    cbar =plt.colorbar(sca)
-    plt.ylim([0,ylimdict[dataset_name]])
-    cbar.ax.yaxis.label.set_color('gray')
-
-    plt.xticks(x_values, lab_ht, size=13) #, verticalalignment="center") #  ha='right', 
-    plt.yticks(size=13)
-    # Create a locator for the second set of x-ticks
-    # plt.secondary_xaxis('top', x_values_rel)
-    plt.xticks(x_values_rel, lab_lines,  size=14, minor=True)
-    plt.tick_params(axis='x', which='minor',  rotation=90,  length=0)
-    plt.grid()
-    save_path = (os.path.join(figs_dir, f"rel_mrrperrel_occ_{dataset_name}.png"))
-    plt.savefig(save_path, bbox_inches='tight')
-    
-
-    ### C) plot all sorts of correlation matrix. I specify different columns for the different plots    
-    df = stats_df[['recurrency_degree', 'direct_recurrency-degree', 'recurrency_tail', 'recurrency_head', 'regcn_tail', 'regcn_head', 'cen_tail', 'cen_head']]
-    corrmat= df.corr()
-    f = plt.figure(figsize=(19, 15))
-    plt.matshow(corrmat, fignum=f.number,  vmin=overall_min, vmax=overall_max)
-    plt.xticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=16, rotation=90)
-    plt.yticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=16)
-    cb = plt.colorbar()
-    cb.ax.tick_params(labelsize=16)
-    save_path = (os.path.join(figs_dir, f"corr_rec_meth_{dataset_name}.png"))
-    plt.savefig(save_path, bbox_inches='tight')
-    
-    df = stats_df[['consecutiveness_value', 'recurrency_tail', 'recurrency_head', 'regcn_tail', 'regcn_head', 'cen_tail', 'cen_head']]
-    corrmat= df.corr()
-    f = plt.figure(figsize=(19, 15))
-    plt.matshow(corrmat, fignum=f.number,  vmin=overall_min, vmax=overall_max)
-    plt.xticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=16, rotation=90)
-    plt.yticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=16)
-    cb = plt.colorbar()
-    cb.ax.tick_params(labelsize=16)
-    save_path = (os.path.join(figs_dir, f"corr_con_meth_{dataset_name}.png"))
-    plt.savefig(save_path, bbox_inches='tight')
-    
-    df = stats_df[['recurrency_degree', 'direct_recurrency-degree', 'consecutiveness_value', 'mean_occurence_per_triple','number_total_occurences',  'recurrency_tail', 'recurrency_head', 'regcn_tail', 'regcn_head', 'cen_tail', 'cen_head']]
-    corrmat= df.corr()
-    f = plt.figure(figsize=(19, 15))
-    plt.matshow(corrmat, fignum=f.number,  vmin=overall_min, vmax=overall_max)
-    plt.xticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=16, rotation=90)
-    plt.yticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=16)
-    for i in range(corrmat.shape[0]):
-        for j in range(corrmat.shape[1]):
-            plt.text(j, i, "{:.2f}".format(corrmat.iloc[i, j]), ha='center', va='center', color='black', fontsize=16)
-    cb = plt.colorbar()
-    # fig.colorbar(cax, ticks=[-1,0,1], shrink=0.8)
-    cb.ax.tick_params(labelsize=16)    
-    # Plot the correlation matrix
-    save_path = (os.path.join(figs_dir, f"corr_all_meth_{dataset_name}.png"))
-    plt.savefig(save_path, bbox_inches='tight')
-    plt.close('all')
-
-
-
-print('done with creating the figs')
-
+""" pie charts, mrr per relation charts
+"""
+
+## imports
+import numpy as np
+import sys
+import os
+import os.path as osp
+tgb_modules_path = osp.abspath(os.path.join(os.path.dirname(__file__), '..'))
+sys.path.append(tgb_modules_path)
+import matplotlib.pyplot as plt
+from matplotlib.colors import LogNorm
+from matplotlib.colors import Normalize
+import numpy as np
+import pandas as pd
+import stats_figures.dataset_utils as du
+
+
+# specify params
+# which datasets
+names = ['thgl-software']#['tkgl-smallpedia','tkgl-polecat', 'tkgl-icews']#[ 'tkgl-polecat', 'tkgl-icews', 'tkgl-wikidata', 'tkgl-smallpedia', 'thgl-software'] #['thgl-software']#
+# names = [ 'tkgl-polecat', 'tkgl-icews', 'tkgl-wikidata', 'tkgl-smallpedia','tkgl-polecat'] #'tkgl-polecat','tkgl-smallpedia',  'tkgl-yago',  'tkgl-icews' ,'tkgl-smallpedia','thgl-myket','tkgl-yago',  'tkgl-icews','thgl-github', 'thgl-forum', 'tkgl-wikidata']
+# which methods for the mrr_per_rel figures
+methods = ['recurrency', 'regcn', 'cen'] #'recurrency'
+colortgb = '#60ab84' #tgb logo colors
+colortgb2 = '#eeb641'
+colortgb3 = '#dd613a'
+head_tail_flag = False # if true, the head and tail of the relation are shown in the plot, otherwise just the mean across both directions
+
+# pie chart colors
+colors = [colortgb,colortgb2,colortgb3]  # from tgb logo
+colors2= ['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ffff99'] #from https://colorbrewer2.org/#type=qualitative&scheme=Paired&n=11
+
+# colors2= ['#8e0152', '#c51b7d', '#de77ae', '#f1b6da', '#fde0ef', '#f7f7f7', '#e6f5d0', '#b8e186', '#7fbc41', '#4d9221', '#276419']
+# from https://colorbrewer2.org/#type=diverging&scheme=PiYG&n=11 color blind friendly 
+
+capsize=1.5
+capthick=1.5
+elinewidth=1.5
+occ_threshold = 5
+k=10 # how many slices in the cake +1
+# k = 14
+plots_flag = True
+ylimdict = {'tkgl-polecat': 0.25, 'tkgl-icews':0.6, 'tkgl-smallpedia': 1.01} # for the mrr charts the upper mrr limit
+
+overall_min = -1 # for the correlation matrix colorbar
+overall_max =1 # for the correlation matrix colorbar
+num_rels_plot = 10 # how many relations to we want to plot in the mrr chart
+i = 0
+plot_values_list = []
+plot_names_multi_line_list =[]
+for dataset_name in names:
+    print('dataset_name:', dataset_name)
+    # some directory stuff
+    modified_dataset_name = dataset_name.replace('-', '_')
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+
+    stats_dir = os.path.join( current_dir,dataset_name,'stats')
+    tgb_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+    figs_dir = os.path.join(current_dir,dataset_name,'figs_rel')
+    stats_df = pd.read_csv(os.path.join(stats_dir, f"relation_statistics_{dataset_name}.csv"))
+
+    # Create the 'figs' directory if it doesn't exist
+    if not os.path.exists(figs_dir):
+        os.makedirs(figs_dir)
+    stats_dir = os.path.join( current_dir,dataset_name,'stats')
+    if not os.path.exists(stats_dir):
+        os.makedirs(stats_dir)
+
+    ### A) pie charts #plot top k relations accordung to the number of occurences plus a slice for "others"
+    plot_names = list(stats_df['rel_string_word'].iloc[:k]) 
+    plot_values = list(stats_df['number_total_occurences'].iloc[:k])
+    all_others = np.sum(stats_df['number_total_occurences'].iloc[k:]) #slice for "others" (sum of all other relations occurences)
+    plot_values.append(all_others)
+    plot_names.append('Others')
+    # for the pie chart labels to be more readable (i.e. force line break if words are long)
+    plot_names_multi_line= []
+    for name in plot_names: # add some \n to make the labels more fittable to the pie chart
+        if type(name) == str:
+            words = name.split()
+            newname = words[0]
+            if len(words) > 1:
+                for i in range(len(words)-1):
+                    if not '(' in words[i+1]:
+                        if len(words[i]) > 3:
+                            newname+='\n'
+                        else:
+                            newname+=' ' 
+                        newname+=words[i+1]
+        else:
+            newname = str(name) #then only plot the int as is. 
+        plot_names_multi_line.append(newname)
+
+    num_slices = len(plot_names)
+    plt.figure(figsize=(7, 7))
+    wedges, texts, autotexts =plt.pie(plot_values,autopct=lambda pct: f"{pct:.0f}%" if pct > 1.5 else '', startangle=140, colors=colors2, labeldistance=2.2) #repeated_colors)
+    # Increase the font size of the percentage values
+    for autotext in autotexts:
+        autotext.set_fontsize(20) #15
+    plt.axis('equal')  
+    # Move the percentage labels further outside
+    for autotext, wedge in zip(autotexts, wedges):
+        angle = (wedge.theta2 - wedge.theta1) / 2 + wedge.theta1
+        x = np.cos(np.deg2rad(angle))
+        y = np.sin(np.deg2rad(angle))
+        distance = 0.85  # Adjust this value to move the labels further or closer to the center
+        autotext.set_position((x * distance, y * distance))
+    # Set the labels for each pie slice
+    # plt.legend(wedges, plot_names_multi_line, loc="center left", bbox_to_anchor=(1, 0, 0.5, 1), fontsize=14)
+    plt.legend(wedges, plot_names_multi_line, loc="center left", bbox_to_anchor=(1, 0, 0.5, 1), fontsize=14)
+    save_path = (os.path.join(figs_dir, f"rel_pie_{dataset_name}.png"))
+    plt.savefig(save_path, bbox_inches='tight')
+    save_path = (os.path.join(figs_dir, f"rel_pie_{dataset_name}.pdf"))
+    plt.savefig(save_path, bbox_inches='tight')
+
+    if dataset_name == 'tkgl-wikidata': #then we do not want to plot the mrr for the relations
+        continue
+
+    ### B) plot the mrr for each relation for each method, different color for different number of occurences or for different recurrency degree
+    
+    # prepare the dataframe: only take the top ten relations according to number of occurences and sort by recurrency degree
+    # we use selected_df_sorted to plot the relations in the order of recurrency degree
+    rels_sorted =  np.array(stats_df['relation'])[0:num_rels_plot]
+    mask = stats_df['relation'].isin(rels_sorted)
+    selected_df = stats_df[mask] #only the parts of the dataframe that contain the top ten relations according to number of occurences
+    selected_df_sorted = selected_df.sort_values(by='recurrency_degree', ascending=False) # Sort selected_df by 'recurrency_degree' column in descending order
+    rels_to_plot = list(selected_df_sorted['relation'])
+    labels = np.array(selected_df_sorted['relation'])# only plotting the id for space reasons
+    mrr_per_rel_freq = [] # list of mrr values for each relation - three lists for three methods
+    mrr_per_rel_freq2 = []
+    mrr_per_rel_freq3 = []
+    lab = []
+    lab_ht = []
+    lab_rel = []
+    # rel_oc_dict[rel] = count_occurrences
+    count_occurrences_sorted = []
+    rec_degree_sorted = []
+    for index, r in enumerate(rels_to_plot):   
+        if head_tail_flag:
+            lab_ht.append('h')
+            lab_ht.append('t')
+            lab_rel.append(str(labels[index])+'    ') # add spaces to make the labels longer
+        else:
+            lab_rel.append(str(labels[index])+'') # add spaces to make the labels longer
+        
+        lab.append(labels[index])
+        if head_tail_flag: # if we do head and tail separately we need the value for head and tail direction
+            mrr_per_rel_freq.append(selected_df_sorted['recurrency_head'].iloc[index])
+            mrr_per_rel_freq.append(selected_df_sorted['recurrency_tail'].iloc[index])
+            mrr_per_rel_freq2.append(selected_df_sorted['regcn_head'].iloc[index])
+            mrr_per_rel_freq2.append(selected_df_sorted['regcn_tail'].iloc[index])
+            mrr_per_rel_freq3.append(selected_df_sorted['cen_head'].iloc[index])
+            mrr_per_rel_freq3.append(selected_df_sorted['cen_tail'].iloc[index])
+            count_occurrences_sorted.append(selected_df_sorted['number_total_occurences'].iloc[index])#append twice for head and tail
+            count_occurrences_sorted.append(selected_df_sorted['number_total_occurences'].iloc[index])
+            rec_degree_sorted.append(selected_df_sorted['recurrency_degree'].iloc[index]) #append twice for head and tail
+            rec_degree_sorted.append(selected_df_sorted['recurrency_degree'].iloc[index])
+        else:# if we do  NOT head and tail separately we need the mean value for head and tail direction
+            mrr_per_rel_freq.append(np.mean([selected_df_sorted['recurrency_head'].iloc[index], selected_df_sorted['recurrency_tail'].iloc[index]]))
+            mrr_per_rel_freq2.append(np.mean([selected_df_sorted['regcn_head'].iloc[index],selected_df_sorted['regcn_tail'].iloc[index]]))
+            mrr_per_rel_freq3.append(np.mean([selected_df_sorted['cen_head'].iloc[index], selected_df_sorted['cen_tail'].iloc[index]]))
+            count_occurrences_sorted.append(selected_df_sorted['number_total_occurences'].iloc[index])#append twice for head and tail
+            rec_degree_sorted.append(selected_df_sorted['recurrency_degree'].iloc[index])
+
+    # these are the x-values of the ticks. in case we plot head and tail separately, we need to have two ticks per relation
+    x_values = []
+    x_values_rel = []
+    for i in range(0,num_rels_plot):
+        if head_tail_flag:
+            x_values.append(i*2+0.4)
+            x_values.append(i*2+0.8)
+        else:
+            x_values.append(i*2+0.4)
+        x_values_rel.append(i*2+0.4)
+
+    lab_lines = lab_rel #labels, for now
+    a = count_occurrences_sorted 
+
+    # version 1) colors are based on the reucrrency degree
+    plt.figure()
+    sca = plt.scatter(x_values, mrr_per_rel_freq2,  marker='p',s=150,   c = rec_degree_sorted, alpha=1, edgecolor='grey',  cmap='jet',  norm=Normalize(vmin=0, vmax=1), label='REGCN')           # cmap='gist_rainbow',
+    sca = plt.scatter(x_values, mrr_per_rel_freq3 , marker='*',s=150,   c = rec_degree_sorted, alpha=1,  edgecolor='grey', cmap='jet',  norm=Normalize(vmin=0, vmax=1), label='CEN')      
+    sca = plt.scatter(x_values, mrr_per_rel_freq,   marker='o',s=60,    c = rec_degree_sorted, alpha=1,  edgecolor='grey', cmap='jet', norm=Normalize(vmin=0, vmax=1), label='Recurrency Baseline')
+    plt.ylabel('MRR', fontsize=14) 
+    plt.xlabel('Relation', fontsize=14) 
+    plt.legend(fontsize=14)
+    cbar =plt.colorbar(sca)
+    plt.ylim([0,ylimdict[dataset_name]])
+    cbar.ax.yaxis.label.set_color('gray')
+
+    if head_tail_flag:
+        plt.xticks(x_values, lab_ht, size=13) #, verticalalignment="center") #  ha='right', 
+        plt.xticks(x_values_rel, lab_lines,  size=14, minor=True)
+        plt.tick_params(axis='x', which='minor',  rotation=90,  length=0)
+    else:
+        plt.xticks(x_values_rel, lab_lines,  size=14)
+        plt.tick_params(axis='x',  rotation=90,  length=0)
+    plt.yticks(size=13)
+    # Create a locator for the second set of x-ticks
+    # plt.secondary_xaxis('top', x_values_rel)
+
+    plt.grid()
+    save_path = (os.path.join(figs_dir, f"rel_mrrperrel_recdeg_{dataset_name}.png"))
+    plt.savefig(save_path, bbox_inches='tight')
+    save_path = (os.path.join(figs_dir, f"rel_mrrperrel_recdeg_{dataset_name}.pdf"))
+    plt.savefig(save_path, bbox_inches='tight')
+    print('saved in ', save_path)
+
+    # version 2) colors are the number of occurences
+    plt.figure()
+    sca = plt.scatter(x_values, mrr_per_rel_freq2, marker='p',s=150,   c = a, alpha=1, edgecolor='grey', norm=LogNorm(), cmap='jet', label='REGCN')          
+    sca = plt.scatter(x_values, mrr_per_rel_freq3 , marker='*',s=150,   c = a, alpha=1, edgecolor='grey', norm=LogNorm(), cmap='jet', label='CEN')      
+    sca = plt.scatter(x_values, mrr_per_rel_freq,   marker='o',s=60,    c = a, alpha=1, edgecolor='grey', norm=LogNorm(), cmap='jet', label='Recurrency Baseline')
+    plt.ylabel('MRR', fontsize=14) 
+    plt.xlabel('Relation', fontsize=14) 
+    plt.legend(fontsize=14)
+    cbar =plt.colorbar(sca)
+    plt.ylim([0,ylimdict[dataset_name]])
+    cbar.ax.yaxis.label.set_color('gray')
+
+    plt.xticks(x_values, lab_ht, size=13) #, verticalalignment="center") #  ha='right', 
+    plt.yticks(size=13)
+    # Create a locator for the second set of x-ticks
+    # plt.secondary_xaxis('top', x_values_rel)
+    plt.xticks(x_values_rel, lab_lines,  size=14, minor=True)
+    plt.tick_params(axis='x', which='minor',  rotation=90,  length=0)
+    plt.grid()
+    save_path = (os.path.join(figs_dir, f"rel_mrrperrel_occ_{dataset_name}.png"))
+    plt.savefig(save_path, bbox_inches='tight')
+    
+
+    ### C) plot all sorts of correlation matrix. I specify different columns for the different plots    
+    df = stats_df[['recurrency_degree', 'direct_recurrency-degree', 'recurrency_tail', 'recurrency_head', 'regcn_tail', 'regcn_head', 'cen_tail', 'cen_head']]
+    corrmat= df.corr()
+    f = plt.figure(figsize=(19, 15))
+    plt.matshow(corrmat, fignum=f.number,  vmin=overall_min, vmax=overall_max)
+    plt.xticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=16, rotation=90)
+    plt.yticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=16)
+    cb = plt.colorbar()
+    cb.ax.tick_params(labelsize=16)
+    save_path = (os.path.join(figs_dir, f"corr_rec_meth_{dataset_name}.png"))
+    plt.savefig(save_path, bbox_inches='tight')
+    
+    df = stats_df[['consecutiveness_value', 'recurrency_tail', 'recurrency_head', 'regcn_tail', 'regcn_head', 'cen_tail', 'cen_head']]
+    corrmat= df.corr()
+    f = plt.figure(figsize=(19, 15))
+    plt.matshow(corrmat, fignum=f.number,  vmin=overall_min, vmax=overall_max)
+    plt.xticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=16, rotation=90)
+    plt.yticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=16)
+    cb = plt.colorbar()
+    cb.ax.tick_params(labelsize=16)
+    save_path = (os.path.join(figs_dir, f"corr_con_meth_{dataset_name}.png"))
+    plt.savefig(save_path, bbox_inches='tight')
+    
+    df = stats_df[['recurrency_degree', 'direct_recurrency-degree', 'consecutiveness_value', 'mean_occurence_per_triple','number_total_occurences',  'recurrency_tail', 'recurrency_head', 'regcn_tail', 'regcn_head', 'cen_tail', 'cen_head']]
+    corrmat= df.corr()
+    f = plt.figure(figsize=(19, 15))
+    plt.matshow(corrmat, fignum=f.number,  vmin=overall_min, vmax=overall_max)
+    plt.xticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=16, rotation=90)
+    plt.yticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=16)
+    for i in range(corrmat.shape[0]):
+        for j in range(corrmat.shape[1]):
+            plt.text(j, i, "{:.2f}".format(corrmat.iloc[i, j]), ha='center', va='center', color='black', fontsize=16)
+    cb = plt.colorbar()
+    # fig.colorbar(cax, ticks=[-1,0,1], shrink=0.8)
+    cb.ax.tick_params(labelsize=16)    
+    # Plot the correlation matrix
+    save_path = (os.path.join(figs_dir, f"corr_all_meth_{dataset_name}.png"))
+    plt.savefig(save_path, bbox_inches='tight')
+    plt.close('all')
+
+
+
+print('done with creating the figs')
+

From 47bf81db2a3123ee99fe43bd57c788cc05cfad69 Mon Sep 17 00:00:00 2001
From: Shenyang Huang <shenyang.huang@mail.mcgill.ca>
Date: Wed, 12 Jun 2024 10:42:04 -0400
Subject: [PATCH 4/5] moving files

---
 .../linkproppred/thgl-software/STHN_README.md                 | 0
 stats_figures/create_relation_figures.py                      | 2 +-
 tgb/linkproppred/evaluate.py                                  | 4 ++--
 sampler_core.cpp => tgb_modules/sampler_core.cpp              | 0
 sthn_sampler_setup.py => tgb_modules/sthn_sampler_setup.py    | 0
 5 files changed, 3 insertions(+), 3 deletions(-)
 rename STHN_README.md => examples/linkproppred/thgl-software/STHN_README.md (100%)
 rename sampler_core.cpp => tgb_modules/sampler_core.cpp (100%)
 rename sthn_sampler_setup.py => tgb_modules/sthn_sampler_setup.py (100%)

diff --git a/STHN_README.md b/examples/linkproppred/thgl-software/STHN_README.md
similarity index 100%
rename from STHN_README.md
rename to examples/linkproppred/thgl-software/STHN_README.md
diff --git a/stats_figures/create_relation_figures.py b/stats_figures/create_relation_figures.py
index 6b136f3..5f4dd72 100644
--- a/stats_figures/create_relation_figures.py
+++ b/stats_figures/create_relation_figures.py
@@ -18,7 +18,7 @@
 
 # specify params
 # which datasets
-names = ['thgl-software']#['tkgl-smallpedia','tkgl-polecat', 'tkgl-icews']#[ 'tkgl-polecat', 'tkgl-icews', 'tkgl-wikidata', 'tkgl-smallpedia', 'thgl-software'] #['thgl-software']#
+names = ['tkgl-wikidata']#['tkgl-smallpedia','tkgl-polecat', 'tkgl-icews']#[ 'tkgl-polecat', 'tkgl-icews', 'tkgl-wikidata', 'tkgl-smallpedia', 'thgl-software'] #['thgl-software']#
 # names = [ 'tkgl-polecat', 'tkgl-icews', 'tkgl-wikidata', 'tkgl-smallpedia','tkgl-polecat'] #'tkgl-polecat','tkgl-smallpedia',  'tkgl-yago',  'tkgl-icews' ,'tkgl-smallpedia','thgl-myket','tkgl-yago',  'tkgl-icews','thgl-github', 'thgl-forum', 'tkgl-wikidata']
 # which methods for the mrr_per_rel figures
 methods = ['recurrency', 'regcn', 'cen'] #'recurrency'
diff --git a/tgb/linkproppred/evaluate.py b/tgb/linkproppred/evaluate.py
index b4ca20e..91553ef 100644
--- a/tgb/linkproppred/evaluate.py
+++ b/tgb/linkproppred/evaluate.py
@@ -108,8 +108,8 @@ def _eval_hits_and_mrr(self, y_pred_pos, y_pred_neg, type_info, k_value):
 
         else:
             y_pred_pos = y_pred_pos.reshape(-1, 1)
-            optimistic_rank = (y_pred_neg >= y_pred_pos).sum(axis=1)
-            pessimistic_rank = (y_pred_neg > y_pred_pos).sum(axis=1)
+            optimistic_rank = (y_pred_neg > y_pred_pos).sum(axis=1)
+            pessimistic_rank = (y_pred_neg >= y_pred_pos).sum(axis=1)
             ranking_list = 0.5 * (optimistic_rank + pessimistic_rank) + 1
             hitsK_list = (ranking_list <= k_value).astype(np.float32)
             mrr_list = 1./ranking_list.astype(np.float32)
diff --git a/sampler_core.cpp b/tgb_modules/sampler_core.cpp
similarity index 100%
rename from sampler_core.cpp
rename to tgb_modules/sampler_core.cpp
diff --git a/sthn_sampler_setup.py b/tgb_modules/sthn_sampler_setup.py
similarity index 100%
rename from sthn_sampler_setup.py
rename to tgb_modules/sthn_sampler_setup.py

From 20bb069b31852a3d5dfcf19d1bb448d033310ca7 Mon Sep 17 00:00:00 2001
From: Shenyang Huang <shenyang.huang@mail.mcgill.ca>
Date: Wed, 19 Jun 2024 17:47:36 -0400
Subject: [PATCH 5/5] adding the docs

---
 docs/api/tgb.linkproppred.md               |   4 +
 tgb/linkproppred/thg_negative_generator.py |  12 +-
 tgb/linkproppred/thg_negative_sampler.py   |  12 +-
 tgb/linkproppred/tkg_negative_generator.py | 132 +--------------------
 tgb/linkproppred/tkg_negative_sampler.py   |  12 +-
 5 files changed, 14 insertions(+), 158 deletions(-)

diff --git a/docs/api/tgb.linkproppred.md b/docs/api/tgb.linkproppred.md
index 21b2106..e47dd75 100644
--- a/docs/api/tgb.linkproppred.md
+++ b/docs/api/tgb.linkproppred.md
@@ -5,3 +5,7 @@
 ::: tgb.linkproppred.evaluate
 ::: tgb.linkproppred.negative_sampler
 ::: tgb.linkproppred.negative_generator
+::: tgb.linkproppred.tkg_negative_generator
+::: tgb.linkproppred.tkg_negative_sampler
+::: tgb.linkproppred.thg_negative_generator
+::: tgb.linkproppred.thg_negative_sampler
diff --git a/tgb/linkproppred/thg_negative_generator.py b/tgb/linkproppred/thg_negative_generator.py
index 82c84be..da7832b 100644
--- a/tgb/linkproppred/thg_negative_generator.py
+++ b/tgb/linkproppred/thg_negative_generator.py
@@ -29,7 +29,7 @@ def __init__(
         edge_data: TemporalData = None,
     ) -> None:
         r"""
-        Negative Edge Sampler class
+        Negative Edge Generator class for Temporal Heterogeneous Graphs
         this is a class for generating negative samples for a specific datasets
         the set of the positive samples are provided, the negative samples are generated with specific strategies 
         and are saved for consistent evaluation across different methods
@@ -39,11 +39,10 @@ def __init__(
             first_node_id: the first node id
             last_node_id: the last node id
             node_type: the node type of each node
-            num_neg_e: number of negative edges being generated per each positive edge
-            strategy: specifies which strategy should be used for generating the negatives
-            rnd_seed: random seed for reproducibility
-            edge_data: the positive edges to generate the negatives for, assuming sorted temporally
-        
+            strategy: the strategy to generate negative samples
+            num_neg_e: number of negative samples to generate
+            rnd_seed: random seed
+            edge_data: the edge data object containing the positive edges
         Returns:
             None
         """
@@ -72,7 +71,6 @@ def get_destinations_based_on_node_type(self,
                                             node_type: np.ndarray) -> dict:
         r"""
         get the destination node id arrays based on the node type
-
         Parameters:
             first_node_id: the first node id
             last_node_id: the last node id
diff --git a/tgb/linkproppred/thg_negative_sampler.py b/tgb/linkproppred/thg_negative_sampler.py
index 1ab281b..9b4ffa0 100644
--- a/tgb/linkproppred/thg_negative_sampler.py
+++ b/tgb/linkproppred/thg_negative_sampler.py
@@ -26,7 +26,7 @@ def __init__(
         r"""
         Negative Edge Sampler
             Loads and query the negative batches based on the positive batches provided.
-        constructor for the negative edge sampler class
+            constructor for the negative edge sampler class
 
         Parameters:
             dataset_name: name of the dataset
@@ -124,16 +124,6 @@ def query_batch(self,
                 neg_samples.append(
                         neg_d_arr
                     )
-
-                # conflict_set, d_node_type = conflict_dict[(pos_t, pos_s, e_type)]
-
-                # all_dst = self.node_type_dict[d_node_type]
-                # # filtered_all_dst = np.delete(all_dst, conflict_set, axis=0)
-                # filtered_all_dst = np.setdiff1d(all_dst, conflict_set)
-                # neg_d_arr = filtered_all_dst
-                # neg_samples.append(
-                #         neg_d_arr
-                #     )
         
         #? can't convert to numpy array due to different lengths of negative samples
         return neg_samples
diff --git a/tgb/linkproppred/tkg_negative_generator.py b/tgb/linkproppred/tkg_negative_generator.py
index 2f22525..6f2da6f 100644
--- a/tgb/linkproppred/tkg_negative_generator.py
+++ b/tgb/linkproppred/tkg_negative_generator.py
@@ -28,13 +28,8 @@ def __init__(
         edge_data: TemporalData = None,
     ) -> None:
         r"""
-        Negative Edge Sampler class
-        this is a class for generating negative samples for a specific datasets
-        the set of the positive samples are provided, the negative samples are generated with specific strategies 
-        and are saved for consistent evaluation across different methods
-        negative edges are sampled with 'oen_vs_many' strategy.
-        it is assumed that the destination nodes are indexed sequentially with 'first_dst_id' 
-        and 'last_dst_id' being the first and last index, respectively.
+        Negative Edge Generator class for Temporal Knowledge Graphs
+        constructor for the negative edge generator class
 
         Parameters:
             dataset_name: name of the dataset
@@ -121,13 +116,6 @@ def generate_dst_dict(self, edge_data: TemporalData, dst_name: str) -> dict:
         edge_type_size = []
         for key in dst_track_dict:
             dst = np.array(list(dst_track_dict[key].keys()))
-            # #* if there are too few dst, sample up to 1000
-            # if len(dst) < 1000:
-            #     dst_sampled = np.random.choice(np.arange(min_dst_idx, max_dst_idx+1), 1000, replace=False)
-            #     while np.intersect1d(dst, dst_sampled).shape[0] != 0:
-            #         dst_sampled = np.random.choice(np.arange(min_dst_idx, max_dst_idx+1), 1000, replace=False)
-            #     dst_sampled[0:len(dst)] = dst[:]
-            #     dst = dst_sampled
             edge_type_size.append(len(dst))
             dst_dict[key] = dst
         print ('destination candidates generated for all edge types ', len(dst_dict))
@@ -401,118 +389,4 @@ def generate_negative_samples_random(self,
                 evaluation_set[(pos_t, pos_s, edge_type)] = neg_d_arr
             save_pkl(evaluation_set, filename)
 
-    
-
-
-
-    # def generate_negative_samples_ftr(self, 
-    #                                   data: TemporalData, 
-    #                                   split_mode: str, 
-    #                                   filename: str,
-    #                                   ) -> None:
-    #     r"""
-    #     now we consider (s, d, t, edge_type) as a unique edge
-    #     Generate negative samples based on the random strategy:
-    #         - for each positive edge, sample a batch of negative edges from all possible edges with the same source node
-    #         - filter actual positive edges at the same timestamp with the same edge type
-        
-    #     Parameters:
-    #         data: an object containing positive edges information
-    #         split_mode: specifies whether to generate negative edges for 'validation' or 'test' splits
-    #         filename: name of the file containing the generated negative edges
-    #     """
-    #     print(
-    #         f"INFO: Negative Sampling Strategy: {self.strategy}, Data Split: {split_mode}"
-    #     )
-    #     assert split_mode in [
-    #         "val",
-    #         "test",
-    #     ], "Invalid split-mode! It should be `val` or `test`!"
-
-    #     if os.path.exists(filename):
-    #         print(
-    #             f"INFO: negative samples for '{split_mode}' evaluation are already generated!"
-    #         )
-    #     else:
-    #         print(f"INFO: Generating negative samples for '{split_mode}' evaluation!")
-    #         # retrieve the information from the batch
-    #         pos_src, pos_dst, pos_timestamp, edge_type = (
-    #             data.src.cpu().numpy(),
-    #             data.dst.cpu().numpy(),
-    #             data.t.cpu().numpy(),
-    #             data.edge_type.cpu().numpy(),
-    #         )
-
-    #         # all possible destinations
-    #         all_dst = np.arange(self.first_dst_id, self.last_dst_id + 1)
-    #         evaluation_set = {}
-    #         # generate a list of negative destinations for each positive edge
-    #         pos_edge_tqdm = tqdm(
-    #             zip(pos_src, pos_dst, pos_timestamp, edge_type), total=len(pos_src)
-    #         )
-
-    #         edge_t_dict = {} # {(t, u, edge_type): {v_1, v_2, ..} }
-    #         #! iterate once to put all edges into a dictionary for reference
-    #         for (
-    #             pos_s,
-    #             pos_d,
-    #             pos_t,
-    #             edge_type,
-    #         ) in pos_edge_tqdm:
-    #             if (pos_t, pos_s, edge_type) not in edge_t_dict:
-    #                 edge_t_dict[(pos_t, pos_s, edge_type)] = {pos_d:1}
-    #             else:
-    #                 edge_t_dict[(pos_t, pos_s, edge_type)][pos_d] = 1
-
-    #         conflict_dict = {}
-    #         for key in edge_t_dict:
-    #             conflict_dict[key] = np.array(list(edge_t_dict[key].keys()))
-            
-    #         print ("conflict sets for ns samples for ", len(conflict_dict), " positive edges are generated")
-
-    #         # save the generated evaluation set to disk
-    #         save_pkl(conflict_dict, filename)
-
-    #         # pos_src, pos_dst, pos_timestamp, edge_type = (
-    #         #     data.src.cpu().numpy(),
-    #         #     data.dst.cpu().numpy(),
-    #         #     data.t.cpu().numpy(),
-    #         #     data.edge_type.cpu().numpy(),
-    #         # )
-            
-
-    #         # # generate a list of negative destinations for each positive edge
-    #         # pos_edge_tqdm = tqdm(
-    #         #     zip(pos_src, pos_dst, pos_timestamp, edge_type), total=len(pos_src)
-    #         # )
-
-            
-    #         # for (
-    #         #     pos_s,
-    #         #     pos_d,
-    #         #     pos_t,
-    #         #     edge_type,
-    #         # ) in pos_edge_tqdm:
-                
-    #         # #! generate all negatives unless restricted
-    #         # conflict_set = list(edge_t_dict[(pos_t, pos_s, edge_type)].keys())
-
-    #         # # filter out positive destination
-    #         # conflict_set = np.array(conflict_set)
-    #         # filtered_all_dst = np.setdiff1d(all_dst, conflict_set)
-
-    #         # '''
-    #         # when num_neg_e is larger than all possible destinations simple return all possible destinations
-    #         # '''
-    #         # if (self.num_neg_e < 0):
-    #         #     neg_d_arr = filtered_all_dst
-    #         # elif (self.num_neg_e > len(filtered_all_dst)):
-    #         #     neg_d_arr = filtered_all_dst
-    #         # else:
-    #         #     neg_d_arr = np.random.choice(
-    #         #     filtered_all_dst, self.num_neg_e, replace=False) #never replace negatives
-
-    #         # evaluation_set[(pos_s, pos_d, pos_t, edge_type)] = neg_d_arr
-
-    #         # # save the generated evaluation set to disk
-    #         # save_pkl(evaluation_set, filename)
+    
\ No newline at end of file
diff --git a/tgb/linkproppred/tkg_negative_sampler.py b/tgb/linkproppred/tkg_negative_sampler.py
index 1e6fd0d..38106d1 100644
--- a/tgb/linkproppred/tkg_negative_sampler.py
+++ b/tgb/linkproppred/tkg_negative_sampler.py
@@ -44,17 +44,7 @@ def __init__(
         self.last_dst_id = last_dst_id
         self.strategy = strategy
         self.dst_dict = None
-        # if self.strategy in ["dst-time-filtered"]:
-        #     dst_dict_name = (
-        #         partial_path 
-        #         + "_"
-        #         + "dst_dict"
-        #         + ".pkl"
-        #     )
-        #     if not os.path.exists(dst_dict_name):
-        #         raise FileNotFoundError(f"File not found at {dst_dict_name}, dst_time_filtered strategy requires the dst_dict file")
-        #     self.dst_dict = load_pkl(dst_dict_name)
-
+      
     def load_eval_set(
         self,
         fname: str,