Skip to content

Commit

Permalink
Merge pull request #51 from JuliaGast/julia_new
Browse files Browse the repository at this point in the history
Julia new: update comments for methods and modules
  • Loading branch information
shenyangHuang committed Jun 10, 2024
2 parents 09d9cf5 + 9a6e939 commit e3737f6
Show file tree
Hide file tree
Showing 33 changed files with 409 additions and 271 deletions.
16 changes: 14 additions & 2 deletions examples/linkproppred/thgl-forum/recurrencybaseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@

def predict(num_processes, data_c_rel, all_data_c_rel, alpha, lmbda_psi,
perf_list_all, hits_list_all, window, neg_sampler, split_mode):
""" create predictions for each relation on test or valid set and compute mrr
:return perf_list_all: list of mrrs for each test query
:return hits_list_all: list of hits for each test query
"""

first_ts = data_c_rel[0][3]
## use this if you wanna use ray:
num_queries = len(data_c_rel) // num_processes
Expand Down Expand Up @@ -78,7 +83,7 @@ def predict(num_processes, data_c_rel, all_data_c_rel, alpha, lmbda_psi,

## test
def test(best_config, all_relations,test_data_prel, all_data_prel, neg_sampler, num_processes, window, split_mode='test'):
""" create predictions for each relation on test or valid set and compute mrr
""" create predictions by loopoing through all relations on test or valid set and compute mrr
:return perf_list_all: list of mrrs for each test query
:return hits_list_all: list of hits for each test query
"""
Expand Down Expand Up @@ -121,6 +126,10 @@ def test(best_config, all_relations,test_data_prel, all_data_prel, neg_sampler,
return perf_list_all, hits_list_all

def read_dict_compute_mrr(split_mode='test'):
""" read the results per relation from a precreated file and compute mrr
:return mrr_per_rel: dictionary of mrrs for each relation
:return all_mrrs: list of mrrs for all relations
"""
csv_file = f'{perrel_results_path}/{MODEL_NAME}_NONE_{DATA}_results_{SEED}'+split_mode+'.csv'
# Initialize an empty dictionary to store the data
results_per_rel_dict = {}
Expand Down Expand Up @@ -153,7 +162,9 @@ def read_dict_compute_mrr(split_mode='test'):

## train
def train(params_dict, rels,val_data_prel, trainval_data_prel, neg_sampler, num_processes, window):
""" optional, find best values for lambda and alpha
""" optional, find best values for lambda and alpha by looping through all relations and testing an a fixed set of params
based on validation mrr
:return best_config: dictionary of best params for each relation
"""
best_config= {}
best_mrr = 0
Expand Down Expand Up @@ -243,6 +254,7 @@ def train(params_dict, rels,val_data_prel, trainval_data_prel, neg_sampler, num_

## args
def get_args():
"""parse all arguments for the script"""
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", "-d", default="thgl-forum", type=str)
parser.add_argument("--window", "-w", default=0, type=int) # set to e.g. 200 if only the most recent 200 timesteps should be considered. set to -2 if multistep
Expand Down
16 changes: 14 additions & 2 deletions examples/linkproppred/thgl-github/recurrencybaseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@

def predict(num_processes, data_c_rel, all_data_c_rel, alpha, lmbda_psi,
perf_list_all, hits_list_all, window, neg_sampler, split_mode):
""" create predictions for each relation on test or valid set and compute mrr
:return perf_list_all: list of mrrs for each test query
:return hits_list_all: list of hits for each test query
"""

first_ts = data_c_rel[0][3]
## use this if you wanna use ray:
num_queries = len(data_c_rel) // num_processes
Expand Down Expand Up @@ -78,7 +83,7 @@ def predict(num_processes, data_c_rel, all_data_c_rel, alpha, lmbda_psi,

## test
def test(best_config, all_relations,test_data_prel, all_data_prel, neg_sampler, num_processes, window, split_mode='test'):
""" create predictions for each relation on test or valid set and compute mrr
""" create predictions by loopoing through all relations on test or valid set and compute mrr
:return perf_list_all: list of mrrs for each test query
:return hits_list_all: list of hits for each test query
"""
Expand Down Expand Up @@ -121,6 +126,10 @@ def test(best_config, all_relations,test_data_prel, all_data_prel, neg_sampler,
return perf_list_all, hits_list_all

def read_dict_compute_mrr(split_mode='test'):
""" read the results per relation from a precreated file and compute mrr
:return mrr_per_rel: dictionary of mrrs for each relation
:return all_mrrs: list of mrrs for all relations
"""
csv_file = f'{perrel_results_path}/{MODEL_NAME}_NONE_{DATA}_results_{SEED}'+split_mode+'.csv'
# Initialize an empty dictionary to store the data
results_per_rel_dict = {}
Expand Down Expand Up @@ -153,7 +162,9 @@ def read_dict_compute_mrr(split_mode='test'):

## train
def train(params_dict, rels,val_data_prel, trainval_data_prel, neg_sampler, num_processes, window):
""" optional, find best values for lambda and alpha
""" optional, find best values for lambda and alpha by looping through all relations and testing an a fixed set of params
based on validation mrr
:return best_config: dictionary of best params for each relation
"""
best_config= {}
best_mrr = 0
Expand Down Expand Up @@ -243,6 +254,7 @@ def train(params_dict, rels,val_data_prel, trainval_data_prel, neg_sampler, num_

## args
def get_args():
"""parse all arguments for the script"""
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", "-d", default="thgl-github", type=str)
parser.add_argument("--window", "-w", default=0, type=int) # set to e.g. 200 if only the most recent 200 timesteps should be considered. set to -2 if multistep
Expand Down
16 changes: 14 additions & 2 deletions examples/linkproppred/thgl-myket/recurrencybaseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@

def predict(num_processes, data_c_rel, all_data_c_rel, alpha, lmbda_psi,
perf_list_all, hits_list_all, window, neg_sampler, split_mode):
""" create predictions for each relation on test or valid set and compute mrr
:return perf_list_all: list of mrrs for each test query
:return hits_list_all: list of hits for each test query
"""

first_ts = data_c_rel[0][3]
## use this if you wanna use ray:
num_queries = len(data_c_rel) // num_processes
Expand Down Expand Up @@ -78,7 +83,7 @@ def predict(num_processes, data_c_rel, all_data_c_rel, alpha, lmbda_psi,

## test
def test(best_config, all_relations,test_data_prel, all_data_prel, neg_sampler, num_processes, window, split_mode='test'):
""" create predictions for each relation on test or valid set and compute mrr
""" create predictions by loopoing through all relations on test or valid set and compute mrr
:return perf_list_all: list of mrrs for each test query
:return hits_list_all: list of hits for each test query
"""
Expand Down Expand Up @@ -121,6 +126,10 @@ def test(best_config, all_relations,test_data_prel, all_data_prel, neg_sampler,
return perf_list_all, hits_list_all

def read_dict_compute_mrr(split_mode='test'):
""" read the results per relation from a precreated file and compute mrr
:return mrr_per_rel: dictionary of mrrs for each relation
:return all_mrrs: list of mrrs for all relations
"""
csv_file = f'{perrel_results_path}/{MODEL_NAME}_NONE_{DATA}_results_{SEED}'+split_mode+'.csv'
# Initialize an empty dictionary to store the data
results_per_rel_dict = {}
Expand Down Expand Up @@ -153,7 +162,9 @@ def read_dict_compute_mrr(split_mode='test'):

## train
def train(params_dict, rels,val_data_prel, trainval_data_prel, neg_sampler, num_processes, window):
""" optional, find best values for lambda and alpha
""" optional, find best values for lambda and alpha by looping through all relations and testing an a fixed set of params
based on validation mrr
:return best_config: dictionary of best params for each relation
"""
best_config= {}
best_mrr = 0
Expand Down Expand Up @@ -243,6 +254,7 @@ def train(params_dict, rels,val_data_prel, trainval_data_prel, neg_sampler, num_

## args
def get_args():
"""parse all arguments for the script"""
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", "-d", default="thgl-myket", type=str)
parser.add_argument("--window", "-w", default=0, type=int) # set to e.g. 200 if only the most recent 200 timesteps should be considered. set to -2 if multistep
Expand Down
16 changes: 14 additions & 2 deletions examples/linkproppred/thgl-software/recurrencybaseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@

def predict(num_processes, data_c_rel, all_data_c_rel, alpha, lmbda_psi,
perf_list_all, hits_list_all, window, neg_sampler, split_mode):
""" create predictions for each relation on test or valid set and compute mrr
:return perf_list_all: list of mrrs for each test query
:return hits_list_all: list of hits for each test query
"""

first_ts = data_c_rel[0][3]
## use this if you wanna use ray:
num_queries = len(data_c_rel) // num_processes
Expand Down Expand Up @@ -78,7 +83,7 @@ def predict(num_processes, data_c_rel, all_data_c_rel, alpha, lmbda_psi,

## test
def test(best_config, all_relations,test_data_prel, all_data_prel, neg_sampler, num_processes, window, split_mode='test'):
""" create predictions for each relation on test or valid set and compute mrr
""" create predictions by loopoing through all relations on test or valid set and compute mrr
:return perf_list_all: list of mrrs for each test query
:return hits_list_all: list of hits for each test query
"""
Expand Down Expand Up @@ -121,6 +126,10 @@ def test(best_config, all_relations,test_data_prel, all_data_prel, neg_sampler,
return perf_list_all, hits_list_all

def read_dict_compute_mrr(split_mode='test'):
""" read the results per relation from a precreated file and compute mrr
:return mrr_per_rel: dictionary of mrrs for each relation
:return all_mrrs: list of mrrs for all relations
"""
csv_file = f'{perrel_results_path}/{MODEL_NAME}_NONE_{DATA}_results_{SEED}'+split_mode+'.csv'
# Initialize an empty dictionary to store the data
results_per_rel_dict = {}
Expand Down Expand Up @@ -153,7 +162,9 @@ def read_dict_compute_mrr(split_mode='test'):

## train
def train(params_dict, rels,val_data_prel, trainval_data_prel, neg_sampler, num_processes, window):
""" optional, find best values for lambda and alpha
""" optional, find best values for lambda and alpha by looping through all relations and testing an a fixed set of params
based on validation mrr
:return best_config: dictionary of best params for each relation
"""
best_config= {}
best_mrr = 0
Expand Down Expand Up @@ -243,6 +254,7 @@ def train(params_dict, rels,val_data_prel, trainval_data_prel, neg_sampler, num_

## args
def get_args():
"""parse all arguments for the script"""
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", "-d", default="thgl-software", type=str)
parser.add_argument("--window", "-w", default=0, type=int) # set to e.g. 200 if only the most recent 200 timesteps should be considered. set to -2 if multistep
Expand Down
10 changes: 9 additions & 1 deletion examples/linkproppred/tkgl-icews/cen.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

def test(model, history_len, history_list, test_list, num_rels, num_nodes, use_cuda, model_name, mode, split_mode):
"""
Test the model
:param model: model used to test
:param history_list: all input history snap shot list, not include output label train list or valid list
:param test_list: test triple snap shot list
Expand Down Expand Up @@ -101,7 +102,14 @@ def test(model, history_len, history_list, test_list, num_rels, num_nodes, use_c

def run_experiment(args, trainvalidtest_id=0, n_hidden=None, n_layers=None, dropout=None, n_bases=None):
'''
trainvalidtest_id: -1: pretrainig, 0: curriculum training (to find best test history len), 1: test on valid set, 2: test on test set
Run experiment for CEN model
:param args: arguments for the model
:param trainvalidtest_id: -1: pretrainig, 0: curriculum training (to find best test history len), 1: test on valid set, 2: test on test set
:param n_hidden: number of hidden units
:param n_layers: number of layers
:param dropout: dropout rate
:param n_bases: number of bases
return: mrr, perf_per_rel: mean reciprocal rank and performance per relation
'''
# 1) load configuration for grid search the best configuration
if n_hidden:
Expand Down
16 changes: 14 additions & 2 deletions examples/linkproppred/tkgl-icews/recurrencybaseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@

def predict(num_processes, data_c_rel, all_data_c_rel, alpha, lmbda_psi,
perf_list_all, hits_list_all, window, neg_sampler, split_mode):
""" create predictions for each relation on test or valid set and compute mrr
:return perf_list_all: list of mrrs for each test query
:return hits_list_all: list of hits for each test query
"""

first_ts = data_c_rel[0][3]
## use this if you wanna use ray:
num_queries = len(data_c_rel) // num_processes
Expand Down Expand Up @@ -78,7 +83,7 @@ def predict(num_processes, data_c_rel, all_data_c_rel, alpha, lmbda_psi,

## test
def test(best_config, all_relations,test_data_prel, all_data_prel, neg_sampler, num_processes, window, split_mode='test'):
""" create predictions for each relation on test or valid set and compute mrr
""" create predictions by loopoing through all relations on test or valid set and compute mrr
:return perf_list_all: list of mrrs for each test query
:return hits_list_all: list of hits for each test query
"""
Expand Down Expand Up @@ -121,6 +126,10 @@ def test(best_config, all_relations,test_data_prel, all_data_prel, neg_sampler,
return perf_list_all, hits_list_all

def read_dict_compute_mrr(split_mode='test'):
""" read the results per relation from a precreated file and compute mrr
:return mrr_per_rel: dictionary of mrrs for each relation
:return all_mrrs: list of mrrs for all relations
"""
csv_file = f'{perrel_results_path}/{MODEL_NAME}_NONE_{DATA}_results_{SEED}'+split_mode+'.csv'
# Initialize an empty dictionary to store the data
results_per_rel_dict = {}
Expand Down Expand Up @@ -153,7 +162,9 @@ def read_dict_compute_mrr(split_mode='test'):

## train
def train(params_dict, rels,val_data_prel, trainval_data_prel, neg_sampler, num_processes, window):
""" optional, find best values for lambda and alpha
""" optional, find best values for lambda and alpha by looping through all relations and testing an a fixed set of params
based on validation mrr
:return best_config: dictionary of best params for each relation
"""
best_config= {}
best_mrr = 0
Expand Down Expand Up @@ -243,6 +254,7 @@ def train(params_dict, rels,val_data_prel, trainval_data_prel, neg_sampler, num_

## args
def get_args():
"""parse all arguments for the script"""
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", "-d", default="tkgl-icews", type=str)
parser.add_argument("--window", "-w", default=0, type=int) # set to e.g. 200 if only the most recent 200 timesteps should be considered. set to -2 if multistep
Expand Down
46 changes: 17 additions & 29 deletions examples/linkproppred/tkgl-icews/regcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

def test(model, history_list, test_list, num_rels, num_nodes, use_cuda, model_name, static_graph, mode, split_mode):
"""
Test the model on either test or validation set
:param model: model used to test
:param history_list: all input history snap shot list, not include output label train list or valid list
:param test_list: test triple snap shot list
Expand Down Expand Up @@ -75,7 +76,7 @@ def test(model, history_list, test_list, num_rels, num_nodes, use_cuda, model_na
pos_samples_batch = test_triples_input[:,2]

_, perf_list = model.predict(history_glist, num_rels, static_graph, test_triples_input, use_cuda, neg_samples_batch, pos_samples_batch,
evaluator, METRIC) # TODO: num_rels, static_graph different!
evaluator, METRIC)

perf_list_all.extend(perf_list)
if split_mode == "test":
Expand All @@ -100,6 +101,15 @@ def test(model, history_list, test_list, num_rels, num_nodes, use_cuda, model_na


def run_experiment(args, n_hidden=None, n_layers=None, dropout=None, n_bases=None):
"""
Run the experiment with the given configuration
:param args: arguments
:param n_hidden: hidden dimension
:param n_layers: number of layers
:param dropout: dropout rate
:param n_bases: number of bases
:return: mrr, perf_per_rel (mean reciprocal rank, performance per relation)
"""
# load configuration for grid search the best configuration
if n_hidden:
args.n_hidden = n_hidden
Expand All @@ -120,19 +130,10 @@ def run_experiment(args, n_hidden=None, n_layers=None, dropout=None, n_bases=Non
perf_per_rel = {}
use_cuda = args.gpu >= 0 and torch.cuda.is_available()


# if args.add_static_graph:
# static_triples = np.array(_read_triplets_as_list("../data/" + args.dataset + "/e-w-graph.txt", {}, {}, load_time=False))
# num_static_rels = len(np.unique(static_triples[:, 1]))
# num_words = len(np.unique(static_triples[:, 2]))
# static_triples[:, 2] = static_triples[:, 2] + num_nodes
# static_node_id = torch.from_numpy(np.arange(num_words + data.num_nodes)).view(-1, 1).long().cuda(args.gpu) \
# if use_cuda else torch.from_numpy(np.arange(num_words + data.num_nodes)).view(-1, 1).long()
# else:
num_static_rels, num_words, static_triples, static_graph = 0, 0, [], None

# create stat
model = RecurrentRGCNREGCN(args.decoder, #TODO: this has slightly different args than CEN
model = RecurrentRGCNREGCN(args.decoder,
args.encoder,
num_nodes,
int(num_rels/2),
Expand Down Expand Up @@ -166,9 +167,6 @@ def run_experiment(args, n_hidden=None, n_layers=None, dropout=None, n_bases=Non
torch.cuda.set_device(args.gpu)
model.cuda()

# if args.add_static_graph: # TODO: what to do about this part:
# static_graph = build_sub_graph(len(static_node_id), num_static_rels, static_triples, use_cuda, args.gpu)

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5)

Expand Down Expand Up @@ -248,16 +246,6 @@ def run_experiment(args, n_hidden=None, n_layers=None, dropout=None, n_bases=Non
best_mrr = mrr
torch.save({'state_dict': model.state_dict(), 'epoch': epoch}, model_state_file)

# mrr = test(model,
# train_list+valid_list,
# test_list,
# num_rels,
# num_nodes,
# use_cuda,
# model_state_file,
# static_graph,
# mode="test", split_mode='test')

return best_mrr, perf_per_rel
# ==================
# ==================
Expand Down Expand Up @@ -311,20 +299,20 @@ def run_experiment(args, n_hidden=None, n_layers=None, dropout=None, n_bases=Non
dataset.load_val_ns()
dataset.load_test_ns()

## run training and testing
val_mrr, test_mrr = 0, 0
if args.grid_search:
print("TODO: implement hyperparameter grid search")
print("hyperparameter grid search not implemented. Exiting.")
# single run
else:
#TODO: differentiate between train, valid, test
start_train = timeit.default_timer()
if args.test == False:
if args.test == False: #if they are true: directly test on a previously trained and stored model
print('start training')
val_mrr, perf_per_rel = run_experiment(args)
val_mrr, perf_per_rel = run_experiment(args) # do training
start_test = timeit.default_timer()
args.test = True
print('start testing')
test_mrr, perf_per_rel = run_experiment(args)
test_mrr, perf_per_rel = run_experiment(args) # do testing


test_time = timeit.default_timer() - start_test
Expand Down
Loading

0 comments on commit e3737f6

Please sign in to comment.