From 1fc6110e2d0c5298ad653b09ebd9b6654eb0af68 Mon Sep 17 00:00:00 2001 From: An Guangyan Date: Thu, 1 Dec 2022 10:42:21 +0800 Subject: [PATCH] Add More MOEA methods --- DTLZ_problem/dataset.py | 27 ++++- main.py | 248 ++++++++++++++++++++++++++++---------- nas/nas.py | 70 ++++++++++- problem_config/example.py | 2 + utils.py | 2 +- 5 files changed, 276 insertions(+), 73 deletions(-) diff --git a/DTLZ_problem/dataset.py b/DTLZ_problem/dataset.py index 4b54731..6adb456 100644 --- a/DTLZ_problem/dataset.py +++ b/DTLZ_problem/dataset.py @@ -75,8 +75,16 @@ def create_dataset_inner_1d(x, n_dim: Tuple[int, int], delta: Tuple[List[int | f return x, y -def create_dataset(problem_dim: Tuple[int, int], problem_name: str, x=None, n_problem=None, spt_qry=None, delta=None, - normalize_targets=True, dim: Literal[0, 1] = 0, pf_ratio: float = 0.5, **_) -> Tuple[ +def create_dataset(problem_dim: Tuple[int, int], + problem_name: str, + x=None, + n_problem=None, + spt_qry=None, + delta=None, + normalize_targets=True, + dim: Literal[0, 1] = 0, + pf_ratio: float = 0.5, + **_) -> Tuple[ Tuple[ Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray] @@ -101,6 +109,8 @@ def create_dataset(problem_dim: Tuple[int, int], problem_name: str, x=None, n_pr Whether to normalize the targets dim : int The dimension of the problem + pf_ratio : float + The ratio of the Pareto front to the whole dataset Returns ------- @@ -267,7 +277,12 @@ def get_pf(n_objectives: int, problem: Any, return pf -def get_moea_data(n_var: int, n_objectives: int, delta: Tuple[int, int], algorithm: Any, n_gen: int, metric: Any, problem_name: str, +def get_moea_data(n_var: int, + n_objectives: int, + delta: Tuple[int, int], + algorithm: Any, + n_eval: int, + metric: Any, problem_name: str, min_max: Tuple[float | None, float | None]) -> Tuple[ np.ndarray, list, list ]: @@ -282,8 +297,8 @@ def get_moea_data(n_var: int, n_objectives: int, delta: Tuple[int, int], algorit The delta1 and delta2 algorithm: MOEA algorithm - n_gen: int - number of generation + n_eval: int + number of function evaluations metric: The metric to calculate the IGD problem_name : str @@ -301,7 +316,7 @@ def get_moea_data(n_var: int, n_objectives: int, delta: Tuple[int, int], algorit problem = get_custom_problem(name=problem_name, n_var=n_var, n_obj=n_objectives, delta1=delta[0], delta2=delta[1]) # change delta here res = minimize(problem, algorithm, - termination=('n_gen', n_gen), + termination=('n_eval', n_eval), save_history=True, verbose=False) moea_pf = res.F diff --git a/main.py b/main.py index dd1f693..58c5a3a 100644 --- a/main.py +++ b/main.py @@ -7,9 +7,11 @@ import torch from matplotlib import pyplot as plt from pymoo.algorithms.moo.nsga2 import NSGA2 +from pymoo.algorithms.moo.rvea import RVEA from pymoo.indicators.igd import IGD from pymoo.operators.sampling.lhs import sampling_lhs from pymoo.optimize import minimize +from pymoo.util.ref_dirs import get_reference_directions from DTLZ_problem import DTLZbProblem, get_custom_problem from DTLZ_problem import evaluate, get_pf, get_moea_data @@ -25,39 +27,19 @@ def cprint(*args, do_print=True, **kwargs): print(*args, **kwargs) -def test(): - # see Sol.__init__ for more information - args = get_args() - network_structure = get_network_structure(args) - dataset, _ = get_dataset(args, normalize_targets=True, problem_name='DTLZ4c') - sol = MamlWrapper(dataset, args, network_structure) - # train_loss = sol.train(explicit=1) - test_loss = sol.test(return_single_loss=False) - mean_test_loss = np.mean(test_loss, axis=0) - print(f'Test loss: {mean_test_loss[-1]:.4f}') - x_test = dataset[1][2][1] - y_true = dataset[1][3][1] - y_pred = [sol(x)[1] for x in x_test] - print(y_true[:10]) - print(y_pred[:10]) - x_test = np.array([i * 0.09 for i in range(1, 1 + 10)], np.float32) - y_pred = sol(x_test) - y_true = [y + 1 for y in y_pred] # add some noise for testing - sol.test_continue(x_test, np.array(y_true, np.float32).reshape((3, 1))) - y_pred_1 = sol(x_test) - print(f'Prediction: {y_pred}') - print(f'Prediction after continue: {y_pred_1}') - - # args.update_step_test = int(1.5 * args.update_step_test) - sol = MamlWrapper(dataset, args, network_structure) - random_loss = sol.test(pretrain=True, return_single_loss=False) - mean_random_loss = np.mean(random_loss, axis=0) - print(f'Random loss: {mean_random_loss[-1]:.4f}') - - visualize_loss(test_loss, random_loss) +def main(problem_name: str, + print_progress=False, + do_plot=False, + do_train=True, + gpu_id: int | None = None, + return_none_train_igd=False, + additional_data: dict | None = None): + if return_none_train_igd: + print_progress = False + do_plot = False + do_train = False -def main(problem_name: str, print_progress=False, do_plot=False, do_train=True, gpu_id: int | None = None): args = get_args() dim = args.dim if 'dim' in args else 1 if gpu_id is not None: @@ -67,28 +49,35 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True, igd = [] fn_eval = args.k_spt fn_eval_limit = 300 + 2 - max_pts_num = 10 + max_pts_num = 20 moea_pop_size = 50 - proxy_n_gen = 50 - proxy_pop_size = 50 + proxy_n_gen = 100 + proxy_pop_size = 100 network_structure = get_network_structure(args) # generate delta - delta = [] - for i in range(2): - delta.append([np.random.rand(args.train_test[i])*8, np.random.rand(args.train_test[i])*8]) - x = [None, None, None, None] - x[2] = sampling_lhs(n_samples=11 * n_var - 1, n_var=n_var, xl=0, xu=1) - # sample 'arg.k_spt' from x[2] - x[2] = x[2][np.random.choice(x[2].shape[0], args.k_spt, replace=False), :] - dataset, min_max = get_dataset( - args, - normalize_targets=True, - delta=delta, - problem_name=problem_name, - pf_ratio=0, - dim=dim - ) + if additional_data is None: + delta = [] + for i in range(2): + delta.append([np.random.rand(args.train_test[i]) * 20, np.random.rand(args.train_test[i]) * 20]) + x = [None, None, None, None] + x[2] = sampling_lhs(n_samples=11 * n_var - 1, n_var=n_var, xl=0, xu=1) + # sample 'arg.k_spt' from x[2] + x[2] = x[2][np.random.choice(x[2].shape[0], args.k_spt, replace=False), :] + dataset, min_max = get_dataset( + args, + normalize_targets=True, + delta=delta, + problem_name=problem_name, + pf_ratio=0.5, + dim=dim + ) + else: + delta = additional_data['delta'] + x = additional_data['x'] + dataset = additional_data['dataset'] + min_max = additional_data['min_max'] + sol = MamlWrapper(dataset, args, network_structure) cprint('dataset init complete', do_print=print_progress) if do_train: @@ -104,7 +93,10 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True, problem = get_custom_problem(name=problem_name, n_var=n_var, n_obj=n_objectives, delta1=delta_finetune[0], delta2=delta_finetune[1]) - pf_true = get_pf(n_objectives, problem, min_max) + if additional_data is None: + pf_true = get_pf(n_objectives, problem, min_max) + else: + pf_true = additional_data['pf_true'] res = minimize(problem=problem, algorithm=NSGA2(pop_size=proxy_pop_size, sampling=init_x), @@ -113,8 +105,9 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True, history_x, history_f = res.X, res.F history_x = history_x.astype(np.float32) history_f = history_f.astype(np.float32) - history_f -= min_max[0] - history_f /= min_max[1] + if min_max[0] is not None: + history_f -= min_max[0] + history_f /= min_max[1] metric = IGD(pf_true, zero_to_one=True) igd.append(metric.do(history_f)) @@ -134,7 +127,9 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True, while fn_eval < fn_eval_limit: cprint(f'fn_eval: {fn_eval}', do_print=print_progress) - algorithm_surrogate = NSGA2(pop_size=args.k_spt, sampling=history_x) + # algorithm_surrogate = NSGA2(pop_size=args.k_spt, sampling=history_x) + ref_dirs = get_reference_directions("das-dennis", 3, n_partitions=8) + algorithm_surrogate = RVEA(pop_size=proxy_pop_size, sampling=history_x, ref_dirs=ref_dirs) problem_surrogate = DTLZbProblem(n_var=n_var, n_obj=n_objectives, sol=sol) res = minimize(problem_surrogate, @@ -161,8 +156,9 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True, history_f = np.vstack((history_f, y_true)) - cont_loss = sol.test_continue(history_x, history_f, return_single_loss=True) - # cont_loss = sol.test_continue(X, y_true.T, return_single_loss=True) + for _ in range(5): + cont_loss = sol.test_continue(history_x, history_f, return_single_loss=True) + # cont_loss = sol.test_continue(train_x, train_y, return_single_loss=True) cprint(f'continue loss: {cont_loss}', do_print=print_progress) # metric = IGD(pf_true, zero_to_one=True) @@ -181,12 +177,16 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True, scale=scale, pf_true=pf_true, show=True) # pf = evaluate(res.X, delta_finetune, n_objectives, min_max=min_max) + if return_none_train_igd: + return func_eval_igd, igd cprint('Algorithm complete', do_print=print_progress) pf = history_f - moea_problem = NSGA2(pop_size=moea_pop_size, sampling=init_x) + # moea_problem = NSGA2(pop_size=moea_pop_size, sampling=init_x) + ref_dirs = get_reference_directions("das-dennis", 3, n_partitions=8) + moea_problem = RVEA(pop_size=moea_pop_size, sampling=init_x, ref_dirs=ref_dirs) moea_pf, n_evals_moea, igd_moea = get_moea_data(n_var, n_objectives, delta_finetune, moea_problem, - int(fn_eval_limit / moea_pop_size), + fn_eval_limit, metric, problem_name, min_max) @@ -208,21 +208,138 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True, scale.append(data) visualize_pf(pf=moea_pf, label='NSGA-II PF', color='blue', scale=scale, pf_true=pf_true) - func_evals = [func_eval_igd, n_evals_moea, func_eval_igd] - igds = [igd, igd_moea, Y_igd] - colors = ['black', 'blue', 'green'] - labels = ['Our Surrogate Model', 'NSGA-II', 'Test'] - if do_plot: + additional_data = { + 'delta': delta, + 'x': x, + 'dataset': dataset, + 'min_max': min_max, + 'pf_true': pf_true, + } + nt_func_eval_igd, nt_igd = main(problem_name, return_none_train_igd=True, additional_data=additional_data) + + func_evals = [func_eval_igd, n_evals_moea, func_eval_igd, nt_func_eval_igd] + igds = [igd, igd_moea, Y_igd, nt_igd] + colors = ['black', 'blue', 'green', 'orange'] + labels = ['Our Algorithm with Meta', 'MOEA', 'Surrogate IGD per update', 'Our Algorithm without Meta'] + visualize_igd(func_evals, igds, colors, labels) plt.show() - cprint(f'IGD of Proxy: {igd[-3:-1]}', do_print=print_progress) - cprint(f'IGD of MOEA: {igd_moea[-3:-1]}', do_print=print_progress) + cprint(f'IGD of Proxy: {igd[-2:]}', do_print=print_progress) + cprint(f'IGD of MOEA: {igd_moea[-2:]}', do_print=print_progress) # deallocate memory del sol return igd[-1] +def train_with_moea_data(problem_name: str): + args = get_args() + dim = args.dim if 'dim' in args else 1 + n_var = args.problem_dim[0] + n_objectives = args.problem_dim[1] + igd = [] + fn_eval = args.k_spt + fn_eval_limit = 300 + 2 + max_pts_num = 5 + moea_pop_size = 50 + proxy_n_gen = 50 + proxy_pop_size = 50 + + network_structure = get_network_structure(args) + # generate delta + delta = [] + for i in range(2): + delta.append([np.random.rand(args.train_test[i]) * 8, np.random.rand(args.train_test[i]) * 8]) + x = [None, None, None, None] + x[2] = sampling_lhs(n_samples=11 * n_var - 1, n_var=n_var, xl=0, xu=1) + # sample 'arg.k_spt' from x[2] + x[2] = x[2][np.random.choice(x[2].shape[0], args.k_spt, replace=False), :] + dataset, min_max = get_dataset( + args, + normalize_targets=True, + delta=delta, + problem_name=problem_name, + pf_ratio=0, + dim=dim + ) + sol = MamlWrapper(dataset, args, network_structure) + cprint('dataset init complete', do_print=True) + train_loss = sol.train(explicit=2) + print(train_loss[-1]) + test_loss = sol.test(return_single_loss=False) + delta_finetune = np.array(delta[1])[:, -1] + init_x = dataset[1][0][0] # test spt set (100, 8) + problem = get_custom_problem(name=problem_name, n_var=n_var, n_obj=n_objectives, delta1=delta_finetune[0], + delta2=delta_finetune[1]) + pf_true = get_pf(n_objectives, problem, min_max) + ref_dirs = get_reference_directions("das-dennis", 3, n_partitions=8) + moea_problem = RVEA(pop_size=moea_pop_size, sampling=init_x, ref_dirs=ref_dirs) + res = minimize(problem, moea_problem, save_history=True, termination=('n_eval', fn_eval_limit), seed=None, verbose=True) + hist = res.history + hist_F, n_evals = [], [] + hist_X = [] + for algo in hist: + n_evals.append(algo.evaluator.n_eval) + opt = algo.opt + # feas = np.where(opt.get("feasible"))[0] + # hist_F.append(opt.get("F")[feas]) + feas_pop = np.where(algo.pop.get("feasible"))[0] + feas_off = np.where(algo.off.get("feasible"))[0] + hist_F.append(np.concatenate([algo.pop.get("F")[feas_pop], algo.off.get("F")[feas_off]], axis=0)) + hist_X.append(np.concatenate([algo.pop.get("X")[feas_pop], algo.off.get("X")[feas_off]], axis=0)) + if len(hist_F) > 1: + hist_F[-1] = np.unique(np.concatenate([hist_F[-2], hist_F[-1]], axis=0), axis=0) + if len(hist_X) > 1: + hist_X[-1] = np.unique(np.concatenate([hist_X[-2], hist_X[-1]], axis=0), axis=0) + if min_max[0] is not None: + for _F in hist_F: + _F -= min_max[0] + _F /= min_max[1] + for _X in hist_X: + _X -= min_max[0] + _X /= min_max[1] + moea_pf = hist_F[-1].astype(np.float32) + hist_F = np.concatenate(hist_F, axis=0) + hist_X = np.concatenate(hist_X, axis=0) + hist_x = hist_X.astype(np.float32) + hist_y = hist_F.astype(np.float32) + + # train surrogate model + for _ in range(20): + print(f'Loss: {sol.test_continue(hist_x, hist_y, return_single_loss=True)}') + + ref_dirs = get_reference_directions("das-dennis", 3, n_partitions=8) + algorithm_surrogate = RVEA(pop_size=proxy_pop_size, ref_dirs=ref_dirs) + problem_surrogate = DTLZbProblem(n_var=n_var, n_obj=n_objectives, sol=sol) + + res = minimize(problem_surrogate, + algorithm_surrogate, + ('n_gen', proxy_n_gen), + verbose=False) + + # calculate igd + sur_pf = res.F + metric = IGD(pf_true, zero_to_one=True) + igd.append(metric.do(sur_pf)) + print(f'IGD of Proxy: {igd[-1]}') + # moea_pf = hist_F[-1] + igd.append(metric.do(moea_pf)) + print(f'IGD of MOEA: {igd[-1]}') + scale = [] + for i in range(n_objectives): + concatenated = np.concatenate([moea_pf[:, i], pf_true[:, i]]), + data = [np.min(concatenated), np.max(concatenated)] + scale.append(data) + visualize_pf(pf=moea_pf, label='RVEA PF', color='blue', scale=scale, pf_true=pf_true) + scale = [] + for i in range(n_objectives): + concatenated = np.concatenate([sur_pf[:, i], pf_true[:, i]]), + data = [np.min(concatenated), np.max(concatenated)] + scale.append(data) + visualize_pf(pf=sur_pf, label='SURR PF', color='g', scale=scale, pf_true=pf_true) + plt.show() + + def post_mean_std(data: list | np.ndarray): return np.mean(data), np.std(data) @@ -310,5 +427,6 @@ def fast_seed(seed: int) -> None: 'd4': 'DTLZ4c', 'd7': 'DTLZ7b', }) - main(problems.d7, do_plot=True, print_progress=True, do_train=True) + main(problems.d1, do_plot=True, print_progress=True, do_train=True) + # train_with_moea_data(problems.d4) # main_benchmark(problems.d1) diff --git a/nas/nas.py b/nas/nas.py index 93e6599..9403ca1 100644 --- a/nas/nas.py +++ b/nas/nas.py @@ -5,6 +5,9 @@ from multiprocessing import Pool from multiprocessing import Manager as LockManager from typing import List +import torch +import torch.nn as nn +import torch.nn.functional as F sys.path.append('..') @@ -299,5 +302,70 @@ def main(): print(f'Pickle Dump Error: {e}') +def single_model_run(): + n_args = (10, 3) + + class Net(nn.Module): + def __init__(self, s): + super(Net, self).__init__() + net = [ + nn.Linear(n_args[0], s[0]), + ] + for i in range(len(s) - 1): + net.append(nn.ReLU()) + net.append(nn.Linear(s[i], s[i + 1])) + net.append(nn.ReLU()) + net.append(nn.Linear(s[-1], n_args[1])) + self.net = nn.Sequential(*net) + + def forward(self, x): + return self.net(x) + + net = Net([100, 200, 200, 200, 100]) + args = get_args() + args.k_spt = 2000 + args.k_qry = 2000 + dataset, norm = get_dataset(args, + normalize_targets=True, + problem_name='DTLZ4c', + pf_ratio=0, + dim=1) + train_x, train_y, test_x, test_y = dataset[1] + dev = torch.device('cuda:0') + + def remove_one_from_shape(x): + s = list(x.shape) + s = [ss for ss in s if ss != 1] + x = x.reshape(s) + return torch.from_numpy(x).float().to(dev) + + train_x = remove_one_from_shape(train_x) + train_y = remove_one_from_shape(train_y) + test_x = remove_one_from_shape(test_x) + test_y = remove_one_from_shape(test_y) + + net = net.to(dev) + + # train + net.train() + optimizer = torch.optim.Adam(net.parameters(), lr=1e-3) + for i in range(207): + optimizer.zero_grad() + random_idx = np.random.choice(train_x.shape[0], 100) + x = train_x[random_idx] + y = train_y[random_idx] + y_pred = net(x) + loss = F.mse_loss(y_pred, y) + loss.backward() + optimizer.step() + print(f'Epoch {i}: {loss.item()}') + + # test + net.eval() + y_pred = net(test_x) + loss = F.mse_loss(y_pred, test_y) + print(f'Test Loss: {loss.item()}') + + if __name__ == '__main__': - main() + single_model_run() diff --git a/problem_config/example.py b/problem_config/example.py index f0d6532..828a876 100644 --- a/problem_config/example.py +++ b/problem_config/example.py @@ -50,6 +50,8 @@ def get_network_structure(args): ('relu', [True]), ('linear', [200, 200]), ('relu', [True]), + ('linear', [200, 200]), + ('relu', [True]), ('linear', [100, 200]), ('relu', [True]), ('linear', [n_args_out, 100]), diff --git a/utils.py b/utils.py index 507dc60..7e0f00d 100644 --- a/utils.py +++ b/utils.py @@ -94,7 +94,7 @@ def draw_curve(n, m): if __name__ == '__main__': - _n, _m = 300, 50 + _n, _m = 2000, 100 # draw_curve(_n, _m) v = calculate_confidence_k(_n, _m) print(v)