From 1fc6110e2d0c5298ad653b09ebd9b6654eb0af68 Mon Sep 17 00:00:00 2001
From: An Guangyan <anguangyan@outlook.com>
Date: Thu, 1 Dec 2022 10:42:21 +0800
Subject: [PATCH] Add More MOEA methods

---
 DTLZ_problem/dataset.py   |  27 ++++-
 main.py                   | 248 ++++++++++++++++++++++++++++----------
 nas/nas.py                |  70 ++++++++++-
 problem_config/example.py |   2 +
 utils.py                  |   2 +-
 5 files changed, 276 insertions(+), 73 deletions(-)

diff --git a/DTLZ_problem/dataset.py b/DTLZ_problem/dataset.py
index 4b54731..6adb456 100644
--- a/DTLZ_problem/dataset.py
+++ b/DTLZ_problem/dataset.py
@@ -75,8 +75,16 @@ def create_dataset_inner_1d(x, n_dim: Tuple[int, int], delta: Tuple[List[int | f
     return x, y
 
 
-def create_dataset(problem_dim: Tuple[int, int], problem_name: str, x=None, n_problem=None, spt_qry=None, delta=None, 
-                   normalize_targets=True, dim: Literal[0, 1] = 0, pf_ratio: float = 0.5, **_) -> Tuple[
+def create_dataset(problem_dim: Tuple[int, int],
+                   problem_name: str,
+                   x=None,
+                   n_problem=None,
+                   spt_qry=None,
+                   delta=None,
+                   normalize_targets=True,
+                   dim: Literal[0, 1] = 0,
+                   pf_ratio: float = 0.5,
+                   **_) -> Tuple[
     Tuple[
         Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray],
         Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
@@ -101,6 +109,8 @@ def create_dataset(problem_dim: Tuple[int, int], problem_name: str, x=None, n_pr
         Whether to normalize the targets
     dim : int
         The dimension of the problem
+    pf_ratio : float
+        The ratio of the Pareto front to the whole dataset
 
     Returns
     -------
@@ -267,7 +277,12 @@ def get_pf(n_objectives: int, problem: Any,
     return pf
 
 
-def get_moea_data(n_var: int, n_objectives: int, delta: Tuple[int, int], algorithm: Any, n_gen: int, metric: Any, problem_name: str,
+def get_moea_data(n_var: int,
+                  n_objectives: int,
+                  delta: Tuple[int, int],
+                  algorithm: Any,
+                  n_eval: int,
+                  metric: Any, problem_name: str,
                   min_max: Tuple[float | None, float | None]) -> Tuple[
     np.ndarray, list, list
 ]:
@@ -282,8 +297,8 @@ def get_moea_data(n_var: int, n_objectives: int, delta: Tuple[int, int], algorit
         The delta1 and delta2
     algorithm: 
         MOEA algorithm
-    n_gen: int
-        number of generation
+    n_eval: int
+        number of function evaluations
     metric:
         The metric to calculate the IGD
     problem_name : str
@@ -301,7 +316,7 @@ def get_moea_data(n_var: int, n_objectives: int, delta: Tuple[int, int], algorit
     problem = get_custom_problem(name=problem_name, n_var=n_var, n_obj=n_objectives, delta1=delta[0], delta2=delta[1])  # change delta here
     res = minimize(problem,
                    algorithm,
-                   termination=('n_gen', n_gen),
+                   termination=('n_eval', n_eval),
                    save_history=True,
                    verbose=False)
     moea_pf = res.F
diff --git a/main.py b/main.py
index dd1f693..58c5a3a 100644
--- a/main.py
+++ b/main.py
@@ -7,9 +7,11 @@
 import torch
 from matplotlib import pyplot as plt
 from pymoo.algorithms.moo.nsga2 import NSGA2
+from pymoo.algorithms.moo.rvea import RVEA
 from pymoo.indicators.igd import IGD
 from pymoo.operators.sampling.lhs import sampling_lhs
 from pymoo.optimize import minimize
+from pymoo.util.ref_dirs import get_reference_directions
 
 from DTLZ_problem import DTLZbProblem, get_custom_problem
 from DTLZ_problem import evaluate, get_pf, get_moea_data
@@ -25,39 +27,19 @@ def cprint(*args, do_print=True, **kwargs):
         print(*args, **kwargs)
 
 
-def test():
-    # see Sol.__init__ for more information
-    args = get_args()
-    network_structure = get_network_structure(args)
-    dataset, _ = get_dataset(args, normalize_targets=True, problem_name='DTLZ4c')
-    sol = MamlWrapper(dataset, args, network_structure)
-    # train_loss = sol.train(explicit=1)
-    test_loss = sol.test(return_single_loss=False)
-    mean_test_loss = np.mean(test_loss, axis=0)
-    print(f'Test loss: {mean_test_loss[-1]:.4f}')
-    x_test = dataset[1][2][1]
-    y_true = dataset[1][3][1]
-    y_pred = [sol(x)[1] for x in x_test]
-    print(y_true[:10])
-    print(y_pred[:10])
-    x_test = np.array([i * 0.09 for i in range(1, 1 + 10)], np.float32)
-    y_pred = sol(x_test)
-    y_true = [y + 1 for y in y_pred]  # add some noise for testing
-    sol.test_continue(x_test, np.array(y_true, np.float32).reshape((3, 1)))
-    y_pred_1 = sol(x_test)
-    print(f'Prediction: {y_pred}')
-    print(f'Prediction after continue: {y_pred_1}')
-
-    # args.update_step_test = int(1.5 * args.update_step_test)
-    sol = MamlWrapper(dataset, args, network_structure)
-    random_loss = sol.test(pretrain=True, return_single_loss=False)
-    mean_random_loss = np.mean(random_loss, axis=0)
-    print(f'Random loss: {mean_random_loss[-1]:.4f}')
-
-    visualize_loss(test_loss, random_loss)
+def main(problem_name: str,
+         print_progress=False,
+         do_plot=False,
+         do_train=True,
+         gpu_id: int | None = None,
+         return_none_train_igd=False,
+         additional_data: dict | None = None):
 
+    if return_none_train_igd:
+        print_progress = False
+        do_plot = False
+        do_train = False
 
-def main(problem_name: str, print_progress=False, do_plot=False, do_train=True, gpu_id: int | None = None):
     args = get_args()
     dim = args.dim if 'dim' in args else 1
     if gpu_id is not None:
@@ -67,28 +49,35 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True,
     igd = []
     fn_eval = args.k_spt
     fn_eval_limit = 300 + 2
-    max_pts_num = 10
+    max_pts_num = 20
     moea_pop_size = 50
-    proxy_n_gen = 50
-    proxy_pop_size = 50
+    proxy_n_gen = 100
+    proxy_pop_size = 100
 
     network_structure = get_network_structure(args)
     # generate delta
-    delta = []
-    for i in range(2):
-        delta.append([np.random.rand(args.train_test[i])*8, np.random.rand(args.train_test[i])*8])
-    x = [None, None, None, None]
-    x[2] = sampling_lhs(n_samples=11 * n_var - 1, n_var=n_var, xl=0, xu=1)
-    # sample 'arg.k_spt' from x[2]
-    x[2] = x[2][np.random.choice(x[2].shape[0], args.k_spt, replace=False), :]
-    dataset, min_max = get_dataset(
-        args,
-        normalize_targets=True,
-        delta=delta,
-        problem_name=problem_name,
-        pf_ratio=0,
-        dim=dim
-    )
+    if additional_data is None:
+        delta = []
+        for i in range(2):
+            delta.append([np.random.rand(args.train_test[i]) * 20, np.random.rand(args.train_test[i]) * 20])
+        x = [None, None, None, None]
+        x[2] = sampling_lhs(n_samples=11 * n_var - 1, n_var=n_var, xl=0, xu=1)
+        # sample 'arg.k_spt' from x[2]
+        x[2] = x[2][np.random.choice(x[2].shape[0], args.k_spt, replace=False), :]
+        dataset, min_max = get_dataset(
+            args,
+            normalize_targets=True,
+            delta=delta,
+            problem_name=problem_name,
+            pf_ratio=0.5,
+            dim=dim
+        )
+    else:
+        delta = additional_data['delta']
+        x = additional_data['x']
+        dataset = additional_data['dataset']
+        min_max = additional_data['min_max']
+
     sol = MamlWrapper(dataset, args, network_structure)
     cprint('dataset init complete', do_print=print_progress)
     if do_train:
@@ -104,7 +93,10 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True,
     problem = get_custom_problem(name=problem_name, n_var=n_var, n_obj=n_objectives, delta1=delta_finetune[0],
                                  delta2=delta_finetune[1])
 
-    pf_true = get_pf(n_objectives, problem, min_max)
+    if additional_data is None:
+        pf_true = get_pf(n_objectives, problem, min_max)
+    else:
+        pf_true = additional_data['pf_true']
 
     res = minimize(problem=problem,
                    algorithm=NSGA2(pop_size=proxy_pop_size, sampling=init_x),
@@ -113,8 +105,9 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True,
     history_x, history_f = res.X, res.F
     history_x = history_x.astype(np.float32)
     history_f = history_f.astype(np.float32)
-    history_f -= min_max[0]
-    history_f /= min_max[1]
+    if min_max[0] is not None:
+        history_f -= min_max[0]
+        history_f /= min_max[1]
 
     metric = IGD(pf_true, zero_to_one=True)
     igd.append(metric.do(history_f))
@@ -134,7 +127,9 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True,
 
     while fn_eval < fn_eval_limit:
         cprint(f'fn_eval: {fn_eval}', do_print=print_progress)
-        algorithm_surrogate = NSGA2(pop_size=args.k_spt, sampling=history_x)
+        # algorithm_surrogate = NSGA2(pop_size=args.k_spt, sampling=history_x)
+        ref_dirs = get_reference_directions("das-dennis", 3, n_partitions=8)
+        algorithm_surrogate = RVEA(pop_size=proxy_pop_size, sampling=history_x, ref_dirs=ref_dirs)
         problem_surrogate = DTLZbProblem(n_var=n_var, n_obj=n_objectives, sol=sol)
 
         res = minimize(problem_surrogate,
@@ -161,8 +156,9 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True,
 
         history_f = np.vstack((history_f, y_true))
 
-        cont_loss = sol.test_continue(history_x, history_f, return_single_loss=True)
-        # cont_loss = sol.test_continue(X, y_true.T, return_single_loss=True)
+        for _ in range(5):
+            cont_loss = sol.test_continue(history_x, history_f, return_single_loss=True)
+            # cont_loss = sol.test_continue(train_x, train_y, return_single_loss=True)
         cprint(f'continue loss: {cont_loss}', do_print=print_progress)
 
         # metric = IGD(pf_true, zero_to_one=True)
@@ -181,12 +177,16 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True,
                          scale=scale, pf_true=pf_true, show=True)
 
     # pf = evaluate(res.X, delta_finetune, n_objectives, min_max=min_max)
+    if return_none_train_igd:
+        return func_eval_igd, igd
     cprint('Algorithm complete', do_print=print_progress)
     pf = history_f
-    moea_problem = NSGA2(pop_size=moea_pop_size, sampling=init_x)
+    # moea_problem = NSGA2(pop_size=moea_pop_size, sampling=init_x)
+    ref_dirs = get_reference_directions("das-dennis", 3, n_partitions=8)
+    moea_problem = RVEA(pop_size=moea_pop_size, sampling=init_x, ref_dirs=ref_dirs)
     moea_pf, n_evals_moea, igd_moea = get_moea_data(n_var, n_objectives, delta_finetune,
                                                     moea_problem,
-                                                    int(fn_eval_limit / moea_pop_size),
+                                                    fn_eval_limit,
                                                     metric,
                                                     problem_name,
                                                     min_max)
@@ -208,21 +208,138 @@ def main(problem_name: str, print_progress=False, do_plot=False, do_train=True,
             scale.append(data)
         visualize_pf(pf=moea_pf, label='NSGA-II PF', color='blue', scale=scale, pf_true=pf_true)
 
-    func_evals = [func_eval_igd, n_evals_moea, func_eval_igd]
-    igds = [igd, igd_moea, Y_igd]
-    colors = ['black', 'blue', 'green']
-    labels = ['Our Surrogate Model', 'NSGA-II', 'Test']
-    if do_plot:
+        additional_data = {
+            'delta': delta,
+            'x': x,
+            'dataset': dataset,
+            'min_max': min_max,
+            'pf_true': pf_true,
+        }
+        nt_func_eval_igd, nt_igd = main(problem_name, return_none_train_igd=True, additional_data=additional_data)
+
+        func_evals = [func_eval_igd, n_evals_moea, func_eval_igd, nt_func_eval_igd]
+        igds = [igd, igd_moea, Y_igd, nt_igd]
+        colors = ['black', 'blue', 'green', 'orange']
+        labels = ['Our Algorithm with Meta', 'MOEA', 'Surrogate IGD per update', 'Our Algorithm without Meta']
+
         visualize_igd(func_evals, igds, colors, labels)
         plt.show()
 
-    cprint(f'IGD of Proxy: {igd[-3:-1]}', do_print=print_progress)
-    cprint(f'IGD of MOEA:  {igd_moea[-3:-1]}', do_print=print_progress)
+    cprint(f'IGD of Proxy: {igd[-2:]}', do_print=print_progress)
+    cprint(f'IGD of MOEA:  {igd_moea[-2:]}', do_print=print_progress)
     # deallocate memory
     del sol
     return igd[-1]
 
 
+def train_with_moea_data(problem_name: str):
+    args = get_args()
+    dim = args.dim if 'dim' in args else 1
+    n_var = args.problem_dim[0]
+    n_objectives = args.problem_dim[1]
+    igd = []
+    fn_eval = args.k_spt
+    fn_eval_limit = 300 + 2
+    max_pts_num = 5
+    moea_pop_size = 50
+    proxy_n_gen = 50
+    proxy_pop_size = 50
+
+    network_structure = get_network_structure(args)
+    # generate delta
+    delta = []
+    for i in range(2):
+        delta.append([np.random.rand(args.train_test[i]) * 8, np.random.rand(args.train_test[i]) * 8])
+    x = [None, None, None, None]
+    x[2] = sampling_lhs(n_samples=11 * n_var - 1, n_var=n_var, xl=0, xu=1)
+    # sample 'arg.k_spt' from x[2]
+    x[2] = x[2][np.random.choice(x[2].shape[0], args.k_spt, replace=False), :]
+    dataset, min_max = get_dataset(
+        args,
+        normalize_targets=True,
+        delta=delta,
+        problem_name=problem_name,
+        pf_ratio=0,
+        dim=dim
+    )
+    sol = MamlWrapper(dataset, args, network_structure)
+    cprint('dataset init complete', do_print=True)
+    train_loss = sol.train(explicit=2)
+    print(train_loss[-1])
+    test_loss = sol.test(return_single_loss=False)
+    delta_finetune = np.array(delta[1])[:, -1]
+    init_x = dataset[1][0][0]  # test spt set (100, 8)
+    problem = get_custom_problem(name=problem_name, n_var=n_var, n_obj=n_objectives, delta1=delta_finetune[0],
+                                 delta2=delta_finetune[1])
+    pf_true = get_pf(n_objectives, problem, min_max)
+    ref_dirs = get_reference_directions("das-dennis", 3, n_partitions=8)
+    moea_problem = RVEA(pop_size=moea_pop_size, sampling=init_x, ref_dirs=ref_dirs)
+    res = minimize(problem, moea_problem, save_history=True, termination=('n_eval', fn_eval_limit), seed=None, verbose=True)
+    hist = res.history
+    hist_F, n_evals = [], []
+    hist_X = []
+    for algo in hist:
+        n_evals.append(algo.evaluator.n_eval)
+        opt = algo.opt
+        # feas = np.where(opt.get("feasible"))[0]
+        # hist_F.append(opt.get("F")[feas])
+        feas_pop = np.where(algo.pop.get("feasible"))[0]
+        feas_off = np.where(algo.off.get("feasible"))[0]
+        hist_F.append(np.concatenate([algo.pop.get("F")[feas_pop], algo.off.get("F")[feas_off]], axis=0))
+        hist_X.append(np.concatenate([algo.pop.get("X")[feas_pop], algo.off.get("X")[feas_off]], axis=0))
+        if len(hist_F) > 1:
+            hist_F[-1] = np.unique(np.concatenate([hist_F[-2], hist_F[-1]], axis=0), axis=0)
+        if len(hist_X) > 1:
+            hist_X[-1] = np.unique(np.concatenate([hist_X[-2], hist_X[-1]], axis=0), axis=0)
+    if min_max[0] is not None:
+        for _F in hist_F:
+            _F -= min_max[0]
+            _F /= min_max[1]
+        for _X in hist_X:
+            _X -= min_max[0]
+            _X /= min_max[1]
+    moea_pf = hist_F[-1].astype(np.float32)
+    hist_F = np.concatenate(hist_F, axis=0)
+    hist_X = np.concatenate(hist_X, axis=0)
+    hist_x = hist_X.astype(np.float32)
+    hist_y = hist_F.astype(np.float32)
+
+    # train surrogate model
+    for _ in range(20):
+        print(f'Loss: {sol.test_continue(hist_x, hist_y, return_single_loss=True)}')
+
+    ref_dirs = get_reference_directions("das-dennis", 3, n_partitions=8)
+    algorithm_surrogate = RVEA(pop_size=proxy_pop_size, ref_dirs=ref_dirs)
+    problem_surrogate = DTLZbProblem(n_var=n_var, n_obj=n_objectives, sol=sol)
+
+    res = minimize(problem_surrogate,
+                   algorithm_surrogate,
+                   ('n_gen', proxy_n_gen),
+                   verbose=False)
+
+    # calculate igd
+    sur_pf = res.F
+    metric = IGD(pf_true, zero_to_one=True)
+    igd.append(metric.do(sur_pf))
+    print(f'IGD of Proxy: {igd[-1]}')
+    # moea_pf = hist_F[-1]
+    igd.append(metric.do(moea_pf))
+    print(f'IGD of MOEA:  {igd[-1]}')
+    scale = []
+    for i in range(n_objectives):
+        concatenated = np.concatenate([moea_pf[:, i], pf_true[:, i]]),
+        data = [np.min(concatenated), np.max(concatenated)]
+        scale.append(data)
+    visualize_pf(pf=moea_pf, label='RVEA PF', color='blue', scale=scale, pf_true=pf_true)
+    scale = []
+    for i in range(n_objectives):
+        concatenated = np.concatenate([sur_pf[:, i], pf_true[:, i]]),
+        data = [np.min(concatenated), np.max(concatenated)]
+        scale.append(data)
+    visualize_pf(pf=sur_pf, label='SURR PF', color='g', scale=scale, pf_true=pf_true)
+    plt.show()
+
+
 def post_mean_std(data: list | np.ndarray):
     return np.mean(data), np.std(data)
 
@@ -310,5 +427,6 @@ def fast_seed(seed: int) -> None:
         'd4': 'DTLZ4c',
         'd7': 'DTLZ7b',
     })
-    main(problems.d7, do_plot=True, print_progress=True, do_train=True)
+    main(problems.d1, do_plot=True, print_progress=True, do_train=True)
+    # train_with_moea_data(problems.d4)
     # main_benchmark(problems.d1)
diff --git a/nas/nas.py b/nas/nas.py
index 93e6599..9403ca1 100644
--- a/nas/nas.py
+++ b/nas/nas.py
@@ -5,6 +5,9 @@
 from multiprocessing import Pool
 from multiprocessing import Manager as LockManager
 from typing import List
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
 
 sys.path.append('..')
 
@@ -299,5 +302,70 @@ def main():
         print(f'Pickle Dump Error: {e}')
 
 
+def single_model_run():
+    n_args = (10, 3)
+
+    class Net(nn.Module):
+        def __init__(self, s):
+            super(Net, self).__init__()
+            net = [
+                nn.Linear(n_args[0], s[0]),
+            ]
+            for i in range(len(s) - 1):
+                net.append(nn.ReLU())
+                net.append(nn.Linear(s[i], s[i + 1]))
+            net.append(nn.ReLU())
+            net.append(nn.Linear(s[-1], n_args[1]))
+            self.net = nn.Sequential(*net)
+
+        def forward(self, x):
+            return self.net(x)
+
+    net = Net([100, 200, 200, 200, 100])
+    args = get_args()
+    args.k_spt = 2000
+    args.k_qry = 2000
+    dataset, norm = get_dataset(args,
+                                normalize_targets=True,
+                                problem_name='DTLZ4c',
+                                pf_ratio=0,
+                                dim=1)
+    train_x, train_y, test_x, test_y = dataset[1]
+    dev = torch.device('cuda:0')
+
+    def remove_one_from_shape(x):
+        s = list(x.shape)
+        s = [ss for ss in s if ss != 1]
+        x = x.reshape(s)
+        return torch.from_numpy(x).float().to(dev)
+
+    train_x = remove_one_from_shape(train_x)
+    train_y = remove_one_from_shape(train_y)
+    test_x = remove_one_from_shape(test_x)
+    test_y = remove_one_from_shape(test_y)
+
+    net = net.to(dev)
+
+    # train
+    net.train()
+    optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
+    for i in range(207):
+        optimizer.zero_grad()
+        random_idx = np.random.choice(train_x.shape[0], 100)
+        x = train_x[random_idx]
+        y = train_y[random_idx]
+        y_pred = net(x)
+        loss = F.mse_loss(y_pred, y)
+        loss.backward()
+        optimizer.step()
+        print(f'Epoch {i}: {loss.item()}')
+
+    # test
+    net.eval()
+    y_pred = net(test_x)
+    loss = F.mse_loss(y_pred, test_y)
+    print(f'Test Loss: {loss.item()}')
+
+
 if __name__ == '__main__':
-    main()
+    single_model_run()
diff --git a/problem_config/example.py b/problem_config/example.py
index f0d6532..828a876 100644
--- a/problem_config/example.py
+++ b/problem_config/example.py
@@ -50,6 +50,8 @@ def get_network_structure(args):
         ('relu', [True]),
         ('linear', [200, 200]),
         ('relu', [True]),
+        ('linear', [200, 200]),
+        ('relu', [True]),
         ('linear', [100, 200]),
         ('relu', [True]),
         ('linear', [n_args_out, 100]),
diff --git a/utils.py b/utils.py
index 507dc60..7e0f00d 100644
--- a/utils.py
+++ b/utils.py
@@ -94,7 +94,7 @@ def draw_curve(n, m):
 
 
 if __name__ == '__main__':
-    _n, _m = 300, 50
+    _n, _m = 2000, 100
     # draw_curve(_n, _m)
     v = calculate_confidence_k(_n, _m)
     print(v)