From 7f4eb319a0e1682fc81591763d078dd2ac025055 Mon Sep 17 00:00:00 2001
From: ZDandsomSP <1115854107@qq.com>
Date: Tue, 24 Oct 2023 22:02:49 +0800
Subject: [PATCH] init code

---
 data_provider/data_factory.py                 |  94 ++
 data_provider/data_loader.py                  | 856 ++++++++++++++++++
 data_provider/m4.py                           | 138 +++
 data_provider/uea.py                          | 125 +++
 experiments/exp_basic.py                      |  50 +
 experiments/exp_long_term_forecasting.py      | 275 ++++++
 .../exp_long_term_forecasting_partial.py      | 320 +++++++
 layers/Embed.py                               | 190 ++++
 layers/SelfAttention_Family.py                | 420 +++++++++
 layers/Transformer_EncDec.py                  | 135 +++
 model/Flashformer.py                          | 136 +++
 model/Flowformer.py                           | 145 +++
 model/Informer.py                             | 166 ++++
 model/Reformer.py                             | 163 ++++
 model/Transformer.py                          | 146 +++
 model/iFlashformer.py                         | 186 ++++
 model/iFlowformer.py                          | 185 ++++
 model/iInformer.py                            | 186 ++++
 model/iReformer.py                            | 185 ++++
 model/iTransformer.py                         | 165 ++++
 run.py                                        | 180 ++++
 scripts/boost_performance/ECL/iFlowformer.sh  | 174 ++++
 scripts/boost_performance/ECL/iInformer.sh    | 174 ++++
 scripts/boost_performance/ECL/iReformer.sh    | 174 ++++
 scripts/boost_performance/ECL/iTransformer.sh | 174 ++++
 .../boost_performance/Traffic/iFlowformer.sh  | 182 ++++
 .../boost_performance/Traffic/iInformer.sh    | 182 ++++
 .../boost_performance/Traffic/iReformer.sh    | 182 ++++
 .../boost_performance/Traffic/iTransformer.sh | 182 ++++
 .../boost_performance/Weather/iFlowformer.sh  | 183 ++++
 .../boost_performance/Weather/iInformer.sh    | 183 ++++
 .../boost_performance/Weather/iReformer.sh    | 183 ++++
 .../boost_performance/Weather/iTransformer.sh | 183 ++++
 .../ECL/iFlashTransformer.sh                  | 173 ++++
 .../Traffic/iFlashTransformer.sh              | 181 ++++
 .../Weather/iFlashTransformer.sh              | 183 ++++
 .../efficient_attentions/iFlashTransformer.sh |   0
 .../increasing_lookback/ECL/iFlowformer.sh    | 128 +++
 scripts/increasing_lookback/ECL/iInformer.sh  | 128 +++
 scripts/increasing_lookback/ECL/iReformer.sh  | 128 +++
 .../increasing_lookback/ECL/iTransformer.sh   | 128 +++
 .../Traffic/iFlowformer.sh                    | 128 +++
 .../increasing_lookback/Traffic/iInformer.sh  | 128 +++
 .../increasing_lookback/Traffic/iReformer.sh  | 128 +++
 .../Traffic/iTransformer.sh                   | 128 +++
 .../multivariate_forecast/ECL/iTransformer.sh | 105 +++
 .../ETTh2/iTransformer.sh                     |  95 ++
 .../Pems/iTransformer.sh                      | 101 +++
 .../Solar/iTransformer.sh                     |  99 ++
 .../Traffic/iTransformer.sh                   | 103 +++
 .../Weather/iTransformer.sh                   |  98 ++
 .../Electricity/iFlowformer.sh                |  53 ++
 .../Electricity/iInformer.sh                  |  53 ++
 .../Electricity/iReformer.sh                  |  53 ++
 .../Electricity/iTransformer.sh               |  53 ++
 .../Solar/iFlowformer.sh                      |  55 ++
 .../variate_generalization/Solar/iInformer.sh |  55 ++
 .../variate_generalization/Solar/iReformer.sh |  55 ++
 .../Solar/iTransformer.sh                     |  55 ++
 .../Traffic/iFlowformer.sh                    |  53 ++
 .../Traffic/iInformer.sh                      |  53 ++
 .../Traffic/iReformer.sh                      |  54 ++
 .../Traffic/iTransformer.sh                   |  53 ++
 utils/masking.py                              |  26 +
 utils/metrics.py                              |  41 +
 utils/timefeatures.py                         | 148 +++
 utils/tools.py                                | 115 +++
 67 files changed, 9739 insertions(+)
 create mode 100644 data_provider/data_factory.py
 create mode 100644 data_provider/data_loader.py
 create mode 100644 data_provider/m4.py
 create mode 100644 data_provider/uea.py
 create mode 100644 experiments/exp_long_term_forecasting.py
 create mode 100644 experiments/exp_long_term_forecasting_partial.py
 create mode 100644 layers/Embed.py
 create mode 100644 layers/SelfAttention_Family.py
 create mode 100644 layers/Transformer_EncDec.py
 create mode 100644 model/Flashformer.py
 create mode 100644 model/Flowformer.py
 create mode 100644 model/Informer.py
 create mode 100644 model/Reformer.py
 create mode 100644 model/Transformer.py
 create mode 100644 model/iFlashformer.py
 create mode 100644 model/iFlowformer.py
 create mode 100644 model/iInformer.py
 create mode 100644 model/iReformer.py
 create mode 100644 model/iTransformer.py
 create mode 100644 run.py
 create mode 100644 scripts/boost_performance/ECL/iFlowformer.sh
 create mode 100644 scripts/boost_performance/ECL/iInformer.sh
 create mode 100644 scripts/boost_performance/ECL/iReformer.sh
 create mode 100644 scripts/boost_performance/ECL/iTransformer.sh
 create mode 100644 scripts/boost_performance/Traffic/iFlowformer.sh
 create mode 100644 scripts/boost_performance/Traffic/iInformer.sh
 create mode 100644 scripts/boost_performance/Traffic/iReformer.sh
 create mode 100644 scripts/boost_performance/Traffic/iTransformer.sh
 create mode 100644 scripts/boost_performance/Weather/iFlowformer.sh
 create mode 100644 scripts/boost_performance/Weather/iInformer.sh
 create mode 100644 scripts/boost_performance/Weather/iReformer.sh
 create mode 100644 scripts/efficient_attentions/ECL/iFlashTransformer.sh
 create mode 100644 scripts/efficient_attentions/Traffic/iFlashTransformer.sh
 create mode 100644 scripts/efficient_attentions/Weather/iFlashTransformer.sh
 delete mode 100644 scripts/efficient_attentions/iFlashTransformer.sh
 create mode 100644 scripts/increasing_lookback/ECL/iFlowformer.sh
 create mode 100644 scripts/increasing_lookback/ECL/iInformer.sh
 create mode 100644 scripts/increasing_lookback/ECL/iReformer.sh
 create mode 100644 scripts/increasing_lookback/ECL/iTransformer.sh
 create mode 100644 scripts/increasing_lookback/Traffic/iFlowformer.sh
 create mode 100644 scripts/increasing_lookback/Traffic/iInformer.sh
 create mode 100644 scripts/increasing_lookback/Traffic/iReformer.sh
 create mode 100644 scripts/multivariate_forecast/ECL/iTransformer.sh
 create mode 100644 scripts/multivariate_forecast/ETTh2/iTransformer.sh
 create mode 100644 scripts/multivariate_forecast/Pems/iTransformer.sh
 create mode 100644 scripts/multivariate_forecast/Solar/iTransformer.sh
 create mode 100644 scripts/multivariate_forecast/Weather/iTransformer.sh
 create mode 100644 scripts/variate_generalization/Electricity/iFlowformer.sh
 create mode 100644 scripts/variate_generalization/Electricity/iInformer.sh
 create mode 100644 scripts/variate_generalization/Electricity/iReformer.sh
 create mode 100644 scripts/variate_generalization/Solar/iFlowformer.sh
 create mode 100644 scripts/variate_generalization/Solar/iInformer.sh
 create mode 100644 scripts/variate_generalization/Solar/iReformer.sh
 create mode 100644 scripts/variate_generalization/Solar/iTransformer.sh
 create mode 100644 scripts/variate_generalization/Traffic/iFlowformer.sh
 create mode 100644 scripts/variate_generalization/Traffic/iInformer.sh
 create mode 100644 scripts/variate_generalization/Traffic/iReformer.sh
 create mode 100644 scripts/variate_generalization/Traffic/iTransformer.sh
 create mode 100644 utils/masking.py
 create mode 100644 utils/metrics.py
 create mode 100644 utils/timefeatures.py
 create mode 100644 utils/tools.py

diff --git a/data_provider/data_factory.py b/data_provider/data_factory.py
new file mode 100644
index 000000000..56820ca9b
--- /dev/null
+++ b/data_provider/data_factory.py
@@ -0,0 +1,94 @@
+from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4, PSMSegLoader, \
+    MSLSegLoader, SMAPSegLoader, SMDSegLoader, SWATSegLoader, UEAloader, Dataset_Solar, Dataset_PEMS
+from data_provider.uea import collate_fn
+from torch.utils.data import DataLoader
+
+data_dict = {
+    'ETTh1': Dataset_ETT_hour,
+    'ETTh2': Dataset_ETT_hour,
+    'ETTm1': Dataset_ETT_minute,
+    'ETTm2': Dataset_ETT_minute,
+    'Solar': Dataset_Solar,
+    'PEMS':Dataset_PEMS,
+    'custom': Dataset_Custom,
+    'm4': Dataset_M4,
+    'PSM': PSMSegLoader,
+    'MSL': MSLSegLoader,
+    'SMAP': SMAPSegLoader,
+    'SMD': SMDSegLoader,
+    'SWAT': SWATSegLoader,
+    'UEA': UEAloader
+}
+
+
+def data_provider(args, flag):
+    Data = data_dict[args.data]
+    timeenc = 0 if args.embed != 'timeF' else 1
+
+    if flag == 'test':
+        shuffle_flag = False
+        drop_last = True
+        if args.task_name == 'anomaly_detection' or args.task_name == 'classification':
+            batch_size = args.batch_size
+        else:
+            batch_size = 1  # bsz=1 for evaluation
+        freq = args.freq
+    else:
+        shuffle_flag = True
+        drop_last = True
+        batch_size = args.batch_size  # bsz for train and valid
+        freq = args.freq
+
+    if args.task_name == 'anomaly_detection':
+        drop_last = False
+        data_set = Data(
+            root_path=args.root_path,
+            win_size=args.seq_len,
+            flag=flag,
+        )
+        print(flag, len(data_set))
+        data_loader = DataLoader(
+            data_set,
+            batch_size=batch_size,
+            shuffle=shuffle_flag,
+            num_workers=args.num_workers,
+            drop_last=drop_last)
+        return data_set, data_loader
+    elif args.task_name == 'classification':
+        drop_last = False
+        data_set = Data(
+            root_path=args.root_path,
+            flag=flag,
+        )
+
+        data_loader = DataLoader(
+            data_set,
+            batch_size=batch_size,
+            shuffle=shuffle_flag,
+            num_workers=args.num_workers,
+            drop_last=drop_last,
+            collate_fn=lambda x: collate_fn(x, max_len=args.seq_len)
+        )
+        return data_set, data_loader
+    else:
+        if args.data == 'm4':
+            drop_last = False
+        data_set = Data(
+            root_path=args.root_path,
+            data_path=args.data_path,
+            flag=flag,
+            size=[args.seq_len, args.label_len, args.pred_len],
+            features=args.features,
+            target=args.target,
+            timeenc=timeenc,
+            freq=freq,
+            seasonal_patterns=args.seasonal_patterns
+        )
+        print(flag, len(data_set))
+        data_loader = DataLoader(
+            data_set,
+            batch_size=batch_size,
+            shuffle=shuffle_flag,
+            num_workers=args.num_workers,
+            drop_last=drop_last)
+        return data_set, data_loader
diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py
new file mode 100644
index 000000000..4dbe726a6
--- /dev/null
+++ b/data_provider/data_loader.py
@@ -0,0 +1,856 @@
+import os
+import numpy as np
+import pandas as pd
+import glob
+import re
+import torch
+from torch.utils.data import Dataset, DataLoader
+from sklearn.preprocessing import StandardScaler
+from utils.timefeatures import time_features
+from data_provider.m4 import M4Dataset, M4Meta
+from data_provider.uea import subsample, interpolate_missing, Normalizer
+from sktime.datasets import load_from_tsfile_to_dataframe
+import warnings
+
+warnings.filterwarnings('ignore')
+
+
+class Dataset_ETT_hour(Dataset):
+    def __init__(self, root_path, flag='train', size=None,
+                 features='S', data_path='ETTh1.csv',
+                 target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
+        # size [seq_len, label_len, pred_len]
+        # info
+        if size == None:
+            self.seq_len = 24 * 4 * 4
+            self.label_len = 24 * 4
+            self.pred_len = 24 * 4
+        else:
+            self.seq_len = size[0]
+            self.label_len = size[1]
+            self.pred_len = size[2]
+        # init
+        assert flag in ['train', 'test', 'val']
+        type_map = {'train': 0, 'val': 1, 'test': 2}
+        self.set_type = type_map[flag]
+
+        self.features = features
+        self.target = target
+        self.scale = scale
+        self.timeenc = timeenc
+        self.freq = freq
+
+        self.root_path = root_path
+        self.data_path = data_path
+        self.__read_data__()
+
+    def __read_data__(self):
+        self.scaler = StandardScaler()
+        df_raw = pd.read_csv(os.path.join(self.root_path,
+                                          self.data_path))
+
+        border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
+        border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
+        border1 = border1s[self.set_type]
+        border2 = border2s[self.set_type]
+
+        if self.features == 'M' or self.features == 'MS':
+            cols_data = df_raw.columns[1:]
+            df_data = df_raw[cols_data]
+        elif self.features == 'S':
+            df_data = df_raw[[self.target]]
+
+        if self.scale:
+            train_data = df_data[border1s[0]:border2s[0]]
+            self.scaler.fit(train_data.values)
+            data = self.scaler.transform(df_data.values)
+        else:
+            data = df_data.values
+
+        df_stamp = df_raw[['date']][border1:border2]
+        df_stamp['date'] = pd.to_datetime(df_stamp.date)
+        if self.timeenc == 0:
+            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
+            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
+            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
+            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
+            data_stamp = df_stamp.drop(['date'], 1).values
+        elif self.timeenc == 1:
+            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
+            data_stamp = data_stamp.transpose(1, 0)
+
+        self.data_x = data[border1:border2]
+        self.data_y = data[border1:border2]
+        self.data_stamp = data_stamp
+
+    def __getitem__(self, index):
+        s_begin = index
+        s_end = s_begin + self.seq_len
+        r_begin = s_end - self.label_len
+        r_end = r_begin + self.label_len + self.pred_len
+
+        seq_x = self.data_x[s_begin:s_end]
+        seq_y = self.data_y[r_begin:r_end]
+        seq_x_mark = self.data_stamp[s_begin:s_end]
+        seq_y_mark = self.data_stamp[r_begin:r_end]
+
+        return seq_x, seq_y, seq_x_mark, seq_y_mark
+
+    def __len__(self):
+        return len(self.data_x) - self.seq_len - self.pred_len + 1
+
+    def inverse_transform(self, data):
+        return self.scaler.inverse_transform(data)
+
+
+class Dataset_ETT_minute(Dataset):
+    def __init__(self, root_path, flag='train', size=None,
+                 features='S', data_path='ETTm1.csv',
+                 target='OT', scale=True, timeenc=0, freq='t', seasonal_patterns=None):
+        # size [seq_len, label_len, pred_len]
+        # info
+        if size == None:
+            self.seq_len = 24 * 4 * 4
+            self.label_len = 24 * 4
+            self.pred_len = 24 * 4
+        else:
+            self.seq_len = size[0]
+            self.label_len = size[1]
+            self.pred_len = size[2]
+        # init
+        assert flag in ['train', 'test', 'val']
+        type_map = {'train': 0, 'val': 1, 'test': 2}
+        self.set_type = type_map[flag]
+
+        self.features = features
+        self.target = target
+        self.scale = scale
+        self.timeenc = timeenc
+        self.freq = freq
+
+        self.root_path = root_path
+        self.data_path = data_path
+        self.__read_data__()
+
+    def __read_data__(self):
+        self.scaler = StandardScaler()
+        df_raw = pd.read_csv(os.path.join(self.root_path,
+                                          self.data_path))
+
+        border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
+        border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
+        border1 = border1s[self.set_type]
+        border2 = border2s[self.set_type]
+
+        if self.features == 'M' or self.features == 'MS':
+            cols_data = df_raw.columns[1:]
+            df_data = df_raw[cols_data]
+        elif self.features == 'S':
+            df_data = df_raw[[self.target]]
+
+        if self.scale:
+            train_data = df_data[border1s[0]:border2s[0]]
+            self.scaler.fit(train_data.values)
+            data = self.scaler.transform(df_data.values)
+        else:
+            data = df_data.values
+
+        df_stamp = df_raw[['date']][border1:border2]
+        df_stamp['date'] = pd.to_datetime(df_stamp.date)
+        if self.timeenc == 0:
+            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
+            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
+            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
+            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
+            df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
+            df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
+            data_stamp = df_stamp.drop(['date'], 1).values
+        elif self.timeenc == 1:
+            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
+            data_stamp = data_stamp.transpose(1, 0)
+
+        self.data_x = data[border1:border2]
+        self.data_y = data[border1:border2]
+        self.data_stamp = data_stamp
+
+    def __getitem__(self, index):
+        s_begin = index
+        s_end = s_begin + self.seq_len
+        r_begin = s_end - self.label_len
+        r_end = r_begin + self.label_len + self.pred_len
+
+        seq_x = self.data_x[s_begin:s_end]
+        seq_y = self.data_y[r_begin:r_end]
+        seq_x_mark = self.data_stamp[s_begin:s_end]
+        seq_y_mark = self.data_stamp[r_begin:r_end]
+
+        return seq_x, seq_y, seq_x_mark, seq_y_mark
+
+    def __len__(self):
+        return len(self.data_x) - self.seq_len - self.pred_len + 1
+
+    def inverse_transform(self, data):
+        return self.scaler.inverse_transform(data)
+
+
+class Dataset_Custom(Dataset):
+    def __init__(self, root_path, flag='train', size=None,
+                 features='S', data_path='ETTh1.csv',
+                 target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
+        # size [seq_len, label_len, pred_len]
+        # info
+        if size == None:
+            self.seq_len = 24 * 4 * 4
+            self.label_len = 24 * 4
+            self.pred_len = 24 * 4
+        else:
+            self.seq_len = size[0]
+            self.label_len = size[1]
+            self.pred_len = size[2]
+        # init
+        assert flag in ['train', 'test', 'val']
+        type_map = {'train': 0, 'val': 1, 'test': 2}
+        self.set_type = type_map[flag]
+
+        self.features = features
+        self.target = target
+        self.scale = scale
+        self.timeenc = timeenc
+        self.freq = freq
+
+        self.root_path = root_path
+        self.data_path = data_path
+        self.__read_data__()
+
+    def __read_data__(self):
+        self.scaler = StandardScaler()
+        df_raw = pd.read_csv(os.path.join(self.root_path,
+                                          self.data_path))
+
+        '''
+        df_raw.columns: ['date', ...(other features), target feature]
+        '''
+        cols = list(df_raw.columns)
+        cols.remove(self.target)
+        cols.remove('date')
+        df_raw = df_raw[['date'] + cols + [self.target]]
+        num_train = int(len(df_raw) * 0.7)
+        num_test = int(len(df_raw) * 0.2)
+        num_vali = len(df_raw) - num_train - num_test
+        border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
+        border2s = [num_train, num_train + num_vali, len(df_raw)]
+        border1 = border1s[self.set_type]
+        border2 = border2s[self.set_type]
+
+        if self.features == 'M' or self.features == 'MS':
+            cols_data = df_raw.columns[1:]
+            df_data = df_raw[cols_data]
+        elif self.features == 'S':
+            df_data = df_raw[[self.target]]
+
+        if self.scale:
+            train_data = df_data[border1s[0]:border2s[0]]
+            self.scaler.fit(train_data.values)
+            data = self.scaler.transform(df_data.values)
+        else:
+            data = df_data.values
+
+        df_stamp = df_raw[['date']][border1:border2]
+        df_stamp['date'] = pd.to_datetime(df_stamp.date)
+        if self.timeenc == 0:
+            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
+            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
+            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
+            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
+            data_stamp = df_stamp.drop(['date'], 1).values
+        elif self.timeenc == 1:
+            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
+            data_stamp = data_stamp.transpose(1, 0)
+
+        self.data_x = data[border1:border2]
+        self.data_y = data[border1:border2]
+        self.data_stamp = data_stamp
+
+    def __getitem__(self, index):
+        s_begin = index
+        s_end = s_begin + self.seq_len
+        r_begin = s_end - self.label_len
+        r_end = r_begin + self.label_len + self.pred_len
+
+        seq_x = self.data_x[s_begin:s_end]
+        seq_y = self.data_y[r_begin:r_end]
+        seq_x_mark = self.data_stamp[s_begin:s_end]
+        seq_y_mark = self.data_stamp[r_begin:r_end]
+
+        return seq_x, seq_y, seq_x_mark, seq_y_mark
+
+    def __len__(self):
+        return len(self.data_x) - self.seq_len - self.pred_len + 1
+
+    def inverse_transform(self, data):
+        return self.scaler.inverse_transform(data)
+
+
+class Dataset_PEMS(Dataset):
+    def __init__(self, root_path, flag='train', size=None,
+                 features='S', data_path='ETTh1.csv',
+                 target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
+        # size [seq_len, label_len, pred_len]
+        # info
+        self.seq_len = size[0]
+        self.label_len = size[1]
+        self.pred_len = size[2]
+        # init
+        assert flag in ['train', 'test', 'val']
+        type_map = {'train': 0, 'val': 1, 'test': 2}
+        self.set_type = type_map[flag]
+
+        self.features = features
+        self.target = target
+        self.scale = scale
+        self.timeenc = timeenc
+        self.freq = freq
+
+        self.root_path = root_path
+        self.data_path = data_path
+        self.__read_data__()
+
+    def __read_data__(self):
+        self.scaler = StandardScaler()
+        data_file = os.path.join(self.root_path, self.data_path)
+        data = np.load(data_file, allow_pickle=True)
+        data = data['data'][:, :, 0]
+
+        train_ratio = 0.6
+        valid_ratio = 0.2
+        train_data = data[:int(train_ratio * len(data))]
+        valid_data = data[int(train_ratio * len(data)): int((train_ratio + valid_ratio) * len(data))]
+        test_data = data[int((train_ratio + valid_ratio) * len(data)):]
+        total_data = [train_data, valid_data, test_data]
+        data = total_data[self.set_type]
+
+        if self.scale:
+            self.scaler.fit(train_data)
+            data = self.scaler.transform(data)
+
+        df = pd.DataFrame(data)
+        df = df.fillna(method='ffill', limit=len(df)).fillna(method='bfill', limit=len(df)).values
+
+        self.data_x = df
+        self.data_y = df
+
+    def __getitem__(self, index):
+        s_begin = index
+        s_end = s_begin + self.seq_len
+        r_begin = s_end - self.label_len
+        r_end = r_begin + self.label_len + self.pred_len
+
+        seq_x = self.data_x[s_begin:s_end]
+        seq_y = self.data_y[r_begin:r_end]
+        seq_x_mark = torch.zeros((seq_x.shape[0], 1))
+        seq_y_mark = torch.zeros((seq_x.shape[0], 1))
+
+        return seq_x, seq_y, seq_x_mark, seq_y_mark
+
+    def __len__(self):
+        return len(self.data_x) - self.seq_len - self.pred_len + 1
+
+    def inverse_transform(self, data):
+        return self.scaler.inverse_transform(data)
+
+
+class Dataset_Solar(Dataset):
+    def __init__(self, root_path, flag='train', size=None,
+                 features='S', data_path='ETTh1.csv',
+                 target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
+        # size [seq_len, label_len, pred_len]
+        # info
+        self.seq_len = size[0]
+        self.label_len = size[1]
+        self.pred_len = size[2]
+        # init
+        assert flag in ['train', 'test', 'val']
+        type_map = {'train': 0, 'val': 1, 'test': 2}
+        self.set_type = type_map[flag]
+
+        self.features = features
+        self.target = target
+        self.scale = scale
+        self.timeenc = timeenc
+        self.freq = freq
+
+        self.root_path = root_path
+        self.data_path = data_path
+        self.__read_data__()
+
+    def __read_data__(self):
+        self.scaler = StandardScaler()
+        df_raw = []
+        with open(os.path.join(self.root_path, self.data_path), "r", encoding='utf-8') as f:
+            for line in f.readlines():
+                line = line.strip('\n').split(',')
+                data_line = np.stack([float(i) for i in line])
+                df_raw.append(data_line)
+        df_raw = np.stack(df_raw, 0)
+        df_raw = pd.DataFrame(df_raw)
+
+        num_train = int(len(df_raw) * 0.7)
+        num_test = int(len(df_raw) * 0.2)
+        num_valid = int(len(df_raw) * 0.1)
+        border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
+        border2s = [num_train, num_train + num_valid, len(df_raw)]
+        border1 = border1s[self.set_type]
+        border2 = border2s[self.set_type]
+
+        df_data = df_raw.values
+
+        if self.scale:
+            train_data = df_data[border1s[0]:border2s[0]]
+            self.scaler.fit(train_data)
+            data = self.scaler.transform(df_data)
+        else:
+            data = df_data
+
+        self.data_x = data[border1:border2]
+        self.data_y = data[border1:border2]
+
+    def __getitem__(self, index):
+        s_begin = index
+        s_end = s_begin + self.seq_len
+        r_begin = s_end - self.label_len
+        r_end = r_begin + self.label_len + self.pred_len
+
+        seq_x = self.data_x[s_begin:s_end]
+        seq_y = self.data_y[r_begin:r_end]
+        seq_x_mark = torch.zeros((seq_x.shape[0], 1))
+        seq_y_mark = torch.zeros((seq_x.shape[0], 1))
+
+        return seq_x, seq_y, seq_x_mark, seq_y_mark
+
+    def __len__(self):
+        return len(self.data_x) - self.seq_len - self.pred_len + 1
+
+    def inverse_transform(self, data):
+        return self.scaler.inverse_transform(data)
+
+class Dataset_M4(Dataset):
+    def __init__(self, root_path, flag='pred', size=None,
+                 features='S', data_path='ETTh1.csv',
+                 target='OT', scale=False, inverse=False, timeenc=0, freq='15min',
+                 seasonal_patterns='Yearly'):
+        # size [seq_len, label_len, pred_len]
+        # init
+        self.features = features
+        self.target = target
+        self.scale = scale
+        self.inverse = inverse
+        self.timeenc = timeenc
+        self.root_path = root_path
+
+        self.seq_len = size[0]
+        self.label_len = size[1]
+        self.pred_len = size[2]
+
+        self.seasonal_patterns = seasonal_patterns
+        self.history_size = M4Meta.history_size[seasonal_patterns]
+        self.window_sampling_limit = int(self.history_size * self.pred_len)
+        self.flag = flag
+
+        self.__read_data__()
+
+    def __read_data__(self):
+        # M4Dataset.initialize()
+        if self.flag == 'train':
+            dataset = M4Dataset.load(training=True, dataset_file=self.root_path)
+        else:
+            dataset = M4Dataset.load(training=False, dataset_file=self.root_path)
+        training_values = np.array(
+            [v[~np.isnan(v)] for v in
+             dataset.values[dataset.groups == self.seasonal_patterns]])  # split different frequencies
+        self.ids = np.array([i for i in dataset.ids[dataset.groups == self.seasonal_patterns]])
+        self.timeseries = [ts for ts in training_values]
+
+    def __getitem__(self, index):
+        insample = np.zeros((self.seq_len, 1))
+        insample_mask = np.zeros((self.seq_len, 1))
+        outsample = np.zeros((self.pred_len + self.label_len, 1))
+        outsample_mask = np.zeros((self.pred_len + self.label_len, 1))  # m4 dataset
+
+        sampled_timeseries = self.timeseries[index]
+        cut_point = np.random.randint(low=max(1, len(sampled_timeseries) - self.window_sampling_limit),
+                                      high=len(sampled_timeseries),
+                                      size=1)[0]
+
+        insample_window = sampled_timeseries[max(0, cut_point - self.seq_len):cut_point]
+        insample[-len(insample_window):, 0] = insample_window
+        insample_mask[-len(insample_window):, 0] = 1.0
+        outsample_window = sampled_timeseries[
+                           cut_point - self.label_len:min(len(sampled_timeseries), cut_point + self.pred_len)]
+        outsample[:len(outsample_window), 0] = outsample_window
+        outsample_mask[:len(outsample_window), 0] = 1.0
+        return insample, outsample, insample_mask, outsample_mask
+
+    def __len__(self):
+        return len(self.timeseries)
+
+    def inverse_transform(self, data):
+        return self.scaler.inverse_transform(data)
+
+    def last_insample_window(self):
+        """
+        The last window of insample size of all timeseries.
+        This function does not support batching and does not reshuffle timeseries.
+
+        :return: Last insample window of all timeseries. Shape "timeseries, insample size"
+        """
+        insample = np.zeros((len(self.timeseries), self.seq_len))
+        insample_mask = np.zeros((len(self.timeseries), self.seq_len))
+        for i, ts in enumerate(self.timeseries):
+            ts_last_window = ts[-self.seq_len:]
+            insample[i, -len(ts):] = ts_last_window
+            insample_mask[i, -len(ts):] = 1.0
+        return insample, insample_mask
+
+
+class PSMSegLoader(Dataset):
+    def __init__(self, root_path, win_size, step=1, flag="train"):
+        self.flag = flag
+        self.step = step
+        self.win_size = win_size
+        self.scaler = StandardScaler()
+        data = pd.read_csv(os.path.join(root_path, 'train.csv'))
+        data = data.values[:, 1:]
+        data = np.nan_to_num(data)
+        self.scaler.fit(data)
+        data = self.scaler.transform(data)
+        test_data = pd.read_csv(os.path.join(root_path, 'test.csv'))
+        test_data = test_data.values[:, 1:]
+        test_data = np.nan_to_num(test_data)
+        self.test = self.scaler.transform(test_data)
+        self.train = data
+        self.val = self.test
+        self.test_labels = pd.read_csv(os.path.join(root_path, 'test_label.csv')).values[:, 1:]
+        print("test:", self.test.shape)
+        print("train:", self.train.shape)
+
+    def __len__(self):
+        if self.flag == "train":
+            return (self.train.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'val'):
+            return (self.val.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'test'):
+            return (self.test.shape[0] - self.win_size) // self.step + 1
+        else:
+            return (self.test.shape[0] - self.win_size) // self.win_size + 1
+
+    def __getitem__(self, index):
+        index = index * self.step
+        if self.flag == "train":
+            return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'val'):
+            return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'test'):
+            return np.float32(self.test[index:index + self.win_size]), np.float32(
+                self.test_labels[index:index + self.win_size])
+        else:
+            return np.float32(self.test[
+                              index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
+                self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
+
+
+class MSLSegLoader(Dataset):
+    def __init__(self, root_path, win_size, step=1, flag="train"):
+        self.flag = flag
+        self.step = step
+        self.win_size = win_size
+        self.scaler = StandardScaler()
+        data = np.load(os.path.join(root_path, "MSL_train.npy"))
+        self.scaler.fit(data)
+        data = self.scaler.transform(data)
+        test_data = np.load(os.path.join(root_path, "MSL_test.npy"))
+        self.test = self.scaler.transform(test_data)
+        self.train = data
+        self.val = self.test
+        self.test_labels = np.load(os.path.join(root_path, "MSL_test_label.npy"))
+        print("test:", self.test.shape)
+        print("train:", self.train.shape)
+
+    def __len__(self):
+        if self.flag == "train":
+            return (self.train.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'val'):
+            return (self.val.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'test'):
+            return (self.test.shape[0] - self.win_size) // self.step + 1
+        else:
+            return (self.test.shape[0] - self.win_size) // self.win_size + 1
+
+    def __getitem__(self, index):
+        index = index * self.step
+        if self.flag == "train":
+            return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'val'):
+            return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'test'):
+            return np.float32(self.test[index:index + self.win_size]), np.float32(
+                self.test_labels[index:index + self.win_size])
+        else:
+            return np.float32(self.test[
+                              index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
+                self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
+
+
+class SMAPSegLoader(Dataset):
+    def __init__(self, root_path, win_size, step=1, flag="train"):
+        self.flag = flag
+        self.step = step
+        self.win_size = win_size
+        self.scaler = StandardScaler()
+        data = np.load(os.path.join(root_path, "SMAP_train.npy"))
+        self.scaler.fit(data)
+        data = self.scaler.transform(data)
+        test_data = np.load(os.path.join(root_path, "SMAP_test.npy"))
+        self.test = self.scaler.transform(test_data)
+        self.train = data
+        self.val = self.test
+        self.test_labels = np.load(os.path.join(root_path, "SMAP_test_label.npy"))
+        print("test:", self.test.shape)
+        print("train:", self.train.shape)
+
+    def __len__(self):
+
+        if self.flag == "train":
+            return (self.train.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'val'):
+            return (self.val.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'test'):
+            return (self.test.shape[0] - self.win_size) // self.step + 1
+        else:
+            return (self.test.shape[0] - self.win_size) // self.win_size + 1
+
+    def __getitem__(self, index):
+        index = index * self.step
+        if self.flag == "train":
+            return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'val'):
+            return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'test'):
+            return np.float32(self.test[index:index + self.win_size]), np.float32(
+                self.test_labels[index:index + self.win_size])
+        else:
+            return np.float32(self.test[
+                              index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
+                self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
+
+
+class SMDSegLoader(Dataset):
+    def __init__(self, root_path, win_size, step=100, flag="train"):
+        self.flag = flag
+        self.step = step
+        self.win_size = win_size
+        self.scaler = StandardScaler()
+        data = np.load(os.path.join(root_path, "SMD_train.npy"))
+        self.scaler.fit(data)
+        data = self.scaler.transform(data)
+        test_data = np.load(os.path.join(root_path, "SMD_test.npy"))
+        self.test = self.scaler.transform(test_data)
+        self.train = data
+        data_len = len(self.train)
+        self.val = self.train[(int)(data_len * 0.8):]
+        self.test_labels = np.load(os.path.join(root_path, "SMD_test_label.npy"))
+
+    def __len__(self):
+        if self.flag == "train":
+            return (self.train.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'val'):
+            return (self.val.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'test'):
+            return (self.test.shape[0] - self.win_size) // self.step + 1
+        else:
+            return (self.test.shape[0] - self.win_size) // self.win_size + 1
+
+    def __getitem__(self, index):
+        index = index * self.step
+        if self.flag == "train":
+            return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'val'):
+            return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'test'):
+            return np.float32(self.test[index:index + self.win_size]), np.float32(
+                self.test_labels[index:index + self.win_size])
+        else:
+            return np.float32(self.test[
+                              index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
+                self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
+
+
+class SWATSegLoader(Dataset):
+    def __init__(self, root_path, win_size, step=1, flag="train"):
+        self.flag = flag
+        self.step = step
+        self.win_size = win_size
+        self.scaler = StandardScaler()
+
+        train_data = pd.read_csv(os.path.join(root_path, 'swat_train2.csv'))
+        test_data = pd.read_csv(os.path.join(root_path, 'swat2.csv'))
+        labels = test_data.values[:, -1:]
+        train_data = train_data.values[:, :-1]
+        test_data = test_data.values[:, :-1]
+
+        self.scaler.fit(train_data)
+        train_data = self.scaler.transform(train_data)
+        test_data = self.scaler.transform(test_data)
+        self.train = train_data
+        self.test = test_data
+        self.val = test_data
+        self.test_labels = labels
+        print("test:", self.test.shape)
+        print("train:", self.train.shape)
+
+    def __len__(self):
+        """
+        Number of images in the object dataset.
+        """
+        if self.flag == "train":
+            return (self.train.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'val'):
+            return (self.val.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'test'):
+            return (self.test.shape[0] - self.win_size) // self.step + 1
+        else:
+            return (self.test.shape[0] - self.win_size) // self.win_size + 1
+
+    def __getitem__(self, index):
+        index = index * self.step
+        if self.flag == "train":
+            return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'val'):
+            return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'test'):
+            return np.float32(self.test[index:index + self.win_size]), np.float32(
+                self.test_labels[index:index + self.win_size])
+        else:
+            return np.float32(self.test[
+                              index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
+                self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
+
+
+class UEAloader(Dataset):
+    """
+    Dataset class for datasets included in:
+        Time Series Classification Archive (www.timeseriesclassification.com)
+    Argument:
+        limit_size: float in (0, 1) for debug
+    Attributes:
+        all_df: (num_samples * seq_len, num_columns) dataframe indexed by integer indices, with multiple rows corresponding to the same index (sample).
+            Each row is a time step; Each column contains either metadata (e.g. timestamp) or a feature.
+        feature_df: (num_samples * seq_len, feat_dim) dataframe; contains the subset of columns of `all_df` which correspond to selected features
+        feature_names: names of columns contained in `feature_df` (same as feature_df.columns)
+        all_IDs: (num_samples,) series of IDs contained in `all_df`/`feature_df` (same as all_df.index.unique() )
+        labels_df: (num_samples, num_labels) pd.DataFrame of label(s) for each sample
+        max_seq_len: maximum sequence (time series) length. If None, script argument `max_seq_len` will be used.
+            (Moreover, script argument overrides this attribute)
+    """
+
+    def __init__(self, root_path, file_list=None, limit_size=None, flag=None):
+        self.root_path = root_path
+        self.all_df, self.labels_df = self.load_all(root_path, file_list=file_list, flag=flag)
+        self.all_IDs = self.all_df.index.unique()  # all sample IDs (integer indices 0 ... num_samples-1)
+
+        if limit_size is not None:
+            if limit_size > 1:
+                limit_size = int(limit_size)
+            else:  # interpret as proportion if in (0, 1]
+                limit_size = int(limit_size * len(self.all_IDs))
+            self.all_IDs = self.all_IDs[:limit_size]
+            self.all_df = self.all_df.loc[self.all_IDs]
+
+        # use all features
+        self.feature_names = self.all_df.columns
+        self.feature_df = self.all_df
+
+        # pre_process
+        normalizer = Normalizer()
+        self.feature_df = normalizer.normalize(self.feature_df)
+        print(len(self.all_IDs))
+
+    def load_all(self, root_path, file_list=None, flag=None):
+        """
+        Loads datasets from csv files contained in `root_path` into a dataframe, optionally choosing from `pattern`
+        Args:
+            root_path: directory containing all individual .csv files
+            file_list: optionally, provide a list of file paths within `root_path` to consider.
+                Otherwise, entire `root_path` contents will be used.
+        Returns:
+            all_df: a single (possibly concatenated) dataframe with all data corresponding to specified files
+            labels_df: dataframe containing label(s) for each sample
+        """
+        # Select paths for training and evaluation
+        if file_list is None:
+            data_paths = glob.glob(os.path.join(root_path, '*'))  # list of all paths
+        else:
+            data_paths = [os.path.join(root_path, p) for p in file_list]
+        if len(data_paths) == 0:
+            raise Exception('No files found using: {}'.format(os.path.join(root_path, '*')))
+        if flag is not None:
+            data_paths = list(filter(lambda x: re.search(flag, x), data_paths))
+        input_paths = [p for p in data_paths if os.path.isfile(p) and p.endswith('.ts')]
+        if len(input_paths) == 0:
+            raise Exception("No .ts files found using pattern: '{}'".format(pattern))
+
+        all_df, labels_df = self.load_single(input_paths[0])  # a single file contains dataset
+
+        return all_df, labels_df
+
+    def load_single(self, filepath):
+        df, labels = load_from_tsfile_to_dataframe(filepath, return_separate_X_and_y=True,
+                                                             replace_missing_vals_with='NaN')
+        labels = pd.Series(labels, dtype="category")
+        self.class_names = labels.cat.categories
+        labels_df = pd.DataFrame(labels.cat.codes,
+                                 dtype=np.int8)  # int8-32 gives an error when using nn.CrossEntropyLoss
+
+        lengths = df.applymap(
+            lambda x: len(x)).values  # (num_samples, num_dimensions) array containing the length of each series
+
+        horiz_diffs = np.abs(lengths - np.expand_dims(lengths[:, 0], -1))
+
+        if np.sum(horiz_diffs) > 0:  # if any row (sample) has varying length across dimensions
+            df = df.applymap(subsample)
+
+        lengths = df.applymap(lambda x: len(x)).values
+        vert_diffs = np.abs(lengths - np.expand_dims(lengths[0, :], 0))
+        if np.sum(vert_diffs) > 0:  # if any column (dimension) has varying length across samples
+            self.max_seq_len = int(np.max(lengths[:, 0]))
+        else:
+            self.max_seq_len = lengths[0, 0]
+
+        # First create a (seq_len, feat_dim) dataframe for each sample, indexed by a single integer ("ID" of the sample)
+        # Then concatenate into a (num_samples * seq_len, feat_dim) dataframe, with multiple rows corresponding to the
+        # sample index (i.e. the same scheme as all datasets in this project)
+
+        df = pd.concat((pd.DataFrame({col: df.loc[row, col] for col in df.columns}).reset_index(drop=True).set_index(
+            pd.Series(lengths[row, 0] * [row])) for row in range(df.shape[0])), axis=0)
+
+        # Replace NaN values
+        grp = df.groupby(by=df.index)
+        df = grp.transform(interpolate_missing)
+
+        return df, labels_df
+
+    def instance_norm(self, case):
+        if self.root_path.count('EthanolConcentration') > 0:  # special process for numerical stability
+            mean = case.mean(0, keepdim=True)
+            case = case - mean
+            stdev = torch.sqrt(torch.var(case, dim=1, keepdim=True, unbiased=False) + 1e-5)
+            case /= stdev
+            return case
+        else:
+            return case
+
+    def __getitem__(self, ind):
+        return self.instance_norm(torch.from_numpy(self.feature_df.loc[self.all_IDs[ind]].values)), \
+               torch.from_numpy(self.labels_df.loc[self.all_IDs[ind]].values)
+
+    def __len__(self):
+        return len(self.all_IDs)
diff --git a/data_provider/m4.py b/data_provider/m4.py
new file mode 100644
index 000000000..8a2e3f1a0
--- /dev/null
+++ b/data_provider/m4.py
@@ -0,0 +1,138 @@
+# This source code is provided for the purposes of scientific reproducibility
+# under the following limited license from Element AI Inc. The code is an
+# implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
+# expansion analysis for interpretable time series forecasting,
+# https://arxiv.org/abs/1905.10437). The copyright to the source code is
+# licensed under the Creative Commons - Attribution-NonCommercial 4.0
+# International license (CC BY-NC 4.0):
+# https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
+# for the benefit of third parties or internally in production) requires an
+# explicit license. The subject-matter of the N-BEATS model and associated
+# materials are the property of Element AI Inc. and may be subject to patent
+# protection. No license to patents is granted hereunder (whether express or
+# implied). Copyright © 2020 Element AI Inc. All rights reserved.
+
+"""
+M4 Dataset
+"""
+import logging
+import os
+from collections import OrderedDict
+from dataclasses import dataclass
+from glob import glob
+
+import numpy as np
+import pandas as pd
+import patoolib
+from tqdm import tqdm
+import logging
+import os
+import pathlib
+import sys
+from urllib import request
+
+
+def url_file_name(url: str) -> str:
+    """
+    Extract file name from url.
+
+    :param url: URL to extract file name from.
+    :return: File name.
+    """
+    return url.split('/')[-1] if len(url) > 0 else ''
+
+
+def download(url: str, file_path: str) -> None:
+    """
+    Download a file to the given path.
+
+    :param url: URL to download
+    :param file_path: Where to download the content.
+    """
+
+    def progress(count, block_size, total_size):
+        progress_pct = float(count * block_size) / float(total_size) * 100.0
+        sys.stdout.write('\rDownloading {} to {} {:.1f}%'.format(url, file_path, progress_pct))
+        sys.stdout.flush()
+
+    if not os.path.isfile(file_path):
+        opener = request.build_opener()
+        opener.addheaders = [('User-agent', 'Mozilla/5.0')]
+        request.install_opener(opener)
+        pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True)
+        f, _ = request.urlretrieve(url, file_path, progress)
+        sys.stdout.write('\n')
+        sys.stdout.flush()
+        file_info = os.stat(f)
+        logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.')
+    else:
+        file_info = os.stat(file_path)
+        logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.')
+
+
+@dataclass()
+class M4Dataset:
+    ids: np.ndarray
+    groups: np.ndarray
+    frequencies: np.ndarray
+    horizons: np.ndarray
+    values: np.ndarray
+
+    @staticmethod
+    def load(training: bool = True, dataset_file: str = '../dataset/m4') -> 'M4Dataset':
+        """
+        Load cached dataset.
+
+        :param training: Load training part if_inverted training is True, test part otherwise.
+        """
+        info_file = os.path.join(dataset_file, 'M4-info.csv')
+        train_cache_file = os.path.join(dataset_file, 'training.npz')
+        test_cache_file = os.path.join(dataset_file, 'test.npz')
+        m4_info = pd.read_csv(info_file)
+        return M4Dataset(ids=m4_info.M4id.values,
+                         groups=m4_info.SP.values,
+                         frequencies=m4_info.Frequency.values,
+                         horizons=m4_info.Horizon.values,
+                         values=np.load(
+                             train_cache_file if training else test_cache_file,
+                             allow_pickle=True))
+
+
+@dataclass()
+class M4Meta:
+    seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly']
+    horizons = [6, 8, 18, 13, 14, 48]
+    frequencies = [1, 4, 12, 1, 1, 24]
+    horizons_map = {
+        'Yearly': 6,
+        'Quarterly': 8,
+        'Monthly': 18,
+        'Weekly': 13,
+        'Daily': 14,
+        'Hourly': 48
+    }  # different predict length
+    frequency_map = {
+        'Yearly': 1,
+        'Quarterly': 4,
+        'Monthly': 12,
+        'Weekly': 1,
+        'Daily': 1,
+        'Hourly': 24
+    }
+    history_size = {
+        'Yearly': 1.5,
+        'Quarterly': 1.5,
+        'Monthly': 1.5,
+        'Weekly': 10,
+        'Daily': 10,
+        'Hourly': 10
+    }  # from interpretable.gin
+
+
+def load_m4_info() -> pd.DataFrame:
+    """
+    Load M4Info file.
+
+    :return: Pandas DataFrame of M4Info.
+    """
+    return pd.read_csv(INFO_FILE_PATH)
diff --git a/data_provider/uea.py b/data_provider/uea.py
new file mode 100644
index 000000000..f0dd0ab9e
--- /dev/null
+++ b/data_provider/uea.py
@@ -0,0 +1,125 @@
+import os
+import numpy as np
+import pandas as pd
+import torch
+
+
+def collate_fn(data, max_len=None):
+    """Build mini-batch tensors from a list of (X, mask) tuples. Mask input. Create
+    Args:
+        data: len(batch_size) list of tuples (X, y).
+            - X: torch tensor of shape (seq_length, feat_dim); variable seq_length.
+            - y: torch tensor of shape (num_labels,) : class indices or numerical targets
+                (for classification or regression, respectively). num_labels > 1 for multi-task models
+        max_len: global fixed sequence length. Used for architectures requiring fixed length input,
+            where the batch length cannot vary dynamically. Longer sequences are clipped, shorter are padded with 0s
+    Returns:
+        X: (batch_size, padded_length, feat_dim) torch tensor of masked features (input)
+        targets: (batch_size, padded_length, feat_dim) torch tensor of unmasked features (output)
+        target_masks: (batch_size, padded_length, feat_dim) boolean torch tensor
+            0 indicates masked values to be predicted, 1 indicates unaffected/"active" feature values
+        padding_masks: (batch_size, padded_length) boolean tensor, 1 means keep vector at this position, 0 means padding
+    """
+
+    batch_size = len(data)
+    features, labels = zip(*data)
+
+    # Stack and pad features and masks (convert 2D to 3D tensors, i.e. add batch dimension)
+    lengths = [X.shape[0] for X in features]  # original sequence length for each time series
+    if max_len is None:
+        max_len = max(lengths)
+
+    X = torch.zeros(batch_size, max_len, features[0].shape[-1])  # (batch_size, padded_length, feat_dim)
+    for i in range(batch_size):
+        end = min(lengths[i], max_len)
+        X[i, :end, :] = features[i][:end, :]
+
+    targets = torch.stack(labels, dim=0)  # (batch_size, num_labels)
+
+    padding_masks = padding_mask(torch.tensor(lengths, dtype=torch.int16),
+                                 max_len=max_len)  # (batch_size, padded_length) boolean tensor, "1" means keep
+
+    return X, targets, padding_masks
+
+
+def padding_mask(lengths, max_len=None):
+    """
+    Used to mask padded positions: creates a (batch_size, max_len) boolean mask from a tensor of sequence lengths,
+    where 1 means keep element at this position (time step)
+    """
+    batch_size = lengths.numel()
+    max_len = max_len or lengths.max_val()  # trick works because of overloading of 'or' operator for non-boolean types
+    return (torch.arange(0, max_len, device=lengths.device)
+            .type_as(lengths)
+            .repeat(batch_size, 1)
+            .lt(lengths.unsqueeze(1)))
+
+
+class Normalizer(object):
+    """
+    Normalizes dataframe across ALL contained rows (time steps). Different from per-sample normalization.
+    """
+
+    def __init__(self, norm_type='standardization', mean=None, std=None, min_val=None, max_val=None):
+        """
+        Args:
+            norm_type: choose from:
+                "standardization", "minmax": normalizes dataframe across ALL contained rows (time steps)
+                "per_sample_std", "per_sample_minmax": normalizes each sample separately (i.e. across only its own rows)
+            mean, std, min_val, max_val: optional (num_feat,) Series of pre-computed values
+        """
+
+        self.norm_type = norm_type
+        self.mean = mean
+        self.std = std
+        self.min_val = min_val
+        self.max_val = max_val
+
+    def normalize(self, df):
+        """
+        Args:
+            df: input dataframe
+        Returns:
+            df: normalized dataframe
+        """
+        if self.norm_type == "standardization":
+            if self.mean is None:
+                self.mean = df.mean()
+                self.std = df.std()
+            return (df - self.mean) / (self.std + np.finfo(float).eps)
+
+        elif self.norm_type == "minmax":
+            if self.max_val is None:
+                self.max_val = df.max()
+                self.min_val = df.min()
+            return (df - self.min_val) / (self.max_val - self.min_val + np.finfo(float).eps)
+
+        elif self.norm_type == "per_sample_std":
+            grouped = df.groupby(by=df.index)
+            return (df - grouped.transform('mean')) / grouped.transform('std')
+
+        elif self.norm_type == "per_sample_minmax":
+            grouped = df.groupby(by=df.index)
+            min_vals = grouped.transform('min')
+            return (df - min_vals) / (grouped.transform('max') - min_vals + np.finfo(float).eps)
+
+        else:
+            raise (NameError(f'Normalize method "{self.norm_type}" not implemented'))
+
+
+def interpolate_missing(y):
+    """
+    Replaces NaN values in pd.Series `y` using linear interpolation
+    """
+    if y.isna().any():
+        y = y.interpolate(method='linear', limit_direction='both')
+    return y
+
+
+def subsample(y, limit=256, factor=2):
+    """
+    If a given Series is longer than `limit`, returns subsampled sequence by the specified integer factor
+    """
+    if len(y) > limit:
+        return y[::factor].reset_index(drop=True)
+    return y
diff --git a/experiments/exp_basic.py b/experiments/exp_basic.py
index e69de29bb..935d65ef6 100644
--- a/experiments/exp_basic.py
+++ b/experiments/exp_basic.py
@@ -0,0 +1,50 @@
+import os
+import torch
+from model import Transformer, Informer, Reformer, Flowformer, Flashformer, \
+    iTransformer, iInformer, iReformer, iFlowformer, iFlashformer
+
+
+class Exp_Basic(object):
+    def __init__(self, args):
+        self.args = args
+        self.model_dict = {
+            'Transformer': Transformer,
+            'Informer': Informer,
+            'Reformer': Reformer,
+            'Flowformer': Flowformer,
+            'Flashformer': Flashformer,
+            'iTransformer': iTransformer,
+            'iInformer': iInformer,
+            'iReformer': iReformer,
+            'iFlowformer': iFlowformer,
+            'iFlashformer': iFlashformer,
+        }
+        self.device = self._acquire_device()
+        self.model = self._build_model().to(self.device)
+
+    def _build_model(self):
+        raise NotImplementedError
+        return None
+
+    def _acquire_device(self):
+        if self.args.use_gpu:
+            os.environ["CUDA_VISIBLE_DEVICES"] = str(
+                self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
+            device = torch.device('cuda:{}'.format(self.args.gpu))
+            print('Use GPU: cuda:{}'.format(self.args.gpu))
+        else:
+            device = torch.device('cpu')
+            print('Use CPU')
+        return device
+
+    def _get_data(self):
+        pass
+
+    def vali(self):
+        pass
+
+    def train(self):
+        pass
+
+    def test(self):
+        pass
diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py
new file mode 100644
index 000000000..667e1f37b
--- /dev/null
+++ b/experiments/exp_long_term_forecasting.py
@@ -0,0 +1,275 @@
+from data_provider.data_factory import data_provider
+from experiments.exp_basic import Exp_Basic
+from utils.tools import EarlyStopping, adjust_learning_rate, visual
+from utils.metrics import metric
+import torch
+import torch.nn as nn
+from torch import optim
+import os
+import time
+import warnings
+import numpy as np
+
+warnings.filterwarnings('ignore')
+
+
+class Exp_Long_Term_Forecast(Exp_Basic):
+    def __init__(self, args):
+        super(Exp_Long_Term_Forecast, self).__init__(args)
+
+    def _build_model(self):
+        model = self.model_dict[self.args.model].Model(self.args).float()
+
+        if self.args.use_multi_gpu and self.args.use_gpu:
+            model = nn.DataParallel(model, device_ids=self.args.device_ids)
+        return model
+
+    def _get_data(self, flag):
+        data_set, data_loader = data_provider(self.args, flag)
+        return data_set, data_loader
+
+    def _select_optimizer(self):
+        model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
+        return model_optim
+
+    def _select_criterion(self):
+        criterion = nn.MSELoss()
+        return criterion
+
+    def vali(self, vali_data, vali_loader, criterion):
+        total_loss = []
+        self.model.eval()
+        with torch.no_grad():
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
+                batch_x = batch_x.float().to(self.device)
+                batch_y = batch_y.float()
+
+                if 'PEMS' in self.args.data or 'Solar' in self.args.data:
+                    batch_x_mark = None
+                    batch_y_mark = None
+                else:
+                    batch_x_mark = batch_x_mark.float().to(self.device)
+                    batch_y_mark = batch_y_mark.float().to(self.device)
+
+                # decoder input
+                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
+                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+                # encoder - decoder
+                if self.args.use_amp:
+                    with torch.cuda.amp.autocast():
+                        if self.args.output_attention:
+                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+                        else:
+                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+                else:
+                    if self.args.output_attention:
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+                    else:
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+                f_dim = -1 if self.args.features == 'MS' else 0
+                outputs = outputs[:, -self.args.pred_len:, f_dim:]
+                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
+
+                pred = outputs.detach().cpu()
+                true = batch_y.detach().cpu()
+
+                loss = criterion(pred, true)
+
+                total_loss.append(loss)
+        total_loss = np.average(total_loss)
+        self.model.train()
+        return total_loss
+
+    def train(self, setting):
+        train_data, train_loader = self._get_data(flag='train')
+        vali_data, vali_loader = self._get_data(flag='val')
+        test_data, test_loader = self._get_data(flag='test')
+
+        path = os.path.join(self.args.checkpoints, setting)
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+        time_now = time.time()
+
+        train_steps = len(train_loader)
+        early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
+
+        model_optim = self._select_optimizer()
+        criterion = self._select_criterion()
+
+        if self.args.use_amp:
+            scaler = torch.cuda.amp.GradScaler()
+
+        for epoch in range(self.args.train_epochs):
+            iter_count = 0
+            train_loss = []
+
+            self.model.train()
+            epoch_time = time.time()
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
+                iter_count += 1
+                model_optim.zero_grad()
+                batch_x = batch_x.float().to(self.device)
+
+                batch_y = batch_y.float().to(self.device)
+                if 'PEMS' in self.args.data or 'Solar' in self.args.data:
+                    batch_x_mark = None
+                    batch_y_mark = None
+                else:
+                    batch_x_mark = batch_x_mark.float().to(self.device)
+                    batch_y_mark = batch_y_mark.float().to(self.device)
+
+                # decoder input
+                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
+                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+
+                # encoder - decoder
+                if self.args.use_amp:
+                    with torch.cuda.amp.autocast():
+                        if self.args.output_attention:
+                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+                        else:
+                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+
+                        f_dim = -1 if self.args.features == 'MS' else 0
+                        outputs = outputs[:, -self.args.pred_len:, f_dim:]
+                        batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
+                        loss = criterion(outputs, batch_y)
+                        train_loss.append(loss.item())
+                else:
+                    if self.args.output_attention:
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+                    else:
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+
+                    f_dim = -1 if self.args.features == 'MS' else 0
+                    outputs = outputs[:, -self.args.pred_len:, f_dim:]
+                    batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
+                    loss = criterion(outputs, batch_y)
+                    train_loss.append(loss.item())
+
+                if (i + 1) % 100 == 0:
+                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
+                    speed = (time.time() - time_now) / iter_count
+                    left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
+                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
+                    iter_count = 0
+                    time_now = time.time()
+
+                if self.args.use_amp:
+                    scaler.scale(loss).backward()
+                    scaler.step(model_optim)
+                    scaler.update()
+                else:
+                    loss.backward()
+                    model_optim.step()
+
+            print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
+            train_loss = np.average(train_loss)
+            vali_loss = self.vali(vali_data, vali_loader, criterion)
+            test_loss = self.vali(test_data, test_loader, criterion)
+
+            print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
+                epoch + 1, train_steps, train_loss, vali_loss, test_loss))
+            early_stopping(vali_loss, self.model, path)
+            if early_stopping.early_stop:
+                print("Early stopping")
+                break
+
+            adjust_learning_rate(model_optim, epoch + 1, self.args)
+
+            # get_cka(self.args, setting, self.model, train_loader, self.device, epoch)
+
+        best_model_path = path + '/' + 'checkpoint.pth'
+        self.model.load_state_dict(torch.load(best_model_path))
+
+        return self.model
+
+    def test(self, setting, test=0):
+        test_data, test_loader = self._get_data(flag='test')
+        if test:
+            print('loading model')
+            self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
+
+        preds = []
+        trues = []
+        folder_path = './test_results/' + setting + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        self.model.eval()
+        with torch.no_grad():
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
+                batch_x = batch_x.float().to(self.device)
+                batch_y = batch_y.float().to(self.device)
+
+                if 'PEMS' in self.args.data or 'Solar' in self.args.data:
+                    batch_x_mark = None
+                    batch_y_mark = None
+                else:
+                    batch_x_mark = batch_x_mark.float().to(self.device)
+                    batch_y_mark = batch_y_mark.float().to(self.device)
+
+                # decoder input
+                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
+                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+                # encoder - decoder
+                if self.args.use_amp:
+                    with torch.cuda.amp.autocast():
+                        if self.args.output_attention:
+                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+                        else:
+                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+                else:
+                    if self.args.output_attention:
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+
+                    else:
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+
+                f_dim = -1 if self.args.features == 'MS' else 0
+                outputs = outputs[:, -self.args.pred_len:, f_dim:]
+                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
+                outputs = outputs.detach().cpu().numpy()
+                batch_y = batch_y.detach().cpu().numpy()
+                if test_data.scale and self.args.inverse:
+                    outputs = test_data.inverse_transform(outputs)
+                    batch_y = test_data.inverse_transform(batch_y)
+
+                pred = outputs
+                true = batch_y
+
+                preds.append(pred)
+                trues.append(true)
+                if i % 20 == 0:
+                    input = batch_x.detach().cpu().numpy()
+                    gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
+                    pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
+                    visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
+
+        preds = np.array(preds)
+        trues = np.array(trues)
+        print('test shape:', preds.shape, trues.shape)
+        preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
+        trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
+        print('test shape:', preds.shape, trues.shape)
+
+        # result save
+        folder_path = './results/' + setting + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        mae, mse, rmse, mape, mspe = metric(preds, trues)
+        print('mse:{}, mae:{}'.format(mse, mae))
+        f = open("result_long_term_forecast.txt", 'a')
+        f.write(setting + "  \n")
+        f.write('mse:{}, mae:{}'.format(mse, mae))
+        f.write('\n')
+        f.write('\n')
+        f.close()
+
+        np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
+        np.save(folder_path + 'pred.npy', preds)
+        np.save(folder_path + 'true.npy', trues)
+
+        return
diff --git a/experiments/exp_long_term_forecasting_partial.py b/experiments/exp_long_term_forecasting_partial.py
new file mode 100644
index 000000000..1080c2960
--- /dev/null
+++ b/experiments/exp_long_term_forecasting_partial.py
@@ -0,0 +1,320 @@
+from data_provider.data_factory import data_provider
+from experiments.exp_basic import Exp_Basic
+from utils.tools import EarlyStopping, adjust_learning_rate, visual
+from utils.metrics import metric
+import torch
+import torch.nn as nn
+from torch import optim
+import os
+import time
+import warnings
+import pdb
+import numpy as np
+import random
+
+warnings.filterwarnings('ignore')
+
+
+class Exp_Long_Term_Forecast_Partial(Exp_Basic):
+    def __init__(self, args):
+        super(Exp_Long_Term_Forecast_Partial, self).__init__(args)
+
+    def _build_model(self):
+        model = self.model_dict[self.args.model].Model(self.args).float()
+
+        if self.args.use_multi_gpu and self.args.use_gpu:
+            model = nn.DataParallel(model, device_ids=self.args.device_ids)
+        return model
+
+    def _get_data(self, flag):
+        data_set, data_loader = data_provider(self.args, flag)
+        return data_set, data_loader
+
+    def _select_optimizer(self):
+        model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
+        return model_optim
+
+    def _select_criterion(self):
+        criterion = nn.MSELoss()
+        return criterion
+
+    def vali(self, vali_data, vali_loader, criterion, partial_train=False):
+        total_loss = []
+        self.model.eval()
+        with torch.no_grad():
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
+                batch_x = batch_x.float().to(self.device)
+                batch_y = batch_y.float()
+
+                if 'PEMS' in self.args.data or 'Solar' in self.args.data:
+                    batch_x_mark = None
+                    batch_y_mark = None
+                else:
+                    batch_x_mark = batch_x_mark.float().to(self.device)
+                    batch_y_mark = batch_y_mark.float().to(self.device)
+
+                if partial_train: # 使用部分变量训练
+                    batch_x = batch_x[:,:,-self.args.enc_in:]
+                    batch_y = batch_y[:,:,-self.args.enc_in:]
+
+                # decoder input
+                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
+                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+                # encoder - decoder
+                if self.args.use_amp:
+                    with torch.cuda.amp.autocast():
+                        if self.args.output_attention:
+                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+                        else:
+                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+                else:
+                    if self.args.output_attention:
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+                    elif self.args.channel_independent:
+                        B, Tx, N = batch_x.shape
+                        _, Ty, _ =dec_inp.shape
+                        if batch_x_mark==None:
+                            outputs = self.model(batch_x.permute(0,2,1).reshape(B*N,Tx,1), batch_x_mark,\
+                                                 dec_inp.permute(0,2,1).reshape(B*N,Ty,1), batch_y_mark).reshape(B,N,-1).permute(0,2,1)
+                        else:
+                            outputs = self.model(batch_x.permute(0,2,1).reshape(B*N,Tx,1), batch_x_mark.repeat(N,1,1),\
+                                                 dec_inp.permute(0,2,1).reshape(B*N,Ty,1), batch_y_mark.repeat(N,1,1))\
+                                                .reshape(B,N,-1).permute(0,2,1)
+                    else:
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+                f_dim = -1 if self.args.features == 'MS' else 0
+                outputs = outputs[:, -self.args.pred_len:, f_dim:]
+                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
+
+                pred = outputs.detach().cpu()
+                true = batch_y.detach().cpu()
+
+                loss = criterion(pred, true)
+
+                total_loss.append(loss)
+        total_loss = np.average(total_loss)
+        self.model.train()
+        return total_loss
+
+    def train(self, setting):
+        train_data, train_loader = self._get_data(flag='train')
+        vali_data, vali_loader = self._get_data(flag='val')
+        test_data, test_loader = self._get_data(flag='test')
+
+        path = os.path.join(self.args.checkpoints, setting)
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+        time_now = time.time()
+
+        train_steps = len(train_loader)
+        early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
+
+        model_optim = self._select_optimizer()
+        criterion = self._select_criterion()
+
+        if self.args.use_amp:
+            scaler = torch.cuda.amp.GradScaler()
+
+        for epoch in range(self.args.train_epochs):
+            iter_count = 0
+            train_loss = []
+
+            self.model.train()
+            epoch_time = time.time()
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
+                iter_count += 1
+                model_optim.zero_grad()
+                batch_x = batch_x.float().to(self.device)
+
+                batch_y = batch_y.float().to(self.device)
+                if 'PEMS' in self.args.data or 'Solar' in self.args.data:
+                    batch_x_mark = None
+                    batch_y_mark = None
+                else:
+                    batch_x_mark = batch_x_mark.float().to(self.device)
+                    batch_y_mark = batch_y_mark.float().to(self.device)
+
+                batch_x = batch_x[:,:,-self.args.enc_in:]
+                batch_y = batch_y[:,:,-self.args.enc_in:]
+                if self.args.random_train: # 使用随机的部分变量训练
+                    _,_,N = batch_x.shape
+                    index = np.stack(random.sample(range(N),N))[-self.args.enc_in:]
+                    batch_x = batch_x[:,:,index]
+                    batch_y = batch_y[:,:,index]
+
+                # decoder input
+                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
+                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+
+                # encoder - decoder
+                if self.args.use_amp:
+                    with torch.cuda.amp.autocast():
+                        if self.args.output_attention:
+                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+                        else:
+                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+
+                        f_dim = -1 if self.args.features == 'MS' else 0
+                        outputs = outputs[:, -self.args.pred_len:, f_dim:]
+                        batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
+                        loss = criterion(outputs, batch_y)
+                        train_loss.append(loss.item())
+                else:
+                    if self.args.output_attention:
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+                    elif self.args.channel_independent:
+                        B, Tx, N = batch_x.shape
+                        _, Ty, _ =dec_inp.shape
+                        if batch_x_mark==None:
+                            outputs = self.model(batch_x.permute(0,2,1).reshape(B*N,Tx,1), batch_x_mark,\
+                                                 dec_inp.permute(0,2,1).reshape(B*N,Ty,1), batch_y_mark).reshape(B,N,-1).permute(0,2,1)
+                        else:
+                            a = batch_x.permute(0,2,1)
+                            b = batch_x.permute(0,2,1).reshape(B*N,Tx,1)
+                            outputs = self.model(batch_x.permute(0,2,1).reshape(B*N,Tx,1), batch_x_mark.repeat(N,1,1),\
+                                                 dec_inp.permute(0,2,1).reshape(B*N,Ty,1), batch_y_mark.repeat(N,1,1))\
+                                                .reshape(B,N,-1).permute(0,2,1)
+                    else:
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+
+                    f_dim = -1 if self.args.features == 'MS' else 0
+                    outputs = outputs[:, -self.args.pred_len:, f_dim:]
+                    batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
+                    loss = criterion(outputs, batch_y)
+                    train_loss.append(loss.item())
+
+                if (i + 1) % 100 == 0:
+                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
+                    speed = (time.time() - time_now) / iter_count
+                    left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
+                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
+                    iter_count = 0
+                    time_now = time.time()
+
+                if self.args.use_amp:
+                    scaler.scale(loss).backward()
+                    scaler.step(model_optim)
+                    scaler.update()
+                else:
+                    loss.backward()
+                    model_optim.step()
+
+            print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
+            train_loss = np.average(train_loss)
+            vali_loss = self.vali(vali_data, vali_loader, criterion, partial_train=True)
+            test_loss = self.vali(test_data, test_loader, criterion, partial_train=False)
+
+            print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
+                epoch + 1, train_steps, train_loss, vali_loss, test_loss))
+            early_stopping(vali_loss, self.model, path)
+            if early_stopping.early_stop:
+                print("Early stopping")
+                break
+
+            adjust_learning_rate(model_optim, epoch + 1, self.args)
+
+        best_model_path = path + '/' + 'checkpoint.pth'
+        self.model.load_state_dict(torch.load(best_model_path))
+
+        return self.model
+
+    def test(self, setting, test=0):
+        test_data, test_loader = self._get_data(flag='test')
+        if test:
+            print('loading model')
+            self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
+
+        preds = []
+        trues = []
+        folder_path = './test_results/' + setting + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        self.model.eval()
+        with torch.no_grad():
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
+                batch_x = batch_x.float().to(self.device)
+                batch_y = batch_y.float().to(self.device)
+
+                if 'PEMS' in self.args.data or 'Solar' in self.args.data:
+                    batch_x_mark = None
+                    batch_y_mark = None
+                else:
+                    batch_x_mark = batch_x_mark.float().to(self.device)
+                    batch_y_mark = batch_y_mark.float().to(self.device)
+
+                # decoder input
+                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
+                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+                # encoder - decoder
+                if self.args.use_amp:
+                    with torch.cuda.amp.autocast():
+                        if self.args.output_attention:
+                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+                        else:
+                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+                else:
+                    if self.args.output_attention:
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
+                    elif self.args.channel_independent:
+                        B, Tx, N = batch_x.shape
+                        _, Ty, _ =dec_inp.shape
+                        if batch_x_mark==None:
+                            outputs = self.model(batch_x.permute(0,2,1).reshape(B*N,Tx,1), batch_x_mark,\
+                                                 dec_inp.permute(0,2,1).reshape(B*N,Ty,1), batch_y_mark).reshape(B,N,-1).permute(0,2,1)
+                        else:
+                            outputs = self.model(batch_x.permute(0,2,1).reshape(B*N,Tx,1), batch_x_mark.repeat(N,1,1),\
+                                                 dec_inp.permute(0,2,1).reshape(B*N,Ty,1), batch_y_mark.repeat(N,1,1))\
+                                                .reshape(B,N,-1).permute(0,2,1)
+                    else:
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+
+                f_dim = -1 if self.args.features == 'MS' else 0
+                outputs = outputs[:, -self.args.pred_len:, f_dim:]
+                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
+                outputs = outputs.detach().cpu().numpy()
+                batch_y = batch_y.detach().cpu().numpy()
+                if test_data.scale and self.args.inverse:
+                    outputs = test_data.inverse_transform(outputs)
+                    batch_y = test_data.inverse_transform(batch_y)
+
+                pred = outputs
+                true = batch_y
+
+                preds.append(pred)
+                trues.append(true)
+                if i % 20 == 0:
+                # if i == 4440:
+                    input = batch_x.detach().cpu().numpy()
+                    gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
+                    pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
+                    visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
+                    # pdb.set_trace()
+
+        preds = np.array(preds)
+        trues = np.array(trues)
+        print('test shape:', preds.shape, trues.shape)
+        preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
+        trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
+        print('test shape:', preds.shape, trues.shape)
+
+        # result save
+        folder_path = './results/' + setting + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        mae, mse, rmse, mape, mspe = metric(preds, trues)
+        print('mse:{}, mae:{}'.format(mse, mae))
+        f = open("result_long_term_forecast.txt", 'a')
+        f.write(setting + "  \n")
+        f.write('mse:{}, mae:{}'.format(mse, mae))
+        f.write('\n')
+        f.write('\n')
+        f.close()
+
+        np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
+        np.save(folder_path + 'pred.npy', preds)
+        np.save(folder_path + 'true.npy', trues)
+
+        return
diff --git a/layers/Embed.py b/layers/Embed.py
new file mode 100644
index 000000000..977e25568
--- /dev/null
+++ b/layers/Embed.py
@@ -0,0 +1,190 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.utils import weight_norm
+import math
+
+
+class PositionalEmbedding(nn.Module):
+    def __init__(self, d_model, max_len=5000):
+        super(PositionalEmbedding, self).__init__()
+        # Compute the positional encodings once in log space.
+        pe = torch.zeros(max_len, d_model).float()
+        pe.require_grad = False
+
+        position = torch.arange(0, max_len).float().unsqueeze(1)
+        div_term = (torch.arange(0, d_model, 2).float()
+                    * -(math.log(10000.0) / d_model)).exp()
+
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+
+        pe = pe.unsqueeze(0)
+        self.register_buffer('pe', pe)
+
+    def forward(self, x):
+        return self.pe[:, :x.size(1)]
+
+
+class TokenEmbedding(nn.Module):
+    def __init__(self, c_in, d_model):
+        super(TokenEmbedding, self).__init__()
+        padding = 1 if torch.__version__ >= '1.5.0' else 2
+        self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
+                                   kernel_size=3, padding=padding, padding_mode='circular', bias=False)
+        for m in self.modules():
+            if isinstance(m, nn.Conv1d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode='fan_in', nonlinearity='leaky_relu')
+
+    def forward(self, x):
+        x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
+        return x
+
+
+class FixedEmbedding(nn.Module):
+    def __init__(self, c_in, d_model):
+        super(FixedEmbedding, self).__init__()
+
+        w = torch.zeros(c_in, d_model).float()
+        w.require_grad = False
+
+        position = torch.arange(0, c_in).float().unsqueeze(1)
+        div_term = (torch.arange(0, d_model, 2).float()
+                    * -(math.log(10000.0) / d_model)).exp()
+
+        w[:, 0::2] = torch.sin(position * div_term)
+        w[:, 1::2] = torch.cos(position * div_term)
+
+        self.emb = nn.Embedding(c_in, d_model)
+        self.emb.weight = nn.Parameter(w, requires_grad=False)
+
+    def forward(self, x):
+        return self.emb(x).detach()
+
+
+class TemporalEmbedding(nn.Module):
+    def __init__(self, d_model, embed_type='fixed', freq='h'):
+        super(TemporalEmbedding, self).__init__()
+
+        minute_size = 4
+        hour_size = 24
+        weekday_size = 7
+        day_size = 32
+        month_size = 13
+
+        Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
+        if freq == 't':
+            self.minute_embed = Embed(minute_size, d_model)
+        self.hour_embed = Embed(hour_size, d_model)
+        self.weekday_embed = Embed(weekday_size, d_model)
+        self.day_embed = Embed(day_size, d_model)
+        self.month_embed = Embed(month_size, d_model)
+
+    def forward(self, x):
+        x = x.long()
+        minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
+            self, 'minute_embed') else 0.
+        hour_x = self.hour_embed(x[:, :, 3])
+        weekday_x = self.weekday_embed(x[:, :, 2])
+        day_x = self.day_embed(x[:, :, 1])
+        month_x = self.month_embed(x[:, :, 0])
+
+        return hour_x + weekday_x + day_x + month_x + minute_x
+
+
+class TimeFeatureEmbedding(nn.Module):
+    def __init__(self, d_model, embed_type='timeF', freq='h'):
+        super(TimeFeatureEmbedding, self).__init__()
+
+        freq_map = {'h': 4, 't': 5, 's': 6,
+                    'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
+        d_inp = freq_map[freq]
+        self.embed = nn.Linear(d_inp, d_model, bias=False)
+
+    def forward(self, x):
+        return self.embed(x)
+
+
+class DataEmbedding(nn.Module):
+    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(DataEmbedding, self).__init__()
+
+        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
+        self.position_embedding = PositionalEmbedding(d_model=d_model)
+        self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
+                                                    freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
+            d_model=d_model, embed_type=embed_type, freq=freq)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x, x_mark):
+        if x_mark is None:
+            x = self.value_embedding(x) + self.position_embedding(x)
+        else:
+            x = self.value_embedding(
+                x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
+        return self.dropout(x)
+
+
+class DataEmbedding_inverted(nn.Module):
+    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(DataEmbedding_inverted, self).__init__()
+        self.value_embedding = nn.Linear(c_in, d_model)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x, x_mark):
+        x = x.permute(0, 2, 1)
+        # x: [Batch Variate Time]
+        if x_mark is None:
+            x = self.value_embedding(x)
+        else:
+            x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1))
+        # x: [Batch Variate d_model]
+        return self.dropout(x)
+
+
+class DataEmbedding_wo_pos(nn.Module):
+    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(DataEmbedding_wo_pos, self).__init__()
+
+        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
+        self.position_embedding = PositionalEmbedding(d_model=d_model)
+        self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
+                                                    freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
+            d_model=d_model, embed_type=embed_type, freq=freq)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x, x_mark):
+        if x_mark is None:
+            x = self.value_embedding(x)
+        else:
+            x = self.value_embedding(x) + self.temporal_embedding(x_mark)
+        return self.dropout(x)
+
+
+class PatchEmbedding(nn.Module):
+    def __init__(self, d_model, patch_len, stride, padding, dropout):
+        super(PatchEmbedding, self).__init__()
+        # Patching
+        self.patch_len = patch_len
+        self.stride = stride
+        self.padding_patch_layer = nn.ReplicationPad1d((0, padding))
+
+        # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
+        self.value_embedding = nn.Linear(patch_len, d_model, bias=False)
+
+        # Positional embedding
+        self.position_embedding = PositionalEmbedding(d_model)
+
+        # Residual dropout
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, x):
+        # do patching
+        n_vars = x.shape[1]
+        x = self.padding_patch_layer(x)
+        x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
+        x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
+        # Input encoding
+        x = self.value_embedding(x) + self.position_embedding(x)
+        return self.dropout(x), n_vars
diff --git a/layers/SelfAttention_Family.py b/layers/SelfAttention_Family.py
new file mode 100644
index 000000000..f3d2d16d6
--- /dev/null
+++ b/layers/SelfAttention_Family.py
@@ -0,0 +1,420 @@
+import torch
+import torch.nn as nn
+import numpy as np
+from math import sqrt
+from utils.masking import TriangularCausalMask, ProbMask
+from reformer_pytorch import LSHSelfAttention
+from einops import rearrange, repeat
+
+class FlowAttention(nn.Module):
+    def __init__(self, attention_dropout=0.1):
+        super(FlowAttention, self).__init__()
+        self.dropout = nn.Dropout(attention_dropout)
+
+    def kernel_method(self, x):
+        return torch.sigmoid(x)
+
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        queries = queries.transpose(1, 2)
+        keys = keys.transpose(1, 2)
+        values = values.transpose(1, 2)
+        # kernel
+        queries = self.kernel_method(queries)
+        keys = self.kernel_method(keys)
+        # incoming and outgoing
+        normalizer_row = 1.0 / (torch.einsum("nhld,nhd->nhl", queries + 1e-6, keys.sum(dim=2) + 1e-6))
+        normalizer_col = 1.0 / (torch.einsum("nhsd,nhd->nhs", keys + 1e-6, queries.sum(dim=2) + 1e-6))
+        # reweighting
+        normalizer_row_refine = (
+            torch.einsum("nhld,nhd->nhl", queries + 1e-6, (keys * normalizer_col[:, :, :, None]).sum(dim=2) + 1e-6))
+        normalizer_col_refine = (
+            torch.einsum("nhsd,nhd->nhs", keys + 1e-6, (queries * normalizer_row[:, :, :, None]).sum(dim=2) + 1e-6))
+        # competition and allocation
+        normalizer_row_refine = torch.sigmoid(
+            normalizer_row_refine * (float(queries.shape[2]) / float(keys.shape[2])))
+        normalizer_col_refine = torch.softmax(normalizer_col_refine, dim=-1) * keys.shape[2]  # B h L vis
+        # multiply
+        kv = keys.transpose(-2, -1) @ (values * normalizer_col_refine[:, :, :, None])
+        x = (((queries @ kv) * normalizer_row[:, :, :, None]) * normalizer_row_refine[:, :, :, None]).transpose(1, 2).contiguous()
+        return x, None
+
+
+class DSAttention(nn.Module):
+    '''De-stationary Attention'''
+
+    def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
+        super(DSAttention, self).__init__()
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        B, L, H, E = queries.shape
+        _, S, _, D = values.shape
+        scale = self.scale or 1. / sqrt(E)
+
+        tau = 1.0 if tau is None else tau.unsqueeze(
+            1).unsqueeze(1)  # B x 1 x 1 x 1
+        delta = 0.0 if delta is None else delta.unsqueeze(
+            1).unsqueeze(1)  # B x 1 x 1 x S
+
+        # De-stationary Attention, rescaling pre-softmax score with learned de-stationary factors
+        scores = torch.einsum("blhe,bshe->bhls", queries, keys) * tau + delta
+
+        if self.mask_flag:
+            if attn_mask is None:
+                attn_mask = TriangularCausalMask(B, L, device=queries.device)
+
+            scores.masked_fill_(attn_mask.mask, -np.inf)
+
+        A = self.dropout(torch.softmax(scale * scores, dim=-1))
+        V = torch.einsum("bhls,bshd->blhd", A, values)
+
+        if self.output_attention:
+            return (V.contiguous(), A)
+        else:
+            return (V.contiguous(), None)
+
+
+
+class FlashAttention(nn.Module):
+    def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
+        super(FlashAttention, self).__init__()
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+
+    def flash_attention_forward(self, Q, K, V, mask=None):
+        BLOCK_SIZE = 32
+        NEG_INF = -1e10  # -infinity
+        EPSILON = 1e-10
+        # mask = torch.randint(0, 2, (128, 8)).to(device='cuda')
+        O = torch.zeros_like(Q, requires_grad=True)
+        l = torch.zeros(Q.shape[:-1])[..., None]
+        m = torch.ones(Q.shape[:-1])[..., None] * NEG_INF
+
+        O = O.to(device='cuda')
+        l = l.to(device='cuda')
+        m = m.to(device='cuda')
+
+        Q_BLOCK_SIZE = min(BLOCK_SIZE, Q.shape[-1])
+        KV_BLOCK_SIZE = BLOCK_SIZE
+
+        Q_BLOCKS = torch.split(Q, Q_BLOCK_SIZE, dim=2)
+        K_BLOCKS = torch.split(K, KV_BLOCK_SIZE, dim=2)
+        V_BLOCKS = torch.split(V, KV_BLOCK_SIZE, dim=2)
+        if mask is not None:
+            mask_BLOCKS = list(torch.split(mask, KV_BLOCK_SIZE, dim=1))
+
+        Tr = len(Q_BLOCKS)
+        Tc = len(K_BLOCKS)
+
+        O_BLOCKS = list(torch.split(O, Q_BLOCK_SIZE, dim=2))
+        l_BLOCKS = list(torch.split(l, Q_BLOCK_SIZE, dim=2))
+        m_BLOCKS = list(torch.split(m, Q_BLOCK_SIZE, dim=2))
+
+        for j in range(Tc):
+            Kj = K_BLOCKS[j]
+            Vj = V_BLOCKS[j]
+            if mask is not None:
+                maskj = mask_BLOCKS[j]
+
+            for i in range(Tr):
+                Qi = Q_BLOCKS[i]
+                Oi = O_BLOCKS[i]
+                li = l_BLOCKS[i]
+                mi = m_BLOCKS[i]
+
+                scale = 1 / np.sqrt(Q.shape[-1])
+                Qi_scaled = Qi * scale
+
+                S_ij = torch.einsum('... i d, ... j d -> ... i j', Qi_scaled, Kj)
+                if mask is not None:
+                    # Masking
+                    maskj_temp = rearrange(maskj, 'b j -> b 1 1 j')
+                    S_ij = torch.where(maskj_temp > 0, S_ij, NEG_INF)
+
+                m_block_ij, _ = torch.max(S_ij, dim=-1, keepdims=True)
+                P_ij = torch.exp(S_ij - m_block_ij)
+                if mask is not None:
+                    # Masking
+                    P_ij = torch.where(maskj_temp > 0, P_ij, 0.)
+
+                l_block_ij = torch.sum(P_ij, dim=-1, keepdims=True) + EPSILON
+
+                P_ij_Vj = torch.einsum('... i j, ... j d -> ... i d', P_ij, Vj)
+
+                mi_new = torch.maximum(m_block_ij, mi)
+                li_new = torch.exp(mi - mi_new) * li + torch.exp(m_block_ij - mi_new) * l_block_ij
+
+                O_BLOCKS[i] = (li / li_new) * torch.exp(mi - mi_new) * Oi + (
+                            torch.exp(m_block_ij - mi_new) / li_new) * P_ij_Vj
+                l_BLOCKS[i] = li_new
+                m_BLOCKS[i] = mi_new
+
+        O = torch.cat(O_BLOCKS, dim=2)
+        l = torch.cat(l_BLOCKS, dim=2)
+        m = torch.cat(m_BLOCKS, dim=2)
+        return O, l, m
+
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        res =  self.flash_attention_forward(queries.permute(0, 2, 1, 3), keys.permute(0, 2, 1, 3), values.permute(0, 2, 1, 3), attn_mask)[0]
+        return res.permute(0, 2, 1, 3).contiguous(), None
+
+class FullAttention(nn.Module):
+    def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
+        super(FullAttention, self).__init__()
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        B, L, H, E = queries.shape
+        _, S, _, D = values.shape
+        scale = self.scale or 1. / sqrt(E)
+
+        scores = torch.einsum("blhe,bshe->bhls", queries, keys)
+
+        if self.mask_flag:
+            if attn_mask is None:
+                attn_mask = TriangularCausalMask(B, L, device=queries.device)
+
+            scores.masked_fill_(attn_mask.mask, -np.inf)
+
+        A = self.dropout(torch.softmax(scale * scores, dim=-1))
+        V = torch.einsum("bhls,bshd->blhd", A, values)
+
+        if self.output_attention:
+            return (V.contiguous(), A)
+        else:
+            return (V.contiguous(), None)
+
+
+class ProbAttention(nn.Module):
+    def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
+        super(ProbAttention, self).__init__()
+        self.factor = factor
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+
+    def _prob_QK(self, Q, K, sample_k, n_top):  # n_top: c*ln(L_q)
+        # Q [B, H, L, D]
+        B, H, L_K, E = K.shape
+        _, _, L_Q, _ = Q.shape
+
+        # calculate the sampled Q_K
+        K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
+        # real U = U_part(factor*ln(L_k))*L_q
+        index_sample = torch.randint(L_K, (L_Q, sample_k))
+        K_sample = K_expand[:, :, torch.arange(
+            L_Q).unsqueeze(1), index_sample, :]
+        Q_K_sample = torch.matmul(
+            Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
+
+        # find the Top_k query with sparisty measurement
+        M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
+        M_top = M.topk(n_top, sorted=False)[1]
+
+        # use the reduced Q to calculate Q_K
+        Q_reduce = Q[torch.arange(B)[:, None, None],
+                   torch.arange(H)[None, :, None],
+                   M_top, :]  # factor*ln(L_q)
+        Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1))  # factor*ln(L_q)*L_k
+
+        return Q_K, M_top
+
+    def _get_initial_context(self, V, L_Q):
+        B, H, L_V, D = V.shape
+        if not self.mask_flag:
+            # V_sum = V.sum(dim=-2)
+            V_sum = V.mean(dim=-2)
+            contex = V_sum.unsqueeze(-2).expand(B, H,
+                                                L_Q, V_sum.shape[-1]).clone()
+        else:  # use mask
+            # requires that L_Q == L_V, i.e. for self-attention only
+            assert (L_Q == L_V)
+            contex = V.cumsum(dim=-2)
+        return contex
+
+    def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
+        B, H, L_V, D = V.shape
+
+        if self.mask_flag:
+            attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
+            scores.masked_fill_(attn_mask.mask, -np.inf)
+
+        attn = torch.softmax(scores, dim=-1)  # nn.Softmax(dim=-1)(scores)
+
+        context_in[torch.arange(B)[:, None, None],
+        torch.arange(H)[None, :, None],
+        index, :] = torch.matmul(attn, V).type_as(context_in)
+        if self.output_attention:
+            attns = (torch.ones([B, H, L_V, L_V]) /
+                     L_V).type_as(attn).to(attn.device)
+            attns[torch.arange(B)[:, None, None], torch.arange(H)[
+                                                  None, :, None], index, :] = attn
+            return (context_in, attns)
+        else:
+            return (context_in, None)
+
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        B, L_Q, H, D = queries.shape
+        _, L_K, _, _ = keys.shape
+
+        queries = queries.transpose(2, 1)
+        keys = keys.transpose(2, 1)
+        values = values.transpose(2, 1)
+
+        U_part = self.factor * \
+                 np.ceil(np.log(L_K)).astype('int').item()  # c*ln(L_k)
+        u = self.factor * \
+            np.ceil(np.log(L_Q)).astype('int').item()  # c*ln(L_q)
+
+        U_part = U_part if U_part < L_K else L_K
+        u = u if u < L_Q else L_Q
+
+        scores_top, index = self._prob_QK(
+            queries, keys, sample_k=U_part, n_top=u)
+
+        # add scale factor
+        scale = self.scale or 1. / sqrt(D)
+        if scale is not None:
+            scores_top = scores_top * scale
+        # get the context
+        context = self._get_initial_context(values, L_Q)
+        # update the context with selected top_k queries
+        context, attn = self._update_context(
+            context, values, scores_top, index, L_Q, attn_mask)
+
+        return context.contiguous(), attn
+
+
+class AttentionLayer(nn.Module):
+    def __init__(self, attention, d_model, n_heads, d_keys=None,
+                 d_values=None):
+        super(AttentionLayer, self).__init__()
+
+        d_keys = d_keys or (d_model // n_heads)
+        d_values = d_values or (d_model // n_heads)
+
+        self.inner_attention = attention
+        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.value_projection = nn.Linear(d_model, d_values * n_heads)
+        self.out_projection = nn.Linear(d_values * n_heads, d_model)
+        self.n_heads = n_heads
+
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        B, L, _ = queries.shape
+        _, S, _ = keys.shape
+        H = self.n_heads
+
+        queries = self.query_projection(queries).view(B, L, H, -1)
+        keys = self.key_projection(keys).view(B, S, H, -1)
+        values = self.value_projection(values).view(B, S, H, -1)
+
+        out, attn = self.inner_attention(
+            queries,
+            keys,
+            values,
+            attn_mask,
+            tau=tau,
+            delta=delta
+        )
+        out = out.view(B, L, -1)
+
+        return self.out_projection(out), attn
+
+
+class ReformerLayer(nn.Module):
+    def __init__(self, attention, d_model, n_heads, d_keys=None,
+                 d_values=None, causal=False, bucket_size=4, n_hashes=4):
+        super().__init__()
+        self.bucket_size = bucket_size
+        self.attn = LSHSelfAttention(
+            dim=d_model,
+            heads=n_heads,
+            bucket_size=bucket_size,
+            n_hashes=n_hashes,
+            causal=causal
+        )
+
+    def fit_length(self, queries):
+        # inside reformer: assert N % (bucket_size * 2) == 0
+        B, N, C = queries.shape
+        if N % (self.bucket_size * 2) == 0:
+            return queries
+        else:
+            # fill the time series
+            fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2))
+            return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1)
+
+    def forward(self, queries, keys, values, attn_mask, tau, delta):
+        # in Reformer: defalut queries=keys
+        B, N, C = queries.shape
+        queries = self.attn(self.fit_length(queries))[:, :N, :]
+        return queries, None
+
+
+class TwoStageAttentionLayer(nn.Module):
+    '''
+    The Two Stage Attention (TSA) Layer
+    input/output shape: [batch_size, Data_dim(D), Seg_num(L), d_model]
+    '''
+
+    def __init__(self, configs,
+                 seg_num, factor, d_model, n_heads, d_ff=None, dropout=0.1):
+        super(TwoStageAttentionLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.time_attention = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                                           output_attention=configs.output_attention), d_model, n_heads)
+        self.dim_sender = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                                       output_attention=configs.output_attention), d_model, n_heads)
+        self.dim_receiver = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                                         output_attention=configs.output_attention), d_model, n_heads)
+        self.router = nn.Parameter(torch.randn(seg_num, factor, d_model))
+
+        self.dropout = nn.Dropout(dropout)
+
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.norm4 = nn.LayerNorm(d_model)
+
+        self.MLP1 = nn.Sequential(nn.Linear(d_model, d_ff),
+                                  nn.GELU(),
+                                  nn.Linear(d_ff, d_model))
+        self.MLP2 = nn.Sequential(nn.Linear(d_model, d_ff),
+                                  nn.GELU(),
+                                  nn.Linear(d_ff, d_model))
+
+    def forward(self, x, attn_mask=None, tau=None, delta=None):
+        # Cross Time Stage: Directly apply MSA to each dimension
+        batch = x.shape[0]
+        time_in = rearrange(x, 'b ts_d seg_num d_model -> (b ts_d) seg_num d_model')
+        time_enc, attn = self.time_attention(
+            time_in, time_in, time_in, attn_mask=None, tau=None, delta=None
+        )
+        dim_in = time_in + self.dropout(time_enc)
+        dim_in = self.norm1(dim_in)
+        dim_in = dim_in + self.dropout(self.MLP1(dim_in))
+        dim_in = self.norm2(dim_in)
+
+        # Cross Dimension Stage: use a small set of learnable vectors to aggregate and distribute messages to build the D-to-D connection
+        dim_send = rearrange(dim_in, '(b ts_d) seg_num d_model -> (b seg_num) ts_d d_model', b=batch)
+        batch_router = repeat(self.router, 'seg_num factor d_model -> (repeat seg_num) factor d_model', repeat=batch)
+        dim_buffer, attn = self.dim_sender(batch_router, dim_send, dim_send, attn_mask=None, tau=None, delta=None)
+        dim_receive, attn = self.dim_receiver(dim_send, dim_buffer, dim_buffer, attn_mask=None, tau=None, delta=None)
+        dim_enc = dim_send + self.dropout(dim_receive)
+        dim_enc = self.norm3(dim_enc)
+        dim_enc = dim_enc + self.dropout(self.MLP2(dim_enc))
+        dim_enc = self.norm4(dim_enc)
+
+        final_out = rearrange(dim_enc, '(b seg_num) ts_d d_model -> b ts_d seg_num d_model', b=batch)
+
+        return final_out
\ No newline at end of file
diff --git a/layers/Transformer_EncDec.py b/layers/Transformer_EncDec.py
new file mode 100644
index 000000000..dabf4c2a5
--- /dev/null
+++ b/layers/Transformer_EncDec.py
@@ -0,0 +1,135 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class ConvLayer(nn.Module):
+    def __init__(self, c_in):
+        super(ConvLayer, self).__init__()
+        self.downConv = nn.Conv1d(in_channels=c_in,
+                                  out_channels=c_in,
+                                  kernel_size=3,
+                                  padding=2,
+                                  padding_mode='circular')
+        self.norm = nn.BatchNorm1d(c_in)
+        self.activation = nn.ELU()
+        self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
+
+    def forward(self, x):
+        x = self.downConv(x.permute(0, 2, 1))
+        x = self.norm(x)
+        x = self.activation(x)
+        x = self.maxPool(x)
+        x = x.transpose(1, 2)
+        return x
+
+
+class EncoderLayer(nn.Module):
+    def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
+        super(EncoderLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.attention = attention
+        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
+        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+
+    def forward(self, x, attn_mask=None, tau=None, delta=None):
+        new_x, attn = self.attention(
+            x, x, x,
+            attn_mask=attn_mask,
+            tau=tau, delta=delta
+        )
+        x = x + self.dropout(new_x)
+
+        y = x = self.norm1(x)
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+
+        return self.norm2(x + y), attn
+
+
+class Encoder(nn.Module):
+    def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
+        super(Encoder, self).__init__()
+        self.attn_layers = nn.ModuleList(attn_layers)
+        self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
+        self.norm = norm_layer
+
+    def forward(self, x, attn_mask=None, tau=None, delta=None):
+        # x [B, L, D]
+        attns = []
+        if self.conv_layers is not None:
+            for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
+                delta = delta if i == 0 else None
+                x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
+                x = conv_layer(x)
+                attns.append(attn)
+            x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
+            attns.append(attn)
+        else:
+            for attn_layer in self.attn_layers:
+                x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
+                attns.append(attn)
+
+        if self.norm is not None:
+            x = self.norm(x)
+
+        return x, attns
+
+
+class DecoderLayer(nn.Module):
+    def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
+                 dropout=0.1, activation="relu"):
+        super(DecoderLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.self_attention = self_attention
+        self.cross_attention = cross_attention
+        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
+        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+
+    def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
+        x = x + self.dropout(self.self_attention(
+            x, x, x,
+            attn_mask=x_mask,
+            tau=tau, delta=None
+        )[0])
+        x = self.norm1(x)
+
+        x = x + self.dropout(self.cross_attention(
+            x, cross, cross,
+            attn_mask=cross_mask,
+            tau=tau, delta=delta
+        )[0])
+
+        y = x = self.norm2(x)
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+
+        return self.norm3(x + y)
+
+
+class Decoder(nn.Module):
+    def __init__(self, layers, norm_layer=None, projection=None):
+        super(Decoder, self).__init__()
+        self.layers = nn.ModuleList(layers)
+        self.norm = norm_layer
+        self.projection = projection
+
+    def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
+        for layer in self.layers:
+            x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
+
+        if self.norm is not None:
+            x = self.norm(x)
+
+        if self.projection is not None:
+            x = self.projection(x)
+        return x
diff --git a/model/Flashformer.py b/model/Flashformer.py
new file mode 100644
index 000000000..45d8aca96
--- /dev/null
+++ b/model/Flashformer.py
@@ -0,0 +1,136 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
+from layers.SelfAttention_Family import FlashAttention, AttentionLayer, FullAttention
+from layers.Embed import DataEmbedding
+import numpy as np
+
+
+class Model(nn.Module):
+    """
+    Vanilla Transformer
+    with O(L^2) complexity
+    Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+        self.output_attention = configs.output_attention
+        # Embedding
+        self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FlashAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=configs.output_attention), configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
+                                               configs.dropout)
+            self.decoder = Decoder(
+                [
+                    DecoderLayer(
+                        AttentionLayer(
+                            FullAttention(True, configs.factor, attention_dropout=configs.dropout,
+                                          output_attention=False),
+                            configs.d_model, configs.n_heads),
+                        AttentionLayer(
+                            FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                          output_attention=False),
+                            configs.d_model, configs.n_heads),
+                        configs.d_model,
+                        configs.d_ff,
+                        dropout=configs.dropout,
+                        activation=configs.activation,
+                    )
+                    for l in range(configs.d_layers)
+                ],
+                norm_layer=torch.nn.LayerNorm(configs.d_model),
+                projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+            )
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.dec_embedding(x_dec, x_mark_dec)
+        dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
+        return dec_out
+
+    def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        return enc_out
+
+    def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        return enc_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        # Output
+        output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.dropout(output)
+        output = output * x_mark_enc.unsqueeze(-1)  # zero-out padding embeddings
+        output = output.reshape(output.shape[0], -1)  # (batch_size, seq_length * d_model)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/model/Flowformer.py b/model/Flowformer.py
new file mode 100644
index 000000000..f624c0acd
--- /dev/null
+++ b/model/Flowformer.py
@@ -0,0 +1,145 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
+from layers.SelfAttention_Family import FullAttention, AttentionLayer, FlowAttention
+from layers.Embed import DataEmbedding
+import numpy as np
+
+
+class Model(nn.Module):
+    """
+    Vanilla Transformer
+    with O(L^2) complexity
+    Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+        self.output_attention = configs.output_attention
+
+        if configs.channel_independent:
+            self.enc_in = 1
+            self.dec_in = 1
+            self.c_out = 1
+        else:
+            self.enc_in = configs.enc_in
+            self.dec_in = configs.dec_in
+            self.c_out = configs.c_out
+
+        # Embedding
+        self.enc_embedding = DataEmbedding(self.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FlowAttention(attention_dropout=configs.dropout), configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.dec_embedding = DataEmbedding(self.dec_in, configs.d_model, configs.embed, configs.freq,
+                                               configs.dropout)
+            self.decoder = Decoder(
+                [
+                    DecoderLayer(
+                        AttentionLayer(
+                            FullAttention(True, configs.factor, attention_dropout=configs.dropout,
+                                          output_attention=False),
+                            configs.d_model, configs.n_heads),
+                        AttentionLayer(
+                            FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                          output_attention=False),
+                            configs.d_model, configs.n_heads),
+                        configs.d_model,
+                        configs.d_ff,
+                        dropout=configs.dropout,
+                        activation=configs.activation,
+                    )
+                    for l in range(configs.d_layers)
+                ],
+                norm_layer=torch.nn.LayerNorm(configs.d_model),
+                projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+            )
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.dec_embedding(x_dec, x_mark_dec)
+        dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
+        return dec_out
+
+    def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        return enc_out
+
+    def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        return enc_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        # Output
+        output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.dropout(output)
+        output = output * x_mark_enc.unsqueeze(-1)  # zero-out padding embeddings
+        output = output.reshape(output.shape[0], -1)  # (batch_size, seq_length * d_model)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/model/Informer.py b/model/Informer.py
new file mode 100644
index 000000000..7e632b2aa
--- /dev/null
+++ b/model/Informer.py
@@ -0,0 +1,166 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
+from layers.SelfAttention_Family import ProbAttention, AttentionLayer
+from layers.Embed import DataEmbedding
+
+
+class Model(nn.Module):
+    """
+    Informer with Propspare attention in O(LlogL) complexity
+    Paper link: https://ojs.aaai.org/index.php/AAAI/article/view/17325/17132
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+        self.label_len = configs.label_len
+
+        if configs.channel_independent:
+            self.enc_in = 1
+            self.dec_in = 1
+            self.c_out = 1
+        else:
+            self.enc_in = configs.enc_in
+            self.dec_in = configs.dec_in
+            self.c_out = configs.c_out
+
+        # Embedding
+        self.enc_embedding = DataEmbedding(self.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+        self.dec_embedding = DataEmbedding(self.dec_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        ProbAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=configs.output_attention),
+                        configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            [
+                ConvLayer(
+                    configs.d_model
+                ) for l in range(configs.e_layers - 1)
+            ] if configs.distil and ('forecast' in configs.task_name) else None,
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        self.decoder = Decoder(
+            [
+                DecoderLayer(
+                    AttentionLayer(
+                        ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False),
+                        configs.d_model, configs.n_heads),
+                    AttentionLayer(
+                        ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False),
+                        configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation,
+                )
+                for l in range(configs.d_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model),
+            projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+        )
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+    def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        dec_out = self.dec_embedding(x_dec, x_mark_dec)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
+
+        return dec_out  # [B, L, D]
+
+    def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+
+        return enc_out
+
+    def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        return enc_out
+
+    def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization
+        mean_enc = x_enc.mean(1, keepdim=True).detach()  # B x 1 x E
+        x_enc = x_enc - mean_enc
+        std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()  # B x 1 x E
+        x_enc = x_enc / std_enc
+
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        dec_out = self.dec_embedding(x_dec, x_mark_dec)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
+
+        dec_out = dec_out * std_enc + mean_enc
+        return dec_out  # [B, L, D]
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # enc
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        # final
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # enc
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        # final
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # enc
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        # Output
+        output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.dropout(output)
+        output = output * x_mark_enc.unsqueeze(-1)  # zero-out padding embeddings
+        output = output.reshape(output.shape[0], -1)  # (batch_size, seq_length * d_model)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast':
+            dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'short_term_forecast':
+            dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/model/Reformer.py b/model/Reformer.py
new file mode 100644
index 000000000..d11bffeb0
--- /dev/null
+++ b/model/Reformer.py
@@ -0,0 +1,163 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Encoder, EncoderLayer
+from layers.SelfAttention_Family import ReformerLayer
+from layers.Embed import DataEmbedding
+
+
+class Model(nn.Module):
+    """
+    Reformer with O(LlogL) complexity
+    Paper link: https://openreview.net/forum?id=rkgNKkHtvB
+    """
+
+    def __init__(self, configs, bucket_size=4, n_hashes=4):
+        """
+        bucket_size: int, 
+        n_hashes: int, 
+        """
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+        self.seq_len = configs.seq_len
+
+        if configs.channel_independent:
+            self.enc_in = 1
+            self.dec_in = 1
+            self.c_out = 1
+        else:
+            self.enc_in = configs.enc_in
+            self.dec_in = configs.dec_in
+            self.c_out = configs.c_out
+
+
+        self.enc_embedding = DataEmbedding(self.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    ReformerLayer(None, configs.d_model, configs.n_heads,
+                                  bucket_size=bucket_size, n_hashes=n_hashes),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(
+                configs.d_model * configs.seq_len, configs.num_class)
+        else:
+            self.projection = nn.Linear(
+                configs.d_model, configs.c_out, bias=True)
+
+    def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # add placeholder
+        x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1)
+        if x_mark_enc is not None:
+            x_mark_enc = torch.cat(
+                [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1)
+
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        dec_out = self.projection(enc_out)
+
+        return dec_out  # [B, L, D]
+
+    def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # add placeholder
+        x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1)
+        if x_mark_enc is not None:
+            x_mark_enc = torch.cat(
+                [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1)
+
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
+        return enc_out
+
+    def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # add placeholder
+        x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1)
+        if x_mark_enc is not None:
+            x_mark_enc = torch.cat(
+                [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1)
+
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        return enc_out
+    
+    def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization
+        mean_enc = x_enc.mean(1, keepdim=True).detach()  # B x 1 x E
+        x_enc = x_enc - mean_enc
+        std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()  # B x 1 x E
+        x_enc = x_enc / std_enc
+
+        # add placeholder
+        x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1)
+        if x_mark_enc is not None:
+            x_mark_enc = torch.cat(
+                [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1)
+
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        dec_out = self.projection(enc_out)
+
+        dec_out = dec_out * std_enc + mean_enc
+        return dec_out  # [B, L, D]
+
+    def imputation(self, x_enc, x_mark_enc):
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
+
+        enc_out, attns = self.encoder(enc_out)
+        enc_out = self.projection(enc_out)
+
+        return enc_out  # [B, L, D]
+
+    def anomaly_detection(self, x_enc):
+        enc_out = self.enc_embedding(x_enc, None)  # [B,T,C]
+
+        enc_out, attns = self.encoder(enc_out)
+        enc_out = self.projection(enc_out)
+
+        return enc_out  # [B, L, D]
+
+    def classification(self, x_enc, x_mark_enc):
+        # enc
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out)
+
+        # Output
+        # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.act(enc_out)
+        output = self.dropout(output)
+        # zero-out padding embeddings
+        output = output * x_mark_enc.unsqueeze(-1)
+        # (batch_size, seq_length * d_model)
+        output = output.reshape(output.shape[0], -1)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast':
+            dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'short_term_forecast':
+            dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/model/Transformer.py b/model/Transformer.py
new file mode 100644
index 000000000..de336f1c8
--- /dev/null
+++ b/model/Transformer.py
@@ -0,0 +1,146 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
+from layers.SelfAttention_Family import FullAttention, AttentionLayer
+from layers.Embed import DataEmbedding
+import numpy as np
+
+
+class Model(nn.Module):
+    """
+    Vanilla Transformer
+    with O(L^2) complexity
+    Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+        self.output_attention = configs.output_attention
+
+        if configs.channel_independent:
+            self.enc_in = 1
+            self.dec_in = 1
+            self.c_out = 1
+        else:
+            self.enc_in = configs.enc_in
+            self.dec_in = configs.dec_in
+            self.c_out = configs.c_out
+
+        # Embedding
+        self.enc_embedding = DataEmbedding(self.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=configs.output_attention), configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.dec_embedding = DataEmbedding(self.dec_in, configs.d_model, configs.embed, configs.freq,
+                                               configs.dropout)
+            self.decoder = Decoder(
+                [
+                    DecoderLayer(
+                        AttentionLayer(
+                            FullAttention(True, configs.factor, attention_dropout=configs.dropout,
+                                          output_attention=False),
+                            configs.d_model, configs.n_heads),
+                        AttentionLayer(
+                            FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                          output_attention=False),
+                            configs.d_model, configs.n_heads),
+                        configs.d_model,
+                        configs.d_ff,
+                        dropout=configs.dropout,
+                        activation=configs.activation,
+                    )
+                    for l in range(configs.d_layers)
+                ],
+                norm_layer=torch.nn.LayerNorm(configs.d_model),
+                projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+            )
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.dec_embedding(x_dec, x_mark_dec)
+        dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
+        return dec_out
+
+    def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        return enc_out
+
+    def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        return enc_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        # Output
+        output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.dropout(output)
+        output = output * x_mark_enc.unsqueeze(-1)  # zero-out padding embeddings
+        output = output.reshape(output.shape[0], -1)  # (batch_size, seq_length * d_model)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/model/iFlashformer.py b/model/iFlashformer.py
new file mode 100644
index 000000000..2acb4a449
--- /dev/null
+++ b/model/iFlashformer.py
@@ -0,0 +1,186 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Encoder, EncoderLayer
+from layers.SelfAttention_Family import FlashAttention, AttentionLayer
+from layers.Embed import DataEmbedding_inverted
+import numpy as np
+
+
+class Model(nn.Module):
+    """
+    Vanilla Transformer
+    with O(L^2) complexity
+    Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.output_attention = configs.output_attention
+        # Embedding
+        self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq, configs.dropout)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FlashAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=configs.output_attention), configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True)
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+
+    def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _,_,N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc)
+        return enc_out
+
+    def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _,_,N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        return enc_out
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, _, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, L, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, L, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        if self.class_strategy == 'average':
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = torch.mean(output, dim=1)  # (batch_size, d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+        elif self.class_strategy == 'cls_token':
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out = torch.cat((self.cls_token.expand(enc_out.shape[0], -1, -1), enc_out), dim=1) #  (batch_size, N + 1, d_model)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = output[:, 0, :]  # (batch_size, d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+        else: # projection
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = output.reshape(output.shape[0], -1)  # (batch_size, c_in * d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/model/iFlowformer.py b/model/iFlowformer.py
new file mode 100644
index 000000000..6abf60ff4
--- /dev/null
+++ b/model/iFlowformer.py
@@ -0,0 +1,185 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Encoder, EncoderLayer
+from layers.SelfAttention_Family import FlowAttention, AttentionLayer
+from layers.Embed import DataEmbedding_inverted
+import numpy as np
+
+
+class Model(nn.Module):
+    """
+    Vanilla Transformer
+    with O(L^2) complexity
+    Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.output_attention = configs.output_attention
+        # Embedding
+        self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq, configs.dropout)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FlowAttention(attention_dropout=configs.dropout), configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True)
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+
+    def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _,_,N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc)
+        return enc_out
+
+    def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _,_,N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        return enc_out
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, _, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, L, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, L, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        if self.class_strategy == 'average':
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = torch.mean(output, dim=1)  # (batch_size, d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+        elif self.class_strategy == 'cls_token':
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out = torch.cat((self.cls_token.expand(enc_out.shape[0], -1, -1), enc_out), dim=1) #  (batch_size, N + 1, d_model)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = output[:, 0, :]  # (batch_size, d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+        else: # projection
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = output.reshape(output.shape[0], -1)  # (batch_size, c_in * d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/model/iInformer.py b/model/iInformer.py
new file mode 100644
index 000000000..c4824fd8a
--- /dev/null
+++ b/model/iInformer.py
@@ -0,0 +1,186 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Encoder, EncoderLayer
+from layers.SelfAttention_Family import ProbAttention, AttentionLayer
+from layers.Embed import DataEmbedding_inverted
+import numpy as np
+
+
+class Model(nn.Module):
+    """
+    Vanilla Transformer
+    with O(L^2) complexity
+    Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.output_attention = configs.output_attention
+        # Embedding
+        self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq, configs.dropout)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        ProbAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=configs.output_attention), configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True)
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+
+    def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _,_,N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc)
+        return enc_out
+
+    def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _,_,N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        return enc_out
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, _, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, L, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, L, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        if self.class_strategy == 'average':
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = torch.mean(output, dim=1)  # (batch_size, d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+        elif self.class_strategy == 'cls_token':
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out = torch.cat((self.cls_token.expand(enc_out.shape[0], -1, -1), enc_out), dim=1) #  (batch_size, N + 1, d_model)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = output[:, 0, :]  # (batch_size, d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+        else: # projection
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = output.reshape(output.shape[0], -1)  # (batch_size, c_in * d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/model/iReformer.py b/model/iReformer.py
new file mode 100644
index 000000000..e7e7ef86e
--- /dev/null
+++ b/model/iReformer.py
@@ -0,0 +1,185 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Encoder, EncoderLayer
+from layers.SelfAttention_Family import ReformerLayer
+from layers.Embed import DataEmbedding_inverted
+import numpy as np
+
+
+class Model(nn.Module):
+    """
+    Vanilla Transformer
+    with O(L^2) complexity
+    Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.output_attention = configs.output_attention
+        # Embedding
+        self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq, configs.dropout)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    ReformerLayer(None, configs.d_model, configs.n_heads,
+                                  bucket_size=4, n_hashes=4),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True)
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+
+    def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _,_,N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc)
+        return enc_out
+
+    def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _,_,N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        return enc_out
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, _, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, L, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, L, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        if self.class_strategy == 'average':
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = torch.mean(output, dim=1)  # (batch_size, d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+        elif self.class_strategy == 'cls_token':
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out = torch.cat((self.cls_token.expand(enc_out.shape[0], -1, -1), enc_out), dim=1) #  (batch_size, N + 1, d_model)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = output[:, 0, :]  # (batch_size, d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+        else: # projection
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = output.reshape(output.shape[0], -1)  # (batch_size, c_in * d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/model/iTransformer.py b/model/iTransformer.py
new file mode 100644
index 000000000..c17e8498c
--- /dev/null
+++ b/model/iTransformer.py
@@ -0,0 +1,165 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Encoder, EncoderLayer
+from layers.SelfAttention_Family import FullAttention, AttentionLayer
+from layers.Embed import DataEmbedding_inverted
+import numpy as np
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://arxiv.org/abs/2310.06625
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.output_attention = configs.output_attention
+        # Embedding
+        self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq,
+                                                    configs.dropout)
+        self.class_strategy = configs.class_strategy
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=configs.output_attention), configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True)
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            if self.class_strategy == 'average':
+                self.projection = nn.Linear(configs.d_model, configs.num_class)
+            elif self.class_strategy == 'cls_token':
+                self.cls_token = nn.Parameter(torch.zeros(1, 1, configs.d_model))
+                self.projection = nn.Linear(configs.d_model, configs.num_class)
+                nn.init.normal_(self.cls_token, std=1e-6)
+            else:
+                self.projection = nn.Linear(configs.d_model * configs.enc_in, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, _, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, L, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, L, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        if self.class_strategy == 'average':
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = torch.mean(output, dim=1)  # (batch_size, d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+        elif self.class_strategy == 'cls_token':
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out = torch.cat((self.cls_token.expand(enc_out.shape[0], -1, -1), enc_out), dim=1) #  (batch_size, N + 1, d_model)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = output[:, 0, :]  # (batch_size, d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+        else: # projection
+            # Embedding
+            enc_out = self.enc_embedding(x_enc, None)
+            enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+            # Output
+            output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+            output = self.dropout(output)
+            output = output.reshape(output.shape[0], -1)  # (batch_size, c_in * d_model)
+            output = self.projection(output)  # (batch_size, num_classes)
+            return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/run.py b/run.py
new file mode 100644
index 000000000..420f0bd32
--- /dev/null
+++ b/run.py
@@ -0,0 +1,180 @@
+import argparse
+import torch
+from experiments.exp_long_term_forecasting import Exp_Long_Term_Forecast
+from experiments.exp_long_term_forecasting_partial import Exp_Long_Term_Forecast_Partial
+import random
+import numpy as np
+
+if __name__ == '__main__':
+    fix_seed = 2021
+    random.seed(fix_seed)
+    torch.manual_seed(fix_seed)
+    np.random.seed(fix_seed)
+
+    parser = argparse.ArgumentParser(description='TimesNet')
+
+    # basic config
+    parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast',
+                        help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
+    parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
+    parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
+    parser.add_argument('--model', type=str, required=True, default='Autoformer',
+                        help='model name, options: [Autoformer, Transformer, TimesNet]')
+
+    # data loader
+    parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type')
+    parser.add_argument('--root_path', type=str, default='./data/ETT/', help='root path of the data file')
+    parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
+    parser.add_argument('--features', type=str, default='M',
+                        help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
+    parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
+    parser.add_argument('--freq', type=str, default='h',
+                        help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
+    parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
+
+    # forecasting task
+    parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
+    parser.add_argument('--label_len', type=int, default=48, help='start token length')
+    parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
+    parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
+
+    # inputation task
+    parser.add_argument('--mask_rate', type=float, default=0.25, help='mask ratio')
+
+    # anomaly detection task
+    parser.add_argument('--anomaly_ratio', type=float, default=0.25, help='prior anomaly ratio (%)')
+
+    # model define
+    parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock')
+    parser.add_argument('--num_kernels', type=int, default=6, help='for Inception')
+    parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
+    parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
+    parser.add_argument('--c_out', type=int, default=7, help='output size')
+    parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
+    parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
+    parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
+    parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
+    parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn')
+    parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
+    parser.add_argument('--factor', type=int, default=1, help='attn factor')
+    parser.add_argument('--distil', action='store_false',
+                        help='whether to use distilling in encoder, using this argument means not using distilling',
+                        default=True)
+    parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
+    parser.add_argument('--embed', type=str, default='timeF',
+                        help='time features encoding, options:[timeF, fixed, learned]')
+    parser.add_argument('--activation', type=str, default='gelu', help='activation')
+    parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
+
+    # optimization
+    parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
+    parser.add_argument('--itr', type=int, default=1, help='experiments times')
+    parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
+    parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
+    parser.add_argument('--patience', type=int, default=3, help='early stopping patience')
+    parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
+    parser.add_argument('--des', type=str, default='test', help='exp description')
+    parser.add_argument('--loss', type=str, default='MSE', help='loss function')
+    parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
+    parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
+
+    # GPU
+    parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
+    parser.add_argument('--gpu', type=int, default=0, help='gpu')
+    parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
+    parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus')
+
+    # de-stationary projector params
+    parser.add_argument('--p_hidden_dims', type=int, nargs='+', default=[128, 128],
+                        help='hidden layer dimensions of projector (List)')
+    parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector')
+
+    # iTransformer
+    parser.add_argument('--exp_name', type=str, required=True, default='None',
+                        help='task name, options:[partial_train, zero_shot]')
+
+    # parser.add_argument('--partial_train', type=bool, default=False, help='partial_train')
+    parser.add_argument('--random_train', type=bool, default=False, help='random_train')
+    parser.add_argument('--channel_independent', type=bool, default=False, help='channel_independent')
+    parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False)
+    parser.add_argument('--class_strategy', type=str, default='projection', help='projection/average/cls_token')
+    parser.add_argument('--target_root_path', type=str, default='./data/ETT/', help='root path of the data file')
+    parser.add_argument('--target_data_path', type=str, default='ETTh1.csv', help='data file')
+
+
+    args = parser.parse_args()
+    args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
+
+    if args.use_gpu and args.use_multi_gpu:
+        args.devices = args.devices.replace(' ', '')
+        device_ids = args.devices.split(',')
+        args.device_ids = [int(id_) for id_ in device_ids]
+        args.gpu = args.device_ids[0]
+
+    print('Args in experiment:')
+    print(args)
+
+    if args.task_name == 'long_term_forecast':
+        if args.exp_name == 'partial_train':
+            Exp = Exp_Long_Term_Forecast_Partial
+        else:
+            Exp = Exp_Long_Term_Forecast
+    else:
+        Exp = Exp_Long_Term_Forecast
+
+    if args.is_training:
+        for ii in range(args.itr):
+            # setting record of experiments
+            setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
+                args.task_name,
+                args.model_id,
+                args.model,
+                args.data,
+                args.features,
+                args.seq_len,
+                args.label_len,
+                args.pred_len,
+                args.d_model,
+                args.n_heads,
+                args.e_layers,
+                args.d_layers,
+                args.d_ff,
+                args.factor,
+                args.embed,
+                args.distil,
+                args.des,
+                args.class_strategy, ii)
+
+            exp = Exp(args)  # set experiments
+            print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
+            exp.train(setting)
+
+            print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
+            exp.test(setting)
+            torch.cuda.empty_cache()
+    else:
+        ii = 0
+        setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
+            args.task_name,
+            args.model_id,
+            args.model,
+            args.data,
+            args.features,
+            args.seq_len,
+            args.label_len,
+            args.pred_len,
+            args.d_model,
+            args.n_heads,
+            args.e_layers,
+            args.d_layers,
+            args.d_ff,
+            args.factor,
+            args.embed,
+            args.distil,
+            args.des,
+            args.class_strategy, ii)
+
+        exp = Exp(args)  # set experiments
+        print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
+        exp.test(setting, test=1)
+        torch.cuda.empty_cache()
diff --git a/scripts/boost_performance/ECL/iFlowformer.sh b/scripts/boost_performance/ECL/iFlowformer.sh
new file mode 100644
index 000000000..4eac8a143
--- /dev/null
+++ b/scripts/boost_performance/ECL/iFlowformer.sh
@@ -0,0 +1,174 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Flowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+
+model_name=iFlowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/boost_performance/ECL/iInformer.sh b/scripts/boost_performance/ECL/iInformer.sh
new file mode 100644
index 000000000..c693c73b5
--- /dev/null
+++ b/scripts/boost_performance/ECL/iInformer.sh
@@ -0,0 +1,174 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Reformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+
+model_name=iReformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/boost_performance/ECL/iReformer.sh b/scripts/boost_performance/ECL/iReformer.sh
new file mode 100644
index 000000000..c693c73b5
--- /dev/null
+++ b/scripts/boost_performance/ECL/iReformer.sh
@@ -0,0 +1,174 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Reformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+
+model_name=iReformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/boost_performance/ECL/iTransformer.sh b/scripts/boost_performance/ECL/iTransformer.sh
new file mode 100644
index 000000000..4c3618dc2
--- /dev/null
+++ b/scripts/boost_performance/ECL/iTransformer.sh
@@ -0,0 +1,174 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/boost_performance/Traffic/iFlowformer.sh b/scripts/boost_performance/Traffic/iFlowformer.sh
new file mode 100644
index 000000000..d2dd3d2be
--- /dev/null
+++ b/scripts/boost_performance/Traffic/iFlowformer.sh
@@ -0,0 +1,182 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Flowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+
+model_name=iFlowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/boost_performance/Traffic/iInformer.sh b/scripts/boost_performance/Traffic/iInformer.sh
new file mode 100644
index 000000000..3263a97b4
--- /dev/null
+++ b/scripts/boost_performance/Traffic/iInformer.sh
@@ -0,0 +1,182 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Reformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+
+model_name=iReformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/boost_performance/Traffic/iReformer.sh b/scripts/boost_performance/Traffic/iReformer.sh
new file mode 100644
index 000000000..c0b889344
--- /dev/null
+++ b/scripts/boost_performance/Traffic/iReformer.sh
@@ -0,0 +1,182 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Informer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+
+model_name=iInformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/boost_performance/Traffic/iTransformer.sh b/scripts/boost_performance/Traffic/iTransformer.sh
new file mode 100644
index 000000000..782456565
--- /dev/null
+++ b/scripts/boost_performance/Traffic/iTransformer.sh
@@ -0,0 +1,182 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/boost_performance/Weather/iFlowformer.sh b/scripts/boost_performance/Weather/iFlowformer.sh
new file mode 100644
index 000000000..c490ae7b7
--- /dev/null
+++ b/scripts/boost_performance/Weather/iFlowformer.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Flowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+model_name=iFlowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/boost_performance/Weather/iInformer.sh b/scripts/boost_performance/Weather/iInformer.sh
new file mode 100644
index 000000000..52062b726
--- /dev/null
+++ b/scripts/boost_performance/Weather/iInformer.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Reformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+model_name=iReformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/boost_performance/Weather/iReformer.sh b/scripts/boost_performance/Weather/iReformer.sh
new file mode 100644
index 000000000..efc0fdddd
--- /dev/null
+++ b/scripts/boost_performance/Weather/iReformer.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Informer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+model_name=iInformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/boost_performance/Weather/iTransformer.sh b/scripts/boost_performance/Weather/iTransformer.sh
index e69de29bb..e63372c97 100644
--- a/scripts/boost_performance/Weather/iTransformer.sh
+++ b/scripts/boost_performance/Weather/iTransformer.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/efficient_attentions/ECL/iFlashTransformer.sh b/scripts/efficient_attentions/ECL/iFlashTransformer.sh
new file mode 100644
index 000000000..f0f8ef5b6
--- /dev/null
+++ b/scripts/efficient_attentions/ECL/iFlashTransformer.sh
@@ -0,0 +1,173 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Flashformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+model_name=iFlashformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
diff --git a/scripts/efficient_attentions/Traffic/iFlashTransformer.sh b/scripts/efficient_attentions/Traffic/iFlashTransformer.sh
new file mode 100644
index 000000000..1752e6804
--- /dev/null
+++ b/scripts/efficient_attentions/Traffic/iFlashTransformer.sh
@@ -0,0 +1,181 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Flashformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+model_name=iFlashformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
diff --git a/scripts/efficient_attentions/Weather/iFlashTransformer.sh b/scripts/efficient_attentions/Weather/iFlashTransformer.sh
new file mode 100644
index 000000000..410832341
--- /dev/null
+++ b/scripts/efficient_attentions/Weather/iFlashTransformer.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Flashformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+model_name=iFlashformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --batch_size 128 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/efficient_attentions/iFlashTransformer.sh b/scripts/efficient_attentions/iFlashTransformer.sh
deleted file mode 100644
index e69de29bb..000000000
diff --git a/scripts/increasing_lookback/ECL/iFlowformer.sh b/scripts/increasing_lookback/ECL/iFlowformer.sh
new file mode 100644
index 000000000..33cf730df
--- /dev/null
+++ b/scripts/increasing_lookback/ECL/iFlowformer.sh
@@ -0,0 +1,128 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iFlowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 48 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
diff --git a/scripts/increasing_lookback/ECL/iInformer.sh b/scripts/increasing_lookback/ECL/iInformer.sh
new file mode 100644
index 000000000..f60581620
--- /dev/null
+++ b/scripts/increasing_lookback/ECL/iInformer.sh
@@ -0,0 +1,128 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iInformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 48 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
diff --git a/scripts/increasing_lookback/ECL/iReformer.sh b/scripts/increasing_lookback/ECL/iReformer.sh
new file mode 100644
index 000000000..0a2423750
--- /dev/null
+++ b/scripts/increasing_lookback/ECL/iReformer.sh
@@ -0,0 +1,128 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iReformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 48 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
diff --git a/scripts/increasing_lookback/ECL/iTransformer.sh b/scripts/increasing_lookback/ECL/iTransformer.sh
new file mode 100644
index 000000000..7b074bc1e
--- /dev/null
+++ b/scripts/increasing_lookback/ECL/iTransformer.sh
@@ -0,0 +1,128 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 48 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
diff --git a/scripts/increasing_lookback/Traffic/iFlowformer.sh b/scripts/increasing_lookback/Traffic/iFlowformer.sh
new file mode 100644
index 000000000..65ea6ba4e
--- /dev/null
+++ b/scripts/increasing_lookback/Traffic/iFlowformer.sh
@@ -0,0 +1,128 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iFlowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 48 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
diff --git a/scripts/increasing_lookback/Traffic/iInformer.sh b/scripts/increasing_lookback/Traffic/iInformer.sh
new file mode 100644
index 000000000..246526849
--- /dev/null
+++ b/scripts/increasing_lookback/Traffic/iInformer.sh
@@ -0,0 +1,128 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iInformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 48 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
diff --git a/scripts/increasing_lookback/Traffic/iReformer.sh b/scripts/increasing_lookback/Traffic/iReformer.sh
new file mode 100644
index 000000000..9b0681a0c
--- /dev/null
+++ b/scripts/increasing_lookback/Traffic/iReformer.sh
@@ -0,0 +1,128 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iReformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 48 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
diff --git a/scripts/increasing_lookback/Traffic/iTransformer.sh b/scripts/increasing_lookback/Traffic/iTransformer.sh
index e69de29bb..c32fc8568 100644
--- a/scripts/increasing_lookback/Traffic/iTransformer.sh
+++ b/scripts/increasing_lookback/Traffic/iTransformer.sh
@@ -0,0 +1,128 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 48 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
diff --git a/scripts/multivariate_forecast/ECL/iTransformer.sh b/scripts/multivariate_forecast/ECL/iTransformer.sh
new file mode 100644
index 000000000..0192f6d53
--- /dev/null
+++ b/scripts/multivariate_forecast/ECL/iTransformer.sh
@@ -0,0 +1,105 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1 \
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.0005\
+  --itr 1
\ No newline at end of file
diff --git a/scripts/multivariate_forecast/ETTh2/iTransformer.sh b/scripts/multivariate_forecast/ETTh2/iTransformer.sh
new file mode 100644
index 000000000..40c5ecda8
--- /dev/null
+++ b/scripts/multivariate_forecast/ETTh2/iTransformer.sh
@@ -0,0 +1,95 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 128\
+  --d_ff 128\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 128\
+  --d_ff 128\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 128\
+  --d_ff 128\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 128\
+  --d_ff 128\
+  --itr 1
\ No newline at end of file
diff --git a/scripts/multivariate_forecast/Pems/iTransformer.sh b/scripts/multivariate_forecast/Pems/iTransformer.sh
new file mode 100644
index 000000000..32ec1cbd9
--- /dev/null
+++ b/scripts/multivariate_forecast/Pems/iTransformer.sh
@@ -0,0 +1,101 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/PEMS/ \
+  --data_path PEMS03.npz \
+  --model_id PEMS03_96_12 \
+  --model $model_name \
+  --data PEMS \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 12 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 358 \
+  --dec_in 358 \
+  --c_out 358 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --learning_rate 0.001\
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/PEMS/ \
+  --data_path PEMS03.npz \
+  --model_id PEMS03_96_24 \
+  --model $model_name \
+  --data PEMS \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 24 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 358 \
+  --dec_in 358 \
+  --c_out 358 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --learning_rate 0.001\
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/PEMS/ \
+  --data_path PEMS03.npz \
+  --model_id PEMS03_96_48 \
+  --model $model_name \
+  --data PEMS \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 48 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 358 \
+  --dec_in 358 \
+  --c_out 358 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --learning_rate 0.001\
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/PEMS/ \
+  --data_path PEMS03.npz \
+  --model_id PEMS03_96_96 \
+  --model $model_name \
+  --data PEMS \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 358 \
+  --dec_in 358 \
+  --c_out 358 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --learning_rate 0.001\
+  --itr 1
\ No newline at end of file
diff --git a/scripts/multivariate_forecast/Solar/iTransformer.sh b/scripts/multivariate_forecast/Solar/iTransformer.sh
new file mode 100644
index 000000000..dad19e0e6
--- /dev/null
+++ b/scripts/multivariate_forecast/Solar/iTransformer.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/Solar/ \
+  --data_path solar_AL.txt \
+  --model_id solar_96_96 \
+  --model $model_name \
+  --data Solar \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 137 \
+  --dec_in 137 \
+  --c_out 137 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --learning_rate 0.0005\
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/Solar/ \
+  --data_path solar_AL.txt \
+  --model_id solar_96_192 \
+  --model $model_name \
+  --data Solar \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 137 \
+  --dec_in 137 \
+  --c_out 137 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --learning_rate 0.0005\
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/Solar/ \
+  --data_path solar_AL.txt \
+  --model_id solar_96_336 \
+  --model $model_name \
+  --data Solar \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 137 \
+  --dec_in 137 \
+  --c_out 137 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --learning_rate 0.0005\
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/Solar/ \
+  --data_path solar_AL.txt \
+  --model_id solar_96_720 \
+  --model $model_name \
+  --data Solar \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 137 \
+  --dec_in 137 \
+  --c_out 137 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --learning_rate 0.0005\
+  --itr 1
diff --git a/scripts/multivariate_forecast/Traffic/iTransformer.sh b/scripts/multivariate_forecast/Traffic/iTransformer.sh
index e69de29bb..c8a476269 100644
--- a/scripts/multivariate_forecast/Traffic/iTransformer.sh
+++ b/scripts/multivariate_forecast/Traffic/iTransformer.sh
@@ -0,0 +1,103 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --batch_size 16\
+  --learning_rate 0.001\
+  --itr 1
\ No newline at end of file
diff --git a/scripts/multivariate_forecast/Weather/iTransformer.sh b/scripts/multivariate_forecast/Weather/iTransformer.sh
new file mode 100644
index 000000000..f2e3792a5
--- /dev/null
+++ b/scripts/multivariate_forecast/Weather/iTransformer.sh
@@ -0,0 +1,98 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --itr 1 \
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --itr 1 \
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --itr 1 \
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --itr 1
\ No newline at end of file
diff --git a/scripts/variate_generalization/Electricity/iFlowformer.sh b/scripts/variate_generalization/Electricity/iFlowformer.sh
new file mode 100644
index 000000000..259d7cde6
--- /dev/null
+++ b/scripts/variate_generalization/Electricity/iFlowformer.sh
@@ -0,0 +1,53 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Flowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 64 \
+  --dec_in 64 \
+  --c_out 64 \
+  --des 'Exp' \
+  --channel_independent true \
+  --exp_name partial_train \
+  --batch_size 8 \
+  --d_model 32 \
+  --d_ff 64 \
+  --itr 1
+
+model_name=iFlowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 64 \
+  --dec_in 64 \
+  --c_out 64 \
+  --des 'Exp' \
+  --exp_name partial_train \
+  --itr 1
diff --git a/scripts/variate_generalization/Electricity/iInformer.sh b/scripts/variate_generalization/Electricity/iInformer.sh
new file mode 100644
index 000000000..3a105dbc9
--- /dev/null
+++ b/scripts/variate_generalization/Electricity/iInformer.sh
@@ -0,0 +1,53 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Informer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 64 \
+  --dec_in 64 \
+  --c_out 64 \
+  --des 'Exp' \
+  --channel_independent true \
+  --exp_name partial_train \
+  --batch_size 8 \
+  --d_model 32 \
+  --d_ff 64 \
+  --itr 1
+
+model_name=iInformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 64 \
+  --dec_in 64 \
+  --c_out 64 \
+  --des 'Exp' \
+  --exp_name partial_train \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/variate_generalization/Electricity/iReformer.sh b/scripts/variate_generalization/Electricity/iReformer.sh
new file mode 100644
index 000000000..64494f9f8
--- /dev/null
+++ b/scripts/variate_generalization/Electricity/iReformer.sh
@@ -0,0 +1,53 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Reformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 64 \
+  --dec_in 64 \
+  --c_out 64 \
+  --des 'Exp' \
+  --channel_independent true \
+  --exp_name partial_train \
+  --batch_size 8 \
+  --d_model 32 \
+  --d_ff 64 \
+  --itr 1
+
+model_name=iReformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 64 \
+  --dec_in 64 \
+  --c_out 64 \
+  --des 'Exp' \
+  --exp_name partial_train \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/variate_generalization/Electricity/iTransformer.sh b/scripts/variate_generalization/Electricity/iTransformer.sh
index e69de29bb..6a81dd6d4 100644
--- a/scripts/variate_generalization/Electricity/iTransformer.sh
+++ b/scripts/variate_generalization/Electricity/iTransformer.sh
@@ -0,0 +1,53 @@
+export CUDA_VISIBLE_DEVICES=3
+
+#model_name=Transformer
+#
+#python -u run.py \
+#  --task_name long_term_forecast \
+#  --is_training 1 \
+#  --root_path ./dataset/electricity/ \
+#  --data_path electricity.csv \
+#  --model_id ECL_96_96 \
+#  --model $model_name \
+#  --data custom \
+#  --features M \
+#  --seq_len 96 \
+#  --label_len 48 \
+#  --pred_len 96 \
+#  --e_layers 2 \
+#  --d_layers 1 \
+#  --factor 3 \
+#  --enc_in 64 \
+#  --dec_in 64 \
+#  --c_out 64 \
+#  --des 'Exp' \
+#  --channel_independent true \
+#  --exp_name partial_train \
+#  --batch_size 8 \
+#  --d_model 32 \
+#  --d_ff 64 \
+#  --itr 1
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 64 \
+  --dec_in 64 \
+  --c_out 64 \
+  --des 'Exp' \
+  --exp_name partial_train \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/variate_generalization/Solar/iFlowformer.sh b/scripts/variate_generalization/Solar/iFlowformer.sh
new file mode 100644
index 000000000..cd026533c
--- /dev/null
+++ b/scripts/variate_generalization/Solar/iFlowformer.sh
@@ -0,0 +1,55 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Flowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/Solar/ \
+  --data_path solar_AL.txt \
+  --model_id solar_96_96 \
+  --model $model_name \
+  --data Solar \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 27 \
+  --dec_in 27 \
+  --c_out 27 \
+  --des 'Exp' \
+  --d_model 32 \
+  --d_ff 64 \
+  --learning_rate 0.0005 \
+  --channel_independent true \
+  --exp_name partial_train \
+  --batch_size 8 \
+  --itr 1
+
+model_name=iFlowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/Solar/ \
+  --data_path solar_AL.txt \
+  --model_id solar_96_96 \
+  --model $model_name \
+  --data Solar \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 27 \
+  --dec_in 27 \
+  --c_out 27 \
+  --des 'Exp' \
+  --learning_rate 0.0005 \
+  --exp_name partial_train \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/variate_generalization/Solar/iInformer.sh b/scripts/variate_generalization/Solar/iInformer.sh
new file mode 100644
index 000000000..968d47a0c
--- /dev/null
+++ b/scripts/variate_generalization/Solar/iInformer.sh
@@ -0,0 +1,55 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Informer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/Solar/ \
+  --data_path solar_AL.txt \
+  --model_id solar_96_96 \
+  --model $model_name \
+  --data Solar \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 27 \
+  --dec_in 27 \
+  --c_out 27 \
+  --des 'Exp' \
+  --d_model 32 \
+  --d_ff 64 \
+  --learning_rate 0.0005 \
+  --channel_independent true \
+  --exp_name partial_train \
+  --batch_size 8 \
+  --itr 1
+
+model_name=iInformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/Solar/ \
+  --data_path solar_AL.txt \
+  --model_id solar_96_96 \
+  --model $model_name \
+  --data Solar \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 27 \
+  --dec_in 27 \
+  --c_out 27 \
+  --des 'Exp' \
+  --learning_rate 0.0005 \
+  --exp_name partial_train \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/variate_generalization/Solar/iReformer.sh b/scripts/variate_generalization/Solar/iReformer.sh
new file mode 100644
index 000000000..4660cd2ff
--- /dev/null
+++ b/scripts/variate_generalization/Solar/iReformer.sh
@@ -0,0 +1,55 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Reformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/Solar/ \
+  --data_path solar_AL.txt \
+  --model_id solar_96_96 \
+  --model $model_name \
+  --data Solar \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 27 \
+  --dec_in 27 \
+  --c_out 27 \
+  --des 'Exp' \
+  --d_model 32 \
+  --d_ff 64 \
+  --learning_rate 0.0005 \
+  --channel_independent true \
+  --exp_name partial_train \
+  --batch_size 8 \
+  --itr 1
+
+model_name=iReformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/Solar/ \
+  --data_path solar_AL.txt \
+  --model_id solar_96_96 \
+  --model $model_name \
+  --data Solar \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 27 \
+  --dec_in 27 \
+  --c_out 27 \
+  --des 'Exp' \
+  --learning_rate 0.0005 \
+  --exp_name partial_train \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/variate_generalization/Solar/iTransformer.sh b/scripts/variate_generalization/Solar/iTransformer.sh
new file mode 100644
index 000000000..c19304d94
--- /dev/null
+++ b/scripts/variate_generalization/Solar/iTransformer.sh
@@ -0,0 +1,55 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/Solar/ \
+  --data_path solar_AL.txt \
+  --model_id solar_96_96 \
+  --model $model_name \
+  --data Solar \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 27 \
+  --dec_in 27 \
+  --c_out 27 \
+  --des 'Exp' \
+  --d_model 32 \
+  --d_ff 64 \
+  --learning_rate 0.0005 \
+  --channel_independent true \
+  --exp_name partial_train \
+  --batch_size 8 \
+  --itr 1
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path /data/Solar/ \
+  --data_path solar_AL.txt \
+  --model_id solar_96_96 \
+  --model $model_name \
+  --data Solar \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 27 \
+  --dec_in 27 \
+  --c_out 27 \
+  --des 'Exp' \
+  --learning_rate 0.0005 \
+  --exp_name partial_train \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/variate_generalization/Traffic/iFlowformer.sh b/scripts/variate_generalization/Traffic/iFlowformer.sh
new file mode 100644
index 000000000..0c4580f33
--- /dev/null
+++ b/scripts/variate_generalization/Traffic/iFlowformer.sh
@@ -0,0 +1,53 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Flowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 172 \
+  --dec_in 172 \
+  --c_out 172 \
+  --des 'Exp' \
+  --channel_independent true \
+  --exp_name partial_train \
+  --batch_size 4 \
+  --d_model 32 \
+  --d_ff 64 \
+  --itr 1
+
+model_name=iFlowformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 172 \
+  --dec_in 172 \
+  --c_out 172 \
+  --des 'Exp' \
+  --exp_name partial_train \
+  --itr 1
diff --git a/scripts/variate_generalization/Traffic/iInformer.sh b/scripts/variate_generalization/Traffic/iInformer.sh
new file mode 100644
index 000000000..1c03c1350
--- /dev/null
+++ b/scripts/variate_generalization/Traffic/iInformer.sh
@@ -0,0 +1,53 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Informer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 172 \
+  --dec_in 172 \
+  --c_out 172 \
+  --des 'Exp' \
+  --channel_independent true \
+  --exp_name partial_train \
+  --batch_size 4 \
+  --d_model 32 \
+  --d_ff 64 \
+  --itr 1
+
+model_name=iInformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 172 \
+  --dec_in 172 \
+  --c_out 172 \
+  --des 'Exp' \
+  --exp_name partial_train \
+  --itr 1
diff --git a/scripts/variate_generalization/Traffic/iReformer.sh b/scripts/variate_generalization/Traffic/iReformer.sh
new file mode 100644
index 000000000..811e78599
--- /dev/null
+++ b/scripts/variate_generalization/Traffic/iReformer.sh
@@ -0,0 +1,54 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Reformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 172 \
+  --dec_in 172 \
+  --c_out 172 \
+  --des 'Exp' \
+  --channel_independent true \
+  --exp_name partial_train \
+  --batch_size 4 \
+  --d_model 32 \
+  --d_ff 64 \
+  --itr 1
+
+model_name=iReformer
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 172 \
+  --dec_in 172 \
+  --c_out 172 \
+  --des 'Exp' \
+  --exp_name partial_train \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/variate_generalization/Traffic/iTransformer.sh b/scripts/variate_generalization/Traffic/iTransformer.sh
new file mode 100644
index 000000000..04b6b9137
--- /dev/null
+++ b/scripts/variate_generalization/Traffic/iTransformer.sh
@@ -0,0 +1,53 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 172 \
+  --dec_in 172 \
+  --c_out 172 \
+  --des 'Exp' \
+  --channel_independent true \
+  --exp_name partial_train \
+  --batch_size 8 \
+  --d_model 32 \
+  --d_ff 64 \
+  --itr 1
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 172 \
+  --dec_in 172 \
+  --c_out 172 \
+  --des 'Exp' \
+  --exp_name partial_train \
+  --itr 1
diff --git a/utils/masking.py b/utils/masking.py
new file mode 100644
index 000000000..a19cbf63b
--- /dev/null
+++ b/utils/masking.py
@@ -0,0 +1,26 @@
+import torch
+
+
+class TriangularCausalMask():
+    def __init__(self, B, L, device="cpu"):
+        mask_shape = [B, 1, L, L]
+        with torch.no_grad():
+            self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
+
+    @property
+    def mask(self):
+        return self._mask
+
+
+class ProbMask():
+    def __init__(self, B, H, L, index, scores, device="cpu"):
+        _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
+        _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
+        indicator = _mask_ex[torch.arange(B)[:, None, None],
+                    torch.arange(H)[None, :, None],
+                    index, :].to(device)
+        self._mask = indicator.view(scores.shape).to(device)
+
+    @property
+    def mask(self):
+        return self._mask
diff --git a/utils/metrics.py b/utils/metrics.py
new file mode 100644
index 000000000..b4f5a76b8
--- /dev/null
+++ b/utils/metrics.py
@@ -0,0 +1,41 @@
+import numpy as np
+
+
+def RSE(pred, true):
+    return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
+
+
+def CORR(pred, true):
+    u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
+    d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
+    return (u / d).mean(-1)
+
+
+def MAE(pred, true):
+    return np.mean(np.abs(pred - true))
+
+
+def MSE(pred, true):
+    return np.mean((pred - true) ** 2)
+
+
+def RMSE(pred, true):
+    return np.sqrt(MSE(pred, true))
+
+
+def MAPE(pred, true):
+    return np.mean(np.abs((pred - true) / true))
+
+
+def MSPE(pred, true):
+    return np.mean(np.square((pred - true) / true))
+
+
+def metric(pred, true):
+    mae = MAE(pred, true)
+    mse = MSE(pred, true)
+    rmse = RMSE(pred, true)
+    mape = MAPE(pred, true)
+    mspe = MSPE(pred, true)
+
+    return mae, mse, rmse, mape, mspe
diff --git a/utils/timefeatures.py b/utils/timefeatures.py
new file mode 100644
index 000000000..7c1297293
--- /dev/null
+++ b/utils/timefeatures.py
@@ -0,0 +1,148 @@
+# From: gluonts/src/gluonts/time_feature/_base.py
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License").
+# You may not use this file except in compliance with the License.
+# A copy of the License is located at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the "license" file accompanying this file. This file is distributed
+# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+from typing import List
+
+import numpy as np
+import pandas as pd
+from pandas.tseries import offsets
+from pandas.tseries.frequencies import to_offset
+
+
+class TimeFeature:
+    def __init__(self):
+        pass
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        pass
+
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+
+
+class SecondOfMinute(TimeFeature):
+    """Minute of hour encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return index.second / 59.0 - 0.5
+
+
+class MinuteOfHour(TimeFeature):
+    """Minute of hour encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return index.minute / 59.0 - 0.5
+
+
+class HourOfDay(TimeFeature):
+    """Hour of day encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return index.hour / 23.0 - 0.5
+
+
+class DayOfWeek(TimeFeature):
+    """Hour of day encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return index.dayofweek / 6.0 - 0.5
+
+
+class DayOfMonth(TimeFeature):
+    """Day of month encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return (index.day - 1) / 30.0 - 0.5
+
+
+class DayOfYear(TimeFeature):
+    """Day of year encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return (index.dayofyear - 1) / 365.0 - 0.5
+
+
+class MonthOfYear(TimeFeature):
+    """Month of year encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return (index.month - 1) / 11.0 - 0.5
+
+
+class WeekOfYear(TimeFeature):
+    """Week of year encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return (index.isocalendar().week - 1) / 52.0 - 0.5
+
+
+def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
+    """
+    Returns a list of time features that will be appropriate for the given frequency string.
+    Parameters
+    ----------
+    freq_str
+        Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
+    """
+
+    features_by_offsets = {
+        offsets.YearEnd: [],
+        offsets.QuarterEnd: [MonthOfYear],
+        offsets.MonthEnd: [MonthOfYear],
+        offsets.Week: [DayOfMonth, WeekOfYear],
+        offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
+        offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
+        offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
+        offsets.Minute: [
+            MinuteOfHour,
+            HourOfDay,
+            DayOfWeek,
+            DayOfMonth,
+            DayOfYear,
+        ],
+        offsets.Second: [
+            SecondOfMinute,
+            MinuteOfHour,
+            HourOfDay,
+            DayOfWeek,
+            DayOfMonth,
+            DayOfYear,
+        ],
+    }
+
+    offset = to_offset(freq_str)
+
+    for offset_type, feature_classes in features_by_offsets.items():
+        if isinstance(offset, offset_type):
+            return [cls() for cls in feature_classes]
+
+    supported_freq_msg = f"""
+    Unsupported frequency {freq_str}
+    The following frequencies are supported:
+        Y   - yearly
+            alias: A
+        M   - monthly
+        W   - weekly
+        D   - daily
+        B   - business days
+        H   - hourly
+        T   - minutely
+            alias: min
+        S   - secondly
+    """
+    raise RuntimeError(supported_freq_msg)
+
+
+def time_features(dates, freq='h'):
+    return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
diff --git a/utils/tools.py b/utils/tools.py
new file mode 100644
index 000000000..a357cc2c4
--- /dev/null
+++ b/utils/tools.py
@@ -0,0 +1,115 @@
+import os
+
+import numpy as np
+import torch
+import matplotlib.pyplot as plt
+import pandas as pd
+
+plt.switch_backend('agg')
+
+
+def adjust_learning_rate(optimizer, epoch, args):
+    # lr = args.learning_rate * (0.2 ** (epoch // 2))
+    if args.lradj == 'type1':
+        lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
+    elif args.lradj == 'type2':
+        lr_adjust = {
+            2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
+            10: 5e-7, 15: 1e-7, 20: 5e-8
+        }
+    if epoch in lr_adjust.keys():
+        lr = lr_adjust[epoch]
+        for param_group in optimizer.param_groups:
+            param_group['lr'] = lr
+        print('Updating learning rate to {}'.format(lr))
+
+
+class EarlyStopping:
+    def __init__(self, patience=7, verbose=False, delta=0):
+        self.patience = patience
+        self.verbose = verbose
+        self.counter = 0
+        self.best_score = None
+        self.early_stop = False
+        self.val_loss_min = np.Inf
+        self.delta = delta
+
+    def __call__(self, val_loss, model, path):
+        score = -val_loss
+        if self.best_score is None:
+            self.best_score = score
+            self.save_checkpoint(val_loss, model, path)
+        elif score < self.best_score + self.delta:
+            self.counter += 1
+            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
+            if self.counter >= self.patience:
+                self.early_stop = True
+        else:
+            self.best_score = score
+            self.save_checkpoint(val_loss, model, path)
+            self.counter = 0
+
+    def save_checkpoint(self, val_loss, model, path):
+        if self.verbose:
+            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
+        torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
+        self.val_loss_min = val_loss
+
+
+class dotdict(dict):
+    """dot.notation access to dictionary attributes"""
+    __getattr__ = dict.get
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__
+
+
+class StandardScaler():
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+
+    def transform(self, data):
+        return (data - self.mean) / self.std
+
+    def inverse_transform(self, data):
+        return (data * self.std) + self.mean
+
+
+def visual(true, preds=None, name='./pic/test.pdf'):
+    """
+    Results visualization
+    """
+    plt.figure()
+    plt.plot(true, label='GroundTruth', linewidth=2)
+    if preds is not None:
+        plt.plot(preds, label='Prediction', linewidth=2)
+    plt.legend()
+    plt.savefig(name, bbox_inches='tight')
+
+
+def adjustment(gt, pred):
+    anomaly_state = False
+    for i in range(len(gt)):
+        if gt[i] == 1 and pred[i] == 1 and not anomaly_state:
+            anomaly_state = True
+            for j in range(i, 0, -1):
+                if gt[j] == 0:
+                    break
+                else:
+                    if pred[j] == 0:
+                        pred[j] = 1
+            for j in range(i, len(gt)):
+                if gt[j] == 0:
+                    break
+                else:
+                    if pred[j] == 0:
+                        pred[j] = 1
+        elif gt[i] == 0:
+            anomaly_state = False
+        if anomaly_state:
+            pred[i] = 1
+    return gt, pred
+
+
+def cal_accuracy(y_pred, y_true):
+    return np.mean(y_pred == y_true)
\ No newline at end of file