From 7f4eb319a0e1682fc81591763d078dd2ac025055 Mon Sep 17 00:00:00 2001 From: ZDandsomSP <1115854107@qq.com> Date: Tue, 24 Oct 2023 22:02:49 +0800 Subject: [PATCH] init code --- data_provider/data_factory.py | 94 ++ data_provider/data_loader.py | 856 ++++++++++++++++++ data_provider/m4.py | 138 +++ data_provider/uea.py | 125 +++ experiments/exp_basic.py | 50 + experiments/exp_long_term_forecasting.py | 275 ++++++ .../exp_long_term_forecasting_partial.py | 320 +++++++ layers/Embed.py | 190 ++++ layers/SelfAttention_Family.py | 420 +++++++++ layers/Transformer_EncDec.py | 135 +++ model/Flashformer.py | 136 +++ model/Flowformer.py | 145 +++ model/Informer.py | 166 ++++ model/Reformer.py | 163 ++++ model/Transformer.py | 146 +++ model/iFlashformer.py | 186 ++++ model/iFlowformer.py | 185 ++++ model/iInformer.py | 186 ++++ model/iReformer.py | 185 ++++ model/iTransformer.py | 165 ++++ run.py | 180 ++++ scripts/boost_performance/ECL/iFlowformer.sh | 174 ++++ scripts/boost_performance/ECL/iInformer.sh | 174 ++++ scripts/boost_performance/ECL/iReformer.sh | 174 ++++ scripts/boost_performance/ECL/iTransformer.sh | 174 ++++ .../boost_performance/Traffic/iFlowformer.sh | 182 ++++ .../boost_performance/Traffic/iInformer.sh | 182 ++++ .../boost_performance/Traffic/iReformer.sh | 182 ++++ .../boost_performance/Traffic/iTransformer.sh | 182 ++++ .../boost_performance/Weather/iFlowformer.sh | 183 ++++ .../boost_performance/Weather/iInformer.sh | 183 ++++ .../boost_performance/Weather/iReformer.sh | 183 ++++ .../boost_performance/Weather/iTransformer.sh | 183 ++++ .../ECL/iFlashTransformer.sh | 173 ++++ .../Traffic/iFlashTransformer.sh | 181 ++++ .../Weather/iFlashTransformer.sh | 183 ++++ .../efficient_attentions/iFlashTransformer.sh | 0 .../increasing_lookback/ECL/iFlowformer.sh | 128 +++ scripts/increasing_lookback/ECL/iInformer.sh | 128 +++ scripts/increasing_lookback/ECL/iReformer.sh | 128 +++ .../increasing_lookback/ECL/iTransformer.sh | 128 +++ .../Traffic/iFlowformer.sh | 128 +++ .../increasing_lookback/Traffic/iInformer.sh | 128 +++ .../increasing_lookback/Traffic/iReformer.sh | 128 +++ .../Traffic/iTransformer.sh | 128 +++ .../multivariate_forecast/ECL/iTransformer.sh | 105 +++ .../ETTh2/iTransformer.sh | 95 ++ .../Pems/iTransformer.sh | 101 +++ .../Solar/iTransformer.sh | 99 ++ .../Traffic/iTransformer.sh | 103 +++ .../Weather/iTransformer.sh | 98 ++ .../Electricity/iFlowformer.sh | 53 ++ .../Electricity/iInformer.sh | 53 ++ .../Electricity/iReformer.sh | 53 ++ .../Electricity/iTransformer.sh | 53 ++ .../Solar/iFlowformer.sh | 55 ++ .../variate_generalization/Solar/iInformer.sh | 55 ++ .../variate_generalization/Solar/iReformer.sh | 55 ++ .../Solar/iTransformer.sh | 55 ++ .../Traffic/iFlowformer.sh | 53 ++ .../Traffic/iInformer.sh | 53 ++ .../Traffic/iReformer.sh | 54 ++ .../Traffic/iTransformer.sh | 53 ++ utils/masking.py | 26 + utils/metrics.py | 41 + utils/timefeatures.py | 148 +++ utils/tools.py | 115 +++ 67 files changed, 9739 insertions(+) create mode 100644 data_provider/data_factory.py create mode 100644 data_provider/data_loader.py create mode 100644 data_provider/m4.py create mode 100644 data_provider/uea.py create mode 100644 experiments/exp_long_term_forecasting.py create mode 100644 experiments/exp_long_term_forecasting_partial.py create mode 100644 layers/Embed.py create mode 100644 layers/SelfAttention_Family.py create mode 100644 layers/Transformer_EncDec.py create mode 100644 model/Flashformer.py create mode 100644 model/Flowformer.py create mode 100644 model/Informer.py create mode 100644 model/Reformer.py create mode 100644 model/Transformer.py create mode 100644 model/iFlashformer.py create mode 100644 model/iFlowformer.py create mode 100644 model/iInformer.py create mode 100644 model/iReformer.py create mode 100644 model/iTransformer.py create mode 100644 run.py create mode 100644 scripts/boost_performance/ECL/iFlowformer.sh create mode 100644 scripts/boost_performance/ECL/iInformer.sh create mode 100644 scripts/boost_performance/ECL/iReformer.sh create mode 100644 scripts/boost_performance/ECL/iTransformer.sh create mode 100644 scripts/boost_performance/Traffic/iFlowformer.sh create mode 100644 scripts/boost_performance/Traffic/iInformer.sh create mode 100644 scripts/boost_performance/Traffic/iReformer.sh create mode 100644 scripts/boost_performance/Traffic/iTransformer.sh create mode 100644 scripts/boost_performance/Weather/iFlowformer.sh create mode 100644 scripts/boost_performance/Weather/iInformer.sh create mode 100644 scripts/boost_performance/Weather/iReformer.sh create mode 100644 scripts/efficient_attentions/ECL/iFlashTransformer.sh create mode 100644 scripts/efficient_attentions/Traffic/iFlashTransformer.sh create mode 100644 scripts/efficient_attentions/Weather/iFlashTransformer.sh delete mode 100644 scripts/efficient_attentions/iFlashTransformer.sh create mode 100644 scripts/increasing_lookback/ECL/iFlowformer.sh create mode 100644 scripts/increasing_lookback/ECL/iInformer.sh create mode 100644 scripts/increasing_lookback/ECL/iReformer.sh create mode 100644 scripts/increasing_lookback/ECL/iTransformer.sh create mode 100644 scripts/increasing_lookback/Traffic/iFlowformer.sh create mode 100644 scripts/increasing_lookback/Traffic/iInformer.sh create mode 100644 scripts/increasing_lookback/Traffic/iReformer.sh create mode 100644 scripts/multivariate_forecast/ECL/iTransformer.sh create mode 100644 scripts/multivariate_forecast/ETTh2/iTransformer.sh create mode 100644 scripts/multivariate_forecast/Pems/iTransformer.sh create mode 100644 scripts/multivariate_forecast/Solar/iTransformer.sh create mode 100644 scripts/multivariate_forecast/Weather/iTransformer.sh create mode 100644 scripts/variate_generalization/Electricity/iFlowformer.sh create mode 100644 scripts/variate_generalization/Electricity/iInformer.sh create mode 100644 scripts/variate_generalization/Electricity/iReformer.sh create mode 100644 scripts/variate_generalization/Solar/iFlowformer.sh create mode 100644 scripts/variate_generalization/Solar/iInformer.sh create mode 100644 scripts/variate_generalization/Solar/iReformer.sh create mode 100644 scripts/variate_generalization/Solar/iTransformer.sh create mode 100644 scripts/variate_generalization/Traffic/iFlowformer.sh create mode 100644 scripts/variate_generalization/Traffic/iInformer.sh create mode 100644 scripts/variate_generalization/Traffic/iReformer.sh create mode 100644 scripts/variate_generalization/Traffic/iTransformer.sh create mode 100644 utils/masking.py create mode 100644 utils/metrics.py create mode 100644 utils/timefeatures.py create mode 100644 utils/tools.py diff --git a/data_provider/data_factory.py b/data_provider/data_factory.py new file mode 100644 index 000000000..56820ca9b --- /dev/null +++ b/data_provider/data_factory.py @@ -0,0 +1,94 @@ +from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4, PSMSegLoader, \ + MSLSegLoader, SMAPSegLoader, SMDSegLoader, SWATSegLoader, UEAloader, Dataset_Solar, Dataset_PEMS +from data_provider.uea import collate_fn +from torch.utils.data import DataLoader + +data_dict = { + 'ETTh1': Dataset_ETT_hour, + 'ETTh2': Dataset_ETT_hour, + 'ETTm1': Dataset_ETT_minute, + 'ETTm2': Dataset_ETT_minute, + 'Solar': Dataset_Solar, + 'PEMS':Dataset_PEMS, + 'custom': Dataset_Custom, + 'm4': Dataset_M4, + 'PSM': PSMSegLoader, + 'MSL': MSLSegLoader, + 'SMAP': SMAPSegLoader, + 'SMD': SMDSegLoader, + 'SWAT': SWATSegLoader, + 'UEA': UEAloader +} + + +def data_provider(args, flag): + Data = data_dict[args.data] + timeenc = 0 if args.embed != 'timeF' else 1 + + if flag == 'test': + shuffle_flag = False + drop_last = True + if args.task_name == 'anomaly_detection' or args.task_name == 'classification': + batch_size = args.batch_size + else: + batch_size = 1 # bsz=1 for evaluation + freq = args.freq + else: + shuffle_flag = True + drop_last = True + batch_size = args.batch_size # bsz for train and valid + freq = args.freq + + if args.task_name == 'anomaly_detection': + drop_last = False + data_set = Data( + root_path=args.root_path, + win_size=args.seq_len, + flag=flag, + ) + print(flag, len(data_set)) + data_loader = DataLoader( + data_set, + batch_size=batch_size, + shuffle=shuffle_flag, + num_workers=args.num_workers, + drop_last=drop_last) + return data_set, data_loader + elif args.task_name == 'classification': + drop_last = False + data_set = Data( + root_path=args.root_path, + flag=flag, + ) + + data_loader = DataLoader( + data_set, + batch_size=batch_size, + shuffle=shuffle_flag, + num_workers=args.num_workers, + drop_last=drop_last, + collate_fn=lambda x: collate_fn(x, max_len=args.seq_len) + ) + return data_set, data_loader + else: + if args.data == 'm4': + drop_last = False + data_set = Data( + root_path=args.root_path, + data_path=args.data_path, + flag=flag, + size=[args.seq_len, args.label_len, args.pred_len], + features=args.features, + target=args.target, + timeenc=timeenc, + freq=freq, + seasonal_patterns=args.seasonal_patterns + ) + print(flag, len(data_set)) + data_loader = DataLoader( + data_set, + batch_size=batch_size, + shuffle=shuffle_flag, + num_workers=args.num_workers, + drop_last=drop_last) + return data_set, data_loader diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py new file mode 100644 index 000000000..4dbe726a6 --- /dev/null +++ b/data_provider/data_loader.py @@ -0,0 +1,856 @@ +import os +import numpy as np +import pandas as pd +import glob +import re +import torch +from torch.utils.data import Dataset, DataLoader +from sklearn.preprocessing import StandardScaler +from utils.timefeatures import time_features +from data_provider.m4 import M4Dataset, M4Meta +from data_provider.uea import subsample, interpolate_missing, Normalizer +from sktime.datasets import load_from_tsfile_to_dataframe +import warnings + +warnings.filterwarnings('ignore') + + +class Dataset_ETT_hour(Dataset): + def __init__(self, root_path, flag='train', size=None, + features='S', data_path='ETTh1.csv', + target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None): + # size [seq_len, label_len, pred_len] + # info + if size == None: + self.seq_len = 24 * 4 * 4 + self.label_len = 24 * 4 + self.pred_len = 24 * 4 + else: + self.seq_len = size[0] + self.label_len = size[1] + self.pred_len = size[2] + # init + assert flag in ['train', 'test', 'val'] + type_map = {'train': 0, 'val': 1, 'test': 2} + self.set_type = type_map[flag] + + self.features = features + self.target = target + self.scale = scale + self.timeenc = timeenc + self.freq = freq + + self.root_path = root_path + self.data_path = data_path + self.__read_data__() + + def __read_data__(self): + self.scaler = StandardScaler() + df_raw = pd.read_csv(os.path.join(self.root_path, + self.data_path)) + + border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len] + border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24] + border1 = border1s[self.set_type] + border2 = border2s[self.set_type] + + if self.features == 'M' or self.features == 'MS': + cols_data = df_raw.columns[1:] + df_data = df_raw[cols_data] + elif self.features == 'S': + df_data = df_raw[[self.target]] + + if self.scale: + train_data = df_data[border1s[0]:border2s[0]] + self.scaler.fit(train_data.values) + data = self.scaler.transform(df_data.values) + else: + data = df_data.values + + df_stamp = df_raw[['date']][border1:border2] + df_stamp['date'] = pd.to_datetime(df_stamp.date) + if self.timeenc == 0: + df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) + df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) + df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) + df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) + data_stamp = df_stamp.drop(['date'], 1).values + elif self.timeenc == 1: + data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) + data_stamp = data_stamp.transpose(1, 0) + + self.data_x = data[border1:border2] + self.data_y = data[border1:border2] + self.data_stamp = data_stamp + + def __getitem__(self, index): + s_begin = index + s_end = s_begin + self.seq_len + r_begin = s_end - self.label_len + r_end = r_begin + self.label_len + self.pred_len + + seq_x = self.data_x[s_begin:s_end] + seq_y = self.data_y[r_begin:r_end] + seq_x_mark = self.data_stamp[s_begin:s_end] + seq_y_mark = self.data_stamp[r_begin:r_end] + + return seq_x, seq_y, seq_x_mark, seq_y_mark + + def __len__(self): + return len(self.data_x) - self.seq_len - self.pred_len + 1 + + def inverse_transform(self, data): + return self.scaler.inverse_transform(data) + + +class Dataset_ETT_minute(Dataset): + def __init__(self, root_path, flag='train', size=None, + features='S', data_path='ETTm1.csv', + target='OT', scale=True, timeenc=0, freq='t', seasonal_patterns=None): + # size [seq_len, label_len, pred_len] + # info + if size == None: + self.seq_len = 24 * 4 * 4 + self.label_len = 24 * 4 + self.pred_len = 24 * 4 + else: + self.seq_len = size[0] + self.label_len = size[1] + self.pred_len = size[2] + # init + assert flag in ['train', 'test', 'val'] + type_map = {'train': 0, 'val': 1, 'test': 2} + self.set_type = type_map[flag] + + self.features = features + self.target = target + self.scale = scale + self.timeenc = timeenc + self.freq = freq + + self.root_path = root_path + self.data_path = data_path + self.__read_data__() + + def __read_data__(self): + self.scaler = StandardScaler() + df_raw = pd.read_csv(os.path.join(self.root_path, + self.data_path)) + + border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len] + border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4] + border1 = border1s[self.set_type] + border2 = border2s[self.set_type] + + if self.features == 'M' or self.features == 'MS': + cols_data = df_raw.columns[1:] + df_data = df_raw[cols_data] + elif self.features == 'S': + df_data = df_raw[[self.target]] + + if self.scale: + train_data = df_data[border1s[0]:border2s[0]] + self.scaler.fit(train_data.values) + data = self.scaler.transform(df_data.values) + else: + data = df_data.values + + df_stamp = df_raw[['date']][border1:border2] + df_stamp['date'] = pd.to_datetime(df_stamp.date) + if self.timeenc == 0: + df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) + df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) + df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) + df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) + df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1) + df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15) + data_stamp = df_stamp.drop(['date'], 1).values + elif self.timeenc == 1: + data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) + data_stamp = data_stamp.transpose(1, 0) + + self.data_x = data[border1:border2] + self.data_y = data[border1:border2] + self.data_stamp = data_stamp + + def __getitem__(self, index): + s_begin = index + s_end = s_begin + self.seq_len + r_begin = s_end - self.label_len + r_end = r_begin + self.label_len + self.pred_len + + seq_x = self.data_x[s_begin:s_end] + seq_y = self.data_y[r_begin:r_end] + seq_x_mark = self.data_stamp[s_begin:s_end] + seq_y_mark = self.data_stamp[r_begin:r_end] + + return seq_x, seq_y, seq_x_mark, seq_y_mark + + def __len__(self): + return len(self.data_x) - self.seq_len - self.pred_len + 1 + + def inverse_transform(self, data): + return self.scaler.inverse_transform(data) + + +class Dataset_Custom(Dataset): + def __init__(self, root_path, flag='train', size=None, + features='S', data_path='ETTh1.csv', + target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None): + # size [seq_len, label_len, pred_len] + # info + if size == None: + self.seq_len = 24 * 4 * 4 + self.label_len = 24 * 4 + self.pred_len = 24 * 4 + else: + self.seq_len = size[0] + self.label_len = size[1] + self.pred_len = size[2] + # init + assert flag in ['train', 'test', 'val'] + type_map = {'train': 0, 'val': 1, 'test': 2} + self.set_type = type_map[flag] + + self.features = features + self.target = target + self.scale = scale + self.timeenc = timeenc + self.freq = freq + + self.root_path = root_path + self.data_path = data_path + self.__read_data__() + + def __read_data__(self): + self.scaler = StandardScaler() + df_raw = pd.read_csv(os.path.join(self.root_path, + self.data_path)) + + ''' + df_raw.columns: ['date', ...(other features), target feature] + ''' + cols = list(df_raw.columns) + cols.remove(self.target) + cols.remove('date') + df_raw = df_raw[['date'] + cols + [self.target]] + num_train = int(len(df_raw) * 0.7) + num_test = int(len(df_raw) * 0.2) + num_vali = len(df_raw) - num_train - num_test + border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] + border2s = [num_train, num_train + num_vali, len(df_raw)] + border1 = border1s[self.set_type] + border2 = border2s[self.set_type] + + if self.features == 'M' or self.features == 'MS': + cols_data = df_raw.columns[1:] + df_data = df_raw[cols_data] + elif self.features == 'S': + df_data = df_raw[[self.target]] + + if self.scale: + train_data = df_data[border1s[0]:border2s[0]] + self.scaler.fit(train_data.values) + data = self.scaler.transform(df_data.values) + else: + data = df_data.values + + df_stamp = df_raw[['date']][border1:border2] + df_stamp['date'] = pd.to_datetime(df_stamp.date) + if self.timeenc == 0: + df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) + df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) + df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) + df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) + data_stamp = df_stamp.drop(['date'], 1).values + elif self.timeenc == 1: + data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) + data_stamp = data_stamp.transpose(1, 0) + + self.data_x = data[border1:border2] + self.data_y = data[border1:border2] + self.data_stamp = data_stamp + + def __getitem__(self, index): + s_begin = index + s_end = s_begin + self.seq_len + r_begin = s_end - self.label_len + r_end = r_begin + self.label_len + self.pred_len + + seq_x = self.data_x[s_begin:s_end] + seq_y = self.data_y[r_begin:r_end] + seq_x_mark = self.data_stamp[s_begin:s_end] + seq_y_mark = self.data_stamp[r_begin:r_end] + + return seq_x, seq_y, seq_x_mark, seq_y_mark + + def __len__(self): + return len(self.data_x) - self.seq_len - self.pred_len + 1 + + def inverse_transform(self, data): + return self.scaler.inverse_transform(data) + + +class Dataset_PEMS(Dataset): + def __init__(self, root_path, flag='train', size=None, + features='S', data_path='ETTh1.csv', + target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None): + # size [seq_len, label_len, pred_len] + # info + self.seq_len = size[0] + self.label_len = size[1] + self.pred_len = size[2] + # init + assert flag in ['train', 'test', 'val'] + type_map = {'train': 0, 'val': 1, 'test': 2} + self.set_type = type_map[flag] + + self.features = features + self.target = target + self.scale = scale + self.timeenc = timeenc + self.freq = freq + + self.root_path = root_path + self.data_path = data_path + self.__read_data__() + + def __read_data__(self): + self.scaler = StandardScaler() + data_file = os.path.join(self.root_path, self.data_path) + data = np.load(data_file, allow_pickle=True) + data = data['data'][:, :, 0] + + train_ratio = 0.6 + valid_ratio = 0.2 + train_data = data[:int(train_ratio * len(data))] + valid_data = data[int(train_ratio * len(data)): int((train_ratio + valid_ratio) * len(data))] + test_data = data[int((train_ratio + valid_ratio) * len(data)):] + total_data = [train_data, valid_data, test_data] + data = total_data[self.set_type] + + if self.scale: + self.scaler.fit(train_data) + data = self.scaler.transform(data) + + df = pd.DataFrame(data) + df = df.fillna(method='ffill', limit=len(df)).fillna(method='bfill', limit=len(df)).values + + self.data_x = df + self.data_y = df + + def __getitem__(self, index): + s_begin = index + s_end = s_begin + self.seq_len + r_begin = s_end - self.label_len + r_end = r_begin + self.label_len + self.pred_len + + seq_x = self.data_x[s_begin:s_end] + seq_y = self.data_y[r_begin:r_end] + seq_x_mark = torch.zeros((seq_x.shape[0], 1)) + seq_y_mark = torch.zeros((seq_x.shape[0], 1)) + + return seq_x, seq_y, seq_x_mark, seq_y_mark + + def __len__(self): + return len(self.data_x) - self.seq_len - self.pred_len + 1 + + def inverse_transform(self, data): + return self.scaler.inverse_transform(data) + + +class Dataset_Solar(Dataset): + def __init__(self, root_path, flag='train', size=None, + features='S', data_path='ETTh1.csv', + target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None): + # size [seq_len, label_len, pred_len] + # info + self.seq_len = size[0] + self.label_len = size[1] + self.pred_len = size[2] + # init + assert flag in ['train', 'test', 'val'] + type_map = {'train': 0, 'val': 1, 'test': 2} + self.set_type = type_map[flag] + + self.features = features + self.target = target + self.scale = scale + self.timeenc = timeenc + self.freq = freq + + self.root_path = root_path + self.data_path = data_path + self.__read_data__() + + def __read_data__(self): + self.scaler = StandardScaler() + df_raw = [] + with open(os.path.join(self.root_path, self.data_path), "r", encoding='utf-8') as f: + for line in f.readlines(): + line = line.strip('\n').split(',') + data_line = np.stack([float(i) for i in line]) + df_raw.append(data_line) + df_raw = np.stack(df_raw, 0) + df_raw = pd.DataFrame(df_raw) + + num_train = int(len(df_raw) * 0.7) + num_test = int(len(df_raw) * 0.2) + num_valid = int(len(df_raw) * 0.1) + border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] + border2s = [num_train, num_train + num_valid, len(df_raw)] + border1 = border1s[self.set_type] + border2 = border2s[self.set_type] + + df_data = df_raw.values + + if self.scale: + train_data = df_data[border1s[0]:border2s[0]] + self.scaler.fit(train_data) + data = self.scaler.transform(df_data) + else: + data = df_data + + self.data_x = data[border1:border2] + self.data_y = data[border1:border2] + + def __getitem__(self, index): + s_begin = index + s_end = s_begin + self.seq_len + r_begin = s_end - self.label_len + r_end = r_begin + self.label_len + self.pred_len + + seq_x = self.data_x[s_begin:s_end] + seq_y = self.data_y[r_begin:r_end] + seq_x_mark = torch.zeros((seq_x.shape[0], 1)) + seq_y_mark = torch.zeros((seq_x.shape[0], 1)) + + return seq_x, seq_y, seq_x_mark, seq_y_mark + + def __len__(self): + return len(self.data_x) - self.seq_len - self.pred_len + 1 + + def inverse_transform(self, data): + return self.scaler.inverse_transform(data) + +class Dataset_M4(Dataset): + def __init__(self, root_path, flag='pred', size=None, + features='S', data_path='ETTh1.csv', + target='OT', scale=False, inverse=False, timeenc=0, freq='15min', + seasonal_patterns='Yearly'): + # size [seq_len, label_len, pred_len] + # init + self.features = features + self.target = target + self.scale = scale + self.inverse = inverse + self.timeenc = timeenc + self.root_path = root_path + + self.seq_len = size[0] + self.label_len = size[1] + self.pred_len = size[2] + + self.seasonal_patterns = seasonal_patterns + self.history_size = M4Meta.history_size[seasonal_patterns] + self.window_sampling_limit = int(self.history_size * self.pred_len) + self.flag = flag + + self.__read_data__() + + def __read_data__(self): + # M4Dataset.initialize() + if self.flag == 'train': + dataset = M4Dataset.load(training=True, dataset_file=self.root_path) + else: + dataset = M4Dataset.load(training=False, dataset_file=self.root_path) + training_values = np.array( + [v[~np.isnan(v)] for v in + dataset.values[dataset.groups == self.seasonal_patterns]]) # split different frequencies + self.ids = np.array([i for i in dataset.ids[dataset.groups == self.seasonal_patterns]]) + self.timeseries = [ts for ts in training_values] + + def __getitem__(self, index): + insample = np.zeros((self.seq_len, 1)) + insample_mask = np.zeros((self.seq_len, 1)) + outsample = np.zeros((self.pred_len + self.label_len, 1)) + outsample_mask = np.zeros((self.pred_len + self.label_len, 1)) # m4 dataset + + sampled_timeseries = self.timeseries[index] + cut_point = np.random.randint(low=max(1, len(sampled_timeseries) - self.window_sampling_limit), + high=len(sampled_timeseries), + size=1)[0] + + insample_window = sampled_timeseries[max(0, cut_point - self.seq_len):cut_point] + insample[-len(insample_window):, 0] = insample_window + insample_mask[-len(insample_window):, 0] = 1.0 + outsample_window = sampled_timeseries[ + cut_point - self.label_len:min(len(sampled_timeseries), cut_point + self.pred_len)] + outsample[:len(outsample_window), 0] = outsample_window + outsample_mask[:len(outsample_window), 0] = 1.0 + return insample, outsample, insample_mask, outsample_mask + + def __len__(self): + return len(self.timeseries) + + def inverse_transform(self, data): + return self.scaler.inverse_transform(data) + + def last_insample_window(self): + """ + The last window of insample size of all timeseries. + This function does not support batching and does not reshuffle timeseries. + + :return: Last insample window of all timeseries. Shape "timeseries, insample size" + """ + insample = np.zeros((len(self.timeseries), self.seq_len)) + insample_mask = np.zeros((len(self.timeseries), self.seq_len)) + for i, ts in enumerate(self.timeseries): + ts_last_window = ts[-self.seq_len:] + insample[i, -len(ts):] = ts_last_window + insample_mask[i, -len(ts):] = 1.0 + return insample, insample_mask + + +class PSMSegLoader(Dataset): + def __init__(self, root_path, win_size, step=1, flag="train"): + self.flag = flag + self.step = step + self.win_size = win_size + self.scaler = StandardScaler() + data = pd.read_csv(os.path.join(root_path, 'train.csv')) + data = data.values[:, 1:] + data = np.nan_to_num(data) + self.scaler.fit(data) + data = self.scaler.transform(data) + test_data = pd.read_csv(os.path.join(root_path, 'test.csv')) + test_data = test_data.values[:, 1:] + test_data = np.nan_to_num(test_data) + self.test = self.scaler.transform(test_data) + self.train = data + self.val = self.test + self.test_labels = pd.read_csv(os.path.join(root_path, 'test_label.csv')).values[:, 1:] + print("test:", self.test.shape) + print("train:", self.train.shape) + + def __len__(self): + if self.flag == "train": + return (self.train.shape[0] - self.win_size) // self.step + 1 + elif (self.flag == 'val'): + return (self.val.shape[0] - self.win_size) // self.step + 1 + elif (self.flag == 'test'): + return (self.test.shape[0] - self.win_size) // self.step + 1 + else: + return (self.test.shape[0] - self.win_size) // self.win_size + 1 + + def __getitem__(self, index): + index = index * self.step + if self.flag == "train": + return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) + elif (self.flag == 'val'): + return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) + elif (self.flag == 'test'): + return np.float32(self.test[index:index + self.win_size]), np.float32( + self.test_labels[index:index + self.win_size]) + else: + return np.float32(self.test[ + index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( + self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) + + +class MSLSegLoader(Dataset): + def __init__(self, root_path, win_size, step=1, flag="train"): + self.flag = flag + self.step = step + self.win_size = win_size + self.scaler = StandardScaler() + data = np.load(os.path.join(root_path, "MSL_train.npy")) + self.scaler.fit(data) + data = self.scaler.transform(data) + test_data = np.load(os.path.join(root_path, "MSL_test.npy")) + self.test = self.scaler.transform(test_data) + self.train = data + self.val = self.test + self.test_labels = np.load(os.path.join(root_path, "MSL_test_label.npy")) + print("test:", self.test.shape) + print("train:", self.train.shape) + + def __len__(self): + if self.flag == "train": + return (self.train.shape[0] - self.win_size) // self.step + 1 + elif (self.flag == 'val'): + return (self.val.shape[0] - self.win_size) // self.step + 1 + elif (self.flag == 'test'): + return (self.test.shape[0] - self.win_size) // self.step + 1 + else: + return (self.test.shape[0] - self.win_size) // self.win_size + 1 + + def __getitem__(self, index): + index = index * self.step + if self.flag == "train": + return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) + elif (self.flag == 'val'): + return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) + elif (self.flag == 'test'): + return np.float32(self.test[index:index + self.win_size]), np.float32( + self.test_labels[index:index + self.win_size]) + else: + return np.float32(self.test[ + index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( + self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) + + +class SMAPSegLoader(Dataset): + def __init__(self, root_path, win_size, step=1, flag="train"): + self.flag = flag + self.step = step + self.win_size = win_size + self.scaler = StandardScaler() + data = np.load(os.path.join(root_path, "SMAP_train.npy")) + self.scaler.fit(data) + data = self.scaler.transform(data) + test_data = np.load(os.path.join(root_path, "SMAP_test.npy")) + self.test = self.scaler.transform(test_data) + self.train = data + self.val = self.test + self.test_labels = np.load(os.path.join(root_path, "SMAP_test_label.npy")) + print("test:", self.test.shape) + print("train:", self.train.shape) + + def __len__(self): + + if self.flag == "train": + return (self.train.shape[0] - self.win_size) // self.step + 1 + elif (self.flag == 'val'): + return (self.val.shape[0] - self.win_size) // self.step + 1 + elif (self.flag == 'test'): + return (self.test.shape[0] - self.win_size) // self.step + 1 + else: + return (self.test.shape[0] - self.win_size) // self.win_size + 1 + + def __getitem__(self, index): + index = index * self.step + if self.flag == "train": + return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) + elif (self.flag == 'val'): + return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) + elif (self.flag == 'test'): + return np.float32(self.test[index:index + self.win_size]), np.float32( + self.test_labels[index:index + self.win_size]) + else: + return np.float32(self.test[ + index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( + self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) + + +class SMDSegLoader(Dataset): + def __init__(self, root_path, win_size, step=100, flag="train"): + self.flag = flag + self.step = step + self.win_size = win_size + self.scaler = StandardScaler() + data = np.load(os.path.join(root_path, "SMD_train.npy")) + self.scaler.fit(data) + data = self.scaler.transform(data) + test_data = np.load(os.path.join(root_path, "SMD_test.npy")) + self.test = self.scaler.transform(test_data) + self.train = data + data_len = len(self.train) + self.val = self.train[(int)(data_len * 0.8):] + self.test_labels = np.load(os.path.join(root_path, "SMD_test_label.npy")) + + def __len__(self): + if self.flag == "train": + return (self.train.shape[0] - self.win_size) // self.step + 1 + elif (self.flag == 'val'): + return (self.val.shape[0] - self.win_size) // self.step + 1 + elif (self.flag == 'test'): + return (self.test.shape[0] - self.win_size) // self.step + 1 + else: + return (self.test.shape[0] - self.win_size) // self.win_size + 1 + + def __getitem__(self, index): + index = index * self.step + if self.flag == "train": + return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) + elif (self.flag == 'val'): + return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) + elif (self.flag == 'test'): + return np.float32(self.test[index:index + self.win_size]), np.float32( + self.test_labels[index:index + self.win_size]) + else: + return np.float32(self.test[ + index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( + self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) + + +class SWATSegLoader(Dataset): + def __init__(self, root_path, win_size, step=1, flag="train"): + self.flag = flag + self.step = step + self.win_size = win_size + self.scaler = StandardScaler() + + train_data = pd.read_csv(os.path.join(root_path, 'swat_train2.csv')) + test_data = pd.read_csv(os.path.join(root_path, 'swat2.csv')) + labels = test_data.values[:, -1:] + train_data = train_data.values[:, :-1] + test_data = test_data.values[:, :-1] + + self.scaler.fit(train_data) + train_data = self.scaler.transform(train_data) + test_data = self.scaler.transform(test_data) + self.train = train_data + self.test = test_data + self.val = test_data + self.test_labels = labels + print("test:", self.test.shape) + print("train:", self.train.shape) + + def __len__(self): + """ + Number of images in the object dataset. + """ + if self.flag == "train": + return (self.train.shape[0] - self.win_size) // self.step + 1 + elif (self.flag == 'val'): + return (self.val.shape[0] - self.win_size) // self.step + 1 + elif (self.flag == 'test'): + return (self.test.shape[0] - self.win_size) // self.step + 1 + else: + return (self.test.shape[0] - self.win_size) // self.win_size + 1 + + def __getitem__(self, index): + index = index * self.step + if self.flag == "train": + return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) + elif (self.flag == 'val'): + return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) + elif (self.flag == 'test'): + return np.float32(self.test[index:index + self.win_size]), np.float32( + self.test_labels[index:index + self.win_size]) + else: + return np.float32(self.test[ + index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( + self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) + + +class UEAloader(Dataset): + """ + Dataset class for datasets included in: + Time Series Classification Archive (www.timeseriesclassification.com) + Argument: + limit_size: float in (0, 1) for debug + Attributes: + all_df: (num_samples * seq_len, num_columns) dataframe indexed by integer indices, with multiple rows corresponding to the same index (sample). + Each row is a time step; Each column contains either metadata (e.g. timestamp) or a feature. + feature_df: (num_samples * seq_len, feat_dim) dataframe; contains the subset of columns of `all_df` which correspond to selected features + feature_names: names of columns contained in `feature_df` (same as feature_df.columns) + all_IDs: (num_samples,) series of IDs contained in `all_df`/`feature_df` (same as all_df.index.unique() ) + labels_df: (num_samples, num_labels) pd.DataFrame of label(s) for each sample + max_seq_len: maximum sequence (time series) length. If None, script argument `max_seq_len` will be used. + (Moreover, script argument overrides this attribute) + """ + + def __init__(self, root_path, file_list=None, limit_size=None, flag=None): + self.root_path = root_path + self.all_df, self.labels_df = self.load_all(root_path, file_list=file_list, flag=flag) + self.all_IDs = self.all_df.index.unique() # all sample IDs (integer indices 0 ... num_samples-1) + + if limit_size is not None: + if limit_size > 1: + limit_size = int(limit_size) + else: # interpret as proportion if in (0, 1] + limit_size = int(limit_size * len(self.all_IDs)) + self.all_IDs = self.all_IDs[:limit_size] + self.all_df = self.all_df.loc[self.all_IDs] + + # use all features + self.feature_names = self.all_df.columns + self.feature_df = self.all_df + + # pre_process + normalizer = Normalizer() + self.feature_df = normalizer.normalize(self.feature_df) + print(len(self.all_IDs)) + + def load_all(self, root_path, file_list=None, flag=None): + """ + Loads datasets from csv files contained in `root_path` into a dataframe, optionally choosing from `pattern` + Args: + root_path: directory containing all individual .csv files + file_list: optionally, provide a list of file paths within `root_path` to consider. + Otherwise, entire `root_path` contents will be used. + Returns: + all_df: a single (possibly concatenated) dataframe with all data corresponding to specified files + labels_df: dataframe containing label(s) for each sample + """ + # Select paths for training and evaluation + if file_list is None: + data_paths = glob.glob(os.path.join(root_path, '*')) # list of all paths + else: + data_paths = [os.path.join(root_path, p) for p in file_list] + if len(data_paths) == 0: + raise Exception('No files found using: {}'.format(os.path.join(root_path, '*'))) + if flag is not None: + data_paths = list(filter(lambda x: re.search(flag, x), data_paths)) + input_paths = [p for p in data_paths if os.path.isfile(p) and p.endswith('.ts')] + if len(input_paths) == 0: + raise Exception("No .ts files found using pattern: '{}'".format(pattern)) + + all_df, labels_df = self.load_single(input_paths[0]) # a single file contains dataset + + return all_df, labels_df + + def load_single(self, filepath): + df, labels = load_from_tsfile_to_dataframe(filepath, return_separate_X_and_y=True, + replace_missing_vals_with='NaN') + labels = pd.Series(labels, dtype="category") + self.class_names = labels.cat.categories + labels_df = pd.DataFrame(labels.cat.codes, + dtype=np.int8) # int8-32 gives an error when using nn.CrossEntropyLoss + + lengths = df.applymap( + lambda x: len(x)).values # (num_samples, num_dimensions) array containing the length of each series + + horiz_diffs = np.abs(lengths - np.expand_dims(lengths[:, 0], -1)) + + if np.sum(horiz_diffs) > 0: # if any row (sample) has varying length across dimensions + df = df.applymap(subsample) + + lengths = df.applymap(lambda x: len(x)).values + vert_diffs = np.abs(lengths - np.expand_dims(lengths[0, :], 0)) + if np.sum(vert_diffs) > 0: # if any column (dimension) has varying length across samples + self.max_seq_len = int(np.max(lengths[:, 0])) + else: + self.max_seq_len = lengths[0, 0] + + # First create a (seq_len, feat_dim) dataframe for each sample, indexed by a single integer ("ID" of the sample) + # Then concatenate into a (num_samples * seq_len, feat_dim) dataframe, with multiple rows corresponding to the + # sample index (i.e. the same scheme as all datasets in this project) + + df = pd.concat((pd.DataFrame({col: df.loc[row, col] for col in df.columns}).reset_index(drop=True).set_index( + pd.Series(lengths[row, 0] * [row])) for row in range(df.shape[0])), axis=0) + + # Replace NaN values + grp = df.groupby(by=df.index) + df = grp.transform(interpolate_missing) + + return df, labels_df + + def instance_norm(self, case): + if self.root_path.count('EthanolConcentration') > 0: # special process for numerical stability + mean = case.mean(0, keepdim=True) + case = case - mean + stdev = torch.sqrt(torch.var(case, dim=1, keepdim=True, unbiased=False) + 1e-5) + case /= stdev + return case + else: + return case + + def __getitem__(self, ind): + return self.instance_norm(torch.from_numpy(self.feature_df.loc[self.all_IDs[ind]].values)), \ + torch.from_numpy(self.labels_df.loc[self.all_IDs[ind]].values) + + def __len__(self): + return len(self.all_IDs) diff --git a/data_provider/m4.py b/data_provider/m4.py new file mode 100644 index 000000000..8a2e3f1a0 --- /dev/null +++ b/data_provider/m4.py @@ -0,0 +1,138 @@ +# This source code is provided for the purposes of scientific reproducibility +# under the following limited license from Element AI Inc. The code is an +# implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis +# expansion analysis for interpretable time series forecasting, +# https://arxiv.org/abs/1905.10437). The copyright to the source code is +# licensed under the Creative Commons - Attribution-NonCommercial 4.0 +# International license (CC BY-NC 4.0): +# https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether +# for the benefit of third parties or internally in production) requires an +# explicit license. The subject-matter of the N-BEATS model and associated +# materials are the property of Element AI Inc. and may be subject to patent +# protection. No license to patents is granted hereunder (whether express or +# implied). Copyright © 2020 Element AI Inc. All rights reserved. + +""" +M4 Dataset +""" +import logging +import os +from collections import OrderedDict +from dataclasses import dataclass +from glob import glob + +import numpy as np +import pandas as pd +import patoolib +from tqdm import tqdm +import logging +import os +import pathlib +import sys +from urllib import request + + +def url_file_name(url: str) -> str: + """ + Extract file name from url. + + :param url: URL to extract file name from. + :return: File name. + """ + return url.split('/')[-1] if len(url) > 0 else '' + + +def download(url: str, file_path: str) -> None: + """ + Download a file to the given path. + + :param url: URL to download + :param file_path: Where to download the content. + """ + + def progress(count, block_size, total_size): + progress_pct = float(count * block_size) / float(total_size) * 100.0 + sys.stdout.write('\rDownloading {} to {} {:.1f}%'.format(url, file_path, progress_pct)) + sys.stdout.flush() + + if not os.path.isfile(file_path): + opener = request.build_opener() + opener.addheaders = [('User-agent', 'Mozilla/5.0')] + request.install_opener(opener) + pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True) + f, _ = request.urlretrieve(url, file_path, progress) + sys.stdout.write('\n') + sys.stdout.flush() + file_info = os.stat(f) + logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.') + else: + file_info = os.stat(file_path) + logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.') + + +@dataclass() +class M4Dataset: + ids: np.ndarray + groups: np.ndarray + frequencies: np.ndarray + horizons: np.ndarray + values: np.ndarray + + @staticmethod + def load(training: bool = True, dataset_file: str = '../dataset/m4') -> 'M4Dataset': + """ + Load cached dataset. + + :param training: Load training part if_inverted training is True, test part otherwise. + """ + info_file = os.path.join(dataset_file, 'M4-info.csv') + train_cache_file = os.path.join(dataset_file, 'training.npz') + test_cache_file = os.path.join(dataset_file, 'test.npz') + m4_info = pd.read_csv(info_file) + return M4Dataset(ids=m4_info.M4id.values, + groups=m4_info.SP.values, + frequencies=m4_info.Frequency.values, + horizons=m4_info.Horizon.values, + values=np.load( + train_cache_file if training else test_cache_file, + allow_pickle=True)) + + +@dataclass() +class M4Meta: + seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly'] + horizons = [6, 8, 18, 13, 14, 48] + frequencies = [1, 4, 12, 1, 1, 24] + horizons_map = { + 'Yearly': 6, + 'Quarterly': 8, + 'Monthly': 18, + 'Weekly': 13, + 'Daily': 14, + 'Hourly': 48 + } # different predict length + frequency_map = { + 'Yearly': 1, + 'Quarterly': 4, + 'Monthly': 12, + 'Weekly': 1, + 'Daily': 1, + 'Hourly': 24 + } + history_size = { + 'Yearly': 1.5, + 'Quarterly': 1.5, + 'Monthly': 1.5, + 'Weekly': 10, + 'Daily': 10, + 'Hourly': 10 + } # from interpretable.gin + + +def load_m4_info() -> pd.DataFrame: + """ + Load M4Info file. + + :return: Pandas DataFrame of M4Info. + """ + return pd.read_csv(INFO_FILE_PATH) diff --git a/data_provider/uea.py b/data_provider/uea.py new file mode 100644 index 000000000..f0dd0ab9e --- /dev/null +++ b/data_provider/uea.py @@ -0,0 +1,125 @@ +import os +import numpy as np +import pandas as pd +import torch + + +def collate_fn(data, max_len=None): + """Build mini-batch tensors from a list of (X, mask) tuples. Mask input. Create + Args: + data: len(batch_size) list of tuples (X, y). + - X: torch tensor of shape (seq_length, feat_dim); variable seq_length. + - y: torch tensor of shape (num_labels,) : class indices or numerical targets + (for classification or regression, respectively). num_labels > 1 for multi-task models + max_len: global fixed sequence length. Used for architectures requiring fixed length input, + where the batch length cannot vary dynamically. Longer sequences are clipped, shorter are padded with 0s + Returns: + X: (batch_size, padded_length, feat_dim) torch tensor of masked features (input) + targets: (batch_size, padded_length, feat_dim) torch tensor of unmasked features (output) + target_masks: (batch_size, padded_length, feat_dim) boolean torch tensor + 0 indicates masked values to be predicted, 1 indicates unaffected/"active" feature values + padding_masks: (batch_size, padded_length) boolean tensor, 1 means keep vector at this position, 0 means padding + """ + + batch_size = len(data) + features, labels = zip(*data) + + # Stack and pad features and masks (convert 2D to 3D tensors, i.e. add batch dimension) + lengths = [X.shape[0] for X in features] # original sequence length for each time series + if max_len is None: + max_len = max(lengths) + + X = torch.zeros(batch_size, max_len, features[0].shape[-1]) # (batch_size, padded_length, feat_dim) + for i in range(batch_size): + end = min(lengths[i], max_len) + X[i, :end, :] = features[i][:end, :] + + targets = torch.stack(labels, dim=0) # (batch_size, num_labels) + + padding_masks = padding_mask(torch.tensor(lengths, dtype=torch.int16), + max_len=max_len) # (batch_size, padded_length) boolean tensor, "1" means keep + + return X, targets, padding_masks + + +def padding_mask(lengths, max_len=None): + """ + Used to mask padded positions: creates a (batch_size, max_len) boolean mask from a tensor of sequence lengths, + where 1 means keep element at this position (time step) + """ + batch_size = lengths.numel() + max_len = max_len or lengths.max_val() # trick works because of overloading of 'or' operator for non-boolean types + return (torch.arange(0, max_len, device=lengths.device) + .type_as(lengths) + .repeat(batch_size, 1) + .lt(lengths.unsqueeze(1))) + + +class Normalizer(object): + """ + Normalizes dataframe across ALL contained rows (time steps). Different from per-sample normalization. + """ + + def __init__(self, norm_type='standardization', mean=None, std=None, min_val=None, max_val=None): + """ + Args: + norm_type: choose from: + "standardization", "minmax": normalizes dataframe across ALL contained rows (time steps) + "per_sample_std", "per_sample_minmax": normalizes each sample separately (i.e. across only its own rows) + mean, std, min_val, max_val: optional (num_feat,) Series of pre-computed values + """ + + self.norm_type = norm_type + self.mean = mean + self.std = std + self.min_val = min_val + self.max_val = max_val + + def normalize(self, df): + """ + Args: + df: input dataframe + Returns: + df: normalized dataframe + """ + if self.norm_type == "standardization": + if self.mean is None: + self.mean = df.mean() + self.std = df.std() + return (df - self.mean) / (self.std + np.finfo(float).eps) + + elif self.norm_type == "minmax": + if self.max_val is None: + self.max_val = df.max() + self.min_val = df.min() + return (df - self.min_val) / (self.max_val - self.min_val + np.finfo(float).eps) + + elif self.norm_type == "per_sample_std": + grouped = df.groupby(by=df.index) + return (df - grouped.transform('mean')) / grouped.transform('std') + + elif self.norm_type == "per_sample_minmax": + grouped = df.groupby(by=df.index) + min_vals = grouped.transform('min') + return (df - min_vals) / (grouped.transform('max') - min_vals + np.finfo(float).eps) + + else: + raise (NameError(f'Normalize method "{self.norm_type}" not implemented')) + + +def interpolate_missing(y): + """ + Replaces NaN values in pd.Series `y` using linear interpolation + """ + if y.isna().any(): + y = y.interpolate(method='linear', limit_direction='both') + return y + + +def subsample(y, limit=256, factor=2): + """ + If a given Series is longer than `limit`, returns subsampled sequence by the specified integer factor + """ + if len(y) > limit: + return y[::factor].reset_index(drop=True) + return y diff --git a/experiments/exp_basic.py b/experiments/exp_basic.py index e69de29bb..935d65ef6 100644 --- a/experiments/exp_basic.py +++ b/experiments/exp_basic.py @@ -0,0 +1,50 @@ +import os +import torch +from model import Transformer, Informer, Reformer, Flowformer, Flashformer, \ + iTransformer, iInformer, iReformer, iFlowformer, iFlashformer + + +class Exp_Basic(object): + def __init__(self, args): + self.args = args + self.model_dict = { + 'Transformer': Transformer, + 'Informer': Informer, + 'Reformer': Reformer, + 'Flowformer': Flowformer, + 'Flashformer': Flashformer, + 'iTransformer': iTransformer, + 'iInformer': iInformer, + 'iReformer': iReformer, + 'iFlowformer': iFlowformer, + 'iFlashformer': iFlashformer, + } + self.device = self._acquire_device() + self.model = self._build_model().to(self.device) + + def _build_model(self): + raise NotImplementedError + return None + + def _acquire_device(self): + if self.args.use_gpu: + os.environ["CUDA_VISIBLE_DEVICES"] = str( + self.args.gpu) if not self.args.use_multi_gpu else self.args.devices + device = torch.device('cuda:{}'.format(self.args.gpu)) + print('Use GPU: cuda:{}'.format(self.args.gpu)) + else: + device = torch.device('cpu') + print('Use CPU') + return device + + def _get_data(self): + pass + + def vali(self): + pass + + def train(self): + pass + + def test(self): + pass diff --git a/experiments/exp_long_term_forecasting.py b/experiments/exp_long_term_forecasting.py new file mode 100644 index 000000000..667e1f37b --- /dev/null +++ b/experiments/exp_long_term_forecasting.py @@ -0,0 +1,275 @@ +from data_provider.data_factory import data_provider +from experiments.exp_basic import Exp_Basic +from utils.tools import EarlyStopping, adjust_learning_rate, visual +from utils.metrics import metric +import torch +import torch.nn as nn +from torch import optim +import os +import time +import warnings +import numpy as np + +warnings.filterwarnings('ignore') + + +class Exp_Long_Term_Forecast(Exp_Basic): + def __init__(self, args): + super(Exp_Long_Term_Forecast, self).__init__(args) + + def _build_model(self): + model = self.model_dict[self.args.model].Model(self.args).float() + + if self.args.use_multi_gpu and self.args.use_gpu: + model = nn.DataParallel(model, device_ids=self.args.device_ids) + return model + + def _get_data(self, flag): + data_set, data_loader = data_provider(self.args, flag) + return data_set, data_loader + + def _select_optimizer(self): + model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) + return model_optim + + def _select_criterion(self): + criterion = nn.MSELoss() + return criterion + + def vali(self, vali_data, vali_loader, criterion): + total_loss = [] + self.model.eval() + with torch.no_grad(): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader): + batch_x = batch_x.float().to(self.device) + batch_y = batch_y.float() + + if 'PEMS' in self.args.data or 'Solar' in self.args.data: + batch_x_mark = None + batch_y_mark = None + else: + batch_x_mark = batch_x_mark.float().to(self.device) + batch_y_mark = batch_y_mark.float().to(self.device) + + # decoder input + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() + dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + # encoder - decoder + if self.args.use_amp: + with torch.cuda.amp.autocast(): + if self.args.output_attention: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + else: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + else: + if self.args.output_attention: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + else: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + f_dim = -1 if self.args.features == 'MS' else 0 + outputs = outputs[:, -self.args.pred_len:, f_dim:] + batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) + + pred = outputs.detach().cpu() + true = batch_y.detach().cpu() + + loss = criterion(pred, true) + + total_loss.append(loss) + total_loss = np.average(total_loss) + self.model.train() + return total_loss + + def train(self, setting): + train_data, train_loader = self._get_data(flag='train') + vali_data, vali_loader = self._get_data(flag='val') + test_data, test_loader = self._get_data(flag='test') + + path = os.path.join(self.args.checkpoints, setting) + if not os.path.exists(path): + os.makedirs(path) + + time_now = time.time() + + train_steps = len(train_loader) + early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) + + model_optim = self._select_optimizer() + criterion = self._select_criterion() + + if self.args.use_amp: + scaler = torch.cuda.amp.GradScaler() + + for epoch in range(self.args.train_epochs): + iter_count = 0 + train_loss = [] + + self.model.train() + epoch_time = time.time() + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): + iter_count += 1 + model_optim.zero_grad() + batch_x = batch_x.float().to(self.device) + + batch_y = batch_y.float().to(self.device) + if 'PEMS' in self.args.data or 'Solar' in self.args.data: + batch_x_mark = None + batch_y_mark = None + else: + batch_x_mark = batch_x_mark.float().to(self.device) + batch_y_mark = batch_y_mark.float().to(self.device) + + # decoder input + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() + dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + + # encoder - decoder + if self.args.use_amp: + with torch.cuda.amp.autocast(): + if self.args.output_attention: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + else: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + + f_dim = -1 if self.args.features == 'MS' else 0 + outputs = outputs[:, -self.args.pred_len:, f_dim:] + batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) + loss = criterion(outputs, batch_y) + train_loss.append(loss.item()) + else: + if self.args.output_attention: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + else: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + + f_dim = -1 if self.args.features == 'MS' else 0 + outputs = outputs[:, -self.args.pred_len:, f_dim:] + batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) + loss = criterion(outputs, batch_y) + train_loss.append(loss.item()) + + if (i + 1) % 100 == 0: + print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) + speed = (time.time() - time_now) / iter_count + left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) + print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) + iter_count = 0 + time_now = time.time() + + if self.args.use_amp: + scaler.scale(loss).backward() + scaler.step(model_optim) + scaler.update() + else: + loss.backward() + model_optim.step() + + print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) + train_loss = np.average(train_loss) + vali_loss = self.vali(vali_data, vali_loader, criterion) + test_loss = self.vali(test_data, test_loader, criterion) + + print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( + epoch + 1, train_steps, train_loss, vali_loss, test_loss)) + early_stopping(vali_loss, self.model, path) + if early_stopping.early_stop: + print("Early stopping") + break + + adjust_learning_rate(model_optim, epoch + 1, self.args) + + # get_cka(self.args, setting, self.model, train_loader, self.device, epoch) + + best_model_path = path + '/' + 'checkpoint.pth' + self.model.load_state_dict(torch.load(best_model_path)) + + return self.model + + def test(self, setting, test=0): + test_data, test_loader = self._get_data(flag='test') + if test: + print('loading model') + self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) + + preds = [] + trues = [] + folder_path = './test_results/' + setting + '/' + if not os.path.exists(folder_path): + os.makedirs(folder_path) + + self.model.eval() + with torch.no_grad(): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): + batch_x = batch_x.float().to(self.device) + batch_y = batch_y.float().to(self.device) + + if 'PEMS' in self.args.data or 'Solar' in self.args.data: + batch_x_mark = None + batch_y_mark = None + else: + batch_x_mark = batch_x_mark.float().to(self.device) + batch_y_mark = batch_y_mark.float().to(self.device) + + # decoder input + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() + dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + # encoder - decoder + if self.args.use_amp: + with torch.cuda.amp.autocast(): + if self.args.output_attention: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + else: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + else: + if self.args.output_attention: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + + else: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + + f_dim = -1 if self.args.features == 'MS' else 0 + outputs = outputs[:, -self.args.pred_len:, f_dim:] + batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) + outputs = outputs.detach().cpu().numpy() + batch_y = batch_y.detach().cpu().numpy() + if test_data.scale and self.args.inverse: + outputs = test_data.inverse_transform(outputs) + batch_y = test_data.inverse_transform(batch_y) + + pred = outputs + true = batch_y + + preds.append(pred) + trues.append(true) + if i % 20 == 0: + input = batch_x.detach().cpu().numpy() + gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0) + pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0) + visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) + + preds = np.array(preds) + trues = np.array(trues) + print('test shape:', preds.shape, trues.shape) + preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) + trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1]) + print('test shape:', preds.shape, trues.shape) + + # result save + folder_path = './results/' + setting + '/' + if not os.path.exists(folder_path): + os.makedirs(folder_path) + + mae, mse, rmse, mape, mspe = metric(preds, trues) + print('mse:{}, mae:{}'.format(mse, mae)) + f = open("result_long_term_forecast.txt", 'a') + f.write(setting + " \n") + f.write('mse:{}, mae:{}'.format(mse, mae)) + f.write('\n') + f.write('\n') + f.close() + + np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe])) + np.save(folder_path + 'pred.npy', preds) + np.save(folder_path + 'true.npy', trues) + + return diff --git a/experiments/exp_long_term_forecasting_partial.py b/experiments/exp_long_term_forecasting_partial.py new file mode 100644 index 000000000..1080c2960 --- /dev/null +++ b/experiments/exp_long_term_forecasting_partial.py @@ -0,0 +1,320 @@ +from data_provider.data_factory import data_provider +from experiments.exp_basic import Exp_Basic +from utils.tools import EarlyStopping, adjust_learning_rate, visual +from utils.metrics import metric +import torch +import torch.nn as nn +from torch import optim +import os +import time +import warnings +import pdb +import numpy as np +import random + +warnings.filterwarnings('ignore') + + +class Exp_Long_Term_Forecast_Partial(Exp_Basic): + def __init__(self, args): + super(Exp_Long_Term_Forecast_Partial, self).__init__(args) + + def _build_model(self): + model = self.model_dict[self.args.model].Model(self.args).float() + + if self.args.use_multi_gpu and self.args.use_gpu: + model = nn.DataParallel(model, device_ids=self.args.device_ids) + return model + + def _get_data(self, flag): + data_set, data_loader = data_provider(self.args, flag) + return data_set, data_loader + + def _select_optimizer(self): + model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) + return model_optim + + def _select_criterion(self): + criterion = nn.MSELoss() + return criterion + + def vali(self, vali_data, vali_loader, criterion, partial_train=False): + total_loss = [] + self.model.eval() + with torch.no_grad(): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader): + batch_x = batch_x.float().to(self.device) + batch_y = batch_y.float() + + if 'PEMS' in self.args.data or 'Solar' in self.args.data: + batch_x_mark = None + batch_y_mark = None + else: + batch_x_mark = batch_x_mark.float().to(self.device) + batch_y_mark = batch_y_mark.float().to(self.device) + + if partial_train: # 使用部分变量训练 + batch_x = batch_x[:,:,-self.args.enc_in:] + batch_y = batch_y[:,:,-self.args.enc_in:] + + # decoder input + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() + dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + # encoder - decoder + if self.args.use_amp: + with torch.cuda.amp.autocast(): + if self.args.output_attention: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + else: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + else: + if self.args.output_attention: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + elif self.args.channel_independent: + B, Tx, N = batch_x.shape + _, Ty, _ =dec_inp.shape + if batch_x_mark==None: + outputs = self.model(batch_x.permute(0,2,1).reshape(B*N,Tx,1), batch_x_mark,\ + dec_inp.permute(0,2,1).reshape(B*N,Ty,1), batch_y_mark).reshape(B,N,-1).permute(0,2,1) + else: + outputs = self.model(batch_x.permute(0,2,1).reshape(B*N,Tx,1), batch_x_mark.repeat(N,1,1),\ + dec_inp.permute(0,2,1).reshape(B*N,Ty,1), batch_y_mark.repeat(N,1,1))\ + .reshape(B,N,-1).permute(0,2,1) + else: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + f_dim = -1 if self.args.features == 'MS' else 0 + outputs = outputs[:, -self.args.pred_len:, f_dim:] + batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) + + pred = outputs.detach().cpu() + true = batch_y.detach().cpu() + + loss = criterion(pred, true) + + total_loss.append(loss) + total_loss = np.average(total_loss) + self.model.train() + return total_loss + + def train(self, setting): + train_data, train_loader = self._get_data(flag='train') + vali_data, vali_loader = self._get_data(flag='val') + test_data, test_loader = self._get_data(flag='test') + + path = os.path.join(self.args.checkpoints, setting) + if not os.path.exists(path): + os.makedirs(path) + + time_now = time.time() + + train_steps = len(train_loader) + early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) + + model_optim = self._select_optimizer() + criterion = self._select_criterion() + + if self.args.use_amp: + scaler = torch.cuda.amp.GradScaler() + + for epoch in range(self.args.train_epochs): + iter_count = 0 + train_loss = [] + + self.model.train() + epoch_time = time.time() + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): + iter_count += 1 + model_optim.zero_grad() + batch_x = batch_x.float().to(self.device) + + batch_y = batch_y.float().to(self.device) + if 'PEMS' in self.args.data or 'Solar' in self.args.data: + batch_x_mark = None + batch_y_mark = None + else: + batch_x_mark = batch_x_mark.float().to(self.device) + batch_y_mark = batch_y_mark.float().to(self.device) + + batch_x = batch_x[:,:,-self.args.enc_in:] + batch_y = batch_y[:,:,-self.args.enc_in:] + if self.args.random_train: # 使用随机的部分变量训练 + _,_,N = batch_x.shape + index = np.stack(random.sample(range(N),N))[-self.args.enc_in:] + batch_x = batch_x[:,:,index] + batch_y = batch_y[:,:,index] + + # decoder input + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() + dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + + # encoder - decoder + if self.args.use_amp: + with torch.cuda.amp.autocast(): + if self.args.output_attention: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + else: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + + f_dim = -1 if self.args.features == 'MS' else 0 + outputs = outputs[:, -self.args.pred_len:, f_dim:] + batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) + loss = criterion(outputs, batch_y) + train_loss.append(loss.item()) + else: + if self.args.output_attention: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + elif self.args.channel_independent: + B, Tx, N = batch_x.shape + _, Ty, _ =dec_inp.shape + if batch_x_mark==None: + outputs = self.model(batch_x.permute(0,2,1).reshape(B*N,Tx,1), batch_x_mark,\ + dec_inp.permute(0,2,1).reshape(B*N,Ty,1), batch_y_mark).reshape(B,N,-1).permute(0,2,1) + else: + a = batch_x.permute(0,2,1) + b = batch_x.permute(0,2,1).reshape(B*N,Tx,1) + outputs = self.model(batch_x.permute(0,2,1).reshape(B*N,Tx,1), batch_x_mark.repeat(N,1,1),\ + dec_inp.permute(0,2,1).reshape(B*N,Ty,1), batch_y_mark.repeat(N,1,1))\ + .reshape(B,N,-1).permute(0,2,1) + else: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + + f_dim = -1 if self.args.features == 'MS' else 0 + outputs = outputs[:, -self.args.pred_len:, f_dim:] + batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) + loss = criterion(outputs, batch_y) + train_loss.append(loss.item()) + + if (i + 1) % 100 == 0: + print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) + speed = (time.time() - time_now) / iter_count + left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) + print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) + iter_count = 0 + time_now = time.time() + + if self.args.use_amp: + scaler.scale(loss).backward() + scaler.step(model_optim) + scaler.update() + else: + loss.backward() + model_optim.step() + + print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) + train_loss = np.average(train_loss) + vali_loss = self.vali(vali_data, vali_loader, criterion, partial_train=True) + test_loss = self.vali(test_data, test_loader, criterion, partial_train=False) + + print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( + epoch + 1, train_steps, train_loss, vali_loss, test_loss)) + early_stopping(vali_loss, self.model, path) + if early_stopping.early_stop: + print("Early stopping") + break + + adjust_learning_rate(model_optim, epoch + 1, self.args) + + best_model_path = path + '/' + 'checkpoint.pth' + self.model.load_state_dict(torch.load(best_model_path)) + + return self.model + + def test(self, setting, test=0): + test_data, test_loader = self._get_data(flag='test') + if test: + print('loading model') + self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) + + preds = [] + trues = [] + folder_path = './test_results/' + setting + '/' + if not os.path.exists(folder_path): + os.makedirs(folder_path) + + self.model.eval() + with torch.no_grad(): + for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): + batch_x = batch_x.float().to(self.device) + batch_y = batch_y.float().to(self.device) + + if 'PEMS' in self.args.data or 'Solar' in self.args.data: + batch_x_mark = None + batch_y_mark = None + else: + batch_x_mark = batch_x_mark.float().to(self.device) + batch_y_mark = batch_y_mark.float().to(self.device) + + # decoder input + dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() + dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) + # encoder - decoder + if self.args.use_amp: + with torch.cuda.amp.autocast(): + if self.args.output_attention: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + else: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + else: + if self.args.output_attention: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] + elif self.args.channel_independent: + B, Tx, N = batch_x.shape + _, Ty, _ =dec_inp.shape + if batch_x_mark==None: + outputs = self.model(batch_x.permute(0,2,1).reshape(B*N,Tx,1), batch_x_mark,\ + dec_inp.permute(0,2,1).reshape(B*N,Ty,1), batch_y_mark).reshape(B,N,-1).permute(0,2,1) + else: + outputs = self.model(batch_x.permute(0,2,1).reshape(B*N,Tx,1), batch_x_mark.repeat(N,1,1),\ + dec_inp.permute(0,2,1).reshape(B*N,Ty,1), batch_y_mark.repeat(N,1,1))\ + .reshape(B,N,-1).permute(0,2,1) + else: + outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) + + f_dim = -1 if self.args.features == 'MS' else 0 + outputs = outputs[:, -self.args.pred_len:, f_dim:] + batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) + outputs = outputs.detach().cpu().numpy() + batch_y = batch_y.detach().cpu().numpy() + if test_data.scale and self.args.inverse: + outputs = test_data.inverse_transform(outputs) + batch_y = test_data.inverse_transform(batch_y) + + pred = outputs + true = batch_y + + preds.append(pred) + trues.append(true) + if i % 20 == 0: + # if i == 4440: + input = batch_x.detach().cpu().numpy() + gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0) + pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0) + visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) + # pdb.set_trace() + + preds = np.array(preds) + trues = np.array(trues) + print('test shape:', preds.shape, trues.shape) + preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) + trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1]) + print('test shape:', preds.shape, trues.shape) + + # result save + folder_path = './results/' + setting + '/' + if not os.path.exists(folder_path): + os.makedirs(folder_path) + + mae, mse, rmse, mape, mspe = metric(preds, trues) + print('mse:{}, mae:{}'.format(mse, mae)) + f = open("result_long_term_forecast.txt", 'a') + f.write(setting + " \n") + f.write('mse:{}, mae:{}'.format(mse, mae)) + f.write('\n') + f.write('\n') + f.close() + + np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe])) + np.save(folder_path + 'pred.npy', preds) + np.save(folder_path + 'true.npy', trues) + + return diff --git a/layers/Embed.py b/layers/Embed.py new file mode 100644 index 000000000..977e25568 --- /dev/null +++ b/layers/Embed.py @@ -0,0 +1,190 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.utils import weight_norm +import math + + +class PositionalEmbedding(nn.Module): + def __init__(self, d_model, max_len=5000): + super(PositionalEmbedding, self).__init__() + # Compute the positional encodings once in log space. + pe = torch.zeros(max_len, d_model).float() + pe.require_grad = False + + position = torch.arange(0, max_len).float().unsqueeze(1) + div_term = (torch.arange(0, d_model, 2).float() + * -(math.log(10000.0) / d_model)).exp() + + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + + pe = pe.unsqueeze(0) + self.register_buffer('pe', pe) + + def forward(self, x): + return self.pe[:, :x.size(1)] + + +class TokenEmbedding(nn.Module): + def __init__(self, c_in, d_model): + super(TokenEmbedding, self).__init__() + padding = 1 if torch.__version__ >= '1.5.0' else 2 + self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, + kernel_size=3, padding=padding, padding_mode='circular', bias=False) + for m in self.modules(): + if isinstance(m, nn.Conv1d): + nn.init.kaiming_normal_( + m.weight, mode='fan_in', nonlinearity='leaky_relu') + + def forward(self, x): + x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) + return x + + +class FixedEmbedding(nn.Module): + def __init__(self, c_in, d_model): + super(FixedEmbedding, self).__init__() + + w = torch.zeros(c_in, d_model).float() + w.require_grad = False + + position = torch.arange(0, c_in).float().unsqueeze(1) + div_term = (torch.arange(0, d_model, 2).float() + * -(math.log(10000.0) / d_model)).exp() + + w[:, 0::2] = torch.sin(position * div_term) + w[:, 1::2] = torch.cos(position * div_term) + + self.emb = nn.Embedding(c_in, d_model) + self.emb.weight = nn.Parameter(w, requires_grad=False) + + def forward(self, x): + return self.emb(x).detach() + + +class TemporalEmbedding(nn.Module): + def __init__(self, d_model, embed_type='fixed', freq='h'): + super(TemporalEmbedding, self).__init__() + + minute_size = 4 + hour_size = 24 + weekday_size = 7 + day_size = 32 + month_size = 13 + + Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding + if freq == 't': + self.minute_embed = Embed(minute_size, d_model) + self.hour_embed = Embed(hour_size, d_model) + self.weekday_embed = Embed(weekday_size, d_model) + self.day_embed = Embed(day_size, d_model) + self.month_embed = Embed(month_size, d_model) + + def forward(self, x): + x = x.long() + minute_x = self.minute_embed(x[:, :, 4]) if hasattr( + self, 'minute_embed') else 0. + hour_x = self.hour_embed(x[:, :, 3]) + weekday_x = self.weekday_embed(x[:, :, 2]) + day_x = self.day_embed(x[:, :, 1]) + month_x = self.month_embed(x[:, :, 0]) + + return hour_x + weekday_x + day_x + month_x + minute_x + + +class TimeFeatureEmbedding(nn.Module): + def __init__(self, d_model, embed_type='timeF', freq='h'): + super(TimeFeatureEmbedding, self).__init__() + + freq_map = {'h': 4, 't': 5, 's': 6, + 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} + d_inp = freq_map[freq] + self.embed = nn.Linear(d_inp, d_model, bias=False) + + def forward(self, x): + return self.embed(x) + + +class DataEmbedding(nn.Module): + def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): + super(DataEmbedding, self).__init__() + + self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) + self.position_embedding = PositionalEmbedding(d_model=d_model) + self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, + freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( + d_model=d_model, embed_type=embed_type, freq=freq) + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x, x_mark): + if x_mark is None: + x = self.value_embedding(x) + self.position_embedding(x) + else: + x = self.value_embedding( + x) + self.temporal_embedding(x_mark) + self.position_embedding(x) + return self.dropout(x) + + +class DataEmbedding_inverted(nn.Module): + def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): + super(DataEmbedding_inverted, self).__init__() + self.value_embedding = nn.Linear(c_in, d_model) + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x, x_mark): + x = x.permute(0, 2, 1) + # x: [Batch Variate Time] + if x_mark is None: + x = self.value_embedding(x) + else: + x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1)) + # x: [Batch Variate d_model] + return self.dropout(x) + + +class DataEmbedding_wo_pos(nn.Module): + def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): + super(DataEmbedding_wo_pos, self).__init__() + + self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) + self.position_embedding = PositionalEmbedding(d_model=d_model) + self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, + freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( + d_model=d_model, embed_type=embed_type, freq=freq) + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x, x_mark): + if x_mark is None: + x = self.value_embedding(x) + else: + x = self.value_embedding(x) + self.temporal_embedding(x_mark) + return self.dropout(x) + + +class PatchEmbedding(nn.Module): + def __init__(self, d_model, patch_len, stride, padding, dropout): + super(PatchEmbedding, self).__init__() + # Patching + self.patch_len = patch_len + self.stride = stride + self.padding_patch_layer = nn.ReplicationPad1d((0, padding)) + + # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space + self.value_embedding = nn.Linear(patch_len, d_model, bias=False) + + # Positional embedding + self.position_embedding = PositionalEmbedding(d_model) + + # Residual dropout + self.dropout = nn.Dropout(dropout) + + def forward(self, x): + # do patching + n_vars = x.shape[1] + x = self.padding_patch_layer(x) + x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride) + x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3])) + # Input encoding + x = self.value_embedding(x) + self.position_embedding(x) + return self.dropout(x), n_vars diff --git a/layers/SelfAttention_Family.py b/layers/SelfAttention_Family.py new file mode 100644 index 000000000..f3d2d16d6 --- /dev/null +++ b/layers/SelfAttention_Family.py @@ -0,0 +1,420 @@ +import torch +import torch.nn as nn +import numpy as np +from math import sqrt +from utils.masking import TriangularCausalMask, ProbMask +from reformer_pytorch import LSHSelfAttention +from einops import rearrange, repeat + +class FlowAttention(nn.Module): + def __init__(self, attention_dropout=0.1): + super(FlowAttention, self).__init__() + self.dropout = nn.Dropout(attention_dropout) + + def kernel_method(self, x): + return torch.sigmoid(x) + + def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): + queries = queries.transpose(1, 2) + keys = keys.transpose(1, 2) + values = values.transpose(1, 2) + # kernel + queries = self.kernel_method(queries) + keys = self.kernel_method(keys) + # incoming and outgoing + normalizer_row = 1.0 / (torch.einsum("nhld,nhd->nhl", queries + 1e-6, keys.sum(dim=2) + 1e-6)) + normalizer_col = 1.0 / (torch.einsum("nhsd,nhd->nhs", keys + 1e-6, queries.sum(dim=2) + 1e-6)) + # reweighting + normalizer_row_refine = ( + torch.einsum("nhld,nhd->nhl", queries + 1e-6, (keys * normalizer_col[:, :, :, None]).sum(dim=2) + 1e-6)) + normalizer_col_refine = ( + torch.einsum("nhsd,nhd->nhs", keys + 1e-6, (queries * normalizer_row[:, :, :, None]).sum(dim=2) + 1e-6)) + # competition and allocation + normalizer_row_refine = torch.sigmoid( + normalizer_row_refine * (float(queries.shape[2]) / float(keys.shape[2]))) + normalizer_col_refine = torch.softmax(normalizer_col_refine, dim=-1) * keys.shape[2] # B h L vis + # multiply + kv = keys.transpose(-2, -1) @ (values * normalizer_col_refine[:, :, :, None]) + x = (((queries @ kv) * normalizer_row[:, :, :, None]) * normalizer_row_refine[:, :, :, None]).transpose(1, 2).contiguous() + return x, None + + +class DSAttention(nn.Module): + '''De-stationary Attention''' + + def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): + super(DSAttention, self).__init__() + self.scale = scale + self.mask_flag = mask_flag + self.output_attention = output_attention + self.dropout = nn.Dropout(attention_dropout) + + def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): + B, L, H, E = queries.shape + _, S, _, D = values.shape + scale = self.scale or 1. / sqrt(E) + + tau = 1.0 if tau is None else tau.unsqueeze( + 1).unsqueeze(1) # B x 1 x 1 x 1 + delta = 0.0 if delta is None else delta.unsqueeze( + 1).unsqueeze(1) # B x 1 x 1 x S + + # De-stationary Attention, rescaling pre-softmax score with learned de-stationary factors + scores = torch.einsum("blhe,bshe->bhls", queries, keys) * tau + delta + + if self.mask_flag: + if attn_mask is None: + attn_mask = TriangularCausalMask(B, L, device=queries.device) + + scores.masked_fill_(attn_mask.mask, -np.inf) + + A = self.dropout(torch.softmax(scale * scores, dim=-1)) + V = torch.einsum("bhls,bshd->blhd", A, values) + + if self.output_attention: + return (V.contiguous(), A) + else: + return (V.contiguous(), None) + + + +class FlashAttention(nn.Module): + def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): + super(FlashAttention, self).__init__() + self.scale = scale + self.mask_flag = mask_flag + self.output_attention = output_attention + self.dropout = nn.Dropout(attention_dropout) + + def flash_attention_forward(self, Q, K, V, mask=None): + BLOCK_SIZE = 32 + NEG_INF = -1e10 # -infinity + EPSILON = 1e-10 + # mask = torch.randint(0, 2, (128, 8)).to(device='cuda') + O = torch.zeros_like(Q, requires_grad=True) + l = torch.zeros(Q.shape[:-1])[..., None] + m = torch.ones(Q.shape[:-1])[..., None] * NEG_INF + + O = O.to(device='cuda') + l = l.to(device='cuda') + m = m.to(device='cuda') + + Q_BLOCK_SIZE = min(BLOCK_SIZE, Q.shape[-1]) + KV_BLOCK_SIZE = BLOCK_SIZE + + Q_BLOCKS = torch.split(Q, Q_BLOCK_SIZE, dim=2) + K_BLOCKS = torch.split(K, KV_BLOCK_SIZE, dim=2) + V_BLOCKS = torch.split(V, KV_BLOCK_SIZE, dim=2) + if mask is not None: + mask_BLOCKS = list(torch.split(mask, KV_BLOCK_SIZE, dim=1)) + + Tr = len(Q_BLOCKS) + Tc = len(K_BLOCKS) + + O_BLOCKS = list(torch.split(O, Q_BLOCK_SIZE, dim=2)) + l_BLOCKS = list(torch.split(l, Q_BLOCK_SIZE, dim=2)) + m_BLOCKS = list(torch.split(m, Q_BLOCK_SIZE, dim=2)) + + for j in range(Tc): + Kj = K_BLOCKS[j] + Vj = V_BLOCKS[j] + if mask is not None: + maskj = mask_BLOCKS[j] + + for i in range(Tr): + Qi = Q_BLOCKS[i] + Oi = O_BLOCKS[i] + li = l_BLOCKS[i] + mi = m_BLOCKS[i] + + scale = 1 / np.sqrt(Q.shape[-1]) + Qi_scaled = Qi * scale + + S_ij = torch.einsum('... i d, ... j d -> ... i j', Qi_scaled, Kj) + if mask is not None: + # Masking + maskj_temp = rearrange(maskj, 'b j -> b 1 1 j') + S_ij = torch.where(maskj_temp > 0, S_ij, NEG_INF) + + m_block_ij, _ = torch.max(S_ij, dim=-1, keepdims=True) + P_ij = torch.exp(S_ij - m_block_ij) + if mask is not None: + # Masking + P_ij = torch.where(maskj_temp > 0, P_ij, 0.) + + l_block_ij = torch.sum(P_ij, dim=-1, keepdims=True) + EPSILON + + P_ij_Vj = torch.einsum('... i j, ... j d -> ... i d', P_ij, Vj) + + mi_new = torch.maximum(m_block_ij, mi) + li_new = torch.exp(mi - mi_new) * li + torch.exp(m_block_ij - mi_new) * l_block_ij + + O_BLOCKS[i] = (li / li_new) * torch.exp(mi - mi_new) * Oi + ( + torch.exp(m_block_ij - mi_new) / li_new) * P_ij_Vj + l_BLOCKS[i] = li_new + m_BLOCKS[i] = mi_new + + O = torch.cat(O_BLOCKS, dim=2) + l = torch.cat(l_BLOCKS, dim=2) + m = torch.cat(m_BLOCKS, dim=2) + return O, l, m + + def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): + res = self.flash_attention_forward(queries.permute(0, 2, 1, 3), keys.permute(0, 2, 1, 3), values.permute(0, 2, 1, 3), attn_mask)[0] + return res.permute(0, 2, 1, 3).contiguous(), None + +class FullAttention(nn.Module): + def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): + super(FullAttention, self).__init__() + self.scale = scale + self.mask_flag = mask_flag + self.output_attention = output_attention + self.dropout = nn.Dropout(attention_dropout) + + def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): + B, L, H, E = queries.shape + _, S, _, D = values.shape + scale = self.scale or 1. / sqrt(E) + + scores = torch.einsum("blhe,bshe->bhls", queries, keys) + + if self.mask_flag: + if attn_mask is None: + attn_mask = TriangularCausalMask(B, L, device=queries.device) + + scores.masked_fill_(attn_mask.mask, -np.inf) + + A = self.dropout(torch.softmax(scale * scores, dim=-1)) + V = torch.einsum("bhls,bshd->blhd", A, values) + + if self.output_attention: + return (V.contiguous(), A) + else: + return (V.contiguous(), None) + + +class ProbAttention(nn.Module): + def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): + super(ProbAttention, self).__init__() + self.factor = factor + self.scale = scale + self.mask_flag = mask_flag + self.output_attention = output_attention + self.dropout = nn.Dropout(attention_dropout) + + def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) + # Q [B, H, L, D] + B, H, L_K, E = K.shape + _, _, L_Q, _ = Q.shape + + # calculate the sampled Q_K + K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) + # real U = U_part(factor*ln(L_k))*L_q + index_sample = torch.randint(L_K, (L_Q, sample_k)) + K_sample = K_expand[:, :, torch.arange( + L_Q).unsqueeze(1), index_sample, :] + Q_K_sample = torch.matmul( + Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze() + + # find the Top_k query with sparisty measurement + M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) + M_top = M.topk(n_top, sorted=False)[1] + + # use the reduced Q to calculate Q_K + Q_reduce = Q[torch.arange(B)[:, None, None], + torch.arange(H)[None, :, None], + M_top, :] # factor*ln(L_q) + Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k + + return Q_K, M_top + + def _get_initial_context(self, V, L_Q): + B, H, L_V, D = V.shape + if not self.mask_flag: + # V_sum = V.sum(dim=-2) + V_sum = V.mean(dim=-2) + contex = V_sum.unsqueeze(-2).expand(B, H, + L_Q, V_sum.shape[-1]).clone() + else: # use mask + # requires that L_Q == L_V, i.e. for self-attention only + assert (L_Q == L_V) + contex = V.cumsum(dim=-2) + return contex + + def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): + B, H, L_V, D = V.shape + + if self.mask_flag: + attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) + scores.masked_fill_(attn_mask.mask, -np.inf) + + attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) + + context_in[torch.arange(B)[:, None, None], + torch.arange(H)[None, :, None], + index, :] = torch.matmul(attn, V).type_as(context_in) + if self.output_attention: + attns = (torch.ones([B, H, L_V, L_V]) / + L_V).type_as(attn).to(attn.device) + attns[torch.arange(B)[:, None, None], torch.arange(H)[ + None, :, None], index, :] = attn + return (context_in, attns) + else: + return (context_in, None) + + def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): + B, L_Q, H, D = queries.shape + _, L_K, _, _ = keys.shape + + queries = queries.transpose(2, 1) + keys = keys.transpose(2, 1) + values = values.transpose(2, 1) + + U_part = self.factor * \ + np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) + u = self.factor * \ + np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) + + U_part = U_part if U_part < L_K else L_K + u = u if u < L_Q else L_Q + + scores_top, index = self._prob_QK( + queries, keys, sample_k=U_part, n_top=u) + + # add scale factor + scale = self.scale or 1. / sqrt(D) + if scale is not None: + scores_top = scores_top * scale + # get the context + context = self._get_initial_context(values, L_Q) + # update the context with selected top_k queries + context, attn = self._update_context( + context, values, scores_top, index, L_Q, attn_mask) + + return context.contiguous(), attn + + +class AttentionLayer(nn.Module): + def __init__(self, attention, d_model, n_heads, d_keys=None, + d_values=None): + super(AttentionLayer, self).__init__() + + d_keys = d_keys or (d_model // n_heads) + d_values = d_values or (d_model // n_heads) + + self.inner_attention = attention + self.query_projection = nn.Linear(d_model, d_keys * n_heads) + self.key_projection = nn.Linear(d_model, d_keys * n_heads) + self.value_projection = nn.Linear(d_model, d_values * n_heads) + self.out_projection = nn.Linear(d_values * n_heads, d_model) + self.n_heads = n_heads + + def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): + B, L, _ = queries.shape + _, S, _ = keys.shape + H = self.n_heads + + queries = self.query_projection(queries).view(B, L, H, -1) + keys = self.key_projection(keys).view(B, S, H, -1) + values = self.value_projection(values).view(B, S, H, -1) + + out, attn = self.inner_attention( + queries, + keys, + values, + attn_mask, + tau=tau, + delta=delta + ) + out = out.view(B, L, -1) + + return self.out_projection(out), attn + + +class ReformerLayer(nn.Module): + def __init__(self, attention, d_model, n_heads, d_keys=None, + d_values=None, causal=False, bucket_size=4, n_hashes=4): + super().__init__() + self.bucket_size = bucket_size + self.attn = LSHSelfAttention( + dim=d_model, + heads=n_heads, + bucket_size=bucket_size, + n_hashes=n_hashes, + causal=causal + ) + + def fit_length(self, queries): + # inside reformer: assert N % (bucket_size * 2) == 0 + B, N, C = queries.shape + if N % (self.bucket_size * 2) == 0: + return queries + else: + # fill the time series + fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2)) + return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1) + + def forward(self, queries, keys, values, attn_mask, tau, delta): + # in Reformer: defalut queries=keys + B, N, C = queries.shape + queries = self.attn(self.fit_length(queries))[:, :N, :] + return queries, None + + +class TwoStageAttentionLayer(nn.Module): + ''' + The Two Stage Attention (TSA) Layer + input/output shape: [batch_size, Data_dim(D), Seg_num(L), d_model] + ''' + + def __init__(self, configs, + seg_num, factor, d_model, n_heads, d_ff=None, dropout=0.1): + super(TwoStageAttentionLayer, self).__init__() + d_ff = d_ff or 4 * d_model + self.time_attention = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout, + output_attention=configs.output_attention), d_model, n_heads) + self.dim_sender = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout, + output_attention=configs.output_attention), d_model, n_heads) + self.dim_receiver = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout, + output_attention=configs.output_attention), d_model, n_heads) + self.router = nn.Parameter(torch.randn(seg_num, factor, d_model)) + + self.dropout = nn.Dropout(dropout) + + self.norm1 = nn.LayerNorm(d_model) + self.norm2 = nn.LayerNorm(d_model) + self.norm3 = nn.LayerNorm(d_model) + self.norm4 = nn.LayerNorm(d_model) + + self.MLP1 = nn.Sequential(nn.Linear(d_model, d_ff), + nn.GELU(), + nn.Linear(d_ff, d_model)) + self.MLP2 = nn.Sequential(nn.Linear(d_model, d_ff), + nn.GELU(), + nn.Linear(d_ff, d_model)) + + def forward(self, x, attn_mask=None, tau=None, delta=None): + # Cross Time Stage: Directly apply MSA to each dimension + batch = x.shape[0] + time_in = rearrange(x, 'b ts_d seg_num d_model -> (b ts_d) seg_num d_model') + time_enc, attn = self.time_attention( + time_in, time_in, time_in, attn_mask=None, tau=None, delta=None + ) + dim_in = time_in + self.dropout(time_enc) + dim_in = self.norm1(dim_in) + dim_in = dim_in + self.dropout(self.MLP1(dim_in)) + dim_in = self.norm2(dim_in) + + # Cross Dimension Stage: use a small set of learnable vectors to aggregate and distribute messages to build the D-to-D connection + dim_send = rearrange(dim_in, '(b ts_d) seg_num d_model -> (b seg_num) ts_d d_model', b=batch) + batch_router = repeat(self.router, 'seg_num factor d_model -> (repeat seg_num) factor d_model', repeat=batch) + dim_buffer, attn = self.dim_sender(batch_router, dim_send, dim_send, attn_mask=None, tau=None, delta=None) + dim_receive, attn = self.dim_receiver(dim_send, dim_buffer, dim_buffer, attn_mask=None, tau=None, delta=None) + dim_enc = dim_send + self.dropout(dim_receive) + dim_enc = self.norm3(dim_enc) + dim_enc = dim_enc + self.dropout(self.MLP2(dim_enc)) + dim_enc = self.norm4(dim_enc) + + final_out = rearrange(dim_enc, '(b seg_num) ts_d d_model -> b ts_d seg_num d_model', b=batch) + + return final_out \ No newline at end of file diff --git a/layers/Transformer_EncDec.py b/layers/Transformer_EncDec.py new file mode 100644 index 000000000..dabf4c2a5 --- /dev/null +++ b/layers/Transformer_EncDec.py @@ -0,0 +1,135 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class ConvLayer(nn.Module): + def __init__(self, c_in): + super(ConvLayer, self).__init__() + self.downConv = nn.Conv1d(in_channels=c_in, + out_channels=c_in, + kernel_size=3, + padding=2, + padding_mode='circular') + self.norm = nn.BatchNorm1d(c_in) + self.activation = nn.ELU() + self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) + + def forward(self, x): + x = self.downConv(x.permute(0, 2, 1)) + x = self.norm(x) + x = self.activation(x) + x = self.maxPool(x) + x = x.transpose(1, 2) + return x + + +class EncoderLayer(nn.Module): + def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): + super(EncoderLayer, self).__init__() + d_ff = d_ff or 4 * d_model + self.attention = attention + self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) + self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) + self.norm1 = nn.LayerNorm(d_model) + self.norm2 = nn.LayerNorm(d_model) + self.dropout = nn.Dropout(dropout) + self.activation = F.relu if activation == "relu" else F.gelu + + def forward(self, x, attn_mask=None, tau=None, delta=None): + new_x, attn = self.attention( + x, x, x, + attn_mask=attn_mask, + tau=tau, delta=delta + ) + x = x + self.dropout(new_x) + + y = x = self.norm1(x) + y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) + y = self.dropout(self.conv2(y).transpose(-1, 1)) + + return self.norm2(x + y), attn + + +class Encoder(nn.Module): + def __init__(self, attn_layers, conv_layers=None, norm_layer=None): + super(Encoder, self).__init__() + self.attn_layers = nn.ModuleList(attn_layers) + self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None + self.norm = norm_layer + + def forward(self, x, attn_mask=None, tau=None, delta=None): + # x [B, L, D] + attns = [] + if self.conv_layers is not None: + for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)): + delta = delta if i == 0 else None + x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) + x = conv_layer(x) + attns.append(attn) + x, attn = self.attn_layers[-1](x, tau=tau, delta=None) + attns.append(attn) + else: + for attn_layer in self.attn_layers: + x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) + attns.append(attn) + + if self.norm is not None: + x = self.norm(x) + + return x, attns + + +class DecoderLayer(nn.Module): + def __init__(self, self_attention, cross_attention, d_model, d_ff=None, + dropout=0.1, activation="relu"): + super(DecoderLayer, self).__init__() + d_ff = d_ff or 4 * d_model + self.self_attention = self_attention + self.cross_attention = cross_attention + self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) + self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) + self.norm1 = nn.LayerNorm(d_model) + self.norm2 = nn.LayerNorm(d_model) + self.norm3 = nn.LayerNorm(d_model) + self.dropout = nn.Dropout(dropout) + self.activation = F.relu if activation == "relu" else F.gelu + + def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): + x = x + self.dropout(self.self_attention( + x, x, x, + attn_mask=x_mask, + tau=tau, delta=None + )[0]) + x = self.norm1(x) + + x = x + self.dropout(self.cross_attention( + x, cross, cross, + attn_mask=cross_mask, + tau=tau, delta=delta + )[0]) + + y = x = self.norm2(x) + y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) + y = self.dropout(self.conv2(y).transpose(-1, 1)) + + return self.norm3(x + y) + + +class Decoder(nn.Module): + def __init__(self, layers, norm_layer=None, projection=None): + super(Decoder, self).__init__() + self.layers = nn.ModuleList(layers) + self.norm = norm_layer + self.projection = projection + + def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): + for layer in self.layers: + x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta) + + if self.norm is not None: + x = self.norm(x) + + if self.projection is not None: + x = self.projection(x) + return x diff --git a/model/Flashformer.py b/model/Flashformer.py new file mode 100644 index 000000000..45d8aca96 --- /dev/null +++ b/model/Flashformer.py @@ -0,0 +1,136 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer +from layers.SelfAttention_Family import FlashAttention, AttentionLayer, FullAttention +from layers.Embed import DataEmbedding +import numpy as np + + +class Model(nn.Module): + """ + Vanilla Transformer + with O(L^2) complexity + Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf + """ + + def __init__(self, configs): + super(Model, self).__init__() + self.task_name = configs.task_name + self.pred_len = configs.pred_len + self.output_attention = configs.output_attention + # Embedding + self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, + configs.dropout) + # Encoder + self.encoder = Encoder( + [ + EncoderLayer( + AttentionLayer( + FlashAttention(False, configs.factor, attention_dropout=configs.dropout, + output_attention=configs.output_attention), configs.d_model, configs.n_heads), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation + ) for l in range(configs.e_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model) + ) + # Decoder + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, + configs.dropout) + self.decoder = Decoder( + [ + DecoderLayer( + AttentionLayer( + FullAttention(True, configs.factor, attention_dropout=configs.dropout, + output_attention=False), + configs.d_model, configs.n_heads), + AttentionLayer( + FullAttention(False, configs.factor, attention_dropout=configs.dropout, + output_attention=False), + configs.d_model, configs.n_heads), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation, + ) + for l in range(configs.d_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model), + projection=nn.Linear(configs.d_model, configs.c_out, bias=True) + ) + if self.task_name == 'imputation': + self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) + if self.task_name == 'anomaly_detection': + self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) + if self.task_name == 'classification': + self.act = F.gelu + self.dropout = nn.Dropout(configs.dropout) + self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) + + def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.dec_embedding(x_dec, x_mark_dec) + dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None) + return dec_out + + def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + return enc_out + + def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + return enc_out + + def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out) + return dec_out + + def anomaly_detection(self, x_enc): + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out) + return dec_out + + def classification(self, x_enc, x_mark_enc): + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings + output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + + def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + return dec_out[:, -self.pred_len:, :] # [B, L, D] + if self.task_name == 'imputation': + dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) + return dec_out # [B, L, D] + if self.task_name == 'anomaly_detection': + dec_out = self.anomaly_detection(x_enc) + return dec_out # [B, L, D] + if self.task_name == 'classification': + dec_out = self.classification(x_enc, x_mark_enc) + return dec_out # [B, N] + return None diff --git a/model/Flowformer.py b/model/Flowformer.py new file mode 100644 index 000000000..f624c0acd --- /dev/null +++ b/model/Flowformer.py @@ -0,0 +1,145 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer +from layers.SelfAttention_Family import FullAttention, AttentionLayer, FlowAttention +from layers.Embed import DataEmbedding +import numpy as np + + +class Model(nn.Module): + """ + Vanilla Transformer + with O(L^2) complexity + Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf + """ + + def __init__(self, configs): + super(Model, self).__init__() + self.task_name = configs.task_name + self.pred_len = configs.pred_len + self.output_attention = configs.output_attention + + if configs.channel_independent: + self.enc_in = 1 + self.dec_in = 1 + self.c_out = 1 + else: + self.enc_in = configs.enc_in + self.dec_in = configs.dec_in + self.c_out = configs.c_out + + # Embedding + self.enc_embedding = DataEmbedding(self.enc_in, configs.d_model, configs.embed, configs.freq, + configs.dropout) + # Encoder + self.encoder = Encoder( + [ + EncoderLayer( + AttentionLayer( + FlowAttention(attention_dropout=configs.dropout), configs.d_model, configs.n_heads), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation + ) for l in range(configs.e_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model) + ) + # Decoder + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + self.dec_embedding = DataEmbedding(self.dec_in, configs.d_model, configs.embed, configs.freq, + configs.dropout) + self.decoder = Decoder( + [ + DecoderLayer( + AttentionLayer( + FullAttention(True, configs.factor, attention_dropout=configs.dropout, + output_attention=False), + configs.d_model, configs.n_heads), + AttentionLayer( + FullAttention(False, configs.factor, attention_dropout=configs.dropout, + output_attention=False), + configs.d_model, configs.n_heads), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation, + ) + for l in range(configs.d_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model), + projection=nn.Linear(configs.d_model, configs.c_out, bias=True) + ) + if self.task_name == 'imputation': + self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) + if self.task_name == 'anomaly_detection': + self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) + if self.task_name == 'classification': + self.act = F.gelu + self.dropout = nn.Dropout(configs.dropout) + self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) + + def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.dec_embedding(x_dec, x_mark_dec) + dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None) + return dec_out + + def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + return enc_out + + def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + return enc_out + + def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out) + return dec_out + + def anomaly_detection(self, x_enc): + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out) + return dec_out + + def classification(self, x_enc, x_mark_enc): + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings + output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + + def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + return dec_out[:, -self.pred_len:, :] # [B, L, D] + if self.task_name == 'imputation': + dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) + return dec_out # [B, L, D] + if self.task_name == 'anomaly_detection': + dec_out = self.anomaly_detection(x_enc) + return dec_out # [B, L, D] + if self.task_name == 'classification': + dec_out = self.classification(x_enc, x_mark_enc) + return dec_out # [B, N] + return None diff --git a/model/Informer.py b/model/Informer.py new file mode 100644 index 000000000..7e632b2aa --- /dev/null +++ b/model/Informer.py @@ -0,0 +1,166 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer +from layers.SelfAttention_Family import ProbAttention, AttentionLayer +from layers.Embed import DataEmbedding + + +class Model(nn.Module): + """ + Informer with Propspare attention in O(LlogL) complexity + Paper link: https://ojs.aaai.org/index.php/AAAI/article/view/17325/17132 + """ + + def __init__(self, configs): + super(Model, self).__init__() + self.task_name = configs.task_name + self.pred_len = configs.pred_len + self.label_len = configs.label_len + + if configs.channel_independent: + self.enc_in = 1 + self.dec_in = 1 + self.c_out = 1 + else: + self.enc_in = configs.enc_in + self.dec_in = configs.dec_in + self.c_out = configs.c_out + + # Embedding + self.enc_embedding = DataEmbedding(self.enc_in, configs.d_model, configs.embed, configs.freq, + configs.dropout) + self.dec_embedding = DataEmbedding(self.dec_in, configs.d_model, configs.embed, configs.freq, + configs.dropout) + + # Encoder + self.encoder = Encoder( + [ + EncoderLayer( + AttentionLayer( + ProbAttention(False, configs.factor, attention_dropout=configs.dropout, + output_attention=configs.output_attention), + configs.d_model, configs.n_heads), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation + ) for l in range(configs.e_layers) + ], + [ + ConvLayer( + configs.d_model + ) for l in range(configs.e_layers - 1) + ] if configs.distil and ('forecast' in configs.task_name) else None, + norm_layer=torch.nn.LayerNorm(configs.d_model) + ) + # Decoder + self.decoder = Decoder( + [ + DecoderLayer( + AttentionLayer( + ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), + configs.d_model, configs.n_heads), + AttentionLayer( + ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), + configs.d_model, configs.n_heads), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation, + ) + for l in range(configs.d_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model), + projection=nn.Linear(configs.d_model, configs.c_out, bias=True) + ) + if self.task_name == 'imputation': + self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) + if self.task_name == 'anomaly_detection': + self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) + if self.task_name == 'classification': + self.act = F.gelu + self.dropout = nn.Dropout(configs.dropout) + self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) + + def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + enc_out = self.enc_embedding(x_enc, x_mark_enc) + dec_out = self.dec_embedding(x_dec, x_mark_dec) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None) + + return dec_out # [B, L, D] + + def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + enc_out = self.enc_embedding(x_enc, x_mark_enc) + + return enc_out + + def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + return enc_out + + def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization + mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E + x_enc = x_enc - mean_enc + std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E + x_enc = x_enc / std_enc + + enc_out = self.enc_embedding(x_enc, x_mark_enc) + dec_out = self.dec_embedding(x_dec, x_mark_dec) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None) + + dec_out = dec_out * std_enc + mean_enc + return dec_out # [B, L, D] + + def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): + # enc + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + # final + dec_out = self.projection(enc_out) + return dec_out + + def anomaly_detection(self, x_enc): + # enc + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + # final + dec_out = self.projection(enc_out) + return dec_out + + def classification(self, x_enc, x_mark_enc): + # enc + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings + output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + + def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): + if self.task_name == 'long_term_forecast': + dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + return dec_out[:, -self.pred_len:, :] # [B, L, D] + if self.task_name == 'short_term_forecast': + dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + return dec_out[:, -self.pred_len:, :] # [B, L, D] + if self.task_name == 'imputation': + dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) + return dec_out # [B, L, D] + if self.task_name == 'anomaly_detection': + dec_out = self.anomaly_detection(x_enc) + return dec_out # [B, L, D] + if self.task_name == 'classification': + dec_out = self.classification(x_enc, x_mark_enc) + return dec_out # [B, N] + return None diff --git a/model/Reformer.py b/model/Reformer.py new file mode 100644 index 000000000..d11bffeb0 --- /dev/null +++ b/model/Reformer.py @@ -0,0 +1,163 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from layers.Transformer_EncDec import Encoder, EncoderLayer +from layers.SelfAttention_Family import ReformerLayer +from layers.Embed import DataEmbedding + + +class Model(nn.Module): + """ + Reformer with O(LlogL) complexity + Paper link: https://openreview.net/forum?id=rkgNKkHtvB + """ + + def __init__(self, configs, bucket_size=4, n_hashes=4): + """ + bucket_size: int, + n_hashes: int, + """ + super(Model, self).__init__() + self.task_name = configs.task_name + self.pred_len = configs.pred_len + self.seq_len = configs.seq_len + + if configs.channel_independent: + self.enc_in = 1 + self.dec_in = 1 + self.c_out = 1 + else: + self.enc_in = configs.enc_in + self.dec_in = configs.dec_in + self.c_out = configs.c_out + + + self.enc_embedding = DataEmbedding(self.enc_in, configs.d_model, configs.embed, configs.freq, + configs.dropout) + # Encoder + self.encoder = Encoder( + [ + EncoderLayer( + ReformerLayer(None, configs.d_model, configs.n_heads, + bucket_size=bucket_size, n_hashes=n_hashes), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation + ) for l in range(configs.e_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model) + ) + + if self.task_name == 'classification': + self.act = F.gelu + self.dropout = nn.Dropout(configs.dropout) + self.projection = nn.Linear( + configs.d_model * configs.seq_len, configs.num_class) + else: + self.projection = nn.Linear( + configs.d_model, configs.c_out, bias=True) + + def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # add placeholder + x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1) + if x_mark_enc is not None: + x_mark_enc = torch.cat( + [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1) + + enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] + enc_out, attns = self.encoder(enc_out, attn_mask=None) + dec_out = self.projection(enc_out) + + return dec_out # [B, L, D] + + def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # add placeholder + x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1) + if x_mark_enc is not None: + x_mark_enc = torch.cat( + [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1) + + enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] + return enc_out + + def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # add placeholder + x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1) + if x_mark_enc is not None: + x_mark_enc = torch.cat( + [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1) + + enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] + enc_out, attns = self.encoder(enc_out, attn_mask=None) + return enc_out + + def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization + mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E + x_enc = x_enc - mean_enc + std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E + x_enc = x_enc / std_enc + + # add placeholder + x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1) + if x_mark_enc is not None: + x_mark_enc = torch.cat( + [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1) + + enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] + enc_out, attns = self.encoder(enc_out, attn_mask=None) + dec_out = self.projection(enc_out) + + dec_out = dec_out * std_enc + mean_enc + return dec_out # [B, L, D] + + def imputation(self, x_enc, x_mark_enc): + enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] + + enc_out, attns = self.encoder(enc_out) + enc_out = self.projection(enc_out) + + return enc_out # [B, L, D] + + def anomaly_detection(self, x_enc): + enc_out = self.enc_embedding(x_enc, None) # [B,T,C] + + enc_out, attns = self.encoder(enc_out) + enc_out = self.projection(enc_out) + + return enc_out # [B, L, D] + + def classification(self, x_enc, x_mark_enc): + # enc + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out) + + # Output + # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.act(enc_out) + output = self.dropout(output) + # zero-out padding embeddings + output = output * x_mark_enc.unsqueeze(-1) + # (batch_size, seq_length * d_model) + output = output.reshape(output.shape[0], -1) + output = self.projection(output) # (batch_size, num_classes) + return output + + def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): + if self.task_name == 'long_term_forecast': + dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + return dec_out[:, -self.pred_len:, :] # [B, L, D] + if self.task_name == 'short_term_forecast': + dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + return dec_out[:, -self.pred_len:, :] # [B, L, D] + if self.task_name == 'imputation': + dec_out = self.imputation(x_enc, x_mark_enc) + return dec_out # [B, L, D] + if self.task_name == 'anomaly_detection': + dec_out = self.anomaly_detection(x_enc) + return dec_out # [B, L, D] + if self.task_name == 'classification': + dec_out = self.classification(x_enc, x_mark_enc) + return dec_out # [B, N] + return None diff --git a/model/Transformer.py b/model/Transformer.py new file mode 100644 index 000000000..de336f1c8 --- /dev/null +++ b/model/Transformer.py @@ -0,0 +1,146 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer +from layers.SelfAttention_Family import FullAttention, AttentionLayer +from layers.Embed import DataEmbedding +import numpy as np + + +class Model(nn.Module): + """ + Vanilla Transformer + with O(L^2) complexity + Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf + """ + + def __init__(self, configs): + super(Model, self).__init__() + self.task_name = configs.task_name + self.pred_len = configs.pred_len + self.output_attention = configs.output_attention + + if configs.channel_independent: + self.enc_in = 1 + self.dec_in = 1 + self.c_out = 1 + else: + self.enc_in = configs.enc_in + self.dec_in = configs.dec_in + self.c_out = configs.c_out + + # Embedding + self.enc_embedding = DataEmbedding(self.enc_in, configs.d_model, configs.embed, configs.freq, + configs.dropout) + # Encoder + self.encoder = Encoder( + [ + EncoderLayer( + AttentionLayer( + FullAttention(False, configs.factor, attention_dropout=configs.dropout, + output_attention=configs.output_attention), configs.d_model, configs.n_heads), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation + ) for l in range(configs.e_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model) + ) + # Decoder + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + self.dec_embedding = DataEmbedding(self.dec_in, configs.d_model, configs.embed, configs.freq, + configs.dropout) + self.decoder = Decoder( + [ + DecoderLayer( + AttentionLayer( + FullAttention(True, configs.factor, attention_dropout=configs.dropout, + output_attention=False), + configs.d_model, configs.n_heads), + AttentionLayer( + FullAttention(False, configs.factor, attention_dropout=configs.dropout, + output_attention=False), + configs.d_model, configs.n_heads), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation, + ) + for l in range(configs.d_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model), + projection=nn.Linear(configs.d_model, configs.c_out, bias=True) + ) + if self.task_name == 'imputation': + self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) + if self.task_name == 'anomaly_detection': + self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) + if self.task_name == 'classification': + self.act = F.gelu + self.dropout = nn.Dropout(configs.dropout) + self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) + + def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.dec_embedding(x_dec, x_mark_dec) + dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None) + return dec_out + + def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + return enc_out + + def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + return enc_out + + def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out) + return dec_out + + def anomaly_detection(self, x_enc): + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out) + return dec_out + + def classification(self, x_enc, x_mark_enc): + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings + output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + + def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + return dec_out[:, -self.pred_len:, :] # [B, L, D] + if self.task_name == 'imputation': + dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) + return dec_out # [B, L, D] + if self.task_name == 'anomaly_detection': + dec_out = self.anomaly_detection(x_enc) + return dec_out # [B, L, D] + if self.task_name == 'classification': + dec_out = self.classification(x_enc, x_mark_enc) + return dec_out # [B, N] + return None diff --git a/model/iFlashformer.py b/model/iFlashformer.py new file mode 100644 index 000000000..2acb4a449 --- /dev/null +++ b/model/iFlashformer.py @@ -0,0 +1,186 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from layers.Transformer_EncDec import Encoder, EncoderLayer +from layers.SelfAttention_Family import FlashAttention, AttentionLayer +from layers.Embed import DataEmbedding_inverted +import numpy as np + + +class Model(nn.Module): + """ + Vanilla Transformer + with O(L^2) complexity + Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf + """ + + def __init__(self, configs): + super(Model, self).__init__() + self.task_name = configs.task_name + self.seq_len = configs.seq_len + self.pred_len = configs.pred_len + self.output_attention = configs.output_attention + # Embedding + self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq, configs.dropout) + # Encoder + self.encoder = Encoder( + [ + EncoderLayer( + AttentionLayer( + FlashAttention(False, configs.factor, attention_dropout=configs.dropout, + output_attention=configs.output_attention), configs.d_model, configs.n_heads), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation + ) for l in range(configs.e_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model) + ) + # Decoder + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True) + if self.task_name == 'imputation': + self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True) + if self.task_name == 'anomaly_detection': + self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True) + if self.task_name == 'classification': + self.act = F.gelu + self.dropout = nn.Dropout(configs.dropout) + self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) + + + def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _,_,N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc) + return enc_out + + def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _,_,N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + return enc_out + + def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, _, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) + return dec_out + + def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, L, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + return dec_out + + def anomaly_detection(self, x_enc): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, L, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + return dec_out + + def classification(self, x_enc, x_mark_enc): + if self.class_strategy == 'average': + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = torch.mean(output, dim=1) # (batch_size, d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + elif self.class_strategy == 'cls_token': + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out = torch.cat((self.cls_token.expand(enc_out.shape[0], -1, -1), enc_out), dim=1) # (batch_size, N + 1, d_model) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output[:, 0, :] # (batch_size, d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + else: # projection + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output.reshape(output.shape[0], -1) # (batch_size, c_in * d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + + def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + return dec_out[:, -self.pred_len:, :] # [B, L, D] + if self.task_name == 'imputation': + dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) + return dec_out # [B, L, D] + if self.task_name == 'anomaly_detection': + dec_out = self.anomaly_detection(x_enc) + return dec_out # [B, L, D] + if self.task_name == 'classification': + dec_out = self.classification(x_enc, x_mark_enc) + return dec_out # [B, N] + return None diff --git a/model/iFlowformer.py b/model/iFlowformer.py new file mode 100644 index 000000000..6abf60ff4 --- /dev/null +++ b/model/iFlowformer.py @@ -0,0 +1,185 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from layers.Transformer_EncDec import Encoder, EncoderLayer +from layers.SelfAttention_Family import FlowAttention, AttentionLayer +from layers.Embed import DataEmbedding_inverted +import numpy as np + + +class Model(nn.Module): + """ + Vanilla Transformer + with O(L^2) complexity + Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf + """ + + def __init__(self, configs): + super(Model, self).__init__() + self.task_name = configs.task_name + self.seq_len = configs.seq_len + self.pred_len = configs.pred_len + self.output_attention = configs.output_attention + # Embedding + self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq, configs.dropout) + # Encoder + self.encoder = Encoder( + [ + EncoderLayer( + AttentionLayer( + FlowAttention(attention_dropout=configs.dropout), configs.d_model, configs.n_heads), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation + ) for l in range(configs.e_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model) + ) + # Decoder + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True) + if self.task_name == 'imputation': + self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True) + if self.task_name == 'anomaly_detection': + self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True) + if self.task_name == 'classification': + self.act = F.gelu + self.dropout = nn.Dropout(configs.dropout) + self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) + + + def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _,_,N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc) + return enc_out + + def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _,_,N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + return enc_out + + def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, _, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) + return dec_out + + def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, L, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + return dec_out + + def anomaly_detection(self, x_enc): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, L, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + return dec_out + + def classification(self, x_enc, x_mark_enc): + if self.class_strategy == 'average': + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = torch.mean(output, dim=1) # (batch_size, d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + elif self.class_strategy == 'cls_token': + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out = torch.cat((self.cls_token.expand(enc_out.shape[0], -1, -1), enc_out), dim=1) # (batch_size, N + 1, d_model) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output[:, 0, :] # (batch_size, d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + else: # projection + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output.reshape(output.shape[0], -1) # (batch_size, c_in * d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + + def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + return dec_out[:, -self.pred_len:, :] # [B, L, D] + if self.task_name == 'imputation': + dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) + return dec_out # [B, L, D] + if self.task_name == 'anomaly_detection': + dec_out = self.anomaly_detection(x_enc) + return dec_out # [B, L, D] + if self.task_name == 'classification': + dec_out = self.classification(x_enc, x_mark_enc) + return dec_out # [B, N] + return None diff --git a/model/iInformer.py b/model/iInformer.py new file mode 100644 index 000000000..c4824fd8a --- /dev/null +++ b/model/iInformer.py @@ -0,0 +1,186 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from layers.Transformer_EncDec import Encoder, EncoderLayer +from layers.SelfAttention_Family import ProbAttention, AttentionLayer +from layers.Embed import DataEmbedding_inverted +import numpy as np + + +class Model(nn.Module): + """ + Vanilla Transformer + with O(L^2) complexity + Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf + """ + + def __init__(self, configs): + super(Model, self).__init__() + self.task_name = configs.task_name + self.seq_len = configs.seq_len + self.pred_len = configs.pred_len + self.output_attention = configs.output_attention + # Embedding + self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq, configs.dropout) + # Encoder + self.encoder = Encoder( + [ + EncoderLayer( + AttentionLayer( + ProbAttention(False, configs.factor, attention_dropout=configs.dropout, + output_attention=configs.output_attention), configs.d_model, configs.n_heads), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation + ) for l in range(configs.e_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model) + ) + # Decoder + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True) + if self.task_name == 'imputation': + self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True) + if self.task_name == 'anomaly_detection': + self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True) + if self.task_name == 'classification': + self.act = F.gelu + self.dropout = nn.Dropout(configs.dropout) + self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) + + + def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _,_,N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc) + return enc_out + + def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _,_,N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + return enc_out + + def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, _, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) + return dec_out + + def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, L, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + return dec_out + + def anomaly_detection(self, x_enc): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, L, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + return dec_out + + def classification(self, x_enc, x_mark_enc): + if self.class_strategy == 'average': + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = torch.mean(output, dim=1) # (batch_size, d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + elif self.class_strategy == 'cls_token': + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out = torch.cat((self.cls_token.expand(enc_out.shape[0], -1, -1), enc_out), dim=1) # (batch_size, N + 1, d_model) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output[:, 0, :] # (batch_size, d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + else: # projection + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output.reshape(output.shape[0], -1) # (batch_size, c_in * d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + + def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + return dec_out[:, -self.pred_len:, :] # [B, L, D] + if self.task_name == 'imputation': + dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) + return dec_out # [B, L, D] + if self.task_name == 'anomaly_detection': + dec_out = self.anomaly_detection(x_enc) + return dec_out # [B, L, D] + if self.task_name == 'classification': + dec_out = self.classification(x_enc, x_mark_enc) + return dec_out # [B, N] + return None diff --git a/model/iReformer.py b/model/iReformer.py new file mode 100644 index 000000000..e7e7ef86e --- /dev/null +++ b/model/iReformer.py @@ -0,0 +1,185 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from layers.Transformer_EncDec import Encoder, EncoderLayer +from layers.SelfAttention_Family import ReformerLayer +from layers.Embed import DataEmbedding_inverted +import numpy as np + + +class Model(nn.Module): + """ + Vanilla Transformer + with O(L^2) complexity + Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf + """ + + def __init__(self, configs): + super(Model, self).__init__() + self.task_name = configs.task_name + self.seq_len = configs.seq_len + self.pred_len = configs.pred_len + self.output_attention = configs.output_attention + # Embedding + self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq, configs.dropout) + # Encoder + self.encoder = Encoder( + [ + EncoderLayer( + ReformerLayer(None, configs.d_model, configs.n_heads, + bucket_size=4, n_hashes=4), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation + ) for l in range(configs.e_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model) + ) + # Decoder + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True) + if self.task_name == 'imputation': + self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True) + if self.task_name == 'anomaly_detection': + self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True) + if self.task_name == 'classification': + self.act = F.gelu + self.dropout = nn.Dropout(configs.dropout) + self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) + + + def encoder_top(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _,_,N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc) + return enc_out + + def encoder_bottom(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _,_,N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc.permute(0,2,1), x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + return enc_out + + def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, _, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) + return dec_out + + def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, L, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + return dec_out + + def anomaly_detection(self, x_enc): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, L, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + return dec_out + + def classification(self, x_enc, x_mark_enc): + if self.class_strategy == 'average': + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = torch.mean(output, dim=1) # (batch_size, d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + elif self.class_strategy == 'cls_token': + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out = torch.cat((self.cls_token.expand(enc_out.shape[0], -1, -1), enc_out), dim=1) # (batch_size, N + 1, d_model) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output[:, 0, :] # (batch_size, d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + else: # projection + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output.reshape(output.shape[0], -1) # (batch_size, c_in * d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + + def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + return dec_out[:, -self.pred_len:, :] # [B, L, D] + if self.task_name == 'imputation': + dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) + return dec_out # [B, L, D] + if self.task_name == 'anomaly_detection': + dec_out = self.anomaly_detection(x_enc) + return dec_out # [B, L, D] + if self.task_name == 'classification': + dec_out = self.classification(x_enc, x_mark_enc) + return dec_out # [B, N] + return None diff --git a/model/iTransformer.py b/model/iTransformer.py new file mode 100644 index 000000000..c17e8498c --- /dev/null +++ b/model/iTransformer.py @@ -0,0 +1,165 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from layers.Transformer_EncDec import Encoder, EncoderLayer +from layers.SelfAttention_Family import FullAttention, AttentionLayer +from layers.Embed import DataEmbedding_inverted +import numpy as np + + +class Model(nn.Module): + """ + Paper link: https://arxiv.org/abs/2310.06625 + """ + + def __init__(self, configs): + super(Model, self).__init__() + self.task_name = configs.task_name + self.seq_len = configs.seq_len + self.pred_len = configs.pred_len + self.output_attention = configs.output_attention + # Embedding + self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq, + configs.dropout) + self.class_strategy = configs.class_strategy + # Encoder + self.encoder = Encoder( + [ + EncoderLayer( + AttentionLayer( + FullAttention(False, configs.factor, attention_dropout=configs.dropout, + output_attention=configs.output_attention), configs.d_model, configs.n_heads), + configs.d_model, + configs.d_ff, + dropout=configs.dropout, + activation=configs.activation + ) for l in range(configs.e_layers) + ], + norm_layer=torch.nn.LayerNorm(configs.d_model) + ) + # Decoder + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True) + if self.task_name == 'imputation': + self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True) + if self.task_name == 'anomaly_detection': + self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True) + if self.task_name == 'classification': + self.act = F.gelu + self.dropout = nn.Dropout(configs.dropout) + if self.class_strategy == 'average': + self.projection = nn.Linear(configs.d_model, configs.num_class) + elif self.class_strategy == 'cls_token': + self.cls_token = nn.Parameter(torch.zeros(1, 1, configs.d_model)) + self.projection = nn.Linear(configs.d_model, configs.num_class) + nn.init.normal_(self.cls_token, std=1e-6) + else: + self.projection = nn.Linear(configs.d_model * configs.enc_in, configs.num_class) + + def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, _, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) + return dec_out + + def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, L, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + return dec_out + + def anomaly_detection(self, x_enc): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + _, L, N = x_enc.shape + + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) + return dec_out + + def classification(self, x_enc, x_mark_enc): + if self.class_strategy == 'average': + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = torch.mean(output, dim=1) # (batch_size, d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + elif self.class_strategy == 'cls_token': + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out = torch.cat((self.cls_token.expand(enc_out.shape[0], -1, -1), enc_out), dim=1) # (batch_size, N + 1, d_model) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output[:, 0, :] # (batch_size, d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + else: # projection + # Embedding + enc_out = self.enc_embedding(x_enc, None) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + + # Output + output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity + output = self.dropout(output) + output = output.reshape(output.shape[0], -1) # (batch_size, c_in * d_model) + output = self.projection(output) # (batch_size, num_classes) + return output + + def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): + if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': + dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) + return dec_out[:, -self.pred_len:, :] # [B, L, D] + if self.task_name == 'imputation': + dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) + return dec_out # [B, L, D] + if self.task_name == 'anomaly_detection': + dec_out = self.anomaly_detection(x_enc) + return dec_out # [B, L, D] + if self.task_name == 'classification': + dec_out = self.classification(x_enc, x_mark_enc) + return dec_out # [B, N] + return None diff --git a/run.py b/run.py new file mode 100644 index 000000000..420f0bd32 --- /dev/null +++ b/run.py @@ -0,0 +1,180 @@ +import argparse +import torch +from experiments.exp_long_term_forecasting import Exp_Long_Term_Forecast +from experiments.exp_long_term_forecasting_partial import Exp_Long_Term_Forecast_Partial +import random +import numpy as np + +if __name__ == '__main__': + fix_seed = 2021 + random.seed(fix_seed) + torch.manual_seed(fix_seed) + np.random.seed(fix_seed) + + parser = argparse.ArgumentParser(description='TimesNet') + + # basic config + parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast', + help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]') + parser.add_argument('--is_training', type=int, required=True, default=1, help='status') + parser.add_argument('--model_id', type=str, required=True, default='test', help='model id') + parser.add_argument('--model', type=str, required=True, default='Autoformer', + help='model name, options: [Autoformer, Transformer, TimesNet]') + + # data loader + parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type') + parser.add_argument('--root_path', type=str, default='./data/ETT/', help='root path of the data file') + parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file') + parser.add_argument('--features', type=str, default='M', + help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate') + parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task') + parser.add_argument('--freq', type=str, default='h', + help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h') + parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints') + + # forecasting task + parser.add_argument('--seq_len', type=int, default=96, help='input sequence length') + parser.add_argument('--label_len', type=int, default=48, help='start token length') + parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length') + parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4') + + # inputation task + parser.add_argument('--mask_rate', type=float, default=0.25, help='mask ratio') + + # anomaly detection task + parser.add_argument('--anomaly_ratio', type=float, default=0.25, help='prior anomaly ratio (%)') + + # model define + parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock') + parser.add_argument('--num_kernels', type=int, default=6, help='for Inception') + parser.add_argument('--enc_in', type=int, default=7, help='encoder input size') + parser.add_argument('--dec_in', type=int, default=7, help='decoder input size') + parser.add_argument('--c_out', type=int, default=7, help='output size') + parser.add_argument('--d_model', type=int, default=512, help='dimension of model') + parser.add_argument('--n_heads', type=int, default=8, help='num of heads') + parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers') + parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers') + parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn') + parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average') + parser.add_argument('--factor', type=int, default=1, help='attn factor') + parser.add_argument('--distil', action='store_false', + help='whether to use distilling in encoder, using this argument means not using distilling', + default=True) + parser.add_argument('--dropout', type=float, default=0.1, help='dropout') + parser.add_argument('--embed', type=str, default='timeF', + help='time features encoding, options:[timeF, fixed, learned]') + parser.add_argument('--activation', type=str, default='gelu', help='activation') + parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder') + + # optimization + parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers') + parser.add_argument('--itr', type=int, default=1, help='experiments times') + parser.add_argument('--train_epochs', type=int, default=10, help='train epochs') + parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data') + parser.add_argument('--patience', type=int, default=3, help='early stopping patience') + parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate') + parser.add_argument('--des', type=str, default='test', help='exp description') + parser.add_argument('--loss', type=str, default='MSE', help='loss function') + parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate') + parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False) + + # GPU + parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu') + parser.add_argument('--gpu', type=int, default=0, help='gpu') + parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False) + parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus') + + # de-stationary projector params + parser.add_argument('--p_hidden_dims', type=int, nargs='+', default=[128, 128], + help='hidden layer dimensions of projector (List)') + parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector') + + # iTransformer + parser.add_argument('--exp_name', type=str, required=True, default='None', + help='task name, options:[partial_train, zero_shot]') + + # parser.add_argument('--partial_train', type=bool, default=False, help='partial_train') + parser.add_argument('--random_train', type=bool, default=False, help='random_train') + parser.add_argument('--channel_independent', type=bool, default=False, help='channel_independent') + parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False) + parser.add_argument('--class_strategy', type=str, default='projection', help='projection/average/cls_token') + parser.add_argument('--target_root_path', type=str, default='./data/ETT/', help='root path of the data file') + parser.add_argument('--target_data_path', type=str, default='ETTh1.csv', help='data file') + + + args = parser.parse_args() + args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False + + if args.use_gpu and args.use_multi_gpu: + args.devices = args.devices.replace(' ', '') + device_ids = args.devices.split(',') + args.device_ids = [int(id_) for id_ in device_ids] + args.gpu = args.device_ids[0] + + print('Args in experiment:') + print(args) + + if args.task_name == 'long_term_forecast': + if args.exp_name == 'partial_train': + Exp = Exp_Long_Term_Forecast_Partial + else: + Exp = Exp_Long_Term_Forecast + else: + Exp = Exp_Long_Term_Forecast + + if args.is_training: + for ii in range(args.itr): + # setting record of experiments + setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format( + args.task_name, + args.model_id, + args.model, + args.data, + args.features, + args.seq_len, + args.label_len, + args.pred_len, + args.d_model, + args.n_heads, + args.e_layers, + args.d_layers, + args.d_ff, + args.factor, + args.embed, + args.distil, + args.des, + args.class_strategy, ii) + + exp = Exp(args) # set experiments + print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting)) + exp.train(setting) + + print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) + exp.test(setting) + torch.cuda.empty_cache() + else: + ii = 0 + setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format( + args.task_name, + args.model_id, + args.model, + args.data, + args.features, + args.seq_len, + args.label_len, + args.pred_len, + args.d_model, + args.n_heads, + args.e_layers, + args.d_layers, + args.d_ff, + args.factor, + args.embed, + args.distil, + args.des, + args.class_strategy, ii) + + exp = Exp(args) # set experiments + print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) + exp.test(setting, test=1) + torch.cuda.empty_cache() diff --git a/scripts/boost_performance/ECL/iFlowformer.sh b/scripts/boost_performance/ECL/iFlowformer.sh new file mode 100644 index 000000000..4eac8a143 --- /dev/null +++ b/scripts/boost_performance/ECL/iFlowformer.sh @@ -0,0 +1,174 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=Flowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + + +model_name=iFlowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 \ No newline at end of file diff --git a/scripts/boost_performance/ECL/iInformer.sh b/scripts/boost_performance/ECL/iInformer.sh new file mode 100644 index 000000000..c693c73b5 --- /dev/null +++ b/scripts/boost_performance/ECL/iInformer.sh @@ -0,0 +1,174 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=Reformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + + +model_name=iReformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 \ No newline at end of file diff --git a/scripts/boost_performance/ECL/iReformer.sh b/scripts/boost_performance/ECL/iReformer.sh new file mode 100644 index 000000000..c693c73b5 --- /dev/null +++ b/scripts/boost_performance/ECL/iReformer.sh @@ -0,0 +1,174 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=Reformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + + +model_name=iReformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 \ No newline at end of file diff --git a/scripts/boost_performance/ECL/iTransformer.sh b/scripts/boost_performance/ECL/iTransformer.sh new file mode 100644 index 000000000..4c3618dc2 --- /dev/null +++ b/scripts/boost_performance/ECL/iTransformer.sh @@ -0,0 +1,174 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=Transformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 \ No newline at end of file diff --git a/scripts/boost_performance/Traffic/iFlowformer.sh b/scripts/boost_performance/Traffic/iFlowformer.sh new file mode 100644 index 000000000..d2dd3d2be --- /dev/null +++ b/scripts/boost_performance/Traffic/iFlowformer.sh @@ -0,0 +1,182 @@ +export CUDA_VISIBLE_DEVICES=1 + +model_name=Flowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + + +model_name=iFlowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 \ No newline at end of file diff --git a/scripts/boost_performance/Traffic/iInformer.sh b/scripts/boost_performance/Traffic/iInformer.sh new file mode 100644 index 000000000..3263a97b4 --- /dev/null +++ b/scripts/boost_performance/Traffic/iInformer.sh @@ -0,0 +1,182 @@ +export CUDA_VISIBLE_DEVICES=1 + +model_name=Reformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + + +model_name=iReformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 \ No newline at end of file diff --git a/scripts/boost_performance/Traffic/iReformer.sh b/scripts/boost_performance/Traffic/iReformer.sh new file mode 100644 index 000000000..c0b889344 --- /dev/null +++ b/scripts/boost_performance/Traffic/iReformer.sh @@ -0,0 +1,182 @@ +export CUDA_VISIBLE_DEVICES=1 + +model_name=Informer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + + +model_name=iInformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 \ No newline at end of file diff --git a/scripts/boost_performance/Traffic/iTransformer.sh b/scripts/boost_performance/Traffic/iTransformer.sh new file mode 100644 index 000000000..782456565 --- /dev/null +++ b/scripts/boost_performance/Traffic/iTransformer.sh @@ -0,0 +1,182 @@ +export CUDA_VISIBLE_DEVICES=1 + +model_name=Transformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 \ No newline at end of file diff --git a/scripts/boost_performance/Weather/iFlowformer.sh b/scripts/boost_performance/Weather/iFlowformer.sh new file mode 100644 index 000000000..c490ae7b7 --- /dev/null +++ b/scripts/boost_performance/Weather/iFlowformer.sh @@ -0,0 +1,183 @@ +export CUDA_VISIBLE_DEVICES=2 + +model_name=Flowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --itr 1 \ + --batch_size 128 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +model_name=iFlowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --itr 1 \ + --batch_size 128 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 \ No newline at end of file diff --git a/scripts/boost_performance/Weather/iInformer.sh b/scripts/boost_performance/Weather/iInformer.sh new file mode 100644 index 000000000..52062b726 --- /dev/null +++ b/scripts/boost_performance/Weather/iInformer.sh @@ -0,0 +1,183 @@ +export CUDA_VISIBLE_DEVICES=2 + +model_name=Reformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --itr 1 \ + --batch_size 128 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +model_name=iReformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --itr 1 \ + --batch_size 128 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 \ No newline at end of file diff --git a/scripts/boost_performance/Weather/iReformer.sh b/scripts/boost_performance/Weather/iReformer.sh new file mode 100644 index 000000000..efc0fdddd --- /dev/null +++ b/scripts/boost_performance/Weather/iReformer.sh @@ -0,0 +1,183 @@ +export CUDA_VISIBLE_DEVICES=2 + +model_name=Informer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --itr 1 \ + --batch_size 128 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +model_name=iInformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --itr 1 \ + --batch_size 128 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 \ No newline at end of file diff --git a/scripts/boost_performance/Weather/iTransformer.sh b/scripts/boost_performance/Weather/iTransformer.sh index e69de29bb..e63372c97 100644 --- a/scripts/boost_performance/Weather/iTransformer.sh +++ b/scripts/boost_performance/Weather/iTransformer.sh @@ -0,0 +1,183 @@ +export CUDA_VISIBLE_DEVICES=2 + +model_name=Transformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --itr 1 \ + --batch_size 128 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --itr 1 \ + --batch_size 128 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 \ No newline at end of file diff --git a/scripts/efficient_attentions/ECL/iFlashTransformer.sh b/scripts/efficient_attentions/ECL/iFlashTransformer.sh new file mode 100644 index 000000000..f0f8ef5b6 --- /dev/null +++ b/scripts/efficient_attentions/ECL/iFlashTransformer.sh @@ -0,0 +1,173 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=Flashformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +model_name=iFlashformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --itr 1 diff --git a/scripts/efficient_attentions/Traffic/iFlashTransformer.sh b/scripts/efficient_attentions/Traffic/iFlashTransformer.sh new file mode 100644 index 000000000..1752e6804 --- /dev/null +++ b/scripts/efficient_attentions/Traffic/iFlashTransformer.sh @@ -0,0 +1,181 @@ +export CUDA_VISIBLE_DEVICES=2 + +model_name=Flashformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +model_name=iFlashformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --itr 1 \ + --train_epochs 3 diff --git a/scripts/efficient_attentions/Weather/iFlashTransformer.sh b/scripts/efficient_attentions/Weather/iFlashTransformer.sh new file mode 100644 index 000000000..410832341 --- /dev/null +++ b/scripts/efficient_attentions/Weather/iFlashTransformer.sh @@ -0,0 +1,183 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=Flashformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --itr 1 \ + --batch_size 128 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +model_name=iFlashformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 \ + --train_epochs 3 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --batch_size 128 \ + --itr 1 \ No newline at end of file diff --git a/scripts/efficient_attentions/iFlashTransformer.sh b/scripts/efficient_attentions/iFlashTransformer.sh deleted file mode 100644 index e69de29bb..000000000 diff --git a/scripts/increasing_lookback/ECL/iFlowformer.sh b/scripts/increasing_lookback/ECL/iFlowformer.sh new file mode 100644 index 000000000..33cf730df --- /dev/null +++ b/scripts/increasing_lookback/ECL/iFlowformer.sh @@ -0,0 +1,128 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=iFlowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 48 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 192 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 336 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 720 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ diff --git a/scripts/increasing_lookback/ECL/iInformer.sh b/scripts/increasing_lookback/ECL/iInformer.sh new file mode 100644 index 000000000..f60581620 --- /dev/null +++ b/scripts/increasing_lookback/ECL/iInformer.sh @@ -0,0 +1,128 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=iInformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 48 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 192 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 336 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 720 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ diff --git a/scripts/increasing_lookback/ECL/iReformer.sh b/scripts/increasing_lookback/ECL/iReformer.sh new file mode 100644 index 000000000..0a2423750 --- /dev/null +++ b/scripts/increasing_lookback/ECL/iReformer.sh @@ -0,0 +1,128 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=iReformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 48 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 192 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 336 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 720 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ diff --git a/scripts/increasing_lookback/ECL/iTransformer.sh b/scripts/increasing_lookback/ECL/iTransformer.sh new file mode 100644 index 000000000..7b074bc1e --- /dev/null +++ b/scripts/increasing_lookback/ECL/iTransformer.sh @@ -0,0 +1,128 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 48 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 192 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 336 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 720 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ diff --git a/scripts/increasing_lookback/Traffic/iFlowformer.sh b/scripts/increasing_lookback/Traffic/iFlowformer.sh new file mode 100644 index 000000000..65ea6ba4e --- /dev/null +++ b/scripts/increasing_lookback/Traffic/iFlowformer.sh @@ -0,0 +1,128 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=iFlowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 48 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 192 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 336 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 720 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ diff --git a/scripts/increasing_lookback/Traffic/iInformer.sh b/scripts/increasing_lookback/Traffic/iInformer.sh new file mode 100644 index 000000000..246526849 --- /dev/null +++ b/scripts/increasing_lookback/Traffic/iInformer.sh @@ -0,0 +1,128 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=iInformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 48 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 192 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 336 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 720 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ diff --git a/scripts/increasing_lookback/Traffic/iReformer.sh b/scripts/increasing_lookback/Traffic/iReformer.sh new file mode 100644 index 000000000..9b0681a0c --- /dev/null +++ b/scripts/increasing_lookback/Traffic/iReformer.sh @@ -0,0 +1,128 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=iReformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 48 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 192 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 336 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 720 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ diff --git a/scripts/increasing_lookback/Traffic/iTransformer.sh b/scripts/increasing_lookback/Traffic/iTransformer.sh index e69de29bb..c32fc8568 100644 --- a/scripts/increasing_lookback/Traffic/iTransformer.sh +++ b/scripts/increasing_lookback/Traffic/iTransformer.sh @@ -0,0 +1,128 @@ +export CUDA_VISIBLE_DEVICES=2 + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 48 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 192 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 336 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 720 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ diff --git a/scripts/multivariate_forecast/ECL/iTransformer.sh b/scripts/multivariate_forecast/ECL/iTransformer.sh new file mode 100644 index 000000000..0192f6d53 --- /dev/null +++ b/scripts/multivariate_forecast/ECL/iTransformer.sh @@ -0,0 +1,105 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ + + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 321 \ + --dec_in 321 \ + --c_out 321 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.0005\ + --itr 1 \ No newline at end of file diff --git a/scripts/multivariate_forecast/ETTh2/iTransformer.sh b/scripts/multivariate_forecast/ETTh2/iTransformer.sh new file mode 100644 index 000000000..40c5ecda8 --- /dev/null +++ b/scripts/multivariate_forecast/ETTh2/iTransformer.sh @@ -0,0 +1,95 @@ +export CUDA_VISIBLE_DEVICES=1 + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/ETT-small/ \ + --data_path ETTh2.csv \ + --model_id ETTh2_96_96 \ + --model $model_name \ + --data ETTh2 \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --d_model 128\ + --d_ff 128\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/ETT-small/ \ + --data_path ETTh2.csv \ + --model_id ETTh2_96_192 \ + --model $model_name \ + --data ETTh2 \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --d_model 128\ + --d_ff 128\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/ETT-small/ \ + --data_path ETTh2.csv \ + --model_id ETTh2_96_336 \ + --model $model_name \ + --data ETTh2 \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --d_model 128\ + --d_ff 128\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/ETT-small/ \ + --data_path ETTh2.csv \ + --model_id ETTh2_96_720 \ + --model $model_name \ + --data ETTh2 \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --d_model 128\ + --d_ff 128\ + --itr 1 \ No newline at end of file diff --git a/scripts/multivariate_forecast/Pems/iTransformer.sh b/scripts/multivariate_forecast/Pems/iTransformer.sh new file mode 100644 index 000000000..32ec1cbd9 --- /dev/null +++ b/scripts/multivariate_forecast/Pems/iTransformer.sh @@ -0,0 +1,101 @@ +export CUDA_VISIBLE_DEVICES=1 + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/PEMS/ \ + --data_path PEMS03.npz \ + --model_id PEMS03_96_12 \ + --model $model_name \ + --data PEMS \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 12 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 358 \ + --dec_in 358 \ + --c_out 358 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --learning_rate 0.001\ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/PEMS/ \ + --data_path PEMS03.npz \ + --model_id PEMS03_96_24 \ + --model $model_name \ + --data PEMS \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 24 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 358 \ + --dec_in 358 \ + --c_out 358 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --learning_rate 0.001\ + --itr 1 + + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/PEMS/ \ + --data_path PEMS03.npz \ + --model_id PEMS03_96_48 \ + --model $model_name \ + --data PEMS \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 48 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 358 \ + --dec_in 358 \ + --c_out 358 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --learning_rate 0.001\ + --itr 1 + + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/PEMS/ \ + --data_path PEMS03.npz \ + --model_id PEMS03_96_96 \ + --model $model_name \ + --data PEMS \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 358 \ + --dec_in 358 \ + --c_out 358 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --learning_rate 0.001\ + --itr 1 \ No newline at end of file diff --git a/scripts/multivariate_forecast/Solar/iTransformer.sh b/scripts/multivariate_forecast/Solar/iTransformer.sh new file mode 100644 index 000000000..dad19e0e6 --- /dev/null +++ b/scripts/multivariate_forecast/Solar/iTransformer.sh @@ -0,0 +1,99 @@ +export CUDA_VISIBLE_DEVICES=1 + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/Solar/ \ + --data_path solar_AL.txt \ + --model_id solar_96_96 \ + --model $model_name \ + --data Solar \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 137 \ + --dec_in 137 \ + --c_out 137 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --learning_rate 0.0005\ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/Solar/ \ + --data_path solar_AL.txt \ + --model_id solar_96_192 \ + --model $model_name \ + --data Solar \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 137 \ + --dec_in 137 \ + --c_out 137 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --learning_rate 0.0005\ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/Solar/ \ + --data_path solar_AL.txt \ + --model_id solar_96_336 \ + --model $model_name \ + --data Solar \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 137 \ + --dec_in 137 \ + --c_out 137 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --learning_rate 0.0005\ + --itr 1 + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/Solar/ \ + --data_path solar_AL.txt \ + --model_id solar_96_720 \ + --model $model_name \ + --data Solar \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 137 \ + --dec_in 137 \ + --c_out 137 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --learning_rate 0.0005\ + --itr 1 diff --git a/scripts/multivariate_forecast/Traffic/iTransformer.sh b/scripts/multivariate_forecast/Traffic/iTransformer.sh index e69de29bb..c8a476269 100644 --- a/scripts/multivariate_forecast/Traffic/iTransformer.sh +++ b/scripts/multivariate_forecast/Traffic/iTransformer.sh @@ -0,0 +1,103 @@ +export CUDA_VISIBLE_DEVICES=2 + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 4 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 862 \ + --dec_in 862 \ + --c_out 862 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --batch_size 16\ + --learning_rate 0.001\ + --itr 1 \ No newline at end of file diff --git a/scripts/multivariate_forecast/Weather/iTransformer.sh b/scripts/multivariate_forecast/Weather/iTransformer.sh new file mode 100644 index 000000000..f2e3792a5 --- /dev/null +++ b/scripts/multivariate_forecast/Weather/iTransformer.sh @@ -0,0 +1,98 @@ +export CUDA_VISIBLE_DEVICES=1 + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --itr 1 \ + + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_192 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 192 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --itr 1 \ + + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_336 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 336 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --itr 1 \ + + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/weather/ \ + --data_path weather.csv \ + --model_id weather_96_720 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 720 \ + --e_layers 3 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 21 \ + --dec_in 21 \ + --c_out 21 \ + --des 'Exp' \ + --d_model 512\ + --d_ff 512\ + --itr 1 \ No newline at end of file diff --git a/scripts/variate_generalization/Electricity/iFlowformer.sh b/scripts/variate_generalization/Electricity/iFlowformer.sh new file mode 100644 index 000000000..259d7cde6 --- /dev/null +++ b/scripts/variate_generalization/Electricity/iFlowformer.sh @@ -0,0 +1,53 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=Flowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 64 \ + --dec_in 64 \ + --c_out 64 \ + --des 'Exp' \ + --channel_independent true \ + --exp_name partial_train \ + --batch_size 8 \ + --d_model 32 \ + --d_ff 64 \ + --itr 1 + +model_name=iFlowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 64 \ + --dec_in 64 \ + --c_out 64 \ + --des 'Exp' \ + --exp_name partial_train \ + --itr 1 diff --git a/scripts/variate_generalization/Electricity/iInformer.sh b/scripts/variate_generalization/Electricity/iInformer.sh new file mode 100644 index 000000000..3a105dbc9 --- /dev/null +++ b/scripts/variate_generalization/Electricity/iInformer.sh @@ -0,0 +1,53 @@ +export CUDA_VISIBLE_DEVICES=1 + +model_name=Informer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 64 \ + --dec_in 64 \ + --c_out 64 \ + --des 'Exp' \ + --channel_independent true \ + --exp_name partial_train \ + --batch_size 8 \ + --d_model 32 \ + --d_ff 64 \ + --itr 1 + +model_name=iInformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 64 \ + --dec_in 64 \ + --c_out 64 \ + --des 'Exp' \ + --exp_name partial_train \ + --itr 1 \ No newline at end of file diff --git a/scripts/variate_generalization/Electricity/iReformer.sh b/scripts/variate_generalization/Electricity/iReformer.sh new file mode 100644 index 000000000..64494f9f8 --- /dev/null +++ b/scripts/variate_generalization/Electricity/iReformer.sh @@ -0,0 +1,53 @@ +export CUDA_VISIBLE_DEVICES=2 + +model_name=Reformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 64 \ + --dec_in 64 \ + --c_out 64 \ + --des 'Exp' \ + --channel_independent true \ + --exp_name partial_train \ + --batch_size 8 \ + --d_model 32 \ + --d_ff 64 \ + --itr 1 + +model_name=iReformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 64 \ + --dec_in 64 \ + --c_out 64 \ + --des 'Exp' \ + --exp_name partial_train \ + --itr 1 \ No newline at end of file diff --git a/scripts/variate_generalization/Electricity/iTransformer.sh b/scripts/variate_generalization/Electricity/iTransformer.sh index e69de29bb..6a81dd6d4 100644 --- a/scripts/variate_generalization/Electricity/iTransformer.sh +++ b/scripts/variate_generalization/Electricity/iTransformer.sh @@ -0,0 +1,53 @@ +export CUDA_VISIBLE_DEVICES=3 + +#model_name=Transformer +# +#python -u run.py \ +# --task_name long_term_forecast \ +# --is_training 1 \ +# --root_path ./dataset/electricity/ \ +# --data_path electricity.csv \ +# --model_id ECL_96_96 \ +# --model $model_name \ +# --data custom \ +# --features M \ +# --seq_len 96 \ +# --label_len 48 \ +# --pred_len 96 \ +# --e_layers 2 \ +# --d_layers 1 \ +# --factor 3 \ +# --enc_in 64 \ +# --dec_in 64 \ +# --c_out 64 \ +# --des 'Exp' \ +# --channel_independent true \ +# --exp_name partial_train \ +# --batch_size 8 \ +# --d_model 32 \ +# --d_ff 64 \ +# --itr 1 + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/electricity/ \ + --data_path electricity.csv \ + --model_id ECL_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 64 \ + --dec_in 64 \ + --c_out 64 \ + --des 'Exp' \ + --exp_name partial_train \ + --itr 1 \ No newline at end of file diff --git a/scripts/variate_generalization/Solar/iFlowformer.sh b/scripts/variate_generalization/Solar/iFlowformer.sh new file mode 100644 index 000000000..cd026533c --- /dev/null +++ b/scripts/variate_generalization/Solar/iFlowformer.sh @@ -0,0 +1,55 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=Flowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/Solar/ \ + --data_path solar_AL.txt \ + --model_id solar_96_96 \ + --model $model_name \ + --data Solar \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 27 \ + --dec_in 27 \ + --c_out 27 \ + --des 'Exp' \ + --d_model 32 \ + --d_ff 64 \ + --learning_rate 0.0005 \ + --channel_independent true \ + --exp_name partial_train \ + --batch_size 8 \ + --itr 1 + +model_name=iFlowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/Solar/ \ + --data_path solar_AL.txt \ + --model_id solar_96_96 \ + --model $model_name \ + --data Solar \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 27 \ + --dec_in 27 \ + --c_out 27 \ + --des 'Exp' \ + --learning_rate 0.0005 \ + --exp_name partial_train \ + --itr 1 \ No newline at end of file diff --git a/scripts/variate_generalization/Solar/iInformer.sh b/scripts/variate_generalization/Solar/iInformer.sh new file mode 100644 index 000000000..968d47a0c --- /dev/null +++ b/scripts/variate_generalization/Solar/iInformer.sh @@ -0,0 +1,55 @@ +export CUDA_VISIBLE_DEVICES=1 + +model_name=Informer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/Solar/ \ + --data_path solar_AL.txt \ + --model_id solar_96_96 \ + --model $model_name \ + --data Solar \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 27 \ + --dec_in 27 \ + --c_out 27 \ + --des 'Exp' \ + --d_model 32 \ + --d_ff 64 \ + --learning_rate 0.0005 \ + --channel_independent true \ + --exp_name partial_train \ + --batch_size 8 \ + --itr 1 + +model_name=iInformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/Solar/ \ + --data_path solar_AL.txt \ + --model_id solar_96_96 \ + --model $model_name \ + --data Solar \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 27 \ + --dec_in 27 \ + --c_out 27 \ + --des 'Exp' \ + --learning_rate 0.0005 \ + --exp_name partial_train \ + --itr 1 \ No newline at end of file diff --git a/scripts/variate_generalization/Solar/iReformer.sh b/scripts/variate_generalization/Solar/iReformer.sh new file mode 100644 index 000000000..4660cd2ff --- /dev/null +++ b/scripts/variate_generalization/Solar/iReformer.sh @@ -0,0 +1,55 @@ +export CUDA_VISIBLE_DEVICES=2 + +model_name=Reformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/Solar/ \ + --data_path solar_AL.txt \ + --model_id solar_96_96 \ + --model $model_name \ + --data Solar \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 27 \ + --dec_in 27 \ + --c_out 27 \ + --des 'Exp' \ + --d_model 32 \ + --d_ff 64 \ + --learning_rate 0.0005 \ + --channel_independent true \ + --exp_name partial_train \ + --batch_size 8 \ + --itr 1 + +model_name=iReformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/Solar/ \ + --data_path solar_AL.txt \ + --model_id solar_96_96 \ + --model $model_name \ + --data Solar \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 27 \ + --dec_in 27 \ + --c_out 27 \ + --des 'Exp' \ + --learning_rate 0.0005 \ + --exp_name partial_train \ + --itr 1 \ No newline at end of file diff --git a/scripts/variate_generalization/Solar/iTransformer.sh b/scripts/variate_generalization/Solar/iTransformer.sh new file mode 100644 index 000000000..c19304d94 --- /dev/null +++ b/scripts/variate_generalization/Solar/iTransformer.sh @@ -0,0 +1,55 @@ +export CUDA_VISIBLE_DEVICES=3 + +model_name=Transformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/Solar/ \ + --data_path solar_AL.txt \ + --model_id solar_96_96 \ + --model $model_name \ + --data Solar \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 27 \ + --dec_in 27 \ + --c_out 27 \ + --des 'Exp' \ + --d_model 32 \ + --d_ff 64 \ + --learning_rate 0.0005 \ + --channel_independent true \ + --exp_name partial_train \ + --batch_size 8 \ + --itr 1 + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path /data/Solar/ \ + --data_path solar_AL.txt \ + --model_id solar_96_96 \ + --model $model_name \ + --data Solar \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 27 \ + --dec_in 27 \ + --c_out 27 \ + --des 'Exp' \ + --learning_rate 0.0005 \ + --exp_name partial_train \ + --itr 1 \ No newline at end of file diff --git a/scripts/variate_generalization/Traffic/iFlowformer.sh b/scripts/variate_generalization/Traffic/iFlowformer.sh new file mode 100644 index 000000000..0c4580f33 --- /dev/null +++ b/scripts/variate_generalization/Traffic/iFlowformer.sh @@ -0,0 +1,53 @@ +export CUDA_VISIBLE_DEVICES=0 + +model_name=Flowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 172 \ + --dec_in 172 \ + --c_out 172 \ + --des 'Exp' \ + --channel_independent true \ + --exp_name partial_train \ + --batch_size 4 \ + --d_model 32 \ + --d_ff 64 \ + --itr 1 + +model_name=iFlowformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 172 \ + --dec_in 172 \ + --c_out 172 \ + --des 'Exp' \ + --exp_name partial_train \ + --itr 1 diff --git a/scripts/variate_generalization/Traffic/iInformer.sh b/scripts/variate_generalization/Traffic/iInformer.sh new file mode 100644 index 000000000..1c03c1350 --- /dev/null +++ b/scripts/variate_generalization/Traffic/iInformer.sh @@ -0,0 +1,53 @@ +export CUDA_VISIBLE_DEVICES=1 + +model_name=Informer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 172 \ + --dec_in 172 \ + --c_out 172 \ + --des 'Exp' \ + --channel_independent true \ + --exp_name partial_train \ + --batch_size 4 \ + --d_model 32 \ + --d_ff 64 \ + --itr 1 + +model_name=iInformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 172 \ + --dec_in 172 \ + --c_out 172 \ + --des 'Exp' \ + --exp_name partial_train \ + --itr 1 diff --git a/scripts/variate_generalization/Traffic/iReformer.sh b/scripts/variate_generalization/Traffic/iReformer.sh new file mode 100644 index 000000000..811e78599 --- /dev/null +++ b/scripts/variate_generalization/Traffic/iReformer.sh @@ -0,0 +1,54 @@ +export CUDA_VISIBLE_DEVICES=2 + +model_name=Reformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 172 \ + --dec_in 172 \ + --c_out 172 \ + --des 'Exp' \ + --channel_independent true \ + --exp_name partial_train \ + --batch_size 4 \ + --d_model 32 \ + --d_ff 64 \ + --itr 1 + +model_name=iReformer + + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 172 \ + --dec_in 172 \ + --c_out 172 \ + --des 'Exp' \ + --exp_name partial_train \ + --itr 1 \ No newline at end of file diff --git a/scripts/variate_generalization/Traffic/iTransformer.sh b/scripts/variate_generalization/Traffic/iTransformer.sh new file mode 100644 index 000000000..04b6b9137 --- /dev/null +++ b/scripts/variate_generalization/Traffic/iTransformer.sh @@ -0,0 +1,53 @@ +export CUDA_VISIBLE_DEVICES=3 + +model_name=Transformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 172 \ + --dec_in 172 \ + --c_out 172 \ + --des 'Exp' \ + --channel_independent true \ + --exp_name partial_train \ + --batch_size 8 \ + --d_model 32 \ + --d_ff 64 \ + --itr 1 + +model_name=iTransformer + +python -u run.py \ + --task_name long_term_forecast \ + --is_training 1 \ + --root_path ./dataset/traffic/ \ + --data_path traffic.csv \ + --model_id traffic_96_96 \ + --model $model_name \ + --data custom \ + --features M \ + --seq_len 96 \ + --label_len 48 \ + --pred_len 96 \ + --e_layers 2 \ + --d_layers 1 \ + --factor 3 \ + --enc_in 172 \ + --dec_in 172 \ + --c_out 172 \ + --des 'Exp' \ + --exp_name partial_train \ + --itr 1 diff --git a/utils/masking.py b/utils/masking.py new file mode 100644 index 000000000..a19cbf63b --- /dev/null +++ b/utils/masking.py @@ -0,0 +1,26 @@ +import torch + + +class TriangularCausalMask(): + def __init__(self, B, L, device="cpu"): + mask_shape = [B, 1, L, L] + with torch.no_grad(): + self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) + + @property + def mask(self): + return self._mask + + +class ProbMask(): + def __init__(self, B, H, L, index, scores, device="cpu"): + _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) + _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) + indicator = _mask_ex[torch.arange(B)[:, None, None], + torch.arange(H)[None, :, None], + index, :].to(device) + self._mask = indicator.view(scores.shape).to(device) + + @property + def mask(self): + return self._mask diff --git a/utils/metrics.py b/utils/metrics.py new file mode 100644 index 000000000..b4f5a76b8 --- /dev/null +++ b/utils/metrics.py @@ -0,0 +1,41 @@ +import numpy as np + + +def RSE(pred, true): + return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2)) + + +def CORR(pred, true): + u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0) + d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0)) + return (u / d).mean(-1) + + +def MAE(pred, true): + return np.mean(np.abs(pred - true)) + + +def MSE(pred, true): + return np.mean((pred - true) ** 2) + + +def RMSE(pred, true): + return np.sqrt(MSE(pred, true)) + + +def MAPE(pred, true): + return np.mean(np.abs((pred - true) / true)) + + +def MSPE(pred, true): + return np.mean(np.square((pred - true) / true)) + + +def metric(pred, true): + mae = MAE(pred, true) + mse = MSE(pred, true) + rmse = RMSE(pred, true) + mape = MAPE(pred, true) + mspe = MSPE(pred, true) + + return mae, mse, rmse, mape, mspe diff --git a/utils/timefeatures.py b/utils/timefeatures.py new file mode 100644 index 000000000..7c1297293 --- /dev/null +++ b/utils/timefeatures.py @@ -0,0 +1,148 @@ +# From: gluonts/src/gluonts/time_feature/_base.py +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. + +from typing import List + +import numpy as np +import pandas as pd +from pandas.tseries import offsets +from pandas.tseries.frequencies import to_offset + + +class TimeFeature: + def __init__(self): + pass + + def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: + pass + + def __repr__(self): + return self.__class__.__name__ + "()" + + +class SecondOfMinute(TimeFeature): + """Minute of hour encoded as value between [-0.5, 0.5]""" + + def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: + return index.second / 59.0 - 0.5 + + +class MinuteOfHour(TimeFeature): + """Minute of hour encoded as value between [-0.5, 0.5]""" + + def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: + return index.minute / 59.0 - 0.5 + + +class HourOfDay(TimeFeature): + """Hour of day encoded as value between [-0.5, 0.5]""" + + def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: + return index.hour / 23.0 - 0.5 + + +class DayOfWeek(TimeFeature): + """Hour of day encoded as value between [-0.5, 0.5]""" + + def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: + return index.dayofweek / 6.0 - 0.5 + + +class DayOfMonth(TimeFeature): + """Day of month encoded as value between [-0.5, 0.5]""" + + def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: + return (index.day - 1) / 30.0 - 0.5 + + +class DayOfYear(TimeFeature): + """Day of year encoded as value between [-0.5, 0.5]""" + + def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: + return (index.dayofyear - 1) / 365.0 - 0.5 + + +class MonthOfYear(TimeFeature): + """Month of year encoded as value between [-0.5, 0.5]""" + + def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: + return (index.month - 1) / 11.0 - 0.5 + + +class WeekOfYear(TimeFeature): + """Week of year encoded as value between [-0.5, 0.5]""" + + def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: + return (index.isocalendar().week - 1) / 52.0 - 0.5 + + +def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: + """ + Returns a list of time features that will be appropriate for the given frequency string. + Parameters + ---------- + freq_str + Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. + """ + + features_by_offsets = { + offsets.YearEnd: [], + offsets.QuarterEnd: [MonthOfYear], + offsets.MonthEnd: [MonthOfYear], + offsets.Week: [DayOfMonth, WeekOfYear], + offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], + offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], + offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], + offsets.Minute: [ + MinuteOfHour, + HourOfDay, + DayOfWeek, + DayOfMonth, + DayOfYear, + ], + offsets.Second: [ + SecondOfMinute, + MinuteOfHour, + HourOfDay, + DayOfWeek, + DayOfMonth, + DayOfYear, + ], + } + + offset = to_offset(freq_str) + + for offset_type, feature_classes in features_by_offsets.items(): + if isinstance(offset, offset_type): + return [cls() for cls in feature_classes] + + supported_freq_msg = f""" + Unsupported frequency {freq_str} + The following frequencies are supported: + Y - yearly + alias: A + M - monthly + W - weekly + D - daily + B - business days + H - hourly + T - minutely + alias: min + S - secondly + """ + raise RuntimeError(supported_freq_msg) + + +def time_features(dates, freq='h'): + return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]) diff --git a/utils/tools.py b/utils/tools.py new file mode 100644 index 000000000..a357cc2c4 --- /dev/null +++ b/utils/tools.py @@ -0,0 +1,115 @@ +import os + +import numpy as np +import torch +import matplotlib.pyplot as plt +import pandas as pd + +plt.switch_backend('agg') + + +def adjust_learning_rate(optimizer, epoch, args): + # lr = args.learning_rate * (0.2 ** (epoch // 2)) + if args.lradj == 'type1': + lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))} + elif args.lradj == 'type2': + lr_adjust = { + 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, + 10: 5e-7, 15: 1e-7, 20: 5e-8 + } + if epoch in lr_adjust.keys(): + lr = lr_adjust[epoch] + for param_group in optimizer.param_groups: + param_group['lr'] = lr + print('Updating learning rate to {}'.format(lr)) + + +class EarlyStopping: + def __init__(self, patience=7, verbose=False, delta=0): + self.patience = patience + self.verbose = verbose + self.counter = 0 + self.best_score = None + self.early_stop = False + self.val_loss_min = np.Inf + self.delta = delta + + def __call__(self, val_loss, model, path): + score = -val_loss + if self.best_score is None: + self.best_score = score + self.save_checkpoint(val_loss, model, path) + elif score < self.best_score + self.delta: + self.counter += 1 + print(f'EarlyStopping counter: {self.counter} out of {self.patience}') + if self.counter >= self.patience: + self.early_stop = True + else: + self.best_score = score + self.save_checkpoint(val_loss, model, path) + self.counter = 0 + + def save_checkpoint(self, val_loss, model, path): + if self.verbose: + print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') + torch.save(model.state_dict(), path + '/' + 'checkpoint.pth') + self.val_loss_min = val_loss + + +class dotdict(dict): + """dot.notation access to dictionary attributes""" + __getattr__ = dict.get + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__ + + +class StandardScaler(): + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def transform(self, data): + return (data - self.mean) / self.std + + def inverse_transform(self, data): + return (data * self.std) + self.mean + + +def visual(true, preds=None, name='./pic/test.pdf'): + """ + Results visualization + """ + plt.figure() + plt.plot(true, label='GroundTruth', linewidth=2) + if preds is not None: + plt.plot(preds, label='Prediction', linewidth=2) + plt.legend() + plt.savefig(name, bbox_inches='tight') + + +def adjustment(gt, pred): + anomaly_state = False + for i in range(len(gt)): + if gt[i] == 1 and pred[i] == 1 and not anomaly_state: + anomaly_state = True + for j in range(i, 0, -1): + if gt[j] == 0: + break + else: + if pred[j] == 0: + pred[j] = 1 + for j in range(i, len(gt)): + if gt[j] == 0: + break + else: + if pred[j] == 0: + pred[j] = 1 + elif gt[i] == 0: + anomaly_state = False + if anomaly_state: + pred[i] = 1 + return gt, pred + + +def cal_accuracy(y_pred, y_true): + return np.mean(y_pred == y_true) \ No newline at end of file