refactor: change setting and testing

james397520 · Oct 17, 2023 · be44d25 · be44d25
1 parent 10864d7
commit be44d25
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 12 deletions.
diff --git a/dataloader.py b/dataloader.py
@@ -6,7 +6,7 @@
 from sklearn.preprocessing import StandardScaler, MinMaxScaler
 from torch.utils.data import Dataset, DataLoader
 import pickle
-
+import numpy as np
 
 
 # Z-Score Normalization Function
@@ -31,6 +31,10 @@ def min_max_normalize(df, columns, save_scaler_path=None):
 
     return df
 
+# 定義 sigmoid 函數
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
 # Denormalization Function for Z-Score
 def z_score_denormalize(df, columns, scaler_path):
     with open(scaler_path, 'rb') as f:
@@ -47,12 +51,29 @@ def min_max_denormalize(df, columns, scaler_path):
     df[columns] = scaler.inverse_transform(df[columns])
     return df
 
+# 加入 One-Hot Encoding 的功能
+def one_hot_encode(df):
+
+
+    # 選擇需要進行 One-Hot Encoding 的列
+    columns_to_encode = ['地區', '使用分區', '主要用途', '主要建材', '建物型態']
+
+    # 使用 pandas 的 get_dummies 函數進行 One-Hot Encoding
+    df_encoded = pd.get_dummies(df, columns=columns_to_encode)
+
+    return df_encoded
+
+# 你可以在加載和預處理數據的時候調用這個函數
+
+
 
 
 # Custom Dataset Class with Normalization Option
 class HousePriceTrainDataset(Dataset):
     def __init__(self, dataframe, target_column, normalize_columns=None):
         self.dataframe = dataframe.copy()  # Creating a copy to avoid modifying the original dataframe
+        # 合併 '縣市' 和 '鄉鎮市區' 列
+        self.dataframe['地區'] = self.dataframe['縣市'] + self.dataframe['鄉鎮市區']
         feature_list=[]
         # Applying the specified normalization methods to the specified columns
         if normalize_columns:
@@ -61,9 +82,14 @@ def __init__(self, dataframe, target_column, normalize_columns=None):
                     self.dataframe = z_score_normalize(self.dataframe, [column],save_scaler_path ="pkl/" + column + "_z_score_normalize_data.pkl")
                 elif method == 'min-max':
                     self.dataframe = min_max_normalize(self.dataframe, [column],save_scaler_path ="pkl/" + column + "_min_max_normalize_data.pkl")
-                feature_list.append(column)
+
+            feature_list.append(column)
+        self.dataframe = one_hot_encode(self.dataframe)
         self.dataframe = min_max_normalize(self.dataframe, [target_column],save_scaler_path="pkl/" + target_column + "_min_max_normalize_data.pkl")
+        # self.dataframe = z_score_normalize(self.dataframe, [target_column],save_scaler_path="pkl/" + target_column + "_z_score_normalize_data.pkl")
 
+        # self.dataframe[target_column].apply(sigmoid)
+        print(self.dataframe[feature_list].head)
         self.features = self.dataframe[feature_list].values
         self.target = self.dataframe[target_column].values
 
@@ -122,7 +148,7 @@ def __getitem__(self, idx):
 
     # 創建標準化後的數據集
     normalized_dataset = HousePriceTrainDataset(data, selected_features, target_column, normalize_columns)
-
+    
     # 訪問標準化後的數據集中的樣本
     sample = normalized_dataset[0]  # 這將顯示標準化後的第一個樣本
     print(sample)
diff --git a/inference.py b/inference.py
@@ -40,11 +40,11 @@ def inference():
     data_loader = DataLoader(dataset, batch_size=len(dataset), shuffle=False)
 
     # Load Model
-    model_path = 'model.pth'  # Update with the path of your trained model file
+    model_path = 'model_cnn.pth'  # Update with the path of your trained model file
     input_dim = len(normalize_columns.keys())
-    # model = HousePriceModel(input_dim)
+    model = HousePriceModel(input_dim)
     # model = TransformerRegressor(input_dim, 4, 6)
-    model = HousePriceModel_CNN(input_dim)
+    # model = HousePriceModel_CNN(input_dim)
     if gpu:
         model = model.cuda()
     else:
@@ -66,6 +66,8 @@ def inference():
     ids = [f"PU-{i}" for i in range(1, len(predictions) + 1)]  # Adjust ID format as needed
     predicted_prices_df = pd.DataFrame({"ID": ids, "predicted_price": predictions})
     predicted_prices_df = min_max_denormalize(predicted_prices_df, ["predicted_price"],scaler_path="pkl/單價_min_max_normalize_data.pkl")
+    # predicted_prices_df = z_score_denormalize(predicted_prices_df, ["predicted_price"],scaler_path="pkl/單價_z_score_normalize_data.pkl")
+
     # Save to CSV
     output_csv_path = 'predicted_prices.csv'  # Update with the desired output path
     predicted_prices_df.to_csv(output_csv_path, index=False)

diff --git a/train.py b/train.py
@@ -11,12 +11,17 @@ def main():
 
     # Hyperparameters
     batch_size = 128
-    learning_rate = 0.001
+    learning_rate = 1e-3
     epochs = 100
 
     data = pd.read_csv('data/training_data.csv')
     # 指定要標準化的列和標準化方法
     normalize_columns = {
+    '地區':'one-hot-encoding',
+    '使用分區':'one-hot-encoding',
+    '主要用途':'one-hot-encoding',
+    '主要建材':'one-hot-encoding',
+    '建物型態':'one-hot-encoding',
     '土地面積': 'min-max',
     '移轉層次': 'min-max',
     '總樓層數': 'min-max',
@@ -45,8 +50,8 @@ def main():
     print(input_dim)
     # Initialize model
     # model = HousePriceModel(input_dim)
-    # model = TransformerRegressor(input_dim, 4, 6)
-    model = HousePriceModel_CNN(input_dim)
+    model = TransformerRegressor(input_dim, 4, 6)
+    # model = HousePriceModel_CNN(input_dim)
 
     if gpu:
         model = model.cuda()
@@ -60,8 +65,8 @@ def main():
     # Training loop
     for epoch in range(epochs):
         for batch in train_loader:
-            # print(batch['features'].shape)
-            # print(batch['target'].shape)
+            print(batch['features'].shape)
+            print(batch['target'].shape)
             if gpu:
                 data = batch['features'].cuda()
                 targets = batch['target'].cuda()
@@ -80,7 +85,7 @@ def main():
         print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
 
     # Save the trained model
-    torch.save(model.state_dict(), 'model.pth')
+    torch.save(model.state_dict(), 'model_cnn.pth')