Skip to content

Commit

Permalink
refactor: change setting and testing
Browse files Browse the repository at this point in the history
  • Loading branch information
james397520 committed Oct 17, 2023
1 parent 10864d7 commit be44d25
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 12 deletions.
32 changes: 29 additions & 3 deletions dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.utils.data import Dataset, DataLoader
import pickle

import numpy as np


# Z-Score Normalization Function
Expand All @@ -31,6 +31,10 @@ def min_max_normalize(df, columns, save_scaler_path=None):

return df

# 定義 sigmoid 函數
def sigmoid(x):
return 1 / (1 + np.exp(-x))

# Denormalization Function for Z-Score
def z_score_denormalize(df, columns, scaler_path):
with open(scaler_path, 'rb') as f:
Expand All @@ -47,12 +51,29 @@ def min_max_denormalize(df, columns, scaler_path):
df[columns] = scaler.inverse_transform(df[columns])
return df

# 加入 One-Hot Encoding 的功能
def one_hot_encode(df):


# 選擇需要進行 One-Hot Encoding 的列
columns_to_encode = ['地區', '使用分區', '主要用途', '主要建材', '建物型態']

# 使用 pandas 的 get_dummies 函數進行 One-Hot Encoding
df_encoded = pd.get_dummies(df, columns=columns_to_encode)

return df_encoded

# 你可以在加載和預處理數據的時候調用這個函數




# Custom Dataset Class with Normalization Option
class HousePriceTrainDataset(Dataset):
def __init__(self, dataframe, target_column, normalize_columns=None):
self.dataframe = dataframe.copy() # Creating a copy to avoid modifying the original dataframe
# 合併 '縣市' 和 '鄉鎮市區' 列
self.dataframe['地區'] = self.dataframe['縣市'] + self.dataframe['鄉鎮市區']
feature_list=[]
# Applying the specified normalization methods to the specified columns
if normalize_columns:
Expand All @@ -61,9 +82,14 @@ def __init__(self, dataframe, target_column, normalize_columns=None):
self.dataframe = z_score_normalize(self.dataframe, [column],save_scaler_path ="pkl/" + column + "_z_score_normalize_data.pkl")
elif method == 'min-max':
self.dataframe = min_max_normalize(self.dataframe, [column],save_scaler_path ="pkl/" + column + "_min_max_normalize_data.pkl")
feature_list.append(column)

feature_list.append(column)
self.dataframe = one_hot_encode(self.dataframe)
self.dataframe = min_max_normalize(self.dataframe, [target_column],save_scaler_path="pkl/" + target_column + "_min_max_normalize_data.pkl")
# self.dataframe = z_score_normalize(self.dataframe, [target_column],save_scaler_path="pkl/" + target_column + "_z_score_normalize_data.pkl")

# self.dataframe[target_column].apply(sigmoid)
print(self.dataframe[feature_list].head)
self.features = self.dataframe[feature_list].values
self.target = self.dataframe[target_column].values

Expand Down Expand Up @@ -122,7 +148,7 @@ def __getitem__(self, idx):

# 創建標準化後的數據集
normalized_dataset = HousePriceTrainDataset(data, selected_features, target_column, normalize_columns)

# 訪問標準化後的數據集中的樣本
sample = normalized_dataset[0] # 這將顯示標準化後的第一個樣本
print(sample)
8 changes: 5 additions & 3 deletions inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ def inference():
data_loader = DataLoader(dataset, batch_size=len(dataset), shuffle=False)

# Load Model
model_path = 'model.pth' # Update with the path of your trained model file
model_path = 'model_cnn.pth' # Update with the path of your trained model file
input_dim = len(normalize_columns.keys())
# model = HousePriceModel(input_dim)
model = HousePriceModel(input_dim)
# model = TransformerRegressor(input_dim, 4, 6)
model = HousePriceModel_CNN(input_dim)
# model = HousePriceModel_CNN(input_dim)
if gpu:
model = model.cuda()
else:
Expand All @@ -66,6 +66,8 @@ def inference():
ids = [f"PU-{i}" for i in range(1, len(predictions) + 1)] # Adjust ID format as needed
predicted_prices_df = pd.DataFrame({"ID": ids, "predicted_price": predictions})
predicted_prices_df = min_max_denormalize(predicted_prices_df, ["predicted_price"],scaler_path="pkl/單價_min_max_normalize_data.pkl")
# predicted_prices_df = z_score_denormalize(predicted_prices_df, ["predicted_price"],scaler_path="pkl/單價_z_score_normalize_data.pkl")

# Save to CSV
output_csv_path = 'predicted_prices.csv' # Update with the desired output path
predicted_prices_df.to_csv(output_csv_path, index=False)
Expand Down
17 changes: 11 additions & 6 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,17 @@ def main():

# Hyperparameters
batch_size = 128
learning_rate = 0.001
learning_rate = 1e-3
epochs = 100

data = pd.read_csv('data/training_data.csv')
# 指定要標準化的列和標準化方法
normalize_columns = {
'地區':'one-hot-encoding',
'使用分區':'one-hot-encoding',
'主要用途':'one-hot-encoding',
'主要建材':'one-hot-encoding',
'建物型態':'one-hot-encoding',
'土地面積': 'min-max',
'移轉層次': 'min-max',
'總樓層數': 'min-max',
Expand Down Expand Up @@ -45,8 +50,8 @@ def main():
print(input_dim)
# Initialize model
# model = HousePriceModel(input_dim)
# model = TransformerRegressor(input_dim, 4, 6)
model = HousePriceModel_CNN(input_dim)
model = TransformerRegressor(input_dim, 4, 6)
# model = HousePriceModel_CNN(input_dim)

if gpu:
model = model.cuda()
Expand All @@ -60,8 +65,8 @@ def main():
# Training loop
for epoch in range(epochs):
for batch in train_loader:
# print(batch['features'].shape)
# print(batch['target'].shape)
print(batch['features'].shape)
print(batch['target'].shape)
if gpu:
data = batch['features'].cuda()
targets = batch['target'].cuda()
Expand All @@ -80,7 +85,7 @@ def main():
print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Save the trained model
torch.save(model.state_dict(), 'model.pth')
torch.save(model.state_dict(), 'model_cnn.pth')



Expand Down

0 comments on commit be44d25

Please sign in to comment.