Skip to content

Commit 0b10ff3

Browse files
committed
minor edit
1 parent c0511cd commit 0b10ff3

File tree

3 files changed

+44
-14
lines changed

3 files changed

+44
-14
lines changed

preprocessing.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1+
from os import remove
12
import pandas as pd
23
import os
34
from typing import List, Dict
45
from PIL import Image
56
import seaborn as sns
67
import matplotlib.pyplot as plt
8+
import numpy as np
9+
from torch import float32
710

811
data_path = "./data/"
912

@@ -30,15 +33,42 @@ def resize_images(images: List[str]):
3033
new_image.save(f"{data_path}processed_images/{i}")
3134

3235

36+
def remove_outliers(df: pd.DataFrame, col: str):
37+
q_low = df[col].quantile(0.02)
38+
q_hi = df[col].quantile(0.98)
39+
df_filtered = df[(df[col] < q_hi) & (df[col] > q_low)]
40+
41+
return df_filtered
42+
43+
3344
images = os.listdir(f"{data_path}images")
3445
df = pd.read_pickle(f"{data_path}ny_dataframe.pkl")
3546

3647

3748
df = select_rows_with_images(images, df)
3849
df = df.iloc[0:800]
50+
51+
df["unformattedPrice"] = df["unformattedPrice"].astype(float)
52+
df["latLong_latitude"] = df["latLong_latitude"].astype(float)
53+
df["latLong_longitude"] = df["latLong_longitude"].astype(float)
54+
df["beds"] = df["beds"].astype(float)
55+
df["baths"] = df["baths"].astype(float)
56+
df["area"] = df["area"].astype(float)
3957
print(df.describe())
40-
ax = sns.boxplot(x=df["baths"])
58+
ax = sns.boxplot(x=df["unformattedPrice"])
4159
plt.show()
60+
print(df.dtypes)
4261

62+
# for col in df.columns[1:]:
63+
# df = remove_outliers(df, col)
64+
65+
df = remove_outliers(df, "unformattedPrice")
66+
df = remove_outliers(df, "beds")
67+
df = remove_outliers(df, "baths")
68+
df = remove_outliers(df, "area")
69+
print(df.describe())
70+
ax = sns.boxplot(x=df["unformattedPrice"])
71+
plt.show()
4372
df.to_pickle(f"{data_path}df.pkl")
73+
4474
# resize_images(images)

pytorch_lightning_regression.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def conv_block(input_size, output_size):
6161

6262

6363
class LitClassifier(pl.LightningModule):
64-
def __init__(self, lr):
64+
def __init__(self, lr=1e-3):
6565
super().__init__()
6666
self.lr = lr
6767
self.conv1 = conv_block(3, 16)
@@ -93,7 +93,7 @@ def forward(self, x):
9393
return self.ln3(x)
9494

9595
def train_dataloader(self):
96-
return DataLoader(image_data, batch_size=64)
96+
return DataLoader(image_data, batch_size=32)
9797

9898
def training_step(self, batch, batch_nb):
9999
x, y = batch
@@ -102,32 +102,32 @@ def training_step(self, batch, batch_nb):
102102
# print(self(x))
103103
# print(y)
104104
# print(torch.flatten(self(x)))
105-
loss = torch.nn.functional.l1_loss(torch.flatten(self(x)), y)
105+
criterion = torch.nn.L1Loss()
106+
y_pred = torch.flatten(self(x))
107+
y_pred = y_pred.double()
108+
# loss = torch.sqrt(criterion(y_pred, y))
109+
loss = criterion(y_pred, y)
106110

107111
tensorboard_logs = {"train_loss": loss}
108112
return {"loss": loss, "log": tensorboard_logs}
109113

110114
def configure_optimizers(self):
111-
return torch.optim.Adam(self.parameters(), lr=(self.lr), weight_decay=1e-3 / 200)
115+
return torch.optim.Adam(self.parameters(), lr=(self.lr))
112116

113117

114118
if __name__ == "__main__":
115119
image_data = ImageDataset(pickle_file=f"{data_path}df.pkl", image_dir=f"{data_path}processed_images/")
116-
params = {"batch_size": 64}
117-
# train_loader = DataLoader(image_data, **params)
118120

119-
model = LitClassifier(0.3)
120-
mlflow_logger = pl_loggers.MLFlowLogger("logs/")
121-
trainer = pl.Trainer(gpus=1, logger=mlflow_logger)
121+
model = LitClassifier()
122+
# mlflow_logger = pl_loggers.MLFlowLogger("logs/")
123+
trainer = pl.Trainer(gpus=1)
122124

123125
lr_finder = trainer.lr_find(model)
124126
print(lr_finder.results)
125127
fig = lr_finder.plot(suggest=True, show=True)
126128

127-
# fig.show(block=True)
128-
129129
new_lr = lr_finder.suggestion()
130130
print(new_lr)
131-
model.hparams.lr = new_lr
131+
model.hparams.lr = new_lr # 1e-2
132132

133133
trainer.fit(model)

pytorch_regression_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def train(model, device, train_loader):
121121
use_cuda = torch.cuda.is_available()
122122
device = torch.device("cuda:0" if use_cuda else "cpu")
123123
image_data = ImageDataset(pickle_file=f"{data_path}df.pkl", image_dir=f"{data_path}processed_images/")
124-
params = {"batch_size": 4, "shuffle": True, "num_workers": 4}
124+
params = {"batch_size": 64, "shuffle": True, "num_workers": 4}
125125
max_epochs = 200
126126

127127
train_loader = DataLoader(image_data, **params)

0 commit comments

Comments
 (0)