1
+ import pandas as pd
2
+ import numpy as np
3
+ from PIL import Image
4
+
1
5
import torch
2
6
import torch .nn as nn
3
- import torch .nn .functional as F
4
7
from torch .utils .data import Dataset , DataLoader , random_split
5
8
from torchvision import transforms
6
- from PIL import Image
7
- import os
8
- import pandas as pd
9
- import numpy as np
10
- from tqdm import tqdm
11
- from datetime import datetime
12
9
13
10
import pytorch_lightning as pl
14
- from pytorch_lightning import loggers as pl_loggers
15
- import matplotlib .pyplot as plt
11
+ from pytorch_lightning .logging import TensorBoardLogger
12
+ from pytorch_lightning .callbacks .early_stopping import EarlyStopping
13
+
16
14
17
15
data_path = "./data/"
18
16
@@ -23,11 +21,8 @@ class ImageDataset(Dataset):
23
21
def __init__ (self , pickle_file , image_dir ):
24
22
self .image_dir = image_dir
25
23
self .pickle_file = pickle_file
26
-
27
24
self .tabular = pd .read_pickle (pickle_file )
28
25
29
- print (self .tabular )
30
-
31
26
def __len__ (self ):
32
27
return len (self .tabular )
33
28
@@ -37,19 +32,19 @@ def __getitem__(self, idx):
37
32
38
33
tabular = self .tabular .iloc [idx , 0 :]
39
34
40
- y = tabular ["unformattedPrice " ]
35
+ y = tabular ["price " ]
41
36
42
37
image = Image .open (f"{ self .image_dir } /{ tabular ['zpid' ]} .png" )
43
38
image = np .array (image )
44
39
image = image [..., :3 ]
45
40
46
41
image = transforms .functional .to_tensor (image )
47
42
48
- tabular = tabular [["latLong_latitude " , "latLong_longitude " , "beds" , "baths" , "area" ]]
43
+ tabular = tabular [["latitude " , "longitude " , "beds" , "baths" , "area" ]]
49
44
tabular = tabular .tolist ()
50
45
tabular = torch .FloatTensor (tabular )
51
46
52
- return image , y
47
+ return image , tabular , y
53
48
54
49
55
50
def conv_block (input_size , output_size ):
@@ -61,73 +56,133 @@ def conv_block(input_size, output_size):
61
56
62
57
63
58
class LitClassifier (pl .LightningModule ):
64
- def __init__ (self , lr = 1e-3 ):
59
+ def __init__ (
60
+ self , lr : float = 1e-3 , num_workers : int = 4 , batch_size : int = 32 ,
61
+ ):
65
62
super ().__init__ ()
66
63
self .lr = lr
64
+ self .num_workers = num_workers
65
+ self .batch_size = batch_size
66
+
67
67
self .conv1 = conv_block (3 , 16 )
68
68
self .conv2 = conv_block (16 , 32 )
69
69
self .conv3 = conv_block (32 , 64 )
70
- # conv2d -> -2 pixels
71
- # max pool -> pixels/2
72
- # remainder will be dropped
70
+
73
71
self .ln1 = nn .Linear (64 * 26 * 26 , 16 )
74
72
self .relu = nn .ReLU ()
75
73
self .batchnorm = nn .BatchNorm1d (16 )
76
74
self .dropout = nn .Dropout2d (0.5 )
77
- self .ln2 = nn .Linear (16 , 4 )
78
- self .ln3 = nn .Linear (4 , 1 )
79
-
80
- def forward (self , x ):
81
- x = self .conv1 (x )
82
- x = self .conv2 (x )
83
- x = self .conv3 (x )
84
- x = x .reshape (x .shape [0 ], - 1 )
85
- x = self .ln1 (x )
86
- x = self .relu (x )
87
- x = self .batchnorm (x )
88
- x = self .dropout (x )
89
- x = self .ln2 (x )
75
+ self .ln2 = nn .Linear (16 , 5 )
76
+
77
+ self .ln4 = nn .Linear (5 , 10 )
78
+ self .ln5 = nn .Linear (10 , 10 )
79
+ self .ln6 = nn .Linear (10 , 5 )
80
+ self .ln7 = nn .Linear (10 , 1 )
81
+
82
+ def forward (self , img , tab ):
83
+ img = self .conv1 (img )
84
+
85
+ img = self .conv2 (img )
86
+ img = self .conv3 (img )
87
+ img = img .reshape (img .shape [0 ], - 1 )
88
+ img = self .ln1 (img )
89
+ img = self .relu (img )
90
+ img = self .batchnorm (img )
91
+ img = self .dropout (img )
92
+ img = self .ln2 (img )
93
+ img = self .relu (img )
94
+
95
+ tab = self .ln4 (tab )
96
+ tab = self .relu (tab )
97
+ tab = self .ln5 (tab )
98
+ tab = self .relu (tab )
99
+ tab = self .ln6 (tab )
100
+ tab = self .relu (tab )
101
+
102
+ x = torch .cat ((img , tab ), dim = 1 )
90
103
x = self .relu (x )
91
- # x = self.ln3(x)
92
- # print(x)
93
- return self .ln3 (x )
94
104
95
- def train_dataloader (self ):
96
- return DataLoader (image_data , batch_size = 32 )
97
-
98
- def training_step (self , batch , batch_nb ):
99
- x , y = batch
100
- # print(x)
101
- # print(y)
102
- # print(self(x))
103
- # print(y)
104
- # print(torch.flatten(self(x)))
105
+ return self .ln7 (x )
106
+
107
+ def training_step (self , batch , batch_idx ):
108
+ image , tabular , y = batch
109
+
105
110
criterion = torch .nn .L1Loss ()
106
- y_pred = torch .flatten (self (x ))
111
+ y_pred = torch .flatten (self (image , tabular ))
107
112
y_pred = y_pred .double ()
108
- # loss = torch.sqrt(criterion(y_pred, y))
113
+
109
114
loss = criterion (y_pred , y )
110
115
111
116
tensorboard_logs = {"train_loss" : loss }
112
117
return {"loss" : loss , "log" : tensorboard_logs }
113
118
119
+ def validation_step (self , batch , batch_idx ):
120
+ image , tabular , y = batch
121
+
122
+ criterion = torch .nn .L1Loss ()
123
+ y_pred = torch .flatten (self (image , tabular ))
124
+ y_pred = y_pred .double ()
125
+
126
+ val_loss = criterion (y_pred , y )
127
+
128
+ return {"val_loss" : val_loss }
129
+
130
+ def validation_epoch_end (self , outputs ):
131
+ avg_loss = torch .stack ([x ["val_loss" ] for x in outputs ]).mean ()
132
+ tensorboard_logs = {"val_loss" : avg_loss }
133
+ return {"val_loss" : avg_loss , "log" : tensorboard_logs }
134
+
135
+ def test_step (self , batch , batch_idx ):
136
+ image , tabular , y = batch
137
+
138
+ criterion = torch .nn .L1Loss ()
139
+ y_pred = torch .flatten (self (image , tabular ))
140
+ y_pred = y_pred .double ()
141
+
142
+ test_loss = criterion (y_pred , y )
143
+
144
+ return {"test_loss" : test_loss }
145
+
146
+ def test_epoch_end (self , outputs ):
147
+ avg_loss = torch .stack ([x ["test_loss" ] for x in outputs ]).mean ()
148
+ logs = {"test_loss" : avg_loss }
149
+ return {"test_loss" : avg_loss , "log" : logs , "progress_bar" : logs }
150
+
151
+ def setup (self , stage ):
152
+
153
+ image_data = ImageDataset (pickle_file = f"{ data_path } df.pkl" , image_dir = f"{ data_path } processed_images/" )
154
+
155
+ train_size = int (0.80 * len (image_data ))
156
+ val_size = int ((len (image_data ) - train_size ) / 2 )
157
+ test_size = int ((len (image_data ) - train_size ) / 2 )
158
+
159
+ self .train_set , self .val_set , self .test_set = random_split (image_data , (train_size , val_size , test_size ))
160
+
114
161
def configure_optimizers (self ):
115
162
return torch .optim .Adam (self .parameters (), lr = (self .lr ))
116
163
164
+ def train_dataloader (self ):
165
+ return DataLoader (self .train_set , batch_size = self .batch_size )
166
+
167
+ def val_dataloader (self ):
168
+ return DataLoader (self .val_set , batch_size = self .batch_size )
169
+
170
+ def test_dataloader (self ):
171
+ return DataLoader (self .test_set , batch_size = self .batch_size )
172
+
117
173
118
174
if __name__ == "__main__" :
119
- image_data = ImageDataset (pickle_file = f"{ data_path } df.pkl" , image_dir = f"{ data_path } processed_images/" )
175
+ logger = TensorBoardLogger ("lightning_logs" , name = "multi_input" )
176
+ early_stop_callback = EarlyStopping (monitor = "val_loss" , min_delta = 5000 , patience = 7 , verbose = False , mode = "min" )
120
177
121
178
model = LitClassifier ()
122
- # mlflow_logger = pl_loggers.MLFlowLogger("logs/")
123
- trainer = pl .Trainer (gpus = 1 )
179
+ trainer = pl .Trainer (gpus = 1 , logger = logger , early_stop_callback = early_stop_callback )
124
180
125
181
lr_finder = trainer .lr_find (model )
126
- print (lr_finder .results )
127
182
fig = lr_finder .plot (suggest = True , show = True )
128
-
129
183
new_lr = lr_finder .suggestion ()
130
184
print (new_lr )
131
- model .hparams .lr = new_lr # 1e-2
185
+ model .hparams .lr = new_lr
132
186
133
187
trainer .fit (model )
188
+ trainer .test (model )
0 commit comments