fix(model): 修复模型错误

Yidadaa · Jun 10, 2019 · d375991 · d375991
1 parent b93857a
commit d375991
Show file tree

Hide file tree

Showing 7 changed files with 35 additions and 11 deletions.
diff --git a/.train.py.swp b/.train.py.swp
diff --git a/README.md b/README.md
@@ -1,2 +1,23 @@
 # Pytorch-Video-Classification
 Make video classification on UCF101 using CNN and RNN based on Pytorch framework.
+
+# Environments
+```bash
+# 1. torch >= 1.0
+conda create -n lstm-cnn
+source activate lstm-cnn # or `conda activate lstm-cnn`
+# GPU version
+conda install pytorch torchvision cudatoolkit=9.0 -c pytorch
+# CPU version
+conda install pytorch-cpu torchvision-cpu -c pytorch
+
+# 2. depencies
+pip install pandas scikit-learn tqdm opencv-python
+
+# 3. prepare datasets
+cp -r path/to/your/UCF ./data
+cd ./data && python make_train_test.py
+
+# 4. train your network
+python train.py
+```
diff --git a/data/make_train_test.py b/data/make_train_test.py
@@ -43,7 +43,7 @@ def split(src_dir=default_src_dir, output_dir=default_src_dir, size=default_test
                 os.mkdir(class_dir)
 
         # 遍历每个视频，将每个视频的图像帧提取出来
-        for i in tqdm(range(len(videos)), desc='[%d/%d]%s' % (class_index, num_classes, classname)):
+        for i in tqdm(range(len(videos)), desc='[%d/%d]%s' % (class_index + 1, num_classes, classname)):
             video_path = os.path.join(src_dir, classname, videos[i])
             video_fd = cv2.VideoCapture(video_path)
 

diff --git a/dataloader.py b/dataloader.py
@@ -2,7 +2,6 @@
 from torch.utils import data
 from torchvision import transforms
 from tqdm import tqdm
-import cv2
 from PIL import Image
 import config
 
@@ -87,4 +86,4 @@ def _label_category(self, label):
         '''
         if label not in self.labels:
             raise RuntimeError('不存在的label！')
-        return self.labels.index(label)
+        return self.labels.index(label)
diff --git a/model.py b/model.py
@@ -9,7 +9,7 @@ def __init__(self):
         使用pytorch提供的预训练模型作为encoder
         '''
         super(CNNEncoder, self).__init__()
-        
+
         # 使用resnet预训练模型来提取特征，去掉最后一层分类器
         pretrained_cnn = models.resnet152(pretrained=True)
         cnn_layers = list(pretrained_cnn.children())[:-1]
@@ -36,7 +36,7 @@ def _build_fc(self, in_features, out_features, with_bn=True):
 
     def forward(self, x_3d):
         '''
-        输入的是T帧图像，shape = (t, h, w, 3)
+        输入的是T帧图像，shape = (batch_size, t, h, w, 3)
         '''
         cnn_embedding_out = []
         for t in range(x_3d.size(1)):
@@ -45,21 +45,20 @@ def forward(self, x_3d):
             # -- 因为我们使用的预训练模型，防止后续的层训练时反向传播而影响前面的层
             with torch.no_grad():
                 x = self.cnn(x_3d[:, t, :, :, :])
-                x = torch.flatten(x)
+                x = torch.flatten(x, start_dim=1)
 
-            print(x.shape)
             # 处理fc层
             x = self.fc(x)
 
             cnn_embedding_out.append(x)
 
         cnn_embedding_out = torch.stack(cnn_embedding_out, dim=0).transpose(0, 1)
-        
+
         return cnn_embedding_out
 
 class RNNDecoder(nn.Module):
     def __init__(self, cnn_out_dim=256, rnn_hidden_layers=3, rnn_hidden_nodes=256,
-            num_classes=10, drop_prob=0.3):
+            num_classes=2, drop_prob=0.3):
         super(RNNDecoder, self).__init__()
 
         self.rnn_input_features = cnn_out_dim
@@ -88,6 +87,7 @@ def __init__(self, cnn_out_dim=256, rnn_hidden_layers=3, rnn_hidden_nodes=256,
     def forward(self, x_rnn):
         self.lstm.flatten_parameters()
         rnn_out, (h_n, h_c) = self.lstm(x_rnn, None)
+        # 注意，前面定义lstm时，batch_first=True保证了以下结构：
         # rnn_out shape: (batch, timestep, output_size)
         # h_n and h_c shape: (n_layers, batch, hidden_size)
 

diff --git a/train.py b/train.py
@@ -18,7 +18,7 @@ def train_on_epocchs(train_loader:torch.utils.data.DataLoader, test_loader:torch
     # 实例化计算图模型
     cnn_encoder = CNNEncoder().to(device)
     rnn_decoder = RNNDecoder().to(device)
-    
+
     # 多GPU训练
     device_count = torch.cuda.device_count()
     if device_count > 1:
@@ -76,6 +76,7 @@ def train(model:[nn.Module], dataloader:torch.utils.data.DataLoader, optimizer:t
         optimizer.zero_grad()
         # 执行前向传播
         y_ = rnn_decoder(cnn_encoder(X))
+        print('一个周期完成', y_.shape, y.shape)
         # 计算loss
         loss = F.cross_entropy(y_, y)
         # 反向传播梯度
@@ -132,4 +133,4 @@ def validation(model:[nn.Module], test_loader:torch.utils.data.DataLoader, optim
     test_data = pandas.read_csv('./data/test.csv')
     train_loader = DataLoader(Dataset(train_data.to_numpy()), **config.train_dataset_params)
     test_loader = DataLoader(Dataset(test_data.to_numpy()), **config.train_dataset_params)
-    train_on_epocchs(train_loader, test_loader)
+    train_on_epocchs(train_loader, test_loader)
diff --git a/train.sh b/train.sh
@@ -0,0 +1,3 @@
+
+
+CUDA_VISIBLE_DEVICES=0 python train.py