Skip to content

Commit

Permalink
fix(model): 修复模型错误
Browse files Browse the repository at this point in the history
  • Loading branch information
Yidadaa committed Jun 10, 2019
1 parent b93857a commit d375991
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 11 deletions.
Binary file added .train.py.swp
Binary file not shown.
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,23 @@
# Pytorch-Video-Classification
Make video classification on UCF101 using CNN and RNN based on Pytorch framework.

# Environments
```bash
# 1. torch >= 1.0
conda create -n lstm-cnn
source activate lstm-cnn # or `conda activate lstm-cnn`
# GPU version
conda install pytorch torchvision cudatoolkit=9.0 -c pytorch
# CPU version
conda install pytorch-cpu torchvision-cpu -c pytorch

# 2. depencies
pip install pandas scikit-learn tqdm opencv-python

# 3. prepare datasets
cp -r path/to/your/UCF ./data
cd ./data && python make_train_test.py

# 4. train your network
python train.py
```
2 changes: 1 addition & 1 deletion data/make_train_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def split(src_dir=default_src_dir, output_dir=default_src_dir, size=default_test
os.mkdir(class_dir)

# 遍历每个视频,将每个视频的图像帧提取出来
for i in tqdm(range(len(videos)), desc='[%d/%d]%s' % (class_index, num_classes, classname)):
for i in tqdm(range(len(videos)), desc='[%d/%d]%s' % (class_index + 1, num_classes, classname)):
video_path = os.path.join(src_dir, classname, videos[i])
video_fd = cv2.VideoCapture(video_path)

Expand Down
3 changes: 1 addition & 2 deletions dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from torch.utils import data
from torchvision import transforms
from tqdm import tqdm
import cv2
from PIL import Image
import config

Expand Down Expand Up @@ -87,4 +86,4 @@ def _label_category(self, label):
'''
if label not in self.labels:
raise RuntimeError('不存在的label!')
return self.labels.index(label)
return self.labels.index(label)
12 changes: 6 additions & 6 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(self):
使用pytorch提供的预训练模型作为encoder
'''
super(CNNEncoder, self).__init__()

# 使用resnet预训练模型来提取特征,去掉最后一层分类器
pretrained_cnn = models.resnet152(pretrained=True)
cnn_layers = list(pretrained_cnn.children())[:-1]
Expand All @@ -36,7 +36,7 @@ def _build_fc(self, in_features, out_features, with_bn=True):

def forward(self, x_3d):
'''
输入的是T帧图像,shape = (t, h, w, 3)
输入的是T帧图像,shape = (batch_size, t, h, w, 3)
'''
cnn_embedding_out = []
for t in range(x_3d.size(1)):
Expand All @@ -45,21 +45,20 @@ def forward(self, x_3d):
# -- 因为我们使用的预训练模型,防止后续的层训练时反向传播而影响前面的层
with torch.no_grad():
x = self.cnn(x_3d[:, t, :, :, :])
x = torch.flatten(x)
x = torch.flatten(x, start_dim=1)

print(x.shape)
# 处理fc层
x = self.fc(x)

cnn_embedding_out.append(x)

cnn_embedding_out = torch.stack(cnn_embedding_out, dim=0).transpose(0, 1)

return cnn_embedding_out

class RNNDecoder(nn.Module):
def __init__(self, cnn_out_dim=256, rnn_hidden_layers=3, rnn_hidden_nodes=256,
num_classes=10, drop_prob=0.3):
num_classes=2, drop_prob=0.3):
super(RNNDecoder, self).__init__()

self.rnn_input_features = cnn_out_dim
Expand Down Expand Up @@ -88,6 +87,7 @@ def __init__(self, cnn_out_dim=256, rnn_hidden_layers=3, rnn_hidden_nodes=256,
def forward(self, x_rnn):
self.lstm.flatten_parameters()
rnn_out, (h_n, h_c) = self.lstm(x_rnn, None)
# 注意,前面定义lstm时,batch_first=True保证了以下结构:
# rnn_out shape: (batch, timestep, output_size)
# h_n and h_c shape: (n_layers, batch, hidden_size)

Expand Down
5 changes: 3 additions & 2 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def train_on_epocchs(train_loader:torch.utils.data.DataLoader, test_loader:torch
# 实例化计算图模型
cnn_encoder = CNNEncoder().to(device)
rnn_decoder = RNNDecoder().to(device)

# 多GPU训练
device_count = torch.cuda.device_count()
if device_count > 1:
Expand Down Expand Up @@ -76,6 +76,7 @@ def train(model:[nn.Module], dataloader:torch.utils.data.DataLoader, optimizer:t
optimizer.zero_grad()
# 执行前向传播
y_ = rnn_decoder(cnn_encoder(X))
print('一个周期完成', y_.shape, y.shape)
# 计算loss
loss = F.cross_entropy(y_, y)
# 反向传播梯度
Expand Down Expand Up @@ -132,4 +133,4 @@ def validation(model:[nn.Module], test_loader:torch.utils.data.DataLoader, optim
test_data = pandas.read_csv('./data/test.csv')
train_loader = DataLoader(Dataset(train_data.to_numpy()), **config.train_dataset_params)
test_loader = DataLoader(Dataset(test_data.to_numpy()), **config.train_dataset_params)
train_on_epocchs(train_loader, test_loader)
train_on_epocchs(train_loader, test_loader)
3 changes: 3 additions & 0 deletions train.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@


CUDA_VISIBLE_DEVICES=0 python train.py

0 comments on commit d375991

Please sign in to comment.