Skip to content

Commit

Permalink
Merge pull request #140 from HydrogenSulfate/TSM_ucf101_ft
Browse files Browse the repository at this point in the history
TSM finetune on UCF101 dataset
  • Loading branch information
huangjun12 authored May 17, 2021
2 parents d8b9bd4 + fe89c8c commit 884ee02
Show file tree
Hide file tree
Showing 9 changed files with 290 additions and 67 deletions.
114 changes: 114 additions & 0 deletions configs/recognition/tsm/tsm_k400_frames.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
MODEL: #MODEL field
framework: "Recognizer2D" #Mandatory, indicate the type of network, associate to the 'paddlevideo/modeling/framework/' .
backbone: #Mandatory, indicate the type of backbone, associate to the 'paddlevideo/modeling/backbones/' .
name: "ResNetTSM" #Mandatory, The name of backbone.
pretrained: "data/ResNet50_pretrain.pdparams" #Optional, pretrained model path.
num_seg: 8
depth: 50 #Optional, the depth of backbone architecture.
head:
name: "TSMHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads'
num_classes: 400 #Optional, the number of classes to be classified.
in_channels: 2048 #input channel of the extracted feature.
drop_ratio: 0.5 #the ratio of dropout
std: 0.001 #std value in params initialization


DATASET: #DATASET field
batch_size: 16 #Mandatory, bacth size
num_workers: 4 #Mandatory, XXX the number of subprocess on each GPU.
train:
format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
data_prefix: "" #Mandatory, train data root path
file_path: "data/k400_frames/train.list" #Mandatory, train data index file path
suffix: 'img_{:05}.jpg'
valid:
format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
data_prefix: "" #Mandatory, valid data root path
file_path: "data/k400_frames/val.list" #Mandatory, valid data index file path
suffix: 'img_{:05}.jpg'
test:
format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
data_prefix: "" #Mandatory, valid data root path
file_path: "data/k400_frames/val.list" #Mandatory, valid data index file path
suffix: 'img_{:05}.jpg'


PIPELINE: #PIPELINE field
train: #Mandotary, indicate the pipeline to deal with the training data, associate to the 'paddlevideo/loader/pipelines/'
decode:
name: "FrameDecoder"
sample:
name: "Sampler_TSM"
num_seg: 8
seg_len: 1
valid_mode: False
transform: #Mandotary, image transform operator.
- MultiScaleCrop_TSM:
target_size: 224
- RandomFlip:
- Image2Array:
- Normalization:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]

valid: #Mandatory, indicate the pipeline to deal with the validing data. associate to the 'paddlevideo/loader/pipelines/'
decode:
name: "FrameDecoder"
sample:
name: "Sampler_TSM"
num_seg: 8
seg_len: 1
valid_mode: True
transform:
- Scale_PV:
short_size: 256
- CenterCrop:
target_size: 224
- Image2Array:
- Normalization:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]

test:
decode:
name: "FrameDecoder"
sample:
name: "Sampler_TSM"
num_seg: 8
seg_len: 1
valid_mode: True
transform:
- Scale_PV:
short_size: 256
- CenterCrop:
target_size: 224
- Image2Array:
- Normalization:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]


OPTIMIZER: #OPTIMIZER field
name: 'Momentum' #Mandatory, the type of optimizer, associate to the 'paddlevideo/solver/'
momentum: 0.9
learning_rate: #Mandatory, the type of learning rate scheduler, associate to the 'paddlevideo/solver/'
name: 'PiecewiseDecay'
boundaries: [20, 40]
values: [0.02, 0.002, 0.0002] #8 cards * 16 batch size
weight_decay:
name: 'L2'
value: 0.0001
grad_clip:
name: 'ClipGradByGlobalNorm'
value: 20.0


METRIC:
name: 'CenterCropMetric'


model_name: "TSM"
log_interval: 20 #Optional, the interal of logger, default:10
save_interval: 10
epochs: 50 #Mandatory, total epoch
log_level: "INFO" #Optional, the logger level. default: "INFO"
122 changes: 122 additions & 0 deletions configs/recognition/tsm/tsm_k400_frames_nhwc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
MODEL: #MODEL field
framework: "Recognizer2D" #Mandatory, indicate the type of network, associate to the 'paddlevideo/modeling/framework/' .
backbone: #Mandatory, indicate the type of backbone, associate to the 'paddlevideo/modeling/backbones/' .
name: "ResNetTSM" #Mandatory, The name of backbone.
pretrained: "data/ResNet50_pretrain.pdparams" #Optional, pretrained model path.
num_seg: 8
depth: 50 #Optional, the depth of backbone architecture.
data_format: "NHWC"
head:
name: "TSMHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads'
num_classes: 400 #Optional, the number of classes to be classified.
in_channels: 2048 #input channel of the extracted feature.
drop_ratio: 0.5 #the ratio of dropout
std: 0.001 #std value in params initialization
data_format: "NHWC"


DATASET: #DATASET field
batch_size: 16 #Mandatory, bacth size
num_workers: 4 #Mandatory, XXX the number of subprocess on each GPU.
train:
format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
data_prefix: "" #Mandatory, train data root path
file_path: "data/k400_frames/train.list" #Mandatory, train data index file path
suffix: 'img_{:05}.jpg'
valid:
format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
data_prefix: "" #Mandatory, valid data root path
file_path: "data/k400_frames/val.list" #Mandatory, valid data index file path
suffix: 'img_{:05}.jpg'
test:
format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
data_prefix: "" #Mandatory, valid data root path
file_path: "data/k400_frames/val.list" #Mandatory, valid data index file path
suffix: 'img_{:05}.jpg'


PIPELINE: #PIPELINE field
train: #Mandotary, indicate the pipeline to deal with the training data, associate to the 'paddlevideo/loader/pipelines/'
decode:
name: "FrameDecoder"
sample:
name: "Sampler_TSM"
num_seg: 8
seg_len: 1
valid_mode: False
transform: #Mandotary, image transform operator.
- MultiScaleCrop_TSM:
target_size: 224
- RandomFlip:
- Image2Array:
transpose: False
- Normalization:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
tensor_shape: [1,1,3]

valid: #Mandatory, indicate the pipeline to deal with the validing data. associate to the 'paddlevideo/loader/pipelines/'
decode:
name: "FrameDecoder"
sample:
name: "Sampler_TSM"
num_seg: 8
seg_len: 1
valid_mode: True
transform:
- Scale_PV:
short_size: 256
- CenterCrop:
target_size: 224
- Image2Array:
transpose: False
- Normalization:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
tensor_shape: [1,1,3]

test:
decode:
name: "FrameDecoder"
sample:
name: "Sampler_TSM"
num_seg: 8
seg_len: 1
valid_mode: True
transform:
- Scale_PV:
short_size: 256
- CenterCrop:
target_size: 224
- Image2Array:
transpose: False
- Normalization:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
tensor_shape: [1,1,3]


OPTIMIZER: #OPTIMIZER field
name: 'Momentum' #Mandatory, the type of optimizer, associate to the 'paddlevideo/solver/'
momentum: 0.9
learning_rate: #Mandatory, the type of learning rate scheduler, associate to the 'paddlevideo/solver/'
name: 'PiecewiseDecay'
boundaries: [20, 40]
values: [0.02, 0.002, 0.0002] #8 cards * 16 batch size
weight_decay:
name: 'L2'
value: 0.0001
grad_clip:
name: 'ClipGradByGlobalNorm'
value: 20.0


METRIC:
name: 'CenterCropMetric'


model_name: "TSM"
log_interval: 20 #Optional, the interal of logger, default:10
save_interval: 10
epochs: 50 #Mandatory, total epoch
log_level: "INFO" #Optional, the logger level. default: "INFO"
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,17 @@ MODEL: #MODEL field
framework: "Recognizer2D" #Mandatory, indicate the type of network, associate to the 'paddlevideo/modeling/framework/' .
backbone: #Mandatory, indicate the type of backbone, associate to the 'paddlevideo/modeling/backbones/' .
name: "ResNetTSM" #Mandatory, The name of backbone.
pretrained: "data/ResNet50_pretrain.pdparams" #Optional, pretrained model path.
pretrained: "data/TSM_k400.pdparams" #Optional, pretrained model path.
num_seg: 8
depth: 50 #Optional, the depth of backbone architecture.
bn_wd: False
head:
name: "TSMHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads'
num_classes: 101 #Optional, the number of classes to be classified.
in_channels: 2048 #input channel of the extracted feature.
drop_ratio: 0.5 #the ratio of dropout
# ls_eps: 0.1 # label smoothing epsilon
drop_ratio: 0.8 #the ratio of dropout
std: 0.001 #std value in params initialization


DATASET: #DATASET field
batch_size: 16 #Mandatory, bacth size
num_workers: 4 #Mandatory, XXX the number of subprocess on each GPU.
Expand Down Expand Up @@ -88,29 +87,25 @@ PIPELINE: #PIPELINE field
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]

#MIX:
# name: "Mixup"
# alpha: 0.2

OPTIMIZER: #OPTIMIZER field
name: 'Momentum' #Mandatory, the type of optimizer, associate to the 'paddlevideo/solver/'
momentum: 0.9
learning_rate: #Mandatory, the type of learning rate scheduler, associate to the 'paddlevideo/solver/'
name: 'PiecewiseDecay'
boundaries: [20, 40]
values: [0.02, 0.002, 0.0002] #4 cards * 16 batch size
weight_decay:
name: 'L2'
value: 1e-4
boundaries: [10, 20]
values: [0.001, 0.0001, 0.00001] #4 cards * 16 batch size
grad_clip:
name: 'ClipGradByGlobalNorm'
value: 20.0


METRIC:
name: 'CenterCropMetric'


model_name: "TSM"
log_interval: 20 #Optional, the interal of logger, default:10
save_interval: 10
epochs: 50 #Mandatory, total epoch
epochs: 25 #Mandatory, total epoch
log_level: "INFO" #Optional, the logger level. default: "INFO"
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@ MODEL: #MODEL field
framework: "Recognizer2D" #Mandatory, indicate the type of network, associate to the 'paddlevideo/modeling/framework/' .
backbone: #Mandatory, indicate the type of backbone, associate to the 'paddlevideo/modeling/backbones/' .
name: "ResNetTSM" #Mandatory, The name of backbone.
pretrained: "data/ResNet50_pretrain.pdparams" #Optional, pretrained model path.
pretrained: "data/TSM_k400.pdparams" #Optional, pretrained model path.
num_seg: 8
depth: 50 #Optional, the depth of backbone architecture.
data_format: "NHWC"
head:
name: "TSMHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads'
num_classes: 101 #Optional, the number of classes to be classified.
in_channels: 2048 #input channel of the extracted feature.
drop_ratio: 0.5 #the ratio of dropout
std: 0.01 #std value in params initialization
drop_ratio: 0.8 #the ratio of dropout
std: 0.001 #std value in params initialization
data_format: "NHWC"


Expand Down Expand Up @@ -40,16 +40,12 @@ PIPELINE: #PIPELINE field
decode:
name: "FrameDecoder"
sample:
name: "Sampler"
name: "Sampler_TSM"
num_seg: 8
seg_len: 1
valid_mode: False
transform: #Mandotary, image transform operator.
- Scale:
short_size: 256
- MultiScaleCrop:
target_size: 256
- RandomCrop:
- MultiScaleCrop_TSM:
target_size: 224
- RandomFlip:
- Image2Array:
Expand All @@ -63,13 +59,12 @@ PIPELINE: #PIPELINE field
decode:
name: "FrameDecoder"
sample:
name: "Sampler"
valid_mode: True
name: "Sampler_TSM"
num_seg: 8
seg_len: 1
valid_mode: True
transform:
- Scale:
- Scale_PV:
short_size: 256
- CenterCrop:
target_size: 224
Expand All @@ -84,13 +79,12 @@ PIPELINE: #PIPELINE field
decode:
name: "FrameDecoder"
sample:
name: "Sampler"
valid_mode: True
name: "Sampler_TSM"
num_seg: 8
seg_len: 1
valid_mode: True
transform:
- Scale:
- Scale_PV:
short_size: 256
- CenterCrop:
target_size: 224
Expand All @@ -107,17 +101,19 @@ OPTIMIZER: #OPTIMIZER field
momentum: 0.9
learning_rate: #Mandatory, the type of learning rate scheduler, associate to the 'paddlevideo/solver/'
name: 'PiecewiseDecay'
boundaries: [40, 60]
values: [0.01, 0.001, 0.0001] #4 cards * 16 batch size
weight_decay:
name: 'L2'
value: 1e-4
boundaries: [10, 20]
values: [0.001, 0.0001, 0.00001] #4 cards * 16 batch size
grad_clip:
name: 'ClipGradByGlobalNorm'
value: 20.0


METRIC:
name: 'CenterCropMetric'


model_name: "TSM"
log_interval: 20 #Optional, the interal of logger, default:10
save_interval: 10
epochs: 80 #Mandatory, total epoch
epochs: 25 #Mandatory, total epoch
log_level: "INFO" #Optional, the logger level. default: "INFO"
Loading

0 comments on commit 884ee02

Please sign in to comment.