-
Notifications
You must be signed in to change notification settings - Fork 382
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #140 from HydrogenSulfate/TSM_ucf101_ft
TSM finetune on UCF101 dataset
- Loading branch information
Showing
9 changed files
with
290 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
MODEL: #MODEL field | ||
framework: "Recognizer2D" #Mandatory, indicate the type of network, associate to the 'paddlevideo/modeling/framework/' . | ||
backbone: #Mandatory, indicate the type of backbone, associate to the 'paddlevideo/modeling/backbones/' . | ||
name: "ResNetTSM" #Mandatory, The name of backbone. | ||
pretrained: "data/ResNet50_pretrain.pdparams" #Optional, pretrained model path. | ||
num_seg: 8 | ||
depth: 50 #Optional, the depth of backbone architecture. | ||
head: | ||
name: "TSMHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads' | ||
num_classes: 400 #Optional, the number of classes to be classified. | ||
in_channels: 2048 #input channel of the extracted feature. | ||
drop_ratio: 0.5 #the ratio of dropout | ||
std: 0.001 #std value in params initialization | ||
|
||
|
||
DATASET: #DATASET field | ||
batch_size: 16 #Mandatory, bacth size | ||
num_workers: 4 #Mandatory, XXX the number of subprocess on each GPU. | ||
train: | ||
format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset' | ||
data_prefix: "" #Mandatory, train data root path | ||
file_path: "data/k400_frames/train.list" #Mandatory, train data index file path | ||
suffix: 'img_{:05}.jpg' | ||
valid: | ||
format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset' | ||
data_prefix: "" #Mandatory, valid data root path | ||
file_path: "data/k400_frames/val.list" #Mandatory, valid data index file path | ||
suffix: 'img_{:05}.jpg' | ||
test: | ||
format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset' | ||
data_prefix: "" #Mandatory, valid data root path | ||
file_path: "data/k400_frames/val.list" #Mandatory, valid data index file path | ||
suffix: 'img_{:05}.jpg' | ||
|
||
|
||
PIPELINE: #PIPELINE field | ||
train: #Mandotary, indicate the pipeline to deal with the training data, associate to the 'paddlevideo/loader/pipelines/' | ||
decode: | ||
name: "FrameDecoder" | ||
sample: | ||
name: "Sampler_TSM" | ||
num_seg: 8 | ||
seg_len: 1 | ||
valid_mode: False | ||
transform: #Mandotary, image transform operator. | ||
- MultiScaleCrop_TSM: | ||
target_size: 224 | ||
- RandomFlip: | ||
- Image2Array: | ||
- Normalization: | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
|
||
valid: #Mandatory, indicate the pipeline to deal with the validing data. associate to the 'paddlevideo/loader/pipelines/' | ||
decode: | ||
name: "FrameDecoder" | ||
sample: | ||
name: "Sampler_TSM" | ||
num_seg: 8 | ||
seg_len: 1 | ||
valid_mode: True | ||
transform: | ||
- Scale_PV: | ||
short_size: 256 | ||
- CenterCrop: | ||
target_size: 224 | ||
- Image2Array: | ||
- Normalization: | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
|
||
test: | ||
decode: | ||
name: "FrameDecoder" | ||
sample: | ||
name: "Sampler_TSM" | ||
num_seg: 8 | ||
seg_len: 1 | ||
valid_mode: True | ||
transform: | ||
- Scale_PV: | ||
short_size: 256 | ||
- CenterCrop: | ||
target_size: 224 | ||
- Image2Array: | ||
- Normalization: | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
|
||
|
||
OPTIMIZER: #OPTIMIZER field | ||
name: 'Momentum' #Mandatory, the type of optimizer, associate to the 'paddlevideo/solver/' | ||
momentum: 0.9 | ||
learning_rate: #Mandatory, the type of learning rate scheduler, associate to the 'paddlevideo/solver/' | ||
name: 'PiecewiseDecay' | ||
boundaries: [20, 40] | ||
values: [0.02, 0.002, 0.0002] #8 cards * 16 batch size | ||
weight_decay: | ||
name: 'L2' | ||
value: 0.0001 | ||
grad_clip: | ||
name: 'ClipGradByGlobalNorm' | ||
value: 20.0 | ||
|
||
|
||
METRIC: | ||
name: 'CenterCropMetric' | ||
|
||
|
||
model_name: "TSM" | ||
log_interval: 20 #Optional, the interal of logger, default:10 | ||
save_interval: 10 | ||
epochs: 50 #Mandatory, total epoch | ||
log_level: "INFO" #Optional, the logger level. default: "INFO" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
MODEL: #MODEL field | ||
framework: "Recognizer2D" #Mandatory, indicate the type of network, associate to the 'paddlevideo/modeling/framework/' . | ||
backbone: #Mandatory, indicate the type of backbone, associate to the 'paddlevideo/modeling/backbones/' . | ||
name: "ResNetTSM" #Mandatory, The name of backbone. | ||
pretrained: "data/ResNet50_pretrain.pdparams" #Optional, pretrained model path. | ||
num_seg: 8 | ||
depth: 50 #Optional, the depth of backbone architecture. | ||
data_format: "NHWC" | ||
head: | ||
name: "TSMHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads' | ||
num_classes: 400 #Optional, the number of classes to be classified. | ||
in_channels: 2048 #input channel of the extracted feature. | ||
drop_ratio: 0.5 #the ratio of dropout | ||
std: 0.001 #std value in params initialization | ||
data_format: "NHWC" | ||
|
||
|
||
DATASET: #DATASET field | ||
batch_size: 16 #Mandatory, bacth size | ||
num_workers: 4 #Mandatory, XXX the number of subprocess on each GPU. | ||
train: | ||
format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset' | ||
data_prefix: "" #Mandatory, train data root path | ||
file_path: "data/k400_frames/train.list" #Mandatory, train data index file path | ||
suffix: 'img_{:05}.jpg' | ||
valid: | ||
format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset' | ||
data_prefix: "" #Mandatory, valid data root path | ||
file_path: "data/k400_frames/val.list" #Mandatory, valid data index file path | ||
suffix: 'img_{:05}.jpg' | ||
test: | ||
format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset' | ||
data_prefix: "" #Mandatory, valid data root path | ||
file_path: "data/k400_frames/val.list" #Mandatory, valid data index file path | ||
suffix: 'img_{:05}.jpg' | ||
|
||
|
||
PIPELINE: #PIPELINE field | ||
train: #Mandotary, indicate the pipeline to deal with the training data, associate to the 'paddlevideo/loader/pipelines/' | ||
decode: | ||
name: "FrameDecoder" | ||
sample: | ||
name: "Sampler_TSM" | ||
num_seg: 8 | ||
seg_len: 1 | ||
valid_mode: False | ||
transform: #Mandotary, image transform operator. | ||
- MultiScaleCrop_TSM: | ||
target_size: 224 | ||
- RandomFlip: | ||
- Image2Array: | ||
transpose: False | ||
- Normalization: | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
tensor_shape: [1,1,3] | ||
|
||
valid: #Mandatory, indicate the pipeline to deal with the validing data. associate to the 'paddlevideo/loader/pipelines/' | ||
decode: | ||
name: "FrameDecoder" | ||
sample: | ||
name: "Sampler_TSM" | ||
num_seg: 8 | ||
seg_len: 1 | ||
valid_mode: True | ||
transform: | ||
- Scale_PV: | ||
short_size: 256 | ||
- CenterCrop: | ||
target_size: 224 | ||
- Image2Array: | ||
transpose: False | ||
- Normalization: | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
tensor_shape: [1,1,3] | ||
|
||
test: | ||
decode: | ||
name: "FrameDecoder" | ||
sample: | ||
name: "Sampler_TSM" | ||
num_seg: 8 | ||
seg_len: 1 | ||
valid_mode: True | ||
transform: | ||
- Scale_PV: | ||
short_size: 256 | ||
- CenterCrop: | ||
target_size: 224 | ||
- Image2Array: | ||
transpose: False | ||
- Normalization: | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
tensor_shape: [1,1,3] | ||
|
||
|
||
OPTIMIZER: #OPTIMIZER field | ||
name: 'Momentum' #Mandatory, the type of optimizer, associate to the 'paddlevideo/solver/' | ||
momentum: 0.9 | ||
learning_rate: #Mandatory, the type of learning rate scheduler, associate to the 'paddlevideo/solver/' | ||
name: 'PiecewiseDecay' | ||
boundaries: [20, 40] | ||
values: [0.02, 0.002, 0.0002] #8 cards * 16 batch size | ||
weight_decay: | ||
name: 'L2' | ||
value: 0.0001 | ||
grad_clip: | ||
name: 'ClipGradByGlobalNorm' | ||
value: 20.0 | ||
|
||
|
||
METRIC: | ||
name: 'CenterCropMetric' | ||
|
||
|
||
model_name: "TSM" | ||
log_interval: 20 #Optional, the interal of logger, default:10 | ||
save_interval: 10 | ||
epochs: 50 #Mandatory, total epoch | ||
log_level: "INFO" #Optional, the logger level. default: "INFO" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.