-
Notifications
You must be signed in to change notification settings - Fork 382
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
dbedc5f
commit 220cd07
Showing
15 changed files
with
1,094 additions
and
40 deletions.
There are no files selected for viewing
143 changes: 143 additions & 0 deletions
143
configs/recognition/timesformer/timesformer_k400_videos.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
MODEL: #MODEL field | ||
framework: "Recognizer2D" #Mandatory, indicate the type of network, associate to the 'paddlevideo/modeling/framework/' . | ||
backbone: #Mandatory, indicate the type of backbone, associate to the 'paddlevideo/modeling/backbones/' . | ||
name: "VisionTransformer" #Mandatory, The name of backbone. | ||
pretrained: "data/ViT_base_patch16_224_pretrained.pdparams" #Optional, pretrained model path. | ||
img_size: 224 | ||
patch_size: 16 | ||
in_channels: 3 | ||
embed_dim: 768 | ||
depth: 12 | ||
num_heads: 12 | ||
mlp_ratio: 4 | ||
qkv_bias: True | ||
epsilon: 1e-6 | ||
seg_num: 8 | ||
attention_type: 'divided_space_time' | ||
head: | ||
name: "TimeSformerHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads' | ||
num_classes: 400 #Optional, the number of classes to be classified. | ||
in_channels: 768 #input channel of the extracted feature. | ||
std: 0.01 #std value in params initialization | ||
|
||
|
||
DATASET: #DATASET field | ||
batch_size: 1 #Mandatory, bacth size | ||
num_workers: 4 #Mandatory, XXX the number of subprocess on each GPU. | ||
train: | ||
format: "VideoDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset' | ||
file_path: "/workspace/huangjun12/PaddleProject/PaddleVideo/ppTSM_ACC/Distill/Stage3/E2.r101.Dense/data/k400/train.list" #Mandatory, train data index file path | ||
valid: | ||
format: "VideoDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset' | ||
file_path: "/workspace/huangjun12/PaddleProject/PaddleVideo/ppTSM_ACC/Distill/Stage3/E2.r101.Dense/data/k400/val.list" #Mandatory, valid data index file path | ||
test: | ||
format: "VideoDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset' | ||
file_path: "/workspace/huangjun12/PaddleProject/PaddleVideo/ppTSM_ACC/Distill/Stage3/E2.r101.Dense/data/k400/val.list" #Mandatory, valid data index file path | ||
|
||
PIPELINE: #PIPELINE field TODO..... | ||
train: #Mandotary, indicate the pipeline to deal with the training data, associate to the 'paddlevideo/loader/pipelines/' | ||
decode: | ||
name: "VideoDecoder" | ||
backend: 'pyav' | ||
mode: 'train' | ||
num_seg: 8 | ||
sample: | ||
name: "Sampler" | ||
num_seg: 8 | ||
seg_len: 1 | ||
valid_mode: False | ||
linspace_sample: True | ||
transform: #Mandotary, image transform operator. | ||
- JitterScale: | ||
min_size: 256 | ||
max_size: 320 | ||
- RandomCrop: | ||
target_size: 224 | ||
- RandomFlip: | ||
- Image2Array: | ||
data_format: 'cthw' | ||
- Normalization: | ||
mean: [0.45, 0.45, 0.45] | ||
std: [0.225, 0.225, 0.225] | ||
tensor_shape: [3, 1, 1, 1] | ||
|
||
valid: #Mandatory, indicate the pipeline to deal with the validing data. associate to the 'paddlevideo/loader/pipelines/' | ||
decode: | ||
name: "VideoDecoder" | ||
backend: 'pyav' | ||
mode: 'valid' | ||
num_seg: 8 | ||
sample: | ||
name: "Sampler" | ||
num_seg: 8 | ||
seg_len: 1 | ||
valid_mode: False | ||
linspace_sample: True | ||
transform: | ||
- JitterScale: | ||
min_size: 256 | ||
max_size: 320 | ||
- RandomCrop: | ||
target_size: 224 | ||
- RandomFlip: | ||
- Image2Array: | ||
data_format: 'cthw' | ||
- Normalization: | ||
mean: [0.45, 0.45, 0.45] | ||
std: [0.225, 0.225, 0.225] | ||
tensor_shape: [3, 1, 1, 1] | ||
test: | ||
decode: | ||
name: "VideoDecoder" | ||
backend: 'pyav' | ||
mode: 'test' | ||
num_seg: 8 | ||
sample: | ||
name: "Sampler" | ||
num_seg: 8 | ||
seg_len: 1 | ||
valid_mode: True | ||
linspace_sample: True | ||
transform: | ||
- JitterScale: | ||
min_size: 224 | ||
max_size: 224 | ||
- UniformCrop: | ||
target_size: 224 | ||
- Image2Array: | ||
data_format: 'cthw' | ||
- Normalization: | ||
mean: [0.45, 0.45, 0.45] | ||
std: [0.225, 0.225, 0.225] | ||
tensor_shape: [3, 1, 1, 1] | ||
|
||
OPTIMIZER: #OPTIMIZER field | ||
name: 'Momentum' #Mandatory, the type of optimizer, associate to the 'paddlevideo/solver/' | ||
momentum: 0.9 | ||
learning_rate: #Mandatory, the type of learning rate scheduler, associate to the 'paddlevideo/solver/' | ||
learning_rate: 0.005 # 8 cards * 4 batch size | ||
name: 'MultiStepDecay' | ||
milestones: [11, 14] | ||
gamma: 0.1 | ||
weight_decay: | ||
name: 'L2' | ||
value: 0.0001 | ||
use_nesterov: True | ||
|
||
METRIC: | ||
name: 'UniformCropMetric' | ||
|
||
GRADIENT_ACCUMULATION: | ||
global_batch_size: 64 # Specify the sum of batches to be calculated by all GPUs | ||
num_gpus: 8 # Number of GPUs | ||
|
||
# INFERENCE: | ||
# name: 'ppTSM_Inference_helper' | ||
# num_seg: 8 | ||
# target_size: 224 | ||
|
||
model_name: "TimeSformer" | ||
log_interval: 20 #Optional, the interal of logger, default:10 | ||
save_interval: 3 | ||
epochs: 15 #Mandatory, total epoch | ||
log_level: "INFO" #Optional, the logger level. default: "INFO" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.