File tree Expand file tree Collapse file tree 4 files changed +143
-0
lines changed Expand file tree Collapse file tree 4 files changed +143
-0
lines changed Original file line number Diff line number Diff line change @@ -237,6 +237,14 @@ def create_parser():
237
237
'Choice: O0 - all FP32, O1 - only cast ops in white-list to FP16, '
238
238
'O2 - cast all ops except for blacklist to FP16, '
239
239
'O3 - cast all ops to FP16. (default="O0").' )
240
+ group .add_argument ('--amp_cast_list' , type = str , default = None ,
241
+ help = 'At the cell level, customize the black-list or white-list to cast cells to '
242
+ 'FP16 based on the value of argument "amp_level". If None, use the built-in '
243
+ 'black-list and white-list. (default=None) '
244
+ 'If amp_level="O0" or "O3", this argument has no effect. '
245
+ 'If amp_level="O1", cast all cells in the white-list to FP16. '
246
+ 'If amp_level="O2", cast all cells except for the black-list to FP16. '
247
+ 'Example: "[nn.Conv1d, nn.Conv2d]" or "[nn.BatchNorm1d, nn.BatchNorm2d]".' )
240
248
group .add_argument ('--loss_scale_type' , type = str , default = 'fixed' ,
241
249
choices = ['fixed' , 'dynamic' , 'auto' ],
242
250
help = 'The type of loss scale (default="fixed")' )
Original file line number Diff line number Diff line change @@ -22,6 +22,8 @@ Our reproduced model performance on ImageNet-1K is reported as follows.
22
22
| Model | Context | Top-1 (%) | Top-5 (%) | Params (M) | Recipe | Weight |
23
23
| -----------------| -----------| -------| ------------| ------------------------------------------------------------------------------------------| ----------------------------------------------------------------------------------| ----------------------------------------------------------------------------------|
24
24
| volo_d1 | D910x8-G | 82.59 | 95.99 | 27 | [ yaml] ( https://github.com/mindspore-lab/mindcv/blob/main/configs/volo/volo_d1_ascend.yaml ) | [ weights] ( https://download.mindspore.cn/toolkits/mindcv/volo/volo_d1-c7efada9.ckpt ) |
25
+ | volo_d2 | D910x8-G | 82.95 | 96.13 | 59 | [ yaml] ( https://github.com/mindspore-lab/mindcv/blob/main/configs/volo/volo_d2_ascend.yaml ) | [ weights] ( https://download.mindspore.cn/toolkits/mindcv/volo/volo_d2-0910a460.ckpt ) |
26
+ | volo_d3 | D910x8-G | 83.38 | 96.28 | 87 | [ yaml] ( https://github.com/mindspore-lab/mindcv/blob/main/configs/volo/volo_d3_ascend.yaml ) | [ weights] ( https://download.mindspore.cn/toolkits/mindcv/volo/volo_d3-25916c36.ckpt ) |
25
27
26
28
</div >
27
29
Original file line number Diff line number Diff line change
1
+ # system
2
+ mode : 0
3
+ distribute : True
4
+ num_parallel_workers : 8
5
+ val_while_train : True
6
+ val_interval : 1
7
+
8
+ # dataset
9
+ dataset : ' imagenet'
10
+ data_dir : ' /path/to/imagenet'
11
+ shuffle : True
12
+ dataset_download : False
13
+ batch_size : 128
14
+ drop_remainder : True
15
+
16
+ # augmentation
17
+ image_resize : 224
18
+ scale : [0.08, 1.0]
19
+ ratio : [0.75, 1.33]
20
+ hflip : 0.5
21
+ vflip : 0.
22
+ interpolation : ' bicubic'
23
+ auto_augment : ' randaug-m9-mstd0.5-inc1'
24
+ re_prob : 0.25
25
+ mixup : 0.2
26
+ cutmix : 1.0
27
+ cutmix_prob : 1.0
28
+ crop_pct : 0.96
29
+ color_jitter : [0.4, 0.4, 0.4]
30
+
31
+ # model config
32
+ model : ' volo_d2'
33
+ num_classes : 1000
34
+ pretrained : False
35
+ ckpt_path : ' '
36
+ keep_checkpoint_max : 10
37
+ ckpt_save_dir : ' ./ckpt/'
38
+ ckpt_save_policy : ' top_k'
39
+ drop_path_rate : 0.2
40
+ dataset_sink_mode : True
41
+ amp_level : ' O3'
42
+ ema : True
43
+ ema_decay : 0.9995
44
+
45
+ # loss
46
+ loss : ' CE'
47
+ label_smoothing : 0.1
48
+
49
+ # lr scheduler
50
+ scheduler : ' warmup_cosine_decay'
51
+ lr : 0.0007
52
+ min_lr : 0.00001
53
+ epoch_size : 300
54
+ warmup_epochs : 10
55
+ decay_epochs : 290
56
+ decay_rate : 0.1
57
+
58
+ # optimizer
59
+ opt : ' adamw'
60
+ weight_decay : 0.05
61
+ momentum : 0.9
62
+ filter_bias_and_bn : True
63
+ loss_scale_type : ' dynamic'
64
+ loss_scale : 2048
65
+ use_nesterov : False
66
+ drop_overflow_update : True
Original file line number Diff line number Diff line change
1
+ # system
2
+ mode : 0
3
+ distribute : True
4
+ num_parallel_workers : 8
5
+ val_while_train : True
6
+ val_interval : 1
7
+
8
+ # dataset
9
+ dataset : ' imagenet'
10
+ data_dir : ' /path/to/imagenet'
11
+ shuffle : True
12
+ dataset_download : False
13
+ batch_size : 64
14
+ drop_remainder : True
15
+
16
+ # augmentation
17
+ image_resize : 224
18
+ scale : [0.08, 1.0]
19
+ ratio : [0.75, 1.33]
20
+ hflip : 0.5
21
+ vflip : 0.
22
+ interpolation : ' bicubic'
23
+ auto_augment : ' randaug-m9-mstd0.5-inc1'
24
+ re_prob : 0.25
25
+ mixup : 0.2
26
+ cutmix : 1.0
27
+ cutmix_prob : 1.0
28
+ crop_pct : 0.96
29
+ color_jitter : [0.4, 0.4, 0.4]
30
+
31
+ # model
32
+ model : ' volo_d3'
33
+ num_classes : 1000
34
+ pretrained : False
35
+ ckpt_path : ' '
36
+ keep_checkpoint_max : 10
37
+ ckpt_save_dir : ' ./ckpt'
38
+ ckpt_save_policy : ' top_k'
39
+ drop_path_rate : 0.5
40
+ dataset_sink_mode : True
41
+ amp_level : ' O3'
42
+ ema : True
43
+ ema_decay : 0.9995
44
+ gradient_accumulation_steps : 2
45
+
46
+ # loss
47
+ loss : ' CE'
48
+ label_smoothing : 0.1
49
+
50
+ # lr scheduler
51
+ scheduler : ' warmup_cosine_decay'
52
+ lr : 0.0005
53
+ min_lr : 0.00001
54
+ epoch_size : 3
55
+ warmup_epochs : 10
56
+ decay_epochs : 290
57
+ decay_rate : 0.1
58
+
59
+ # optimizer
60
+ opt : ' adamw'
61
+ weight_decay : 0.05
62
+ momentum : 0.9
63
+ filter_bias_and_bn : True
64
+ loss_scale_type : ' dynamic'
65
+ loss_scale : 1024
66
+ use_nesterov : False
67
+ drop_overflow_update : True
You can’t perform that action at this time.
0 commit comments