diff --git a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.024e-7.yaml b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.024e-7.yaml index a14014c..39938de 100644 --- a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.024e-7.yaml +++ b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.024e-7.yaml @@ -100,7 +100,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -126,26 +126,39 @@ train: T_max: 300000 scheduling_step: 1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'batchmean' - weight: 1.024e-7 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'batchmean' + weight: 1.024e-7 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.28e-8.yaml b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.28e-8.yaml index afe502f..89a4741 100644 --- a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.28e-8.yaml +++ b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.28e-8.yaml @@ -100,7 +100,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -126,26 +126,39 @@ train: T_max: 300000 scheduling_step: 1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'batchmean' - weight: 1.28e-8 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'batchmean' + weight: 1.28e-8 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta2.048e-7.yaml b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta2.048e-7.yaml index 36d21fc..a332f0c 100644 --- a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta2.048e-7.yaml +++ b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta2.048e-7.yaml @@ -100,7 +100,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -126,26 +126,39 @@ train: T_max: 300000 scheduling_step: 1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'batchmean' - weight: 2.048e-7 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'batchmean' + weight: 2.048e-7 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta3.2768e-6.yaml b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta3.2768e-6.yaml index 944d58e..12fbde0 100644 --- a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta3.2768e-6.yaml +++ b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta3.2768e-6.yaml @@ -100,7 +100,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -126,26 +126,39 @@ train: T_max: 300000 scheduling_step: 1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'batchmean' - weight: 3.2768e-6 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'batchmean' + weight: 3.2768e-6 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta8.192e-7.yaml b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta8.192e-7.yaml index 4ecf7bc..cc637dd 100644 --- a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta8.192e-7.yaml +++ b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta8.192e-7.yaml @@ -100,7 +100,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -126,26 +126,39 @@ train: T_max: 300000 scheduling_step: 1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'batchmean' - weight: 8.192e-7 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'batchmean' + weight: 8.192e-7 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml index d7dfe3e..e6fd026 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml @@ -145,77 +145,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.16 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.16 stage2: num_epochs: 10 train_data_loader: @@ -264,21 +271,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml index fda739d..c15de5a 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml @@ -145,77 +145,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.32 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.32 stage2: num_epochs: 10 train_data_loader: @@ -264,16 +271,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml index 9472975..f7aafea 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml @@ -145,77 +145,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.64 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.64 stage2: num_epochs: 10 train_data_loader: @@ -264,16 +271,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml index ca904ea..dd74593 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml @@ -145,77 +145,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 1.28 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 1.28 stage2: num_epochs: 10 train_data_loader: @@ -264,16 +271,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml index e686cba..c53d7f1 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml @@ -145,77 +145,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 10.24 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 10.24 stage2: num_epochs: 20 train_data_loader: @@ -264,16 +271,21 @@ train: milestones: [10] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml index b66d9cb..f3f4902 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml @@ -145,77 +145,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 15.36 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 15.36 stage2: num_epochs: 20 train_data_loader: @@ -264,21 +271,29 @@ train: milestones: [10] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml index d4f2ba7..db8d196 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml @@ -145,77 +145,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 2.56 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 2.56 stage2: num_epochs: 10 train_data_loader: @@ -264,21 +271,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml index b2acd8c..c53af15 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml @@ -145,77 +145,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 20.48 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 20.48 stage2: num_epochs: 20 train_data_loader: @@ -264,16 +271,21 @@ train: milestones: [10] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml index 4d7a04c..c3ebc01 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml @@ -145,77 +145,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 5.12 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 5.12 stage2: num_epochs: 10 train_data_loader: @@ -264,16 +271,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml index 0f107f4..782e3e7 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml @@ -146,84 +146,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.16 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.16 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.16 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.16 stage2: num_epochs: 10 train_data_loader: @@ -272,16 +279,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml index aa15b43..df45331 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml @@ -146,84 +146,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.32 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.32 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.32 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.32 stage2: num_epochs: 10 train_data_loader: @@ -272,16 +279,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml index caac55a..2bd2d94 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml @@ -146,84 +146,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.64 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.64 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.64 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.64 stage2: num_epochs: 10 train_data_loader: @@ -272,21 +279,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml index ce28f84..0b9c9cb 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml @@ -146,84 +146,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 1.28 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 1.28 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 1.28 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 1.28 stage2: num_epochs: 10 train_data_loader: @@ -272,16 +279,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml index 8ac5948..1279381 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml @@ -146,84 +146,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 10.24 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 10.24 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 10.24 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 10.24 stage2: num_epochs: 20 train_data_loader: @@ -272,21 +279,29 @@ train: milestones: [10] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml index 62720c0..856a729 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml @@ -146,84 +146,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 15.36 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 15.36 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 15.36 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 15.36 stage2: num_epochs: 20 train_data_loader: @@ -272,21 +279,29 @@ train: milestones: [10] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml index 388b162..77ec4a2 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml @@ -146,84 +146,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 2.56 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 2.56 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 2.56 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 2.56 stage2: num_epochs: 10 train_data_loader: @@ -272,21 +279,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml index b33c044..db306c5 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml @@ -146,84 +146,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 20.48 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 20.48 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 20.48 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 20.48 stage2: num_epochs: 20 train_data_loader: @@ -272,21 +279,29 @@ train: milestones: [10] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml index 719dffd..fd57b9b 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml @@ -146,84 +146,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - stages0: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.0' - io: 'output' - weight: 1.0 - stages1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.1' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.1' - io: 'output' - weight: 1.0 - stages2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.2' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.2' - io: 'output' - weight: 1.0 - stages3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'patch_embed_pruned_stages.3' - io: 'output' - target: - is_from_teacher: True - module_path: 'patch_embed.backbone.stages.3' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 5.12 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 5.12 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + stages0: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.0' + io: 'output' + weight: 1.0 + stages1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.1' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.1' + io: 'output' + weight: 1.0 + stages2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.2' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.2' + io: 'output' + weight: 1.0 + stages3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'patch_embed_pruned_stages.3' + io: 'output' + target: + is_from_teacher: True + module_path: 'patch_embed.backbone.stages.3' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 5.12 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 5.12 stage2: num_epochs: 10 train_data_loader: @@ -272,21 +279,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.08_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.08_from_regnety6.4gf.yaml index b714098..4b4dcb6 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.08_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.08_from_regnety6.4gf.yaml @@ -144,77 +144,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.08 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.08 stage2: num_epochs: 10 train_data_loader: @@ -263,21 +270,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.16_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.16_from_regnety6.4gf.yaml index 51d5199..1aeed6b 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.16_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.16_from_regnety6.4gf.yaml @@ -144,77 +144,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.16 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.16 stage2: num_epochs: 10 train_data_loader: @@ -263,16 +270,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.32_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.32_from_regnety6.4gf.yaml index 4cb4a2e..44297e0 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.32_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.32_from_regnety6.4gf.yaml @@ -144,77 +144,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.32 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.32 stage2: num_epochs: 10 train_data_loader: @@ -263,16 +270,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.64_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.64_from_regnety6.4gf.yaml index a07c57c..ace6bcd 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.64_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.64_from_regnety6.4gf.yaml @@ -144,77 +144,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.64 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.64 stage2: num_epochs: 10 train_data_loader: @@ -263,21 +270,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta1.28_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta1.28_from_regnety6.4gf.yaml index 3ab05e2..ff11793 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta1.28_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta1.28_from_regnety6.4gf.yaml @@ -144,77 +144,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 1.28 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 1.28 stage2: num_epochs: 10 train_data_loader: @@ -263,21 +270,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta2.56_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta2.56_from_regnety6.4gf.yaml index 7f06536..067618d 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta2.56_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta2.56_from_regnety6.4gf.yaml @@ -144,77 +144,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 2.56 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 2.56 stage2: num_epochs: 10 train_data_loader: @@ -263,16 +270,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta5.12_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta5.12_from_regnety6.4gf.yaml index e26b73d..b73b683 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta5.12_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta5.12_from_regnety6.4gf.yaml @@ -144,77 +144,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 5.12 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 5.12 stage2: num_epochs: 10 train_data_loader: @@ -263,16 +270,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta7.68_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta7.68_from_regnety6.4gf.yaml index e7bfcfc..3c0eb53 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta7.68_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta7.68_from_regnety6.4gf.yaml @@ -144,77 +144,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 7.68 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 7.68 stage2: num_epochs: 10 train_data_loader: @@ -263,21 +270,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.08_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.08_from_regnety6.4gf.yaml index 64b4e49..053c1c8 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.08_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.08_from_regnety6.4gf.yaml @@ -145,84 +145,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.08 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.08 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.08 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.08 stage2: num_epochs: 10 train_data_loader: @@ -271,21 +278,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.16_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.16_from_regnety6.4gf.yaml index 565648b..683f6b3 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.16_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.16_from_regnety6.4gf.yaml @@ -145,84 +145,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.16 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.16 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.16 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.16 stage2: num_epochs: 10 train_data_loader: @@ -271,21 +278,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.32_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.32_from_regnety6.4gf.yaml index 94f7ec6..66b8498 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.32_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.32_from_regnety6.4gf.yaml @@ -145,84 +145,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.32 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.32 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.32 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.32 stage2: num_epochs: 10 train_data_loader: @@ -271,16 +278,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.64_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.64_from_regnety6.4gf.yaml index cdfb1d2..f4df3fd 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.64_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.64_from_regnety6.4gf.yaml @@ -145,84 +145,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.64 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.64 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.64 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.64 stage2: num_epochs: 10 train_data_loader: @@ -271,16 +278,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta1.28_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta1.28_from_regnety6.4gf.yaml index 5eb2130..0a1eb23 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta1.28_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta1.28_from_regnety6.4gf.yaml @@ -145,84 +145,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 1.28 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 1.28 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 1.28 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 1.28 stage2: num_epochs: 10 train_data_loader: @@ -271,16 +278,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta2.56_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta2.56_from_regnety6.4gf.yaml index 709b41c..6ca78dc 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta2.56_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta2.56_from_regnety6.4gf.yaml @@ -145,84 +145,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 2.56 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 2.56 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 2.56 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 2.56 stage2: num_epochs: 10 train_data_loader: @@ -271,21 +278,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta4.48_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta4.48_from_regnety6.4gf.yaml index 961634e..83d661d 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta4.48_from_regnety6.4gf.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta4.48_from_regnety6.4gf.yaml @@ -145,84 +145,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - s1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 's1' - io: 'output' - weight: 1.0 - s2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's2' - io: 'output' - target: - is_from_teacher: True - module_path: 's2' - io: 'output' - weight: 1.0 - s3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's3' - io: 'output' - target: - is_from_teacher: True - module_path: 's3' - io: 'output' - weight: 1.0 - s4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 's4' - io: 'output' - target: - is_from_teacher: True - module_path: 's4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 4.48 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 4.48 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + s1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 's1' + io: 'output' + weight: 1.0 + s2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's2' + io: 'output' + target: + is_from_teacher: True + module_path: 's2' + io: 'output' + weight: 1.0 + s3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's3' + io: 'output' + target: + is_from_teacher: True + module_path: 's3' + io: 'output' + weight: 1.0 + s4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 's4' + io: 'output' + target: + is_from_teacher: True + module_path: 's4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 4.48 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 4.48 stage2: num_epochs: 10 train_data_loader: @@ -271,21 +278,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.08_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.08_from_resnet101.yaml index 74fb45f..4e77188 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.08_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.08_from_resnet101.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.08 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.08 stage2: num_epochs: 10 train_data_loader: @@ -255,16 +262,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.16_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.16_from_resnet101.yaml index 7fc1fd0..3519185 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.16_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.16_from_resnet101.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.16 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.16 stage2: num_epochs: 10 train_data_loader: @@ -255,16 +262,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.32_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.32_from_resnet101.yaml index fe67a55..19051df 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.32_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.32_from_resnet101.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.32 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.32 stage2: num_epochs: 10 train_data_loader: @@ -255,16 +262,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.64_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.64_from_resnet101.yaml index e735a61..5ed2cb5 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.64_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.64_from_resnet101.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.64 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.64 stage2: num_epochs: 10 train_data_loader: @@ -255,16 +262,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta1.28_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta1.28_from_resnet101.yaml index d069037..c782508 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta1.28_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta1.28_from_resnet101.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 1.28 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 1.28 stage2: num_epochs: 10 train_data_loader: @@ -255,16 +262,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta2.56_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta2.56_from_resnet101.yaml index ddbbae3..8bd32e5 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta2.56_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta2.56_from_resnet101.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 2.56 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 2.56 stage2: num_epochs: 10 train_data_loader: @@ -255,16 +262,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta5.12_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta5.12_from_resnet101.yaml index ec5e9ef..f5587e9 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta5.12_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta5.12_from_resnet101.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 5.12 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 5.12 stage2: num_epochs: 10 train_data_loader: @@ -255,16 +262,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.08_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.08_from_resnet101.yaml index e612866..adc6243 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.08_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.08_from_resnet101.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.08 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.08 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.08 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.08 stage2: num_epochs: 10 train_data_loader: @@ -263,21 +270,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.16_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.16_from_resnet101.yaml index f02fb9d..5a7a432 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.16_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.16_from_resnet101.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.16 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.16 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.16 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.16 stage2: num_epochs: 10 train_data_loader: @@ -263,16 +270,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.32_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.32_from_resnet101.yaml index d06f5ba..c209460 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.32_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.32_from_resnet101.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.32 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.32 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.32 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.32 stage2: num_epochs: 10 train_data_loader: @@ -263,16 +270,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.64_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.64_from_resnet101.yaml index 51406f2..313d10b 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.64_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.64_from_resnet101.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.64 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.64 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.64 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.64 stage2: num_epochs: 10 train_data_loader: @@ -263,16 +270,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta1.28_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta1.28_from_resnet101.yaml index 07f9ae4..e50200e 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta1.28_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta1.28_from_resnet101.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 1.28 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 1.28 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 1.28 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 1.28 stage2: num_epochs: 10 train_data_loader: @@ -263,21 +270,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta2.56_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta2.56_from_resnet101.yaml index 341f1de..2f5e40a 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta2.56_from_resnet101.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta2.56_from_resnet101.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 2.56 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 2.56 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 2.56 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 2.56 stage2: num_epochs: 10 train_data_loader: @@ -263,16 +270,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.08_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.08_from_resnet50.yaml index 25d33bd..788d751 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.08_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.08_from_resnet50.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.08 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.08 stage2: num_epochs: 10 train_data_loader: @@ -255,21 +262,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.16_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.16_from_resnet50.yaml index 1b547d2..f40cd3f 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.16_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.16_from_resnet50.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.16 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.16 stage2: num_epochs: 10 train_data_loader: @@ -255,21 +262,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.32_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.32_from_resnet50.yaml index 52f16f1..f0ee6d1 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.32_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.32_from_resnet50.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.32 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.32 stage2: num_epochs: 10 train_data_loader: @@ -255,16 +262,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.64_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.64_from_resnet50.yaml index f149b78..b191cc3 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.64_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.64_from_resnet50.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.64 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.64 stage2: num_epochs: 10 train_data_loader: @@ -255,21 +262,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta1.28_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta1.28_from_resnet50.yaml index 828bc41..92fafef 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta1.28_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta1.28_from_resnet50.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 1.28 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 1.28 stage2: num_epochs: 10 train_data_loader: @@ -255,21 +262,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta2.56_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta2.56_from_resnet50.yaml index 228249c..42e645a 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta2.56_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta2.56_from_resnet50.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 2.56 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 2.56 stage2: num_epochs: 10 train_data_loader: @@ -255,16 +262,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta5.12_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta5.12_from_resnet50.yaml index af6718a..b232b9e 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta5.12_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta5.12_from_resnet50.yaml @@ -136,77 +136,84 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 5.12 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 5.12 stage2: num_epochs: 10 train_data_loader: @@ -255,16 +262,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.08_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.08_from_resnet50.yaml index c0279a5..9090ef1 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.08_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.08_from_resnet50.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.1 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.08 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.1 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.08 stage2: num_epochs: 10 train_data_loader: @@ -263,16 +270,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.16_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.16_from_resnet50.yaml index 80dc14a..5fb9989 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.16_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.16_from_resnet50.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.16 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.16 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.16 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.16 stage2: num_epochs: 10 train_data_loader: @@ -263,21 +270,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.32_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.32_from_resnet50.yaml index 36c665c..6efa264 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.32_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.32_from_resnet50.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.32 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.32 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.32 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.32 stage2: num_epochs: 10 train_data_loader: @@ -263,21 +270,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.64_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.64_from_resnet50.yaml index b5c7fde..096f2ae 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.64_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.64_from_resnet50.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 0.64 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 0.64 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 0.64 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 0.64 stage2: num_epochs: 10 train_data_loader: @@ -263,16 +270,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta1.28_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta1.28_from_resnet50.yaml index a358428..fab873f 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta1.28_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta1.28_from_resnet50.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 1.28 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 1.28 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 1.28 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 1.28 stage2: num_epochs: 10 train_data_loader: @@ -263,21 +270,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta2.56_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta2.56_from_resnet50.yaml index aa453dd..6cc45bb 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta2.56_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta2.56_from_resnet50.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 2.56 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 2.56 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 2.56 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 2.56 stage2: num_epochs: 10 train_data_loader: @@ -263,21 +270,29 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta3.84_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta3.84_from_resnet50.yaml index 8d46154..2babac0 100644 --- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta3.84_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta3.84_from_resnet50.yaml @@ -137,84 +137,91 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 - bpp_entropy: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.entropy_bottleneck' - reduction: 'sum' - weight: 3.84 - bpp_gaussian: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'bottleneck_layer.gaussian_conditional' - reduction: 'sum' - weight: 3.84 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 + bpp_entropy: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.entropy_bottleneck' + reduction: 'sum' + weight: 3.84 + bpp_gaussian: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'bottleneck_layer.gaussian_conditional' + reduction: 'sum' + weight: 3.84 stage2: num_epochs: 10 train_data_loader: @@ -263,16 +270,21 @@ train: milestones: [5] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'KDLoss' - kwargs: - temperature: 1.0 - alpha: 0.5 - reduction: 'batchmean' - weight: 1.0 - sub_terms: + key: 'WeightedSumLoss' + kwargs: + sub_terms: + kd: + criterion: + key: 'KDLoss' + kwargs: + student_module_path: '.' + student_module_io: 'output' + teacher_module_path: '.' + teacher_module_io: 'output' + temperature: 1.0 + alpha: 0.5 + reduction: 'batchmean' + weight: 1.0 test: test_data_loader: diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-1.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-1.yaml index 3fe27e3..ef48b81 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-1.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-1.yaml @@ -104,7 +104,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-1 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-1 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-10.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-10.yaml index b17e2dc..afc5733 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-10.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-10.yaml @@ -104,7 +104,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-10 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-10 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-15.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-15.yaml index 275de9b..068467b 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-15.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-15.yaml @@ -104,7 +104,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-15 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-15 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-2.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-2.yaml index 89f275b..b52c7b3 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-2.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-2.yaml @@ -104,7 +104,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-2 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-2 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-4.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-4.yaml index b394edb..4cbce6c 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-4.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-4.yaml @@ -104,7 +104,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-4 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-4 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta6.5536e-6.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta6.5536e-6.yaml index 17ef1bf..5a0a2a9 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta6.5536e-6.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta6.5536e-6.yaml @@ -104,7 +104,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 6.5536e-6 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 6.5536e-6 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta8.0e-10.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta8.0e-10.yaml index 0cce6af..01b1874 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta8.0e-10.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta8.0e-10.yaml @@ -104,7 +104,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 8.0e-10 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 8.0e-10 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.024e-7.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.024e-7.yaml index 0df8d3d..1425a98 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.024e-7.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.024e-7.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.024e-7 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.024e-7 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-15.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-15.yaml index be48937..315b6e6 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-15.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-15.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-15 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-15 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-4.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-4.yaml index 2ce9ccf..fbe1e4a 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-4.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-4.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-4 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-4 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-5.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-5.yaml index 48fa97b..b07169e 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-5.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-5.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-5 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-5 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.28e-8.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.28e-8.yaml index e0b74bf..816b1b6 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.28e-8.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.28e-8.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.28e-8 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.28e-8 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta5.0e-11.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta5.0e-11.yaml index c9db1a7..3cb7eef 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta5.0e-11.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta5.0e-11.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 5.0e-11 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 5.0e-11 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta6.5536e-6.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta6.5536e-6.yaml index bc77e0b..ec32347 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta6.5536e-6.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta6.5536e-6.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 6.5536e-6 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 6.5536e-6 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.0e-10.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.0e-10.yaml index 65446a3..f661af1 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.0e-10.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.0e-10.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 8.0e-10 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 8.0e-10 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.192e-7.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.192e-7.yaml index 9542c43..7be71ec 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.192e-7.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.192e-7.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 8.192e-7 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 8.192e-7 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-10.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-10.yaml index 05cbd34..84d0d91 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-10.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-10.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-10 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-10 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-15.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-15.yaml index bc69cda..fcaecee 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-15.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-15.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-15 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-15 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-4.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-4.yaml index 57bdcfd..a6c3dae 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-4.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-4.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-4 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-4 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-5.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-5.yaml index 03e0c93..d6ffded 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-5.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-5.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-5 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-5 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-6.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-6.yaml index 51f2519..9e9a3c9 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-6.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-6.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-6 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-6 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-8.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-8.yaml index 4798e7e..c44b70b 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-8.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-8.yaml @@ -101,7 +101,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -127,26 +127,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-8 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-8 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-10.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-10.yaml index f72c2b0..325aa05 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-10.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-10.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-10 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-10 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-15.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-15.yaml index 915a45a..1e3e575 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-15.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-15.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-15 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-15 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-4.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-4.yaml index 8875f63..24e0c3b 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-4.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-4.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-4 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-4 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-5.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-5.yaml index 9a16bdc..4232c1f 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-5.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-5.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-5 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-5 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-6.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-6.yaml index f3e1bef..9811443 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-6.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-6.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-6 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-6 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-8.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-8.yaml index c70d03b..6183ae9 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-8.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-8.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-8 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-8 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta5.0e-5.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta5.0e-5.yaml index 872a0f0..76961c0 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta5.0e-5.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta5.0e-5.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 5.0e-5 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 5.0e-5 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-10.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-10.yaml index ee7a50b..d8dfdb7 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-10.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-10.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-10 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-10 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-15.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-15.yaml index 91a9c0b..f892c72 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-15.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-15.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-15 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-15 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-3.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-3.yaml index 897552d..1c65001 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-3.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-3.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-3 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-3 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-4.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-4.yaml index 25762b4..5def9dc 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-4.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-4.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-4 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-4 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-5.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-5.yaml index b77c39f..b53d87b 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-5.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-5.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-5 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-5 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-6.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-6.yaml index 18f357e..7aa914d 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-6.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-6.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-6 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-6 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-8.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-8.yaml index e82bddf..39260bd 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-8.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-8.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 1.0e-8 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 1.0e-8 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-4.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-4.yaml index eff5f41..7649077 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-4.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-4.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 5.0e-4 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 5.0e-4 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-5.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-5.yaml index 5f6ecf9..ca24934 100644 --- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-5.yaml +++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-5.yaml @@ -103,7 +103,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -129,26 +129,39 @@ train: milestones: [5, 8] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - criterion: - key: 'CrossEntropyLoss' - kwargs: - reduction: 'mean' - weight: 1.0 - sub_terms: - bpp: - criterion: - key: 'BppLoss' - kwargs: - entropy_module_path: 'entropy_bottleneck' - reduction: 'batchmean' - weight: 5.0e-5 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + ce: + criterion: + key: 'CrossEntropyLoss' + kwargs: + reduction: 'mean' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: '.' + io: 'output' + target: + uses_label: True + weight: 1.0 + bpp: + criterion: + key: 'BppLoss' + kwargs: + entropy_module_path: 'entropy_bottleneck' + reduction: 'batchmean' + weight: 5.0e-5 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq12ch_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq12ch_from_resnet50.yaml index 58b5956..0545a1d 100644 --- a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq12ch_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq12ch_from_resnet50.yaml @@ -124,7 +124,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -158,75 +158,85 @@ train: milestones: [5, 10, 15] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq1ch_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq1ch_from_resnet50.yaml index 3499aaa..4216722 100644 --- a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq1ch_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq1ch_from_resnet50.yaml @@ -124,7 +124,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -158,75 +158,85 @@ train: milestones: [5, 10, 15] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq2ch_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq2ch_from_resnet50.yaml index 4a6103a..5e298d3 100644 --- a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq2ch_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq2ch_from_resnet50.yaml @@ -124,7 +124,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -158,75 +158,85 @@ train: milestones: [5, 10, 15] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq3ch_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq3ch_from_resnet50.yaml index daf37b1..46dbe3a 100644 --- a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq3ch_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq3ch_from_resnet50.yaml @@ -124,7 +124,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -158,75 +158,85 @@ train: milestones: [5, 10, 15] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq6ch_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq6ch_from_resnet50.yaml index 8b76cdb..b05bdfc 100644 --- a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq6ch_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq6ch_from_resnet50.yaml @@ -124,7 +124,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -158,75 +158,85 @@ train: milestones: [5, 10, 15] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16 diff --git a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq9ch_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq9ch_from_resnet50.yaml index d3bcb5c..66ea19d 100644 --- a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq9ch_from_resnet50.yaml +++ b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq9ch_from_resnet50.yaml @@ -124,7 +124,7 @@ train: cache_output: val_data_loader: dataset_id: *imagenet_val - sampler: &val_sampler + sampler: class_or_func: !import_get key: 'torch.utils.data.SequentialSampler' kwargs: @@ -158,75 +158,85 @@ train: milestones: [5, 10, 15] gamma: 0.1 criterion: - key: 'GeneralizedCustomLoss' - org_term: - weight: 0.0 - sub_terms: - layer1: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'bottleneck_layer' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer1' - io: 'output' - weight: 1.0 - layer2: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer2' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer2' - io: 'output' - weight: 1.0 - layer3: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer3' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer3' - io: 'output' - weight: 1.0 - layer4: - criterion: - key: 'MSELoss' - kwargs: - reduction: 'sum' - kwargs: - input: - is_from_teacher: False - module_path: 'layer4' - io: 'output' - target: - is_from_teacher: True - module_path: 'layer4' - io: 'output' - weight: 1.0 + key: 'WeightedSumLoss' + kwargs: + sub_terms: + layer1: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'bottleneck_layer' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer1' + io: 'output' + weight: 1.0 + layer2: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer2' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer2' + io: 'output' + weight: 1.0 + layer3: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer3' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer3' + io: 'output' + weight: 1.0 + layer4: + criterion: + key: 'MSELoss' + kwargs: + reduction: 'sum' + criterion_wrapper: + key: 'SimpleLossWrapper' + kwargs: + input: + is_from_teacher: False + module_path: 'layer4' + io: 'output' + target: + is_from_teacher: True + module_path: 'layer4' + io: 'output' + weight: 1.0 test: test_data_loader: dataset_id: *imagenet_val - sampler: *val_sampler + sampler: + class_or_func: !import_get + key: 'torch.utils.data.SequentialSampler' + kwargs: kwargs: batch_size: 1 num_workers: 16