diff --git a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.024e-7.yaml b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.024e-7.yaml
index a14014c..39938de 100644
--- a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.024e-7.yaml
+++ b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.024e-7.yaml
@@ -100,7 +100,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -126,26 +126,39 @@ train:
       T_max: 300000
     scheduling_step: 1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.024e-7
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.024e-7
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.28e-8.yaml b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.28e-8.yaml
index afe502f..89a4741 100644
--- a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.28e-8.yaml
+++ b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta1.28e-8.yaml
@@ -100,7 +100,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -126,26 +126,39 @@ train:
       T_max: 300000
     scheduling_step: 1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.28e-8
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.28e-8
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta2.048e-7.yaml b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta2.048e-7.yaml
index 36d21fc..a332f0c 100644
--- a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta2.048e-7.yaml
+++ b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta2.048e-7.yaml
@@ -100,7 +100,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -126,26 +126,39 @@ train:
       T_max: 300000
     scheduling_step: 1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 2.048e-7
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 2.048e-7
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta3.2768e-6.yaml b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta3.2768e-6.yaml
index 944d58e..12fbde0 100644
--- a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta3.2768e-6.yaml
+++ b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta3.2768e-6.yaml
@@ -100,7 +100,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -126,26 +126,39 @@ train:
       T_max: 300000
     scheduling_step: 1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 3.2768e-6
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 3.2768e-6
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta8.192e-7.yaml b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta8.192e-7.yaml
index 4ecf7bc..cc637dd 100644
--- a/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta8.192e-7.yaml
+++ b/configs/ilsvrc2012/supervised_compression/end-to-end/splitable_resnet50-fp-beta8.192e-7.yaml
@@ -100,7 +100,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -126,26 +126,39 @@ train:
       T_max: 300000
     scheduling_step: 1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 8.192e-7
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 8.192e-7
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml
index d7dfe3e..e6fd026 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml
@@ -145,77 +145,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.16
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.16
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -264,21 +271,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml
index fda739d..c15de5a 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml
@@ -145,77 +145,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.32
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.32
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -264,16 +271,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml
index 9472975..f7aafea 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml
@@ -145,77 +145,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.64
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.64
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -264,16 +271,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml
index ca904ea..dd74593 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml
@@ -145,77 +145,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 1.28
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 1.28
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -264,16 +271,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml
index e686cba..c53d7f1 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml
@@ -145,77 +145,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 10.24
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 10.24
   stage2:
     num_epochs: 20
     train_data_loader:
@@ -264,16 +271,21 @@ train:
         milestones: [10]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml
index b66d9cb..f3f4902 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml
@@ -145,77 +145,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 15.36
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 15.36
   stage2:
     num_epochs: 20
     train_data_loader:
@@ -264,21 +271,29 @@ train:
         milestones: [10]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml
index d4f2ba7..db8d196 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml
@@ -145,77 +145,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 2.56
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 2.56
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -264,21 +271,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml
index b2acd8c..c53af15 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml
@@ -145,77 +145,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 20.48
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 20.48
   stage2:
     num_epochs: 20
     train_data_loader:
@@ -264,16 +271,21 @@ train:
         milestones: [10]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml
index 4d7a04c..c3ebc01 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-fp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml
@@ -145,77 +145,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 5.12
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 5.12
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -264,16 +271,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml
index 0f107f4..782e3e7 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.16_from_hybrid_vit_small_r26_s32_224.yaml
@@ -146,84 +146,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.16
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.16
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.16
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.16
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -272,16 +279,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml
index aa15b43..df45331 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.32_from_hybrid_vit_small_r26_s32_224.yaml
@@ -146,84 +146,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.32
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.32
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.32
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.32
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -272,16 +279,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml
index caac55a..2bd2d94 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta0.64_from_hybrid_vit_small_r26_s32_224.yaml
@@ -146,84 +146,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.64
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.64
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.64
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.64
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -272,21 +279,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml
index ce28f84..0b9c9cb 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta1.28_from_hybrid_vit_small_r26_s32_224.yaml
@@ -146,84 +146,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 1.28
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 1.28
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 1.28
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 1.28
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -272,16 +279,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml
index 8ac5948..1279381 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta10.24_from_hybrid_vit_small_r26_s32_224.yaml
@@ -146,84 +146,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 10.24
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 10.24
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 10.24
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 10.24
   stage2:
     num_epochs: 20
     train_data_loader:
@@ -272,21 +279,29 @@ train:
         milestones: [10]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml
index 62720c0..856a729 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta15.36_from_hybrid_vit_small_r26_s32_224.yaml
@@ -146,84 +146,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 15.36
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 15.36
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 15.36
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 15.36
   stage2:
     num_epochs: 20
     train_data_loader:
@@ -272,21 +279,29 @@ train:
         milestones: [10]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml
index 388b162..77ec4a2 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta2.56_from_hybrid_vit_small_r26_s32_224.yaml
@@ -146,84 +146,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 2.56
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 2.56
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 2.56
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 2.56
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -272,21 +279,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml
index b33c044..db306c5 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta20.48_from_hybrid_vit_small_r26_s32_224.yaml
@@ -146,84 +146,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 20.48
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 20.48
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 20.48
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 20.48
   stage2:
     num_epochs: 20
     train_data_loader:
@@ -272,21 +279,29 @@ train:
         milestones: [10]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml
index 719dffd..fd57b9b 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_hybrid_vit_small_r26_s32_224-mshp-beta5.12_from_hybrid_vit_small_r26_s32_224.yaml
@@ -146,84 +146,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        stages0:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.0'
-              io: 'output'
-          weight: 1.0
-        stages1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.1'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.1'
-              io: 'output'
-          weight: 1.0
-        stages2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.2'
-              io: 'output'
-          weight: 1.0
-        stages3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'patch_embed_pruned_stages.3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'patch_embed.backbone.stages.3'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 5.12
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 5.12
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          stages0:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.0'
+                  io: 'output'
+            weight: 1.0
+          stages1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.1'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.1'
+                  io: 'output'
+            weight: 1.0
+          stages2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.2'
+                  io: 'output'
+            weight: 1.0
+          stages3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'patch_embed_pruned_stages.3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'patch_embed.backbone.stages.3'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 5.12
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 5.12
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -272,21 +279,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.08_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.08_from_regnety6.4gf.yaml
index b714098..4b4dcb6 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.08_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.08_from_regnety6.4gf.yaml
@@ -144,77 +144,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.08
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.08
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,21 +270,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.16_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.16_from_regnety6.4gf.yaml
index 51d5199..1aeed6b 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.16_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.16_from_regnety6.4gf.yaml
@@ -144,77 +144,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.16
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.16
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,16 +270,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.32_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.32_from_regnety6.4gf.yaml
index 4cb4a2e..44297e0 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.32_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.32_from_regnety6.4gf.yaml
@@ -144,77 +144,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.32
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.32
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,16 +270,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.64_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.64_from_regnety6.4gf.yaml
index a07c57c..ace6bcd 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.64_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta0.64_from_regnety6.4gf.yaml
@@ -144,77 +144,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.64
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.64
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,21 +270,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta1.28_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta1.28_from_regnety6.4gf.yaml
index 3ab05e2..ff11793 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta1.28_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta1.28_from_regnety6.4gf.yaml
@@ -144,77 +144,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 1.28
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 1.28
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,21 +270,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta2.56_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta2.56_from_regnety6.4gf.yaml
index 7f06536..067618d 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta2.56_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta2.56_from_regnety6.4gf.yaml
@@ -144,77 +144,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 2.56
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 2.56
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,16 +270,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta5.12_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta5.12_from_regnety6.4gf.yaml
index e26b73d..b73b683 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta5.12_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta5.12_from_regnety6.4gf.yaml
@@ -144,77 +144,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 5.12
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 5.12
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,16 +270,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta7.68_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta7.68_from_regnety6.4gf.yaml
index e7bfcfc..3c0eb53 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta7.68_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-fp-beta7.68_from_regnety6.4gf.yaml
@@ -144,77 +144,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 7.68
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 7.68
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,21 +270,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.08_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.08_from_regnety6.4gf.yaml
index 64b4e49..053c1c8 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.08_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.08_from_regnety6.4gf.yaml
@@ -145,84 +145,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.08
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.08
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.08
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.08
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -271,21 +278,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.16_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.16_from_regnety6.4gf.yaml
index 565648b..683f6b3 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.16_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.16_from_regnety6.4gf.yaml
@@ -145,84 +145,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.16
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.16
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.16
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.16
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -271,21 +278,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.32_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.32_from_regnety6.4gf.yaml
index 94f7ec6..66b8498 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.32_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.32_from_regnety6.4gf.yaml
@@ -145,84 +145,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.32
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.32
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.32
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.32
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -271,16 +278,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.64_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.64_from_regnety6.4gf.yaml
index cdfb1d2..f4df3fd 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.64_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta0.64_from_regnety6.4gf.yaml
@@ -145,84 +145,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.64
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.64
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.64
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.64
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -271,16 +278,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta1.28_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta1.28_from_regnety6.4gf.yaml
index 5eb2130..0a1eb23 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta1.28_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta1.28_from_regnety6.4gf.yaml
@@ -145,84 +145,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 1.28
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 1.28
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 1.28
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 1.28
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -271,16 +278,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta2.56_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta2.56_from_regnety6.4gf.yaml
index 709b41c..6ca78dc 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta2.56_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta2.56_from_regnety6.4gf.yaml
@@ -145,84 +145,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 2.56
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 2.56
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 2.56
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 2.56
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -271,21 +278,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta4.48_from_regnety6.4gf.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta4.48_from_regnety6.4gf.yaml
index 961634e..83d661d 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta4.48_from_regnety6.4gf.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_regnety6.4gf-mshp-beta4.48_from_regnety6.4gf.yaml
@@ -145,84 +145,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        s1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's1'
-              io: 'output'
-          weight: 1.0
-        s2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's2'
-              io: 'output'
-          weight: 1.0
-        s3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's3'
-              io: 'output'
-          weight: 1.0
-        s4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 's4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 's4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 4.48
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 4.48
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          s1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's1'
+                  io: 'output'
+            weight: 1.0
+          s2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's2'
+                  io: 'output'
+            weight: 1.0
+          s3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's3'
+                  io: 'output'
+            weight: 1.0
+          s4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 's4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 's4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 4.48
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 4.48
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -271,21 +278,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.08_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.08_from_resnet101.yaml
index 74fb45f..4e77188 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.08_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.08_from_resnet101.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.08
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.08
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,16 +262,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.16_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.16_from_resnet101.yaml
index 7fc1fd0..3519185 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.16_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.16_from_resnet101.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.16
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.16
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,16 +262,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.32_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.32_from_resnet101.yaml
index fe67a55..19051df 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.32_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.32_from_resnet101.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.32
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.32
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,16 +262,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.64_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.64_from_resnet101.yaml
index e735a61..5ed2cb5 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.64_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta0.64_from_resnet101.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.64
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.64
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,16 +262,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta1.28_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta1.28_from_resnet101.yaml
index d069037..c782508 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta1.28_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta1.28_from_resnet101.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 1.28
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 1.28
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,16 +262,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta2.56_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta2.56_from_resnet101.yaml
index ddbbae3..8bd32e5 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta2.56_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta2.56_from_resnet101.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 2.56
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 2.56
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,16 +262,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta5.12_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta5.12_from_resnet101.yaml
index ec5e9ef..f5587e9 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta5.12_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-fp-beta5.12_from_resnet101.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 5.12
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 5.12
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,16 +262,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.08_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.08_from_resnet101.yaml
index e612866..adc6243 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.08_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.08_from_resnet101.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.08
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.08
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.08
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.08
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,21 +270,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.16_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.16_from_resnet101.yaml
index f02fb9d..5a7a432 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.16_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.16_from_resnet101.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.16
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.16
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.16
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.16
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,16 +270,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.32_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.32_from_resnet101.yaml
index d06f5ba..c209460 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.32_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.32_from_resnet101.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.32
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.32
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.32
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.32
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,16 +270,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.64_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.64_from_resnet101.yaml
index 51406f2..313d10b 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.64_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta0.64_from_resnet101.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.64
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.64
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.64
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.64
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,16 +270,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta1.28_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta1.28_from_resnet101.yaml
index 07f9ae4..e50200e 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta1.28_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta1.28_from_resnet101.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 1.28
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 1.28
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 1.28
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 1.28
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,21 +270,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta2.56_from_resnet101.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta2.56_from_resnet101.yaml
index 341f1de..2f5e40a 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta2.56_from_resnet101.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet101-mshp-beta2.56_from_resnet101.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 2.56
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 2.56
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 2.56
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 2.56
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,16 +270,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.08_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.08_from_resnet50.yaml
index 25d33bd..788d751 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.08_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.08_from_resnet50.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.08
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.08
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,21 +262,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.16_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.16_from_resnet50.yaml
index 1b547d2..f40cd3f 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.16_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.16_from_resnet50.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.16
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.16
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,21 +262,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.32_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.32_from_resnet50.yaml
index 52f16f1..f0ee6d1 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.32_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.32_from_resnet50.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.32
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.32
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,16 +262,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.64_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.64_from_resnet50.yaml
index f149b78..b191cc3 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.64_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta0.64_from_resnet50.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.64
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.64
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,21 +262,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta1.28_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta1.28_from_resnet50.yaml
index 828bc41..92fafef 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta1.28_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta1.28_from_resnet50.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 1.28
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 1.28
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,21 +262,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta2.56_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta2.56_from_resnet50.yaml
index 228249c..42e645a 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta2.56_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta2.56_from_resnet50.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 2.56
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 2.56
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,16 +262,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta5.12_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta5.12_from_resnet50.yaml
index af6718a..b232b9e 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta5.12_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-fp-beta5.12_from_resnet50.yaml
@@ -136,77 +136,84 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 5.12
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 5.12
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -255,16 +262,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.08_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.08_from_resnet50.yaml
index c0279a5..9090ef1 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.08_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.08_from_resnet50.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.1
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.08
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.1
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.08
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,16 +270,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.16_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.16_from_resnet50.yaml
index 80dc14a..5fb9989 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.16_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.16_from_resnet50.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.16
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.16
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.16
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.16
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,21 +270,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.32_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.32_from_resnet50.yaml
index 36c665c..6efa264 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.32_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.32_from_resnet50.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.32
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.32
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.32
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.32
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,21 +270,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.64_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.64_from_resnet50.yaml
index b5c7fde..096f2ae 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.64_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta0.64_from_resnet50.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 0.64
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 0.64
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 0.64
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 0.64
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,16 +270,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta1.28_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta1.28_from_resnet50.yaml
index a358428..fab873f 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta1.28_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta1.28_from_resnet50.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 1.28
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 1.28
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 1.28
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 1.28
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,21 +270,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta2.56_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta2.56_from_resnet50.yaml
index aa453dd..6cc45bb 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta2.56_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta2.56_from_resnet50.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 2.56
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 2.56
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 2.56
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 2.56
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,21 +270,29 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta3.84_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta3.84_from_resnet50.yaml
index 8d46154..2babac0 100644
--- a/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta3.84_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/entropic_student/splitable_resnet50-mshp-beta3.84_from_resnet50.yaml
@@ -137,84 +137,91 @@ train:
         milestones: [5, 8]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        weight: 0.0
-      sub_terms:
-        layer1:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'bottleneck_layer'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer1'
-              io: 'output'
-          weight: 1.0
-        layer2:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer2'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer2'
-              io: 'output'
-          weight: 1.0
-        layer3:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer3'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer3'
-              io: 'output'
-          weight: 1.0
-        layer4:
-          criterion:
-            key: 'MSELoss'
-            kwargs:
-              reduction: 'sum'
-          kwargs:
-            input:
-              is_from_teacher: False
-              module_path: 'layer4'
-              io: 'output'
-            target:
-              is_from_teacher: True
-              module_path: 'layer4'
-              io: 'output'
-          weight: 1.0
-        bpp_entropy:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
-              reduction: 'sum'
-          weight: 3.84
-        bpp_gaussian:
-          criterion:
-            key: 'BppLoss'
-            kwargs:
-              entropy_module_path: 'bottleneck_layer.gaussian_conditional'
-              reduction: 'sum'
-          weight: 3.84
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          layer1:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'bottleneck_layer'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer1'
+                  io: 'output'
+            weight: 1.0
+          layer2:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer2'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer2'
+                  io: 'output'
+            weight: 1.0
+          layer3:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer3'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer3'
+                  io: 'output'
+            weight: 1.0
+          layer4:
+            criterion:
+              key: 'MSELoss'
+              kwargs:
+                reduction: 'sum'
+            criterion_wrapper:
+              key: 'SimpleLossWrapper'
+              kwargs:
+                input:
+                  is_from_teacher: False
+                  module_path: 'layer4'
+                  io: 'output'
+                target:
+                  is_from_teacher: True
+                  module_path: 'layer4'
+                  io: 'output'
+            weight: 1.0
+          bpp_entropy:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.entropy_bottleneck'
+                reduction: 'sum'
+            weight: 3.84
+          bpp_gaussian:
+            criterion:
+              key: 'BppLoss'
+              kwargs:
+                entropy_module_path: 'bottleneck_layer.gaussian_conditional'
+                reduction: 'sum'
+            weight: 3.84
   stage2:
     num_epochs: 10
     train_data_loader:
@@ -263,16 +270,21 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      key: 'GeneralizedCustomLoss'
-      org_term:
-        criterion:
-          key: 'KDLoss'
-          kwargs:
-            temperature: 1.0
-            alpha: 0.5
-            reduction: 'batchmean'
-        weight: 1.0
-      sub_terms:
+      key: 'WeightedSumLoss'
+      kwargs:
+        sub_terms:
+          kd:
+            criterion:
+              key: 'KDLoss'
+              kwargs:
+                student_module_path: '.'
+                student_module_io: 'output'
+                teacher_module_path: '.'
+                teacher_module_io: 'output'
+                temperature: 1.0
+                alpha: 0.5
+                reduction: 'batchmean'
+            weight: 1.0
 
 test:
   test_data_loader:
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-1.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-1.yaml
index 3fe27e3..ef48b81 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-1.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-1.yaml
@@ -104,7 +104,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-1
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-1
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-10.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-10.yaml
index b17e2dc..afc5733 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-10.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-10.yaml
@@ -104,7 +104,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-10
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-10
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-15.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-15.yaml
index 275de9b..068467b 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-15.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-15.yaml
@@ -104,7 +104,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-15
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-15
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-2.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-2.yaml
index 89f275b..b52c7b3 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-2.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-2.yaml
@@ -104,7 +104,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-2
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-2
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-4.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-4.yaml
index b394edb..4cbce6c 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-4.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta1.0e-4.yaml
@@ -104,7 +104,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-4
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-4
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta6.5536e-6.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta6.5536e-6.yaml
index 17ef1bf..5a0a2a9 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta6.5536e-6.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta6.5536e-6.yaml
@@ -104,7 +104,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 6.5536e-6
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 6.5536e-6
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta8.0e-10.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta8.0e-10.yaml
index 0cce6af..01b1874 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta8.0e-10.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_avgpool-beta8.0e-10.yaml
@@ -104,7 +104,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 8.0e-10
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 8.0e-10
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.024e-7.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.024e-7.yaml
index 0df8d3d..1425a98 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.024e-7.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.024e-7.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.024e-7
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.024e-7
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-15.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-15.yaml
index be48937..315b6e6 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-15.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-15.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-15
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-15
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-4.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-4.yaml
index 2ce9ccf..fbe1e4a 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-4.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-4.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-4
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-4
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-5.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-5.yaml
index 48fa97b..b07169e 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-5.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.0e-5.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-5
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-5
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.28e-8.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.28e-8.yaml
index e0b74bf..816b1b6 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.28e-8.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta1.28e-8.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.28e-8
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.28e-8
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta5.0e-11.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta5.0e-11.yaml
index c9db1a7..3cb7eef 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta5.0e-11.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta5.0e-11.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 5.0e-11
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 5.0e-11
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta6.5536e-6.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta6.5536e-6.yaml
index bc77e0b..ec32347 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta6.5536e-6.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta6.5536e-6.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 6.5536e-6
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 6.5536e-6
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.0e-10.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.0e-10.yaml
index 65446a3..f661af1 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.0e-10.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.0e-10.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 8.0e-10
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 8.0e-10
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.192e-7.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.192e-7.yaml
index 9542c43..7be71ec 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.192e-7.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer1-beta8.192e-7.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 8.192e-7
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 8.192e-7
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-10.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-10.yaml
index 05cbd34..84d0d91 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-10.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-10.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-10
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-10
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-15.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-15.yaml
index bc69cda..fcaecee 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-15.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-15.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-15
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-15
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-4.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-4.yaml
index 57bdcfd..a6c3dae 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-4.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-4.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-4
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-4
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-5.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-5.yaml
index 03e0c93..d6ffded 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-5.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-5.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-5
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-5
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-6.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-6.yaml
index 51f2519..9e9a3c9 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-6.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-6.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-6
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-6
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-8.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-8.yaml
index 4798e7e..c44b70b 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-8.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer2-beta1.0e-8.yaml
@@ -101,7 +101,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -127,26 +127,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-8
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-8
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-10.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-10.yaml
index f72c2b0..325aa05 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-10.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-10.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-10
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-10
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-15.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-15.yaml
index 915a45a..1e3e575 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-15.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-15.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-15
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-15
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-4.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-4.yaml
index 8875f63..24e0c3b 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-4.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-4.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-4
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-4
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-5.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-5.yaml
index 9a16bdc..4232c1f 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-5.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-5.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-5
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-5
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-6.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-6.yaml
index f3e1bef..9811443 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-6.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-6.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-6
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-6
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-8.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-8.yaml
index c70d03b..6183ae9 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-8.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta1.0e-8.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-8
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-8
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta5.0e-5.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta5.0e-5.yaml
index 872a0f0..76961c0 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta5.0e-5.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer3-beta5.0e-5.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 5.0e-5
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 5.0e-5
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-10.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-10.yaml
index ee7a50b..d8dfdb7 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-10.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-10.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-10
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-10
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-15.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-15.yaml
index 91a9c0b..f892c72 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-15.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-15.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-15
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-15
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-3.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-3.yaml
index 897552d..1c65001 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-3.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-3.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-3
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-3
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-4.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-4.yaml
index 25762b4..5def9dc 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-4.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-4.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-4
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-4
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-5.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-5.yaml
index b77c39f..b53d87b 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-5.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-5.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-5
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-5
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-6.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-6.yaml
index 18f357e..7aa914d 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-6.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-6.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-6
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-6
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-8.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-8.yaml
index e82bddf..39260bd 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-8.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta1.0e-8.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 1.0e-8
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 1.0e-8
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-4.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-4.yaml
index eff5f41..7649077 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-4.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-4.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 5.0e-4
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 5.0e-4
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-5.yaml b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-5.yaml
index 5f6ecf9..ca24934 100644
--- a/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-5.yaml
+++ b/configs/ilsvrc2012/supervised_compression/fine-tuning/resnet50-eb_after_layer4-beta5.0e-5.yaml
@@ -103,7 +103,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -129,26 +129,39 @@ train:
       milestones: [5, 8]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      criterion:
-        key: 'CrossEntropyLoss'
-        kwargs:
-          reduction: 'mean'
-      weight: 1.0
-    sub_terms:
-      bpp:
-        criterion:
-          key: 'BppLoss'
-          kwargs:
-            entropy_module_path: 'entropy_bottleneck'
-            reduction: 'batchmean'
-        weight: 5.0e-5
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        ce:
+          criterion:
+            key: 'CrossEntropyLoss'
+            kwargs:
+              reduction: 'mean'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: '.'
+                io: 'output'
+              target:
+                uses_label: True
+          weight: 1.0
+        bpp:
+          criterion:
+            key: 'BppLoss'
+            kwargs:
+              entropy_module_path: 'entropy_bottleneck'
+              reduction: 'batchmean'
+          weight: 5.0e-5
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq12ch_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq12ch_from_resnet50.yaml
index 58b5956..0545a1d 100644
--- a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq12ch_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq12ch_from_resnet50.yaml
@@ -124,7 +124,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -158,75 +158,85 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      weight: 0.0
-    sub_terms:
-      layer1:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'bottleneck_layer'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer1'
-            io: 'output'
-        weight: 1.0
-      layer2:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer2'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer2'
-            io: 'output'
-        weight: 1.0
-      layer3:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer3'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer3'
-            io: 'output'
-        weight: 1.0
-      layer4:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer4'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer4'
-            io: 'output'
-        weight: 1.0
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        layer1:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'bottleneck_layer'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer1'
+                io: 'output'
+          weight: 1.0
+        layer2:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer2'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer2'
+                io: 'output'
+          weight: 1.0
+        layer3:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer3'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer3'
+                io: 'output'
+          weight: 1.0
+        layer4:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer4'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer4'
+                io: 'output'
+          weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq1ch_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq1ch_from_resnet50.yaml
index 3499aaa..4216722 100644
--- a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq1ch_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq1ch_from_resnet50.yaml
@@ -124,7 +124,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -158,75 +158,85 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      weight: 0.0
-    sub_terms:
-      layer1:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'bottleneck_layer'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer1'
-            io: 'output'
-        weight: 1.0
-      layer2:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer2'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer2'
-            io: 'output'
-        weight: 1.0
-      layer3:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer3'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer3'
-            io: 'output'
-        weight: 1.0
-      layer4:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer4'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer4'
-            io: 'output'
-        weight: 1.0
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        layer1:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'bottleneck_layer'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer1'
+                io: 'output'
+          weight: 1.0
+        layer2:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer2'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer2'
+                io: 'output'
+          weight: 1.0
+        layer3:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer3'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer3'
+                io: 'output'
+          weight: 1.0
+        layer4:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer4'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer4'
+                io: 'output'
+          weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq2ch_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq2ch_from_resnet50.yaml
index 4a6103a..5e298d3 100644
--- a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq2ch_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq2ch_from_resnet50.yaml
@@ -124,7 +124,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -158,75 +158,85 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      weight: 0.0
-    sub_terms:
-      layer1:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'bottleneck_layer'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer1'
-            io: 'output'
-        weight: 1.0
-      layer2:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer2'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer2'
-            io: 'output'
-        weight: 1.0
-      layer3:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer3'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer3'
-            io: 'output'
-        weight: 1.0
-      layer4:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer4'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer4'
-            io: 'output'
-        weight: 1.0
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        layer1:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'bottleneck_layer'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer1'
+                io: 'output'
+          weight: 1.0
+        layer2:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer2'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer2'
+                io: 'output'
+          weight: 1.0
+        layer3:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer3'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer3'
+                io: 'output'
+          weight: 1.0
+        layer4:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer4'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer4'
+                io: 'output'
+          weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq3ch_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq3ch_from_resnet50.yaml
index daf37b1..46dbe3a 100644
--- a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq3ch_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq3ch_from_resnet50.yaml
@@ -124,7 +124,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -158,75 +158,85 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      weight: 0.0
-    sub_terms:
-      layer1:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'bottleneck_layer'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer1'
-            io: 'output'
-        weight: 1.0
-      layer2:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer2'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer2'
-            io: 'output'
-        weight: 1.0
-      layer3:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer3'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer3'
-            io: 'output'
-        weight: 1.0
-      layer4:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer4'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer4'
-            io: 'output'
-        weight: 1.0
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        layer1:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'bottleneck_layer'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer1'
+                io: 'output'
+          weight: 1.0
+        layer2:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer2'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer2'
+                io: 'output'
+          weight: 1.0
+        layer3:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer3'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer3'
+                io: 'output'
+          weight: 1.0
+        layer4:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer4'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer4'
+                io: 'output'
+          weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq6ch_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq6ch_from_resnet50.yaml
index 8b76cdb..b05bdfc 100644
--- a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq6ch_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq6ch_from_resnet50.yaml
@@ -124,7 +124,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -158,75 +158,85 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      weight: 0.0
-    sub_terms:
-      layer1:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'bottleneck_layer'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer1'
-            io: 'output'
-        weight: 1.0
-      layer2:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer2'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer2'
-            io: 'output'
-        weight: 1.0
-      layer3:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer3'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer3'
-            io: 'output'
-        weight: 1.0
-      layer4:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer4'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer4'
-            io: 'output'
-        weight: 1.0
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        layer1:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'bottleneck_layer'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer1'
+                io: 'output'
+          weight: 1.0
+        layer2:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer2'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer2'
+                io: 'output'
+          weight: 1.0
+        layer3:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer3'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer3'
+                io: 'output'
+          weight: 1.0
+        layer4:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer4'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer4'
+                io: 'output'
+          weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16
diff --git a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq9ch_from_resnet50.yaml b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq9ch_from_resnet50.yaml
index d3bcb5c..66ea19d 100644
--- a/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq9ch_from_resnet50.yaml
+++ b/configs/ilsvrc2012/supervised_compression/ghnd-bq/resnet50-bq9ch_from_resnet50.yaml
@@ -124,7 +124,7 @@ train:
     cache_output:
   val_data_loader:
     dataset_id: *imagenet_val
-    sampler: &val_sampler
+    sampler:
       class_or_func: !import_get
         key: 'torch.utils.data.SequentialSampler'
       kwargs:
@@ -158,75 +158,85 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    key: 'GeneralizedCustomLoss'
-    org_term:
-      weight: 0.0
-    sub_terms:
-      layer1:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'bottleneck_layer'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer1'
-            io: 'output'
-        weight: 1.0
-      layer2:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer2'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer2'
-            io: 'output'
-        weight: 1.0
-      layer3:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer3'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer3'
-            io: 'output'
-        weight: 1.0
-      layer4:
-        criterion:
-          key: 'MSELoss'
-          kwargs:
-            reduction: 'sum'
-        kwargs:
-          input:
-            is_from_teacher: False
-            module_path: 'layer4'
-            io: 'output'
-          target:
-            is_from_teacher: True
-            module_path: 'layer4'
-            io: 'output'
-        weight: 1.0
+    key: 'WeightedSumLoss'
+    kwargs:
+      sub_terms:
+        layer1:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'bottleneck_layer'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer1'
+                io: 'output'
+          weight: 1.0
+        layer2:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer2'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer2'
+                io: 'output'
+          weight: 1.0
+        layer3:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer3'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer3'
+                io: 'output'
+          weight: 1.0
+        layer4:
+          criterion:
+            key: 'MSELoss'
+            kwargs:
+              reduction: 'sum'
+          criterion_wrapper:
+            key: 'SimpleLossWrapper'
+            kwargs:
+              input:
+                is_from_teacher: False
+                module_path: 'layer4'
+                io: 'output'
+              target:
+                is_from_teacher: True
+                module_path: 'layer4'
+                io: 'output'
+          weight: 1.0
 
 test:
   test_data_loader:
     dataset_id: *imagenet_val
-    sampler: *val_sampler
+    sampler:
+      class_or_func: !import_get
+        key: 'torch.utils.data.SequentialSampler'
+      kwargs:
     kwargs:
       batch_size: 1
       num_workers: 16