Update on "[Quant] Add FX support in quantization examples"

Summary: Previously, the quantization examples use only eager mode quantization. This commit adds support for FX mode quantization as well. Test Plan: ``` # ==================== PTQ ==================== # MODEL is one of googlenet, inception_v3, resnet18, resnet50, resnext101_32x8d, # shufflenet_v2_x0_5, shufflenet_v2_x1_0 # eager python train_quantization.py --device="cpu" --post-training-quantize --backend="fbgemm"\ --model="$MODEL" --weights="IMAGENET1K_V1" --quantization-workflow-type="eager_mode_quantization" # fx python train_quantization.py --device="cpu" --post-training-quantize --backend="fbgemm"\ --model="$MODEL" --weights="IMAGENET1K_V1" --quantization-workflow-type="eager_mode_quantization" # ==================== QAT ==================== # mobilenet_v2 eager python train_quantization.py --device="cuda" --backend="qnnpack" --model="mobilenet_v2"\ --epochs=10 --workers=64 --weights="IMAGENET1K_V1" --lr=0.0001 --weight-decay=0.0001\ --quantization-workflow-type="eager_mode_quantization" # mobilenet_v2 fx python train_quantization.py --device="cuda" --backend="qnnpack" --model="mobilenet_v2"\ --epochs=10 --workers=64 --weights="IMAGENET1K_V1" --lr=0.0001 --weight-decay=0.0001\ --quantization-workflow-type="fx_graph_mode_quantization" # mobilenet_v3_large eager python train_quantization.py --device="cuda" --backend="qnnpack" --model="mobilenet_v3_large"\ --epochs=10 --workers=64 --weights="IMAGENET1K_V1" --lr=0.001 --weight-decay=0.00001\ --quantization-workflow-type="eager_mode_quantization" # mobilenet_v3_large fx python train_quantization.py --device="cuda" --backend="qnnpack" --model="mobilenet_v3_large"\ --epochs=10 --workers=64 --weights="IMAGENET1K_V1" --lr=0.001 --weight-decay=0.00001\ --quantization-workflow-type="fx_graph_mode_quantization" ``` Reviewers: jerryzh168, vkuzo Subscribers: jerryzh168, vkuzo [ghstack-poisoned]
pytorch · andrewor14 · Apr 10, 2022 · Apr 12, 2022 · Apr 12, 2022 · Apr 13, 2022
commit a84092abfb223933bfd6ddac77885296d0a1d176
diff --git a/references/classification/train_quantization.py b/references/classification/train_quantization.py
@@ -36,7 +36,9 @@ def main(args):
             "Unknown workflow type '%s', please choose from: %s"
             % (args.quantization_workflow_type, str(tuple([t.lower() for t in QuantizationWorkflowType.__members__])))
         )
-    use_fx_graph_mode_quantization = quantization_workflow_type == QuantizationWorkflowType.FX_GRAPH_MODE_QUANTIZATION
+    use_fx_graph_mode_quantization = (
+        QuantizationWorkflowType[quantization_workflow_type] == QuantizationWorkflowType.FX_GRAPH_MODE_QUANTIZATION
+    )
 
     # Set backend engine to ensure that quantized model runs on the correct kernels
     if args.backend not in torch.backends.quantized.supported_engines:
@@ -61,12 +63,11 @@ def main(args):
     )
 
     print("Creating model", args.model)
+    # when training quantized models, we always start from a pre-trained fp32 reference model
     if use_fx_graph_mode_quantization:
-        model_namespace = torchvision.models
+        model = torchvision.models.__dict__[args.model](weights=args.weights)
     else:
-        model_namespace = torchvision.models.quantization
-    # when training quantized models, we always start from a pre-trained fp32 reference model
-    model = model_namespace.__dict__[args.model](weights=args.weights, quantize=args.test_only)
+        model = torchvision.models.quantization.__dict__[args.model](weights=args.weights, quantize=args.test_only)
     model.to(device)
 
     if not (args.test_only or args.post_training_quantize):