Merge branch 'main' into layout

jerryzh168 · web-flow · commit 4758982f1182 · 2024-05-29T13:05:58.000-07:00
diff --git a/torchao/_eval.py b/torchao/_eval.py
@@ -10,7 +10,7 @@
 
 import torch
 
-from .utils import _lm_eval_available, _MultiInput
+from quantization.utils import _lm_eval_available, _MultiInput
 
 if _lm_eval_available:
     try:  # lm_eval version 0.4
diff --git a/torchao/quantization/README.md b/torchao/quantization/README.md
@@ -69,7 +69,8 @@ Note: The quantization error incurred by applying int4 quantization to your mode
 ## A16W4 WeightOnly Quantization with GPTQ
 
 ```python
-from torchao.quantization.GPTQ import Int4WeightOnlyGPTQQuantizer, InputRecorder, TransformerEvalWrapper
+from torchao._eval import InputRecorder, TransformerEvalWrapper
+from torchao.quantization.GPTQ import Int4WeightOnlyGPTQQuantizer
 precision = torch.bfloat16
 device = "cuda"
 checkpoint_file_name = "../gpt-fast/checkpoints/meta-llama/Llama-2-7b-chat-hf/model.pth"