File tree 2 files changed +3
-2
lines changed
2 files changed +3
-2
lines changed Original file line number Diff line number Diff line change 10
10
11
11
import torch
12
12
13
- from .utils import _lm_eval_available , _MultiInput
13
+ from quantization .utils import _lm_eval_available , _MultiInput
14
14
15
15
if _lm_eval_available :
16
16
try : # lm_eval version 0.4
Original file line number Diff line number Diff line change @@ -69,7 +69,8 @@ Note: The quantization error incurred by applying int4 quantization to your mode
69
69
## A16W4 WeightOnly Quantization with GPTQ
70
70
71
71
``` python
72
- from torchao.quantization.GPTQ import Int4WeightOnlyGPTQQuantizer, InputRecorder, TransformerEvalWrapper
72
+ from torchao._eval import InputRecorder, TransformerEvalWrapper
73
+ from torchao.quantization.GPTQ import Int4WeightOnlyGPTQQuantizer
73
74
precision = torch.bfloat16
74
75
device = " cuda"
75
76
checkpoint_file_name = " ../gpt-fast/checkpoints/meta-llama/Llama-2-7b-chat-hf/model.pth"
You can’t perform that action at this time.
0 commit comments