Skip to content

Commit

Permalink
Fix export error with trust_remote_code (#43)
Browse files Browse the repository at this point in the history
* Fix export error with trust_remote_code

* Remove legacy checks
  • Loading branch information
alessandropalla authored Jun 5, 2024
1 parent c26443e commit 61e0d16
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 13 deletions.
3 changes: 1 addition & 2 deletions examples/phi-3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TextStreamer
from transformers import AutoTokenizer, pipeline, TextStreamer
import intel_npu_acceleration_library as npu_lib
import warnings

Expand All @@ -13,7 +13,6 @@
model = npu_lib.NPUModelForCausalLM.from_pretrained(
"microsoft/Phi-3-mini-4k-instruct",
torch_dtype="auto",
trust_remote_code=True,
dtype=npu_lib.int4,
)

Expand Down
7 changes: 1 addition & 6 deletions intel_npu_acceleration_library/backend/qlinear.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,11 @@ def __init__(
device (str): Target device, default to "NPU".
dtype (np.dtype): weights datatype. Defaults to np.int8.
Raises:
RuntimeError: Quantized matmul requires input_channel to be a multiple of 8
"""
super().__init__(profile, device)
self.inC, self.outC = inC, outC
self.batch = batch
if inC % 8 != 0:
raise RuntimeError(
"Quantized matmul requires input_channel to be a multiple of 8"
)

input = self.parameter((self.batch, self.inC))
out = self.linear(input, outC, inC, bias=False, wt_dtype=dtype)
self.compile(out)
Expand Down
5 changes: 5 additions & 0 deletions intel_npu_acceleration_library/modelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def from_pretrained(
Raises:
RuntimeError: Invalid class
AttributeError: Cannot export model with trust_remote_code=True
Returns:
torch.nn.Module: compiled mode
Expand All @@ -103,6 +104,10 @@ def from_pretrained(
)
model = npu_lib.compile(model, dtype, training)
if export:
if kwargs.get("trust_remote_code", False):
raise AttributeError(
"Cannot export model with trust_remote_code=True. Please set trust_remote_code=False or export=False"
)
print(f"Exporting model {model_name_or_path} to {model_dir_path}")
os.makedirs(model_dir_path, exist_ok=True)
torch.save(model, model_path)
Expand Down
6 changes: 1 addition & 5 deletions intel_npu_acceleration_library/nn/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def fromTensor(
dtype (torch.dtype): the desired datatype
Raises:
RuntimeError: Quantized Linear requires input_channel to be a multiple of 8
RuntimeError: dtype not supported
Returns:
Union[Linear, QuantizedLinear]: A NPU linear layer
Expand All @@ -95,10 +95,6 @@ def fromTensor(
weights_quant = compress_to_i4(weights_quant)
return QuantizedLinear(weights_quant, scale, bias)
elif dtype == torch.int8:
if weight.shape[-1] % 8 != 0:
raise RuntimeError(
"Quantized Linear requires input_channel to be a multiple of 8"
)
weights_quant, scale = quantize_tensor(weight)
return QuantizedLinear(weights_quant, scale, bias)
else:
Expand Down

0 comments on commit 61e0d16

Please sign in to comment.