Skip to content

Commit

Permalink
[fix] Gptq dependency (deepjavalibrary#1137)
Browse files Browse the repository at this point in the history
Co-authored-by: KexinFeng <fenkexin@amazon.com>
  • Loading branch information
KexinFeng and KexinFeng authored Oct 3, 2023
1 parent f4917a6 commit e7afbe4
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,14 @@ def __init__(self, model_id_or_path, device, properties, **kwargs):
properties.get("max_rolling_batch_prefill_tokens"))
tensor_parallel_degree = int(
properties.get("tensor_parallel_degree", None))
args = EngineArgs(
model=model_id_or_path,
tensor_parallel_size=tensor_parallel_degree,
dtype=DTYPE_MAPPER[self.dtype],
seed=0,
max_num_batched_tokens=max_batched_prefill_tokens,
trust_remote_code=kwargs.get("trust_remote_code", False),
quantization=properties.get("quantize", None)
)
args = EngineArgs(model=model_id_or_path,
tensor_parallel_size=tensor_parallel_degree,
dtype=DTYPE_MAPPER[self.dtype],
seed=0,
max_num_batched_tokens=max_batched_prefill_tokens,
trust_remote_code=kwargs.get("trust_remote_code",
False),
quantization=properties.get("quantize", None))
self.engine = LLMEngine.from_engine_args(args)
self.request_cache = OrderedDict()

Expand Down
10 changes: 6 additions & 4 deletions engines/python/setup/djl_python/tests/test_scheduler_bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ class TestSchedulerBloom(unittest.TestCase):
def test_lm_block(self):
model_id = "bigscience/bloom-560m"
model = AutoModelForCausalLM.from_pretrained(
model_id, device_map="auto" if global_device == "cuda" else "cpu")
model_id,
device_map="auto" if global_device.type == "cuda" else "cpu")

device = model.device
tokenizer = AutoTokenizer.from_pretrained(model_id)
Expand Down Expand Up @@ -57,7 +58,8 @@ def test_lm_block(self):
def test_contrastive_scheduler(self):
model_id = "bigscience/bloom-560m"
model = BloomForCausalLM.from_pretrained(
model_id, device_map="auto" if global_device == "cuda" else "cpu")
model_id,
device_map="auto" if global_device.type == "cuda" else "cpu")
device = model.device
tokenizer = AutoTokenizer.from_pretrained(model_id,
padding_side='left')
Expand Down Expand Up @@ -135,7 +137,7 @@ def test_greedy_scheduler_llama(self):
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
device_map="auto" if global_device == "cuda" else "cpu")
device_map="auto" if global_device.type == "cuda" else "cpu")
device = model.device

lm_block = HuggingfaceBlock(model)
Expand Down Expand Up @@ -200,7 +202,7 @@ def test_greedy_scheduler_llama2_gptq(self):
model_name,
trust_remote_code=False,
revision="main",
device_map="auto" if global_device == "cuda" else "cpu")
device_map="auto" if global_device.type == "cuda" else "cpu")
device = model.device

lm_block = HuggingfaceBlock(model)
Expand Down
1 change: 0 additions & 1 deletion engines/python/setup/djl_python/transformers_neuronx.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def __init__(self) -> None:
self.rolling_batch = None
self.load_in_8bit = False


def init_load_path(self, model_type):
path = os.environ.get("SERVING_DOWNLOAD_DIR")
folder = f"inf2_{model_type}_{self.amp}"
Expand Down
5 changes: 4 additions & 1 deletion serving/docker/deepspeed.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ ARG transformers_version=4.33.2
ARG accelerate_version=0.23.0
ARG diffusers_version=0.16.0
ARG bitsandbytes_version=0.41.1
ARG optimum_version=1.13.2
ARG auto_gptq_version=0.4.2

EXPOSE 8080

Expand Down Expand Up @@ -69,7 +71,8 @@ RUN apt-get update && \
${deepspeed_wheel} ${flash_attn_wheel} ${dropout_layer_norm_wheel} ${rotary_emb_wheel} ${flash_attn_2_wheel} \
${vllm_wheel} ${lmi_dist_wheel} ${seq_scheduler_wheel} ${peft_wheel} protobuf==${protobuf_version} \
transformers==${transformers_version} \
mpi4py sentencepiece einops accelerate==${accelerate_version} bitsandbytes==${bitsandbytes_version}\
mpi4py sentencepiece einops accelerate==${accelerate_version} bitsandbytes==${bitsandbytes_version} \
optimum=${optimum_version} auto-gptq=${auto_gptq_version} \
diffusers[torch]==${diffusers_version} opencv-contrib-python-headless safetensors scipy && \
scripts/install_aitemplate.sh && \
scripts/patch_oss_dlc.sh python && \
Expand Down
4 changes: 3 additions & 1 deletion serving/docker/fastertransformer.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ ARG protobuf_version=3.20.3
ARG transformers_version=4.33.2
ARG accelerate_version=0.23.0
ARG bitsandbytes_version=0.41.1
ARG optimum_version=1.13.2
ARG auto_gptq_version=0.4.2

EXPOSE 8080

Expand Down Expand Up @@ -65,7 +67,7 @@ RUN apt-get update && apt-get install -y wget git libnuma-dev zlib1g-dev rapidjs
cd ../../ && rm -rf ompi && \
scripts/install_python.sh ${python_version} && \
pip3 install ${torch_wheel} ${ft_wheel} ${tb_wheel} ${peft_wheel} ${seq_scheduler_wheel} safetensors protobuf==${protobuf_version} && \
pip3 install transformers==${transformers_version} accelerate==${accelerate_version} bitsandbytes==${bitsandbytes_version} \
pip3 install transformers==${transformers_version} accelerate==${accelerate_version} bitsandbytes==${bitsandbytes_version} optimum=${optimum_version} auto-gptq=${auto_gptq_version} \
scipy einops && \
pip3 install cmake sentencepiece bfloat16 tiktoken && \
pip3 cache purge && \
Expand Down

0 comments on commit e7afbe4

Please sign in to comment.