forked from SylphAI-Inc/AdalFlow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_li_transformer.py
54 lines (44 loc) · 1.5 KB
/
test_li_transformer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from transformers import LlamaModel, LlamaConfig
# Initializing a LLaMA llama-7b style configuration
# configuration = LlamaConfig()
# # Initializing a model from the llama-7b style configuration
# model = LlamaModel(configuration)
# # Accessing the model configuration
# configuration = model.config
# https://huggingface.co/LiteLLMs/Meta-Llama-3-8B-GGUF
import transformers
import torch
import dotenv
import os
dotenv.load_dotenv(dotenv_path=".env", override=True)
# get access here https://huggingface.co/meta-llama/Meta-Llama-3-8B
# get access to mistral https://huggingface.co/mistralai/Mistral-7B-v0.1
# model_id = "meta-llama/Meta-Llama-3-8B"
from transformers import BitsAndBytesConfig
# quantize to save memory
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
)
# pipeline = transformers.pipeline(
# "text-generation",
# model=model_id,
# model_kwargs={"torch_dtype": torch.bfloat16},
# device_map="auto",
# token=os.environ.get("HF_TOKEN"),
# )
from transformers import pipeline
model_name = "HuggingFaceH4/zephyr-7b-beta"
tokenizer_name = "HuggingFaceH4/zephyr-7b-beta"
pipe = pipeline(
"text-generation",
model=model_name,
tokenizer=tokenizer_name,
model_kwargs={"quantization_config": quantization_config},
)
# pipe = pipeline("text-generation", model="mistralai/Mistral-7B-v0.1")
print(pipe)
output = pipe("Hey how are you doing today?")
print(output)