Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

move llava notebook on openvino genai #2445

Merged
merged 7 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
use genai for model inference
  • Loading branch information
eaidova committed Oct 15, 2024
commit 67b994a1d58184e230d70ea7a298004d5586c70f
136 changes: 79 additions & 57 deletions notebooks/llava-multimodal-chatbot/gradio_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

from PIL import Image
from typing import Callable
import gradio as gr
import numpy as np
import requests
from threading import Thread
from threading import Event, Thread
from transformers import TextIteratorStreamer
from queue import Queue

example_image_urls = [
(
Expand All @@ -24,72 +25,91 @@
Image.open(requests.get(url, stream=True).raw).save(file_name)


def make_demo_llava(model, processor):
def make_demo_llava(model):
import openvino_genai
import openvino as ov

def read_image(path: str) -> ov.Tensor:
'''

Args:
path: The path to the image.

Returns: the ov.Tensor containing the image.

'''
pic = Image.open(path).convert("RGB")
image_data = np.array(pic.getdata()).reshape(1, 3, pic.size[1], pic.size[0]).astype(np.byte)
return ov.Tensor(image_data)

class TextQueue:
def __init__(self) -> None:
self.text_queue = Queue()
self.stop_signal = None
self.stop_tokens = []

def __call__(self, text):
self.text_queue.put(text)

def __iter__(self):
return self

def __next__(self):
value = self.text_queue.get()
if value == self.stop_signal or value in self.stop_tokens:
raise StopIteration()
else:
return value

def reset(self):
self.text_queue = Queue()

def end(self):
self.text_queue.put(self.stop_signal)



def bot_streaming(message, history):
print(f"message is - {message}")
print(f"history is - {history}")

if not history:
model.start_chat()
generation_config = openvino_genai.GenerationConfig()
generation_config.max_new_tokens = 128
files = message["files"] if isinstance(message, dict) else message.files
message_text = message["text"] if isinstance(message, dict) else message.text

image = None
if files:
# message["files"][-1] is a Dict or just a string
if isinstance(files[-1], dict):
image = files[-1]["path"]
else:
image = files[-1] if isinstance(files[-1], (list, tuple)) else files[-1].path
else:
# if there's no image uploaded for this turn, look for images in the past turns
# kept inside tuples, take the last one
for hist in history:
if type(hist[0]) == tuple:
image = hist[0][0]
try:
if image is None:
# Handle the case where image is None
raise gr.Error("You need to upload an image for Llama-3.2-Vision to work. Close the error and try again with an Image.")
except NameError:
# Handle the case where 'image' is not defined at all
raise gr.Error("You need to upload an image for Llama-3.2-Vision to work. Close the error and try again with an Image.")

conversation = []
flag = False
for user, assistant in history:
if assistant is None:
# pass
flag = True
conversation.extend([{"role": "user", "content": []}])
continue
if flag == True:
conversation[0]["content"] = [{"type": "text", "text": f"{user}"}]
conversation.append({"role": "assistant", "text": assistant})
flag = False
continue
conversation.extend([{"role": "user", "content": [{"type": "text", "text": user}]}, {"role": "assistant", "text": assistant}])

conversation.append({"role": "user", "content": [{"type": "text", "text": f"{message_text}"}, {"type": "image"}]})
prompt = processor.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
print(f"prompt is -\n{prompt}")
image = Image.open(image)
inputs = processor(text=prompt, images=image, return_tensors="pt")

streamer = TextIteratorStreamer(
processor,
**{
"skip_special_tokens": True,
"skip_prompt": True,
"clean_up_tokenization_spaces": False,
},
)
generation_kwargs = dict(
inputs,
streamer=streamer,
max_new_tokens=1024,
do_sample=False,
temperature=0.0,
eos_token_id=processor.tokenizer.eos_token_id,
)

thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
if image is not None:
image = read_image(image)
streamer = TextQueue()
stream_complete = Event()

def generate_and_signal_complete():
"""
genration function for single thread
eaidova marked this conversation as resolved.
Show resolved Hide resolved
"""
streamer.reset()
generation_kwargs = {
"prompt": message_text,
"generation_config": generation_config,
"streamer": streamer
}
if image is not None:
generation_kwargs["image"] = image
model.generate(**generation_kwargs)
stream_complete.set()
streamer.end()

t1 = Thread(target=generate_and_signal_complete)
t1.start()

buffer = ""
for new_text in streamer:
Expand All @@ -104,6 +124,8 @@ def bot_streaming(message, history):
{"text": "How to make this pastry?", "files": ["./baklava.png"]},
],
stop_btn=None,
undo_btn=None,
eaidova marked this conversation as resolved.
Show resolved Hide resolved
retry_btn=None,
multimodal=True,
)
return demo
Expand Down
Loading
Loading