Replies: 1 comment
-
this is working with OpenAI an LM Studio from openai import OpenAI
client = OpenAI(base_url='http://localhost:12345/v1', api_key='na')
# Use the following func to get the available models
# model_list = client.models.list()
# print(model_list)
chat_completion = client.chat.completions.create(
model="C:\\AI LLMS\\gemma-3-4B-it-QAT-Q4_0.gguf",
messages=[
{
"role": "user",
"content": "Tell me something about large language models."
}
],
stream=True,
)
thinking_buf = ""
generation_buf = ""
in_think = False
for chunk in chat_completion:
#print(chunk.choices[0].delta.content or "", end="")
data = chunk.choices[0].delta.content or ""
# Erkennung, ob wir gerade im Think-Block sind
if "<think>" in data:
in_think = True
data = data.split("<think>")[1]
print(f"(🧠 Beginne Denken...) {data}", end="", flush=True)
if "</think>" in data:
thinking_buf += data.split("</think>")[0]
in_think = False
data = data.split("</think>")[1]
print(f"(🧠 Denken beenden...) {data}", end="", flush=True)
if in_think:
thinking_buf += data
print(f"{data}", end="", flush=True)
else:
generation_buf += data
print(data, end="", flush=True)
if data:
thinking_buf += data
|
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
How do I get from e.g. Qwen3 prompt with /think adds a paragraph with reasoning.
howto activate --jininx --chat-template-file "xxx"?
https://qwen.readthedocs.io/en/latest/
Test ...
Beta Was this translation helpful? Give feedback.
All reactions