Skip to content

Commit 7b5251d

Browse files
committed
adding a non static predict
1 parent 1cce380 commit 7b5251d

File tree

5 files changed

+25
-21
lines changed

5 files changed

+25
-21
lines changed

dockerfiles/transformers/llm_transformer_nvidia/src/.ipynb_checkpoints/model-checkpoint.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,25 +62,27 @@ def predict(self, request: dict, headers: dict) -> dict:
6262
Call LLM with retrieved context and return the response.
6363
"""
6464
data = request["instances"][0]
65-
query = data["query"]
66-
context = data["context"]
65+
query = data.get("query")
66+
context = data.get("context", "")
6767

6868
# ✅ Ensure correct predictor URL
6969
predictor_url = f"http://{self.predictor_host}/v1/chat/completions"
7070
logger.info(f"Sending request to LLM predictor at {predictor_url}")
7171

72+
# 🔥 Allow dynamic values for LLM parameters
7273
llm_payload = {
73-
"model": "meta/llama-2-7b-chat",
74+
"model": data.get("model", "meta/llama-2-7b-chat"), # 🔹 Default to llama-2-7b-chat but allow override
7475
"messages": [
75-
{"role": "system", "content": "You are an AI assistant."},
76+
{"role": "system", "content": data.get("system", "You are an AI assistant.")}, # 🔹 Allow custom system messages
7677
{"role": "user", "content": f"Context: {context}\n\nQuestion: {query}"}
7778
],
78-
"temperature": 0.5,
79-
"top_p": 1,
80-
"max_tokens": 256,
81-
"stream": False
79+
"temperature": data.get("temperature", 0.5), # 🔹 Use request value or default
80+
"top_p": data.get("top_p", 1),
81+
"max_tokens": int(data.get("max_tokens", 256)),
82+
"stream": data.get("stream", False)
8283
}
8384

85+
# 🔹 Send request to LLM
8486
llm_response = requests.post(predictor_url, json=llm_payload, verify=False)
8587

8688
if llm_response.status_code == 200:

dockerfiles/transformers/llm_transformer_nvidia/src/model.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,25 +62,27 @@ def predict(self, request: dict, headers: dict) -> dict:
6262
Call LLM with retrieved context and return the response.
6363
"""
6464
data = request["instances"][0]
65-
query = data["query"]
66-
context = data["context"]
65+
query = data.get("query")
66+
context = data.get("context", "")
6767

6868
# ✅ Ensure correct predictor URL
6969
predictor_url = f"http://{self.predictor_host}/v1/chat/completions"
7070
logger.info(f"Sending request to LLM predictor at {predictor_url}")
7171

72+
# 🔥 Allow dynamic values for LLM parameters
7273
llm_payload = {
73-
"model": "meta/llama-2-7b-chat",
74+
"model": data.get("model", "meta/llama-2-7b-chat"), # 🔹 Default to llama-2-7b-chat but allow override
7475
"messages": [
75-
{"role": "system", "content": "You are an AI assistant."},
76+
{"role": "system", "content": data.get("system", "You are an AI assistant.")}, # 🔹 Allow custom system messages
7677
{"role": "user", "content": f"Context: {context}\n\nQuestion: {query}"}
7778
],
78-
"temperature": 0.5,
79-
"top_p": 1,
80-
"max_tokens": 256,
81-
"stream": False
79+
"temperature": data.get("temperature", 0.5), # 🔹 Use request value or default
80+
"top_p": data.get("top_p", 1),
81+
"max_tokens": int(data.get("max_tokens", 256)),
82+
"stream": data.get("stream", False)
8283
}
8384

85+
# 🔹 Send request to LLM
8486
llm_response = requests.post(predictor_url, json=llm_payload, verify=False)
8587

8688
if llm_response.status_code == 200:

manifests/Inference/GPU/.ipynb_checkpoints/llm_inference_nvidia-checkpoint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ spec:
2121
transformer:
2222
timeout: 600
2323
containers:
24-
- image: chasechristensen/transformer-nvidia-nim:0.7
24+
- image: chasechristensen/transformer-nvidia-nim:0.8
2525
imagePullPolicy: Always
2626
resources:
2727
requests:

manifests/Inference/GPU/llm_inference_nvidia.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ spec:
2121
transformer:
2222
timeout: 600
2323
containers:
24-
- image: chasechristensen/transformer-nvidia-nim:0.7
24+
- image: chasechristensen/transformer-nvidia-nim:0.8
2525
imagePullPolicy: Always
2626
resources:
2727
requests:

notebooks/Tiledb_doc_prep.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,16 +213,16 @@
213213
},
214214
{
215215
"cell_type": "code",
216-
"execution_count": 229,
216+
"execution_count": 230,
217217
"id": "f65b0d34-eeb4-44d3-be3e-98d033db8a58",
218218
"metadata": {},
219219
"outputs": [
220220
{
221221
"name": "stdout",
222222
"output_type": "stream",
223223
"text": [
224-
"<Response [500]>\n",
225-
"{\"error\":\"HTTPStatusError : {'detail': 'Not Found'}, '404 Not Found' for url 'http://llm-predictor.christensenc3526/v1/models/llm:predict'\"}\n"
224+
"<Response [200]>\n",
225+
"{\"predictions\":[\" Hello! As an AI assistant, I'm here to help you understand Kubeflow and its technical advantage.\\n\\nKubeflow is indeed for machine learning (ML), specifically for data professionals who want to streamline their ML workflows on Kubernetes. Kubeflow provides a unified platform for ML lifecycle management, enabling data engineers and researchers to deploy, manage, and monitor ML models more efficiently.\\n\\nKubeflow's objectives are centered around making ML more accessible and automating the ML workflow, which is why it's often referred to as \\\"MLOps\\\" (Machine Learning Operations). By providing a Kubernetes-based platform, Kubeflow enables data professionals to leverage the power of Kubernetes to manage their ML workflows, including data ingestion, model training, and deployment.\\n\\nKubeflow Pipelines are an essential component of the platform, allowing data professionals to define, execute, and monitor ML workflows in a repeatable and scalable manner. With Kubeflow Pipelines, data professionals can automate their ML workflows, making it easier to manage and deploy ML models across different environments.\\n\\nIn summary, Kub\"]}\n"
226226
]
227227
}
228228
],

0 commit comments

Comments
 (0)