adding a non static predict

chasecadet · chasecadet · commit 7b5251da5499 · 2025-01-30T23:25:50.000Z
diff --git a/dockerfiles/transformers/llm_transformer_nvidia/src/.ipynb_checkpoints/model-checkpoint.py b/dockerfiles/transformers/llm_transformer_nvidia/src/.ipynb_checkpoints/model-checkpoint.py
@@ -62,25 +62,27 @@ def predict(self, request: dict, headers: dict) -> dict:
         Call LLM with retrieved context and return the response.
         """
         data = request["instances"][0]
-        query = data["query"]
-        context = data["context"]
+        query = data.get("query")
+        context = data.get("context", "")
 
         # ✅ Ensure correct predictor URL
         predictor_url = f"http://{self.predictor_host}/v1/chat/completions"
         logger.info(f"Sending request to LLM predictor at {predictor_url}")
 
+        # 🔥 Allow dynamic values for LLM parameters
         llm_payload = {
-            "model": "meta/llama-2-7b-chat",
+            "model": data.get("model", "meta/llama-2-7b-chat"),  # 🔹 Default to llama-2-7b-chat but allow override
             "messages": [
-                {"role": "system", "content": "You are an AI assistant."},
+                {"role": "system", "content": data.get("system", "You are an AI assistant.")},  # 🔹 Allow custom system messages
                 {"role": "user", "content": f"Context: {context}\n\nQuestion: {query}"}
             ],
-            "temperature": 0.5,
-            "top_p": 1,
-            "max_tokens": 256,
-            "stream": False
+            "temperature": data.get("temperature", 0.5),  # 🔹 Use request value or default
+            "top_p": data.get("top_p", 1),
+            "max_tokens": int(data.get("max_tokens", 256)),
+            "stream": data.get("stream", False)
         }
 
+        # 🔹 Send request to LLM
         llm_response = requests.post(predictor_url, json=llm_payload, verify=False)
 
         if llm_response.status_code == 200:
diff --git a/dockerfiles/transformers/llm_transformer_nvidia/src/model.py b/dockerfiles/transformers/llm_transformer_nvidia/src/model.py
@@ -62,25 +62,27 @@ def predict(self, request: dict, headers: dict) -> dict:
         Call LLM with retrieved context and return the response.
         """
         data = request["instances"][0]
-        query = data["query"]
-        context = data["context"]
+        query = data.get("query")
+        context = data.get("context", "")
 
         # ✅ Ensure correct predictor URL
         predictor_url = f"http://{self.predictor_host}/v1/chat/completions"
         logger.info(f"Sending request to LLM predictor at {predictor_url}")
 
+        # 🔥 Allow dynamic values for LLM parameters
         llm_payload = {
-            "model": "meta/llama-2-7b-chat",
+            "model": data.get("model", "meta/llama-2-7b-chat"),  # 🔹 Default to llama-2-7b-chat but allow override
             "messages": [
-                {"role": "system", "content": "You are an AI assistant."},
+                {"role": "system", "content": data.get("system", "You are an AI assistant.")},  # 🔹 Allow custom system messages
                 {"role": "user", "content": f"Context: {context}\n\nQuestion: {query}"}
             ],
-            "temperature": 0.5,
-            "top_p": 1,
-            "max_tokens": 256,
-            "stream": False
+            "temperature": data.get("temperature", 0.5),  # 🔹 Use request value or default
+            "top_p": data.get("top_p", 1),
+            "max_tokens": int(data.get("max_tokens", 256)),
+            "stream": data.get("stream", False)
         }
 
+        # 🔹 Send request to LLM
         llm_response = requests.post(predictor_url, json=llm_payload, verify=False)
 
         if llm_response.status_code == 200:
diff --git a/manifests/Inference/GPU/.ipynb_checkpoints/llm_inference_nvidia-checkpoint.yml b/manifests/Inference/GPU/.ipynb_checkpoints/llm_inference_nvidia-checkpoint.yml
@@ -21,7 +21,7 @@ spec:
   transformer:
     timeout: 600
     containers:
-      - image: chasechristensen/transformer-nvidia-nim:0.7
+      - image: chasechristensen/transformer-nvidia-nim:0.8
         imagePullPolicy: Always
         resources:
           requests:
diff --git a/manifests/Inference/GPU/llm_inference_nvidia.yml b/manifests/Inference/GPU/llm_inference_nvidia.yml
@@ -21,7 +21,7 @@ spec:
   transformer:
     timeout: 600
     containers:
-      - image: chasechristensen/transformer-nvidia-nim:0.7
+      - image: chasechristensen/transformer-nvidia-nim:0.8
         imagePullPolicy: Always
         resources:
           requests:
diff --git a/notebooks/Tiledb_doc_prep.ipynb b/notebooks/Tiledb_doc_prep.ipynb
@@ -213,16 +213,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 229,
+   "execution_count": 230,
    "id": "f65b0d34-eeb4-44d3-be3e-98d033db8a58",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "<Response [500]>\n",
-      "{\"error\":\"HTTPStatusError : {'detail': 'Not Found'}, '404 Not Found' for url 'http://llm-predictor.christensenc3526/v1/models/llm:predict'\"}\n"
+      "<Response [200]>\n",
+      "{\"predictions\":[\"  Hello! As an AI assistant, I'm here to help you understand Kubeflow and its technical advantage.\\n\\nKubeflow is indeed for machine learning (ML), specifically for data professionals who want to streamline their ML workflows on Kubernetes. Kubeflow provides a unified platform for ML lifecycle management, enabling data engineers and researchers to deploy, manage, and monitor ML models more efficiently.\\n\\nKubeflow's objectives are centered around making ML more accessible and automating the ML workflow, which is why it's often referred to as \\\"MLOps\\\" (Machine Learning Operations). By providing a Kubernetes-based platform, Kubeflow enables data professionals to leverage the power of Kubernetes to manage their ML workflows, including data ingestion, model training, and deployment.\\n\\nKubeflow Pipelines are an essential component of the platform, allowing data professionals to define, execute, and monitor ML workflows in a repeatable and scalable manner. With Kubeflow Pipelines, data professionals can automate their ML workflows, making it easier to manage and deploy ML models across different environments.\\n\\nIn summary, Kub\"]}\n"
      ]
     }
    ],

Original file line number	Diff line number	Diff line change
`@@ -213,16 +213,16 @@`
`213`	`213`	`},`
`214`	`214`	`{`
`215`	`215`	`"cell_type": "code",`
`216`		`- "execution_count": 229,`
	`216`	`+ "execution_count": 230,`
`217`	`217`	`"id": "f65b0d34-eeb4-44d3-be3e-98d033db8a58",`
`218`	`218`	`"metadata": {},`
`219`	`219`	`"outputs": [`
`220`	`220`	`{`
`221`	`221`	`"name": "stdout",`
`222`	`222`	`"output_type": "stream",`
`223`	`223`	`"text": [`
`224`		`- "<Response [500]>\n",`
`225`		`- "{\"error\":\"HTTPStatusError : {'detail': 'Not Found'}, '404 Not Found' for url 'http://llm-predictor.christensenc3526/v1/models/llm:predict'\"}\n"`
	`224`	`+ "<Response [200]>\n",`
	`225`	+ "{\"predictions\":[\" Hello! As an AI assistant, I'm here to help you understand Kubeflow and its technical advantage.\\n\\nKubeflow is indeed for machine learning (ML), specifically for data professionals who want to streamline their ML workflows on Kubernetes. Kubeflow provides a unified platform for ML lifecycle management, enabling data engineers and researchers to deploy, manage, and monitor ML models more efficiently.\\n\\nKubeflow's objectives are centered around making ML more accessible and automating the ML workflow, which is why it's often referred to as \\\"MLOps\\\" (Machine Learning Operations). By providing a Kubernetes-based platform, Kubeflow enables data professionals to leverage the power of Kubernetes to manage their ML workflows, including data ingestion, model training, and deployment.\\n\\nKubeflow Pipelines are an essential component of the platform, allowing data professionals to define, execute, and monitor ML workflows in a repeatable and scalable manner. With Kubeflow Pipelines, data professionals can automate their ML workflows, making it easier to manage and deploy ML models across different environments.\\n\\nIn summary, Kub\"]}\n"
`226`	`226`	`]`
`227`	`227`	`}`
`228`	`228`	`],`