Skip to content

Commit 8686a41

Browse files
Add accelerate workloads
Signed-off-by: kusumachalasani <kchalasa@redhat.com>
1 parent 76a6848 commit 8686a41

File tree

2 files changed

+17
-17
lines changed

2 files changed

+17
-17
lines changed

common/common_helper.sh

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -311,33 +311,33 @@ function benchmarks_install() {
311311

312312
if [ ${GPUS} -gt 0 ];then
313313
num_gpus=$((GPUS))
314-
# Commenting for now
315-
#if [ ${num_gpus} > 0 ]; then
316-
# echo "#######################################"
317-
# echo "Running HumanEval benchmark job in background"
318-
# echo
319-
# pushd AI-MLbenchmarks/human-eval >/dev/null
320-
# ./deploy.sh ${NAMESPACE}
321-
# check_err "ERROR: Human eval job failed to start, exiting"
322-
# popd >/dev/null
323-
# num_gpus=$((num_gpus - 1))
324-
#fi
314+
if [ ${num_gpus} -gt 0 ]; then
315+
echo "#######################################"
316+
echo "Running HumanEval benchmark job in background"
317+
echo
318+
pushd human-eval-benchmark/manifests >/dev/null
319+
sed -i 's/namespace: kruize-hackathon/namespace: "'"${NAMESPACE}"'"/' pvc.yaml
320+
sed -i 's/namespace: kruize-hackathon/namespace: "'"${NAMESPACE}"'"/' job.yaml
321+
oc apply -f pvc.yaml -n ${NAMESPACE}
322+
oc apply -f job.yaml -n ${NAMESPACE}
323+
check_err "ERROR: Human eval job failed to start, exiting"
324+
popd >/dev/null
325+
num_gpus=$((num_gpus - 1))
326+
fi
325327

326328
if [ ${num_gpus} -gt 0 ]; then
327329
echo "#######################################"
328330
echo "Running Training TTM benchmark job in background"
329-
echo
330331
pushd AI-MLbenchmarks/ttm >/dev/null
331332
echo ""
332-
#./run_ttm.sh ${NAMESPACE} >> ${LOG_FILE} &
333+
./run_ttm.sh ${NAMESPACE} >> ${LOG_FILE} &
333334
check_err "ERROR: Training ttm jobs failed to start, exiting"
334335
popd >/dev/null
335336
num_gpus=$((num_gpus - 1))
336337
fi
337338
if [ ${num_gpus} -gt 0 ]; then
338339
echo "#######################################"
339340
echo "Installing LLM-RAG benchmark into cluster"
340-
echo
341341
pushd AI-MLbenchmarks/llm-rag >/dev/null
342342
./deploy.sh ${NAMESPACE}
343343
check_err "ERROR: llm-rag benchmark failed to start, exiting"

monitoring/local_monitoring/create_human_eval_exp.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@
99
"kubernetes_objects": [
1010
{
1111
"type": "job",
12-
"name": "human-eval",
12+
"name": "human-eval-deployment-job",
1313
"namespace": "default",
1414
"containers": [
1515
{
16-
"container_image_name": "kruizehub/human-eval",
17-
"container_name": "human-eval"
16+
"container_image_name": "kruizehub/human-eval-deployment:latest",
17+
"container_name": "human-eval-benchmark"
1818
}
1919
]
2020
}

0 commit comments

Comments
 (0)