Skip to content

Commit 4c48c64

Browse files
authored
Check the Partition state in setup (#30)
1 parent 88100f3 commit 4c48c64

File tree

1 file changed

+28
-2
lines changed

1 file changed

+28
-2
lines changed

setup.sh

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22
# Copyright (c) 2022 Graphcore Ltd. All rights reserved.
33
# Script to be sourced on launch of the Gradient Notebook
4-
4+
EXIT_CODE=0
55
echo "Graphcore setup - Starting notebook setup"
66
DETECTED_NUMBER_OF_IPUS=$(python .gradient/available_ipus.py)
77
if [[ "$1" == "test" ]]; then
@@ -19,7 +19,31 @@ if [[ "${DETECTED_NUMBER_OF_IPUS}" == "0" ]]; then
1919
echo " https://docs.paperspace.com/contact-support/ "
2020
echo " referencing the Notebook ID: ${PAPERSPACE_METRIC_WORKLOAD_ID:-unknown}"
2121
echo "=============================================================================="
22-
exit -1
22+
exit 255
23+
fi
24+
# Check the state of the partition
25+
GC_INFO_OUTPUT=$(timeout 5 gc-info -l 2>&1)
26+
if [[ "$(echo ${GC_INFO_OUTPUT} | grep 'Partition.* \[active\]')" ]]
27+
then
28+
echo "Graphcore setup - Partition check - passed"
29+
elif [[ "$(echo ${GC_INFO_OUTPUT} | grep 'partition is not ACTIVE')" ]]
30+
then
31+
echo "=============================================================================="
32+
echo " IPU ERROR DETECTED"
33+
echo "=============================================================================="
34+
echo " IPU Partition is not active. This error indicates a problem with the "
35+
echo "hardware you are running on. Please contact Paperspace Support at "
36+
echo " https://docs.paperspace.com/contact-support/ "
37+
echo " referencing the Notebook ID: ${PAPERSPACE_METRIC_WORKLOAD_ID:-unknown}"
38+
echo "=============================================================================="
39+
gc-info -l
40+
exit 254
41+
else
42+
echo "[WARNING] IPU Partition in an unrecognised state - Notebook will start normally but"
43+
echo "[WARNING] you may encounter hardware related errors. Get in touch with Paperspace and/or"
44+
echo "[WARNING] Graphcore support if you encounter unexpected behaviours or errors."
45+
echo "[WARNING] Output was: ${GC_INFO_OUTPUT}"
46+
EXIT_CODE=253
2347
fi
2448

2549
export NUM_AVAILABLE_IPU=${IPU_ARG}
@@ -53,3 +77,5 @@ echo "Graphcore setup - Starting Jupyter kernel"
5377
jupyter lab --allow-root --ip=0.0.0.0 --no-browser --ServerApp.trust_xheaders=True \
5478
--ServerApp.disable_check_xsrf=False --ServerApp.allow_remote_access=True \
5579
--ServerApp.allow_origin='*' --ServerApp.allow_credentials=True
80+
81+
exit $EXIT_CODE

0 commit comments

Comments
 (0)