1
1
#! /bin/bash
2
2
# Copyright (c) 2022 Graphcore Ltd. All rights reserved.
3
3
# Script to be sourced on launch of the Gradient Notebook
4
-
4
+ EXIT_CODE=0
5
5
echo " Graphcore setup - Starting notebook setup"
6
6
DETECTED_NUMBER_OF_IPUS=$( python .gradient/available_ipus.py)
7
7
if [[ " $1 " == " test" ]]; then
@@ -19,7 +19,31 @@ if [[ "${DETECTED_NUMBER_OF_IPUS}" == "0" ]]; then
19
19
echo " https://docs.paperspace.com/contact-support/ "
20
20
echo " referencing the Notebook ID: ${PAPERSPACE_METRIC_WORKLOAD_ID:- unknown} "
21
21
echo " =============================================================================="
22
- exit -1
22
+ exit 255
23
+ fi
24
+ # Check the state of the partition
25
+ GC_INFO_OUTPUT=$( timeout 5 gc-info -l 2>&1 )
26
+ if [[ " $( echo ${GC_INFO_OUTPUT} | grep ' Partition.* \[active\]' ) " ]]
27
+ then
28
+ echo " Graphcore setup - Partition check - passed"
29
+ elif [[ " $( echo ${GC_INFO_OUTPUT} | grep ' partition is not ACTIVE' ) " ]]
30
+ then
31
+ echo " =============================================================================="
32
+ echo " IPU ERROR DETECTED"
33
+ echo " =============================================================================="
34
+ echo " IPU Partition is not active. This error indicates a problem with the "
35
+ echo " hardware you are running on. Please contact Paperspace Support at "
36
+ echo " https://docs.paperspace.com/contact-support/ "
37
+ echo " referencing the Notebook ID: ${PAPERSPACE_METRIC_WORKLOAD_ID:- unknown} "
38
+ echo " =============================================================================="
39
+ gc-info -l
40
+ exit 254
41
+ else
42
+ echo " [WARNING] IPU Partition in an unrecognised state - Notebook will start normally but"
43
+ echo " [WARNING] you may encounter hardware related errors. Get in touch with Paperspace and/or"
44
+ echo " [WARNING] Graphcore support if you encounter unexpected behaviours or errors."
45
+ echo " [WARNING] Output was: ${GC_INFO_OUTPUT} "
46
+ EXIT_CODE=253
23
47
fi
24
48
25
49
export NUM_AVAILABLE_IPU=${IPU_ARG}
@@ -53,3 +77,5 @@ echo "Graphcore setup - Starting Jupyter kernel"
53
77
jupyter lab --allow-root --ip=0.0.0.0 --no-browser --ServerApp.trust_xheaders=True \
54
78
--ServerApp.disable_check_xsrf=False --ServerApp.allow_remote_access=True \
55
79
--ServerApp.allow_origin=' *' --ServerApp.allow_credentials=True
80
+
81
+ exit $EXIT_CODE
0 commit comments