-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Based on the kubeedge-local-up script which builds a local k8s cluster and kubeedge, our local-up script installs our package locally for simply developing and preparing for e3e tests. It does: 1. build the gm/lc/worker images. 2. download kubeedge source code and run its localup script. 3. prepare our k8s env. 4. config gm config and start gm. 5. start lc. 6. add cleanup For cleanup, it needs to do our cleanups before kubeedge cleanup otherwise lc cleanup (via kubectl delete) is stuck and lc container is kept running.
- Loading branch information
llhuii
committed
Jan 15, 2021
1 parent
5bf29f7
commit 4b2cb6c
Showing
2 changed files
with
374 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# build directories | ||
tmp | ||
_output | ||
*-tmp | ||
|
||
# dot files | ||
.* | ||
|
||
hack/local-up.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,365 @@ | ||
#!/bin/bash | ||
|
||
set -o errexit | ||
set -o nounset | ||
set -o pipefail | ||
|
||
NEPTUNE_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd -P)" | ||
|
||
cd "$NEPTUNE_ROOT" | ||
|
||
NO_CLEANUP=${NO_CLEANUP:-false} | ||
|
||
IMAGE_REPO=localhost/edgeai-neptune/neptune | ||
IMAGE_TAG=localup | ||
|
||
# local k8s cluster name for local-up-kubeedge.sh | ||
CLUSTER_NAME=nt | ||
MASTER_NODENAME=${CLUSTER_NAME}-control-plane | ||
EDGE_NODENAME=edge-node | ||
NAMESPACE=nt | ||
|
||
KUBEEDGE_VERSION=master | ||
TMP_DIR=local-up-tmp | ||
|
||
GM_BIND_PORT=9000 | ||
LC_BIND_PORT=9100 | ||
|
||
arch() { | ||
local arch=$(uname -m) | ||
case "$arch" in | ||
x86_64) arch=amd64;; | ||
*);; | ||
esac | ||
echo "$arch" | ||
} | ||
|
||
download_and_extract_kubeedge() { | ||
|
||
[ -d kubeedge ] && return | ||
local version=${1:-$KUBEEDGE_VERSION} | ||
|
||
# master branch can't works with git clone --depth 1 | ||
git clone -b $version https://github.com/kubeedge/kubeedge | ||
return | ||
|
||
# the archive file can't works since local-up-kubeedge.sh depends git tag | ||
# https://github.com/kubeedge/kubeedge/archive/${version}.tar.gz | ||
} | ||
|
||
get_kubeedge_pid() { | ||
ps -e -o pid,comm,args | | ||
grep -F "$TMP_DIR" | | ||
# match executable name and print the pid | ||
awk -v bin="${1:-edgecore}" 'NF=$2==bin' | ||
} | ||
|
||
localup_kubeedge() { | ||
pushd $TMP_DIR | ||
download_and_extract_kubeedge | ||
# without setsid when hits ctrl-c, edgecore/cloudclore will be terminated | ||
# before cleanup called. | ||
# but we need cloudcore/edgecore alive to clean our container(mainly lc), | ||
# so here new a session to run local-up-kubeedge.sh | ||
setsid bash -c " | ||
cd kubeedge | ||
# no use ENABLE_DAEMON=true since it has not-fully-cleanup problem. | ||
TIMEOUT=90 CLUSTER_NAME=$CLUSTER_NAME ENABLE_DAEMON=false | ||
source hack/local-up-kubeedge.sh | ||
" & | ||
KUBEEDGE_ROOT_PID=$! | ||
add_cleanup ' | ||
echo "found kubeedge pid, kill it: $KUBEEDGE_ROOT_PID" | ||
for((i=0;i<60;i++)); do | ||
((i%15==0)) && kill "$KUBEEDGE_ROOT_PID" | ||
kill -0 "$KUBEEDGE_ROOT_PID" || break | ||
echo "waiting for $KUBEEDGE_ROOT_PID exists" | ||
sleep 1 | ||
done | ||
# sometimes cloudcore/edgecore cant be stopped(one kill command | ||
# local-up-kubeedge.sh is not enough), | ||
# so to ensure this cleanup we clean it manully. | ||
for bin in cloudcore edgecore; do | ||
pid=$(get_kubeedge_pid $bin) | ||
if [ -n "$pid" ]; then | ||
echo "found $bin: $pid, kill it" | ||
kill $pid | ||
kill $pid | ||
fi | ||
done | ||
' | ||
|
||
# wait ${MASTER_NODENAME} container to be ready | ||
while ! docker ps --filter=name=${MASTER_NODENAME} | grep -q ${MASTER_NODENAME}; do | ||
# errexit when kubeedge-local pid exited | ||
kill -0 "$KUBEEDGE_ROOT_PID" | ||
sleep 3 | ||
done | ||
|
||
# wait edgecore | ||
while [ -z "$(get_kubeedge_pid edgecore)" ]; do | ||
# errexit when kubeedge-local pid exited | ||
kill -0 "$KUBEEDGE_ROOT_PID" | ||
sleep 3 | ||
done | ||
|
||
local parent=$$ | ||
{ | ||
# healthcheck for kubeedge-local pid | ||
# if it died, we died. | ||
while true; do | ||
if ! kill -0 "$KUBEEDGE_ROOT_PID"; then | ||
kill -INT $parent | ||
break | ||
fi | ||
sleep 1 | ||
done | ||
}& | ||
popd | ||
|
||
} | ||
|
||
build_component_image() { | ||
local bin | ||
for bin; do | ||
echo "building $bin image" | ||
make -C "${NEPTUNE_ROOT}" ${bin}image IMAGE_REPO=$IMAGE_REPO IMAGE_TAG=$IMAGE_TAG | ||
eval ${bin^^}_IMAGE="'${IMAGE_REPO}/${bin}:${IMAGE_TAG}'" | ||
done | ||
# no clean up for images | ||
} | ||
|
||
build_worker_base_images() { | ||
echo "building worker base images" | ||
# build tensorflow1.15 image | ||
WORKER_TF1_IMAGE=$IMAGE_REPO/worker-tensorflow:1.15 | ||
docker build -f build/worker/base_images/tensorflow/tensorflow-1.15.Dockerfile -t $WORKER_TF1_IMAGE . | ||
|
||
WORKER_IMAGE_HUB="'tensorflow:1.15': $WORKER_TF1_IMAGE" | ||
# add more base images | ||
} | ||
|
||
load_images_to_master() { | ||
local image | ||
for image in $GM_IMAGE; do | ||
docker save $image | docker exec -i $MASTER_NODENAME ctr --namespace k8s.io image import - | ||
done | ||
} | ||
|
||
prepare_k8s_env() { | ||
kind get kubeconfig --name $CLUSTER_NAME > $TMP_DIR/kubeconfig | ||
export KUBECONFIG=$(realpath $TMP_DIR/kubeconfig) | ||
# prepare our k8s environment | ||
# create these crds including dataset, model, joint-inference etc. | ||
kubectl apply -f build/crds/neptune/ | ||
|
||
# gm, lc will be created in this namespace | ||
kubectl create namespace $NAMESPACE | ||
|
||
# create the cluster role for gm | ||
kubectl apply -f build/gm/rbac/neptune-roles.yaml | ||
kubectl create clusterrolebinding neptune-role-binding --clusterrole=neptune-role --serviceaccount=$NAMESPACE:default | ||
|
||
add_cleanup " | ||
kubectl delete clusterrolebinding neptune-role-binding | ||
kubectl delete -f build/gm/rbac/neptune-roles.yaml | ||
kubectl delete -f build/crds/neptune/ | ||
kubectl delete namespace $NAMESPACE | ||
" | ||
load_images_to_master | ||
} | ||
|
||
start_gm() { | ||
# config gm and start as pod | ||
|
||
pushd $TMP_DIR | ||
|
||
local gm_node_name=${MASTER_NODENAME} | ||
local gm_pod_name=gm-pod | ||
|
||
# prepare gm config | ||
cat > gmconfig <<EOF | ||
kubeConfig: "" | ||
namespace: "" | ||
imageHub: | ||
$WORKER_IMAGE_HUB | ||
websocket: | ||
port: $GM_BIND_PORT | ||
localController: | ||
server: http://localhost:$LC_BIND_PORT | ||
EOF | ||
|
||
add_cleanup "kubectl delete cm config -n $NAMESPACE" | ||
|
||
# create configmaps: kubeconfig, gm config | ||
kubectl create -n $NAMESPACE configmap config --from-file=gmconfig | ||
|
||
add_cleanup "kubectl delete pod $gm_pod_name -n $NAMESPACE" | ||
|
||
# start gm as pod with specified node name | ||
# TODO: create a k8s service, but kubeedge can't support this. | ||
kubectl create -f - <<EOF | ||
apiVersion: v1 | ||
kind: Pod | ||
metadata: | ||
name: $gm_pod_name | ||
namespace: $NAMESPACE | ||
spec: | ||
restartPolicy: OnFailure | ||
hostNetwork: true | ||
nodeName: $gm_node_name | ||
containers: | ||
- name: gm | ||
image: $GM_IMAGE | ||
command: ["neptune-gm", "--config", "/config/gmconfig", "-v2"] | ||
volumeMounts: | ||
- name: config | ||
mountPath: /config | ||
volumes: | ||
- name: config | ||
configMap: | ||
name: config | ||
EOF | ||
|
||
GM_IP=$(kubectl get node $gm_node_name -o jsonpath='{ .status.addresses[?(@.type=="InternalIP")].address }') | ||
GM_ADDRESS=$GM_IP:$GM_BIND_PORT | ||
|
||
add_debug_info "see GM status: kubectl get pod -n $NAMESPACE $gm_pod_name" | ||
popd | ||
} | ||
|
||
start_lc() { | ||
local lc_ds_name=edge-lc | ||
|
||
add_cleanup "kubectl delete ds $lc_ds_name -n $NAMESPACE" | ||
|
||
# start lc as daemonset | ||
kubectl create -f- <<EOF | ||
apiVersion: apps/v1 | ||
kind: DaemonSet | ||
metadata: | ||
labels: | ||
k8s-app: neptune-lc | ||
name: $lc_ds_name | ||
namespace: $NAMESPACE | ||
spec: | ||
selector: | ||
matchLabels: | ||
k8s-app: $lc_ds_name | ||
template: | ||
metadata: | ||
labels: | ||
k8s-app: $lc_ds_name | ||
spec: | ||
nodeSelector: | ||
# only schedule to edge node | ||
node-role.kubernetes.io/edge: "" | ||
containers: | ||
- name: $lc_ds_name | ||
image: $LC_IMAGE | ||
env: | ||
- name: GM_ADDRESS | ||
value: $GM_ADDRESS | ||
- name: BIND_PORT | ||
value: "$LC_BIND_PORT" | ||
- name: NODENAME | ||
valueFrom: | ||
fieldRef: | ||
fieldPath: spec.nodeName | ||
- name: ROOTFS_MOUNT_DIR | ||
# the value of ROOTFS_MOUNT_DIR is same with the mount path of volume | ||
value: /rootfs | ||
volumeMounts: | ||
- name: localcontroller | ||
mountPath: /rootfs | ||
volumes: | ||
- name: localcontroller | ||
hostPath: | ||
path: / | ||
restartPolicy: Always | ||
hostNetwork: true | ||
EOF | ||
add_debug_info "see LC status: kubectl get ds -n $NAMESPACE $lc_ds_name" | ||
|
||
} | ||
|
||
declare -a CLEANUP_CMDS=() | ||
add_cleanup() { | ||
CLEANUP_CMDS+=("$@") | ||
} | ||
|
||
cleanup() { | ||
if [[ "${NO_CLEANUP}" = true ]]; then | ||
echo "No clean up..." | ||
return | ||
fi | ||
|
||
set +o errexit | ||
|
||
echo "Cleaning up neptune..." | ||
|
||
local idx=${#CLEANUP_CMDS[@]} cmd | ||
# reverse call cleanup | ||
for((;--idx>=0;)); do | ||
cmd=${CLEANUP_CMDS[idx]} | ||
echo "calling $cmd:" | ||
eval "$cmd" | ||
done | ||
|
||
set -o errexit | ||
} | ||
|
||
check_healthy() { | ||
# TODO | ||
true | ||
} | ||
|
||
debug_infos="" | ||
add_debug_info() { | ||
debug_infos+="$@ | ||
" | ||
} | ||
|
||
check_prerequisites() { | ||
# TODO | ||
true | ||
} | ||
|
||
NO_COLOR='\033[0m' | ||
RED='\033[0;31m' | ||
GREEN='\033[0;32m' | ||
green_text() { | ||
echo -ne "$GREEN$@$NO_COLOR" | ||
} | ||
|
||
red_text() { | ||
echo -ne "$RED$@$NO_COLOR" | ||
} | ||
|
||
trap cleanup EXIT | ||
|
||
cleanup | ||
|
||
mkdir -p "$TMP_DIR" | ||
add_cleanup 'rm -rf "$TMP_DIR"' | ||
|
||
build_component_image gm lc | ||
build_worker_base_images | ||
|
||
check_prerequisites | ||
|
||
localup_kubeedge | ||
|
||
prepare_k8s_env | ||
|
||
start_gm | ||
start_lc | ||
|
||
echo "Local Neptune cluster is $(green_text running). | ||
Press $(red_text Ctrl-C) to shut it down: | ||
$debug_infos | ||
" | ||
|
||
while check_healthy; do sleep 5; done |