From cb3935a771b116820a3c095a5a4dac3c148995b5 Mon Sep 17 00:00:00 2001 From: llhuii Date: Tue, 12 Jan 2021 09:42:51 +0800 Subject: [PATCH] Add local-up.sh Developers can run `hack/local-up.sh` to setup up a local environment including: 1. a local k8s cluster with a master node. 2. a kubeedge node. 3. our gm/lc. Based on the kubeedge-local-up script which builds a local k8s cluster and kubeedge, our local-up script installs our package locally for simply developing and preparing for e3e tests. It does: 1. build the gm/lc/worker images. 2. download kubeedge source code and run its localup script. 3. prepare our k8s env. 4. config gm config and start gm. 5. start lc. 6. add cleanup For cleanup, it needs to do our cleanups before kubeedge cleanup otherwise lc cleanup (via kubectl delete) is stuck and lc container is kept running. --- .dockerignore | 8 + hack/local-up.sh | 451 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 459 insertions(+) create mode 100644 .dockerignore create mode 100644 hack/local-up.sh diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..c944c37 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +# temp build directories +_output + +tmp +*-tmp + +# dot files +.* diff --git a/hack/local-up.sh b/hack/local-up.sh new file mode 100644 index 0000000..cca6231 --- /dev/null +++ b/hack/local-up.sh @@ -0,0 +1,451 @@ +#!/bin/bash + +# Developers can run `hack/local-up.sh` to setup up a local environment: +# 1. a local k8s cluster with a master node. +# 2. a kubeedge node. +# 3. our gm/lc. + +# Based on the kubeedge-local-up script which builds a local k8s cluster and kubeedge, +# our local-up script installs our package locally for +# simply developing and preparing for e3e tests. + +# It does: +# 1. build the gm/lc/worker images. +# 2. download kubeedge source code and run its localup script. +# 3. prepare our k8s env. +# 4. config gm config and start gm. +# 5. start lc. +# 6. add cleanup. + +# For cleanup, it needs to do our cleanups before kubeedge cleanup. +# Otherwise lc cleanup (via kubectl delete) is stuck and lc is kept running. + +set -o errexit +set -o nounset +set -o pipefail + +NEPTUNE_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd -P)" + +cd "$NEPTUNE_ROOT" + +NO_CLEANUP=${NO_CLEANUP:-false} + +IMAGE_REPO=localhost/edgeai-neptune/neptune +IMAGE_TAG=localup + +# local k8s cluster name for local-up-kubeedge.sh +CLUSTER_NAME=neptune +MASTER_NODENAME=${CLUSTER_NAME}-control-plane +EDGE_NODENAME=edge-node +NAMESPACE=neptune + +KUBEEDGE_VERSION=master +TMP_DIR="$(realpath local-up-tmp)" + +GM_BIND_PORT=9000 +LC_BIND_PORT=9100 + +arch() { + local arch=$(uname -m) + case "$arch" in + x86_64) arch=amd64;; + *);; + esac + echo "$arch" +} + +download_and_extract_kubeedge() { + + [ -d kubeedge ] && return + local version=${1:-$KUBEEDGE_VERSION} + + # master branch can't works with git clone --depth 1 + git clone -b $version https://github.com/kubeedge/kubeedge + return + + # the archive file can't works since local-up-kubeedge.sh depends git tag + # https://github.com/kubeedge/kubeedge/archive/${version}.tar.gz +} + +get_kubeedge_pid() { + ps -e -o pid,comm,args | + grep -F "$TMP_DIR" | + # match executable name and print the pid + awk -v bin="${1:-edgecore}" 'NF=$2==bin' +} + +localup_kubeedge() { + pushd $TMP_DIR >/dev/null + download_and_extract_kubeedge + # without setsid when hits ctrl-c, edgecore/cloudclore will be terminated + # before cleanup called. + # but we need cloudcore/edgecore alive to clean our container(mainly lc), + # so here new a session to run local-up-kubeedge.sh + setsid bash -c " + cd kubeedge + + # no use ENABLE_DAEMON=true since it has not-fully-cleanup problem. + TIMEOUT=90 CLUSTER_NAME=$CLUSTER_NAME ENABLE_DAEMON=false + source hack/local-up-kubeedge.sh + " & + KUBEEDGE_ROOT_PID=$! + add_cleanup ' + # for the case sometimes kube-proxy container in local machine + # not cleanup. + kubectl delete ds -n kube-system kube-proxy + + echo "found kubeedge pid, kill it: $KUBEEDGE_ROOT_PID" + for((i=0;i<60;i++)); do + ((i%15==0)) && kill "$KUBEEDGE_ROOT_PID" + kill -0 "$KUBEEDGE_ROOT_PID" || break + echo "waiting for $KUBEEDGE_ROOT_PID exists" + sleep 1 + done + # sometimes cloudcore/edgecore cant be stopped(one kill command + # local-up-kubeedge.sh is not enough), + # so to ensure this cleanup we clean it manully. + for bin in cloudcore edgecore; do + pid=$(get_kubeedge_pid $bin) + if [ -n "$pid" ]; then + echo "found $bin: $pid, kill it" + kill $pid + kill $pid + fi + done + ' + + # wait ${MASTER_NODENAME} container to be running + while ! docker ps --filter=name=${MASTER_NODENAME} | grep -q ${MASTER_NODENAME}; do + # errexit when kubeedge-local pid exited + kill -0 "$KUBEEDGE_ROOT_PID" + sleep 3 + done + + # wait edgecore + while [ -z "$(get_kubeedge_pid edgecore)" ]; do + # errexit when kubeedge-local pid exited + kill -0 "$KUBEEDGE_ROOT_PID" + sleep 3 + done + + local parent=$$ + { + # healthcheck for kubeedge-local pid + # if it died, we died. + while true; do + if ! kill -0 "$KUBEEDGE_ROOT_PID"; then + kill -INT $parent + break + fi + sleep 1 + done + }& + popd + +} + +build_component_image() { + local bin + for bin; do + echo "building $bin image" + make -C "${NEPTUNE_ROOT}" ${bin}image IMAGE_REPO=$IMAGE_REPO IMAGE_TAG=$IMAGE_TAG + eval ${bin^^}_IMAGE="'${IMAGE_REPO}/${bin}:${IMAGE_TAG}'" + done + # no clean up for images +} + +build_worker_base_images() { + echo "building worker base images" + # build tensorflow1.15 image + WORKER_TF1_IMAGE=$IMAGE_REPO/worker-tensorflow:1.15 + docker build -f build/worker/base_images/tensorflow/tensorflow-1.15.Dockerfile -t $WORKER_TF1_IMAGE . + + WORKER_IMAGE_HUB="'tensorflow:1.15': $WORKER_TF1_IMAGE" + # add more base images +} + +load_images_to_master() { + local image + for image in $GM_IMAGE; do + # just use the docker-image command of kind instead of ctr + # docker save $image | docker exec -i $MASTER_NODENAME ctr --namespace k8s.io image import - + kind load --name $CLUSTER_NAME docker-image $image + done +} + +prepare_k8s_env() { + kind get kubeconfig --name $CLUSTER_NAME > $TMP_DIR/kubeconfig + export KUBECONFIG=$(realpath $TMP_DIR/kubeconfig) + # prepare our k8s environment + # create these crds including dataset, model, joint-inference etc. + kubectl apply -f build/crds/neptune/ + + # gm, lc will be created in this namespace + kubectl create namespace $NAMESPACE + + # create the cluster role for gm + kubectl apply -f build/gm/rbac/ + + add_cleanup " + kubectl delete -f build/crds/neptune/ + kubectl delete namespace $NAMESPACE --timeout=5s + " + load_images_to_master +} + +start_gm() { + # config gm and start as pod + + pushd $TMP_DIR >/dev/null + + local gm_node_name=${MASTER_NODENAME} + local gm_pod_name=gm-pod + + # prepare gm config + cat > gmconfig </dev/null + } + + " + + # start lc as daemonset + kubectl create -f- <=0;)); do + cmd=${CLEANUP_CMDS[idx]} + echo "calling $cmd:" + eval "$cmd" + done + + set -o errexit +} + +check_healthy() { + # TODO + true +} + +debug_infos="" +add_debug_info() { + debug_infos+="$@ +" +} + +check_prerequisites() { + # TODO + true +} + +NO_COLOR='\033[0m' +RED='\033[0;31m' +GREEN='\033[0;32m' +green_text() { + echo -ne "$GREEN$@$NO_COLOR" +} + +red_text() { + echo -ne "$RED$@$NO_COLOR" +} + +trap cleanup EXIT + +cleanup + +mkdir -p "$TMP_DIR" +add_cleanup 'rm -rf "$TMP_DIR"' + +build_component_image gm lc +build_worker_base_images + +check_prerequisites + +localup_kubeedge + +prepare_k8s_env + +start_gm +start_lc + +echo "Local Neptune cluster is $(green_text running). +Currently local-up script only support foreground running. +Press $(red_text Ctrl-C) to shut it down! + +You can use it with: kind export kubeconfig --name ${CLUSTER_NAME} + +$debug_infos +" + +while check_healthy; do sleep 5; done