Skip to content

Commit

Permalink
Add local-up.sh
Browse files Browse the repository at this point in the history
Based on the kubeedge-local-up script which builds a local k8s cluster
and kubeedge, our local-up script installs our package locally for
simply developing and preparing for e3e tests.

It does:
1. build the gm/lc/worker images.
2. download kubeedge source code and run its localup script.
3. prepare our k8s env.
4. config gm config and start gm.
5. start lc.
6. add cleanup

For cleanup, it needs to do our cleanups before kubeedge cleanup
otherwise lc cleanup (via kubectl delete) is stuck and lc container is
kept running.
  • Loading branch information
llhuii committed Jan 15, 2021
1 parent 5bf29f7 commit f2d1d99
Show file tree
Hide file tree
Showing 2 changed files with 373 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# temp build directories
_output

tmp
*-tmp

# dot files
.*
365 changes: 365 additions & 0 deletions hack/local-up.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,365 @@
#!/bin/bash

set -o errexit
set -o nounset
set -o pipefail

NEPTUNE_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd -P)"

cd "$NEPTUNE_ROOT"

NO_CLEANUP=${NO_CLEANUP:-false}

IMAGE_REPO=localhost/edgeai-neptune/neptune
IMAGE_TAG=localup

# local k8s cluster name for local-up-kubeedge.sh
CLUSTER_NAME=nt
MASTER_NODENAME=${CLUSTER_NAME}-control-plane
EDGE_NODENAME=edge-node
NAMESPACE=nt

KUBEEDGE_VERSION=master
TMP_DIR=local-up-tmp

GM_BIND_PORT=9000
LC_BIND_PORT=9100

arch() {
local arch=$(uname -m)
case "$arch" in
x86_64) arch=amd64;;
*);;
esac
echo "$arch"
}

download_and_extract_kubeedge() {

[ -d kubeedge ] && return
local version=${1:-$KUBEEDGE_VERSION}

# master branch can't works with git clone --depth 1
git clone -b $version https://github.com/kubeedge/kubeedge
return

# the archive file can't works since local-up-kubeedge.sh depends git tag
# https://github.com/kubeedge/kubeedge/archive/${version}.tar.gz
}

get_kubeedge_pid() {
ps -e -o pid,comm,args |
grep -F "$TMP_DIR" |
# match executable name and print the pid
awk -v bin="${1:-edgecore}" 'NF=$2==bin'
}

localup_kubeedge() {
pushd $TMP_DIR >/dev/null
download_and_extract_kubeedge
# without setsid when hits ctrl-c, edgecore/cloudclore will be terminated
# before cleanup called.
# but we need cloudcore/edgecore alive to clean our container(mainly lc),
# so here new a session to run local-up-kubeedge.sh
setsid bash -c "
cd kubeedge
# no use ENABLE_DAEMON=true since it has not-fully-cleanup problem.
TIMEOUT=90 CLUSTER_NAME=$CLUSTER_NAME ENABLE_DAEMON=false
source hack/local-up-kubeedge.sh
" &
KUBEEDGE_ROOT_PID=$!
add_cleanup '
echo "found kubeedge pid, kill it: $KUBEEDGE_ROOT_PID"
for((i=0;i<60;i++)); do
((i%15==0)) && kill "$KUBEEDGE_ROOT_PID"
kill -0 "$KUBEEDGE_ROOT_PID" || break
echo "waiting for $KUBEEDGE_ROOT_PID exists"
sleep 1
done
# sometimes cloudcore/edgecore cant be stopped(one kill command
# local-up-kubeedge.sh is not enough),
# so to ensure this cleanup we clean it manully.
for bin in cloudcore edgecore; do
pid=$(get_kubeedge_pid $bin)
if [ -n "$pid" ]; then
echo "found $bin: $pid, kill it"
kill $pid
kill $pid
fi
done
'

# wait ${MASTER_NODENAME} container to be ready
while ! docker ps --filter=name=${MASTER_NODENAME} | grep -q ${MASTER_NODENAME}; do
# errexit when kubeedge-local pid exited
kill -0 "$KUBEEDGE_ROOT_PID"
sleep 3
done

# wait edgecore
while [ -z "$(get_kubeedge_pid edgecore)" ]; do
# errexit when kubeedge-local pid exited
kill -0 "$KUBEEDGE_ROOT_PID"
sleep 3
done

local parent=$$
{
# healthcheck for kubeedge-local pid
# if it died, we died.
while true; do
if ! kill -0 "$KUBEEDGE_ROOT_PID"; then
kill -INT $parent
break
fi
sleep 1
done
}&
popd

}

build_component_image() {
local bin
for bin; do
echo "building $bin image"
make -C "${NEPTUNE_ROOT}" ${bin}image IMAGE_REPO=$IMAGE_REPO IMAGE_TAG=$IMAGE_TAG
eval ${bin^^}_IMAGE="'${IMAGE_REPO}/${bin}:${IMAGE_TAG}'"
done
# no clean up for images
}

build_worker_base_images() {
echo "building worker base images"
# build tensorflow1.15 image
WORKER_TF1_IMAGE=$IMAGE_REPO/worker-tensorflow:1.15
docker build -f build/worker/base_images/tensorflow/tensorflow-1.15.Dockerfile -t $WORKER_TF1_IMAGE .

WORKER_IMAGE_HUB="'tensorflow:1.15': $WORKER_TF1_IMAGE"
# add more base images
}

load_images_to_master() {
local image
for image in $GM_IMAGE; do
docker save $image | docker exec -i $MASTER_NODENAME ctr --namespace k8s.io image import -
done
}

prepare_k8s_env() {
kind get kubeconfig --name $CLUSTER_NAME > $TMP_DIR/kubeconfig
export KUBECONFIG=$(realpath $TMP_DIR/kubeconfig)
# prepare our k8s environment
# create these crds including dataset, model, joint-inference etc.
kubectl apply -f build/crds/neptune/

# gm, lc will be created in this namespace
kubectl create namespace $NAMESPACE

# create the cluster role for gm
kubectl apply -f build/gm/rbac/neptune-roles.yaml
kubectl create clusterrolebinding neptune-role-binding --clusterrole=neptune-role --serviceaccount=$NAMESPACE:default

add_cleanup "
kubectl delete clusterrolebinding neptune-role-binding
kubectl delete -f build/gm/rbac/neptune-roles.yaml
kubectl delete -f build/crds/neptune/
kubectl delete namespace $NAMESPACE
"
load_images_to_master
}

start_gm() {
# config gm and start as pod

pushd $TMP_DIR >/dev/null

local gm_node_name=${MASTER_NODENAME}
local gm_pod_name=gm-pod

# prepare gm config
cat > gmconfig <<EOF
kubeConfig: ""
namespace: ""
imageHub:
$WORKER_IMAGE_HUB
websocket:
port: $GM_BIND_PORT
localController:
server: http://localhost:$LC_BIND_PORT
EOF

add_cleanup "kubectl delete cm config -n $NAMESPACE"

# create configmaps: kubeconfig, gm config
kubectl create -n $NAMESPACE configmap config --from-file=gmconfig

add_cleanup "kubectl delete pod $gm_pod_name -n $NAMESPACE"

# start gm as pod with specified node name
# TODO: create a k8s service, but kubeedge can't support this.
kubectl create -f - <<EOF
apiVersion: v1
kind: Pod
metadata:
name: $gm_pod_name
namespace: $NAMESPACE
spec:
restartPolicy: OnFailure
hostNetwork: true
nodeName: $gm_node_name
containers:
- name: gm
image: $GM_IMAGE
command: ["neptune-gm", "--config", "/config/gmconfig", "-v2"]
volumeMounts:
- name: config
mountPath: /config
volumes:
- name: config
configMap:
name: config
EOF

GM_IP=$(kubectl get node $gm_node_name -o jsonpath='{ .status.addresses[?(@.type=="InternalIP")].address }')
GM_ADDRESS=$GM_IP:$GM_BIND_PORT

add_debug_info "see GM status: kubectl get pod -n $NAMESPACE $gm_pod_name"
popd
}

start_lc() {
local lc_ds_name=edge-lc

add_cleanup "kubectl delete ds $lc_ds_name -n $NAMESPACE"

# start lc as daemonset
kubectl create -f- <<EOF
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
k8s-app: neptune-lc
name: $lc_ds_name
namespace: $NAMESPACE
spec:
selector:
matchLabels:
k8s-app: $lc_ds_name
template:
metadata:
labels:
k8s-app: $lc_ds_name
spec:
nodeSelector:
# only schedule to edge node
node-role.kubernetes.io/edge: ""
containers:
- name: $lc_ds_name
image: $LC_IMAGE
env:
- name: GM_ADDRESS
value: $GM_ADDRESS
- name: BIND_PORT
value: "$LC_BIND_PORT"
- name: NODENAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: ROOTFS_MOUNT_DIR
# the value of ROOTFS_MOUNT_DIR is same with the mount path of volume
value: /rootfs
volumeMounts:
- name: localcontroller
mountPath: /rootfs
volumes:
- name: localcontroller
hostPath:
path: /
restartPolicy: Always
hostNetwork: true
EOF
add_debug_info "see LC status: kubectl get ds -n $NAMESPACE $lc_ds_name"

}

declare -a CLEANUP_CMDS=()
add_cleanup() {
CLEANUP_CMDS+=("$@")
}

cleanup() {
if [[ "${NO_CLEANUP}" = true ]]; then
echo "No clean up..."
return
fi

set +o errexit

echo "Cleaning up neptune..."

local idx=${#CLEANUP_CMDS[@]} cmd
# reverse call cleanup
for((;--idx>=0;)); do
cmd=${CLEANUP_CMDS[idx]}
echo "calling $cmd:"
eval "$cmd"
done

set -o errexit
}

check_healthy() {
# TODO
true
}

debug_infos=""
add_debug_info() {
debug_infos+="$@
"
}

check_prerequisites() {
# TODO
true
}

NO_COLOR='\033[0m'
RED='\033[0;31m'
GREEN='\033[0;32m'
green_text() {
echo -ne "$GREEN$@$NO_COLOR"
}

red_text() {
echo -ne "$RED$@$NO_COLOR"
}

trap cleanup EXIT

cleanup

mkdir -p "$TMP_DIR"
add_cleanup 'rm -rf "$TMP_DIR"'

build_component_image gm lc
build_worker_base_images

check_prerequisites

localup_kubeedge

prepare_k8s_env

start_gm
start_lc

echo "Local Neptune cluster is $(green_text running).
Press $(red_text Ctrl-C) to shut it down:
$debug_infos
"

while check_healthy; do sleep 5; done

0 comments on commit f2d1d99

Please sign in to comment.