Skip to content

Commit

Permalink
add staroid
Browse files Browse the repository at this point in the history
fix skaffold

print dir

update

no tty

use local whl build instead of prebuilt from url

try build only one version

configure git

build 1 python version for now

update

correct whl file name

update

python 37

gpu image build

update

update

add gpu tag

tag image

build whl

full python version name in image

3.8.4 -> 3.8.5

update 3.8.3

install cmake

install make

dependencies to install atari-py

fix sed

try differently

apt-get clean

update

update

build whl

create deployment

add nfs pvc
  • Loading branch information
Leemoonsoo committed Oct 3, 2020
1 parent ff6d412 commit a386c05
Show file tree
Hide file tree
Showing 7 changed files with 365 additions and 0 deletions.
69 changes: 69 additions & 0 deletions .staroid/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/bin/bash
# Custom builder script for Skaffold
# https://skaffold.dev/docs/pipeline-stages/builders/custom/
#

set -x
set -e
pwd

PYTHON_VERSION=$1
SHORT_VER=`echo $PYTHON_VERSION | sed "s/\([0-9]*\)[.]\([0-9]*\)[.][0-9]*/\1\2/g"`

# true to build .whl from source (will take about 3 hours).
# false to use pre-built whl file from http(s) url.
BUILD_WHEEL=${BUILD_WHEEL:-false}

if [ "$BUILD_WHEEL" == "true" ]; then
if [ ! -d ".whl" ]; then # check if already built.
# Uncomment followings to build wheel for only single python version.
#sed -ie "/^PYTHONS=/,+2d" python/build-wheel-manylinux1.sh
#sed -ie "/^chmod/a PYTHONS=\(\"cp37-cp37m\"\)" python/build-wheel-manylinux1.sh
#git config user.name "build"
#git config user.email "ci@build.com"
#git commit python/build-wheel-manylinux1.sh -m "update"
#cat python/build-wheel-manylinux1.sh

# current commit
COMMIT=`git rev-parse HEAD`

docker run \
-e TRAVIS_COMMIT=$COMMIT \
--rm -i \
-w /ray \
-v `pwd`:/ray \
rayproject/arrow_linux_x86_64_base:python-3.8.0 \
/ray/python/build-wheel-manylinux1.sh
fi

WHEEL=`ls .whl/*-cp$SHORT_VER-*`
else
if [ "$SHORT_VER" == "36" ]; then
WHEEL="https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-1.1.0.dev0-cp36-cp36m-manylinux1_x86_64.whl"
elif [ "$SHORT_VER" == "37" ]; then
WHEEL="https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-1.1.0.dev0-cp37-cp37m-manylinux1_x86_64.whl"
elif [ "$SHORT_VER" == "38" ]; then
WHEEL="https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-1.1.0.dev0-cp38-cp38-manylinux1_x86_64.whl"
fi
fi

# apply non-root docker image patch
./.staroid/ray_patch.sh reset . .
./.staroid/ray_patch.sh patch . $WHEEL

# print patched files
git diff

cat docker/ray/Dockerfile
cat docker/ray-deps/Dockerfile

# build docker image
./build-docker.sh --no-cache-build --gpu --python-version $PYTHON_VERSION

# print images
docker tag rayproject/ray:latest-gpu $IMAGE
docker images

if $PUSH_IMAGE; then
docker push $IMAGE
fi
83 changes: 83 additions & 0 deletions .staroid/k8s/minikube.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: staroid-psp
spec:
privileged: false
allowPrivilegeEscalation: false
hostNetwork: false
hostIPC: false
hostPID: false
readOnlyRootFilesystem: false
runAsUser:
rule: 'MustRunAsNonRoot'
runAsGroup:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
seLinux:
rule: 'RunAsAny'
supplementalGroups:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
fsGroup:
rule: 'RunAsAny'
volumes:
- 'configMap'
- 'emptyDir'
- 'secret'
- 'persistentVolumeClaim'
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: staroid-role
rules:
- apiGroups: [""]
resources: ["pods", "pods/log", "pods/exec", "pods/binding", "services", "secrets", "configmaps", "persistentvolumeclaims"]
verbs: ["create", "get", "update", "patch", "list", "delete", "watch"]
- apiGroups: ["apps"]
resources: ["deployments"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]
verbs: ["create", "get", "update", "patch", "list", "delete", "watch"]
- apiGroups: ['policy']
resources: ['podsecuritypolicies']
verbs: ['use']
resourceNames: ['staroid-psp']
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: staroid-rolebinding
roleRef:
kind: Role
name: staroid-role
apiGroup: rbac.authorization.k8s.io
subjects:
- kind: ServiceAccount
name: default
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: nfs-pv
spec:
accessModes:
- ReadWriteMany
capacity:
storage: 1Gi
hostPath:
path: ./nfs-pv
---
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: nfs
labels:
addonmanager.kubernetes.io/mode: EnsureExists
provisioner: k8s.io/minikube-hostpath
18 changes: 18 additions & 0 deletions .staroid/k8s/nfs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
# NFS volume that is being shared among all ray node.
# see https://docs.staroid.com/ske/storage.html for more details.
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: nfs
annotations:
storage.staroid.com/scope: Instance
storage.staroid.com/file-manager: "1000:100" # ray uid/gid
spec:
storageClassName: nfs
accessModes:
- ReadWriteMany
volumeMode: Filesystem
resources:
requests:
storage: 10Gi
44 changes: 44 additions & 0 deletions .staroid/k8s/ray.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# derived from https://github.com/ray-project/ray/blob/master/doc/kubernetes/ray-cluster.yaml

# Ray dashboard service.
apiVersion: v1
kind: Service
metadata:
name: ray-dashboard
annotations:
service.staroid.com/link: "show"
spec:
ports:
- name: ray-dashboard
port: 8265
selector:
component: ray-head
---
# 'ray up' commmand manually create head and worker node,
# while 'ray up' command needs read image name to use.
# While skaffold adds postfix image version tag (e.g. ray-py-3-7-7:aser2oww) based on hash,
# 'ray up' command can not use static image name like 'ray-py-3-7-7'.
# Therefore create a 0 replica deployment resource that list all the
# images built in this project. 'ray-up' command can retrieve this deployment resource
# to get docker image name to use.
apiVersion: apps/v1
kind: Deployment
metadata:
name: ray-images
spec:
replicas: 0
selector:
matchLabels:
app: ray-images
template:
metadata:
labels:
app: ray-images
spec:
containers:
- name: ray-py3-7-7
image: ray-py3-7-7 # skaffold adds postfix
- name: ray-py3-6-9
image: ray-py3-6-9
- name: ray-py-3-8-3
image: ray-py-3-8-3
96 changes: 96 additions & 0 deletions .staroid/ray_patch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/bin/bash

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

if [ $# -ne 3 ]; then
echo "usage) $0 [patch|reset] [RAY_HOME] [WHEEL]"
exit 1
fi

OP=$1
RAY_HOME=$2
WHEEL=$3

RAY_UID=1000
RAY_GID=100

SED_INPLACE="sed -i"
uname | grep Darwin > /dev/null
if [ $? -eq 0 ]; then
SED_INPLACE="sed -i .bak"
fi

if [ "$OP" == "patch" ]; then
# patch wheel url
echo "$WHEEL" | grep ^http > /dev/null
if [ $? -ne 0 ]; then
# path is given
$SED_INPLACE "s|set -x|set -x; set -e|g" $RAY_HOME/build-docker.sh
$SED_INPLACE "s|^WHEEL=.*|WHEEL=$WHEEL|g" $RAY_HOME/build-docker.sh
$SED_INPLACE "s|wget.*||g" $RAY_HOME/build-docker.sh
else
# url is given
$SED_INPLACE "s|^WHEEL_URL=.*|WHEEL_URL=\"$WHEEL\"|g" $RAY_HOME/build-docker.sh
fi

# patch GPU tag https://github.com/ray-project/ray/pull/10909
$SED_INPLACE "s/:latest/:latest\$GPU/g" $RAY_HOME/build-docker.sh

# patch PATH
$SED_INPLACE "s/\/root/\/home\/ray/g" ${RAY_HOME}/docker/base-deps/Dockerfile

# patch PATH in profile
$SED_INPLACE "s/ \/etc\/profile.d\/conda.sh/\> \/home\/ray\/.bash_profile/g" ${RAY_HOME}/docker/base-deps/Dockerfile

# patch kubectl installation section
$SED_INPLACE "s/apt-key add/sudo apt-key add/g" ${RAY_HOME}/docker/base-deps/Dockerfile
$SED_INPLACE "s/touch \/etc/sudo touch \/etc/g" ${RAY_HOME}/docker/base-deps/Dockerfile
$SED_INPLACE "s/tee -a \/etc/sudo tee -a \/etc/g" ${RAY_HOME}/docker/base-deps/Dockerfile

# patch apt-get
$SED_INPLACE "s/apt-get/sudo apt-get/g" ${RAY_HOME}/docker/base-deps/Dockerfile
$SED_INPLACE "s/rm -rf \/var/sudo rm -rf \/var/g" ${RAY_HOME}/docker/base-deps/Dockerfile
$SED_INPLACE "s/apt-get/sudo apt-get/g" ${RAY_HOME}/docker/ray-ml/Dockerfile

# patch rm
$SED_INPLACE "s/ rm / sudo rm /g" ${RAY_HOME}/docker/ray-deps/Dockerfile
$SED_INPLACE "s/ rm / sudo rm /g" ${RAY_HOME}/docker/ray/Dockerfile

# Add ray user & install sudo
# lines until 'ARG DEBIAN_FRONTNED ...'
#
# install tzdata here to initialize tzdata in non-interactive mode.
# otherwise, tzdata will be installed as a transitive dependency later and show keyboard prompt
cat $RAY_HOME/docker/base-deps/Dockerfile | sed '/ARG DEBIAN/q' > /tmp/ray_tmp_docker
cat <<EOF >> /tmp/ray_tmp_docker
RUN apt-get update -y && apt-get install -y sudo tzdata \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
RUN useradd -ms /bin/bash -d /home/ray ray --uid $RAY_UID --gid $RAY_GID \
&& usermod -aG sudo ray \
&& echo 'ray ALL=NOPASSWD: ALL' >> /etc/sudoers
USER 1000
ENV HOME=/home/ray
EOF

# lines after 'ARG DEBIAN_FRONTNED ...'
cat $RAY_HOME/docker/base-deps/Dockerfile | sed '1,/ARG DEBIAN/d' >> /tmp/ray_tmp_docker
mv /tmp/ray_tmp_docker $RAY_HOME/docker/base-deps/Dockerfile

# in case of py38, atari-py package installation fails without few os packages
$SED_INPLACE "s/RUN \$HOME/RUN sudo apt-get update \&\& sudo apt-get install -y g++ cmake zlib1g-dev \&\& \$HOME/g" ${RAY_HOME}/docker/ray-deps/Dockerfile
$SED_INPLACE "s/RUN \$HOME/RUN sudo apt-get update \&\& sudo apt-get install -y g++ cmake zlib1g-dev \&\& \$HOME/g" ${RAY_HOME}/docker/ray/Dockerfile
$SED_INPLACE "s/\(\&\& sudo rm.*\)/\1 \&\& sudo apt-get autoremove -y cmake g++ \&\& sudo rm -rf \/var\/lib\/apt\/lists\/\* \&\& sudo apt-get clean/g" ${RAY_HOME}/docker/ray-deps/Dockerfile
$SED_INPLACE "s/\(\&\& sudo rm.*\)/ \1 \&\& sudo apt-get autoremove -y cmake g++ \&\& sudo rm -rf \/var\/lib\/apt\/lists\/\* \&\& sudo apt-get clean/g" ${RAY_HOME}/docker/ray/Dockerfile

elif [ "$OP" == "reset" ]; then
git checkout ${RAY_HOME}/docker/ray/Dockerfile
git checkout ${RAY_HOME}/docker/ray-deps/Dockerfile
git checkout ${RAY_HOME}/docker/base-deps/Dockerfile
git checkout ${RAY_HOME}/docker/ray-ml/Dockerfile
git checkout ${RAY_HOME}/build-docker.sh
else
echo "Invalid operation $OP"
exit 1
fi

28 changes: 28 additions & 0 deletions .staroid/skaffold.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
apiVersion: skaffold/v2beta1
kind: Config
build:
artifacts:
- image: ray-py3-7-7
context: .
custom:
buildCommand: .staroid/build.sh 3.7.7
- image: ray-py3-6-9
context: .
custom:
buildCommand: .staroid/build.sh 3.6.9
- image: ray-py-3-8-3
context: .
custom:
buildCommand: .staroid/build.sh 3.8.3
deploy:
kubectl:
manifests:
- .staroid/k8s/ray.yaml
- .staroid/k8s/nfs.yaml
statusCheckDeadlineSeconds: 300
profiles:
- name: minikube
patches:
- op: add
path: /deploy/kubectl/manifests/0
value: .staroid/k8s/minikube.yaml
27 changes: 27 additions & 0 deletions .staroid/staroid.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Staroid project configuration file.
# Reference https://docs.staroid.com/references/staroid_yaml.html.
# Use online validator https://staroid.com/site/validator.
apiVersion: beta/v1
starRank: # Learn more about StarRank https://staroid.com/site/starrank.
rate: 1.0 # community rate.
upstream:
- project: ray-project/ray
weight: 100
build:
skaffold:
file: .staroid/skaffold.yaml
deploy:
paramGroups: # launch parameters
- name: Misc
collapsed: true
params:
- name: start-head
description: Start ray head node on start. 'true' or 'false'
type: STRING
optional: false
defaultValue: "true"
options:
- name: "True"
value: "true"
- name: "False"
value: "false"

0 comments on commit a386c05

Please sign in to comment.