Merge pull request #2 from volcano-sh/master

update
volcano-sh · Jun 28, 2019 · 03120f0 · 03120f0
2 parents a619c2f + 2ee41aa
commit 03120f0
Show file tree

Hide file tree

Showing 76 changed files with 5,975 additions and 3,295 deletions.
diff --git a/.cid/volcano.yml b/.cid/volcano.yml
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "installer/helm"]
+	path = installer/helm
+	url = https://github.com/volcano-sh/charts.git
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@ Volcano is a batch system built on Kubernetes. It provides a suite of mechanisms
 Kubernetes that are commonly required by many classes of batch & elastic workload including:
 
 1. machine learning/deep learning,
-2. bioinformatics/genomics, and 
+2. bioinformatics/genomics 
 3. other "big data" applications.
 
 These types of applications typically run on generalized domain
@@ -53,16 +53,34 @@ the open source community.
 
 ## Quick Start Guide
 
-The easiest way to deploy Volcano is to use the Helm chart.
+The easiest way to deploy Volcano is to use the Helm chart.  Volcano can be deployed by cloning code and also by adding helm repo.
 
+## Using Volcano Helm Repo
+
+Add helm repo using following command,
+
+```
+helm repo add volcano https://volcano-sh.github.io/charts
+```
+
+Install Volcano using following command,
+
+```
+helm install volcano/volcano --namespace <namespace> --name <specified-name>
+
+e.g :
+helm install volcano/volcano --namespace volcano-trial --name volcano-trial
+```
+
+## Cloning Code
 ### Pre-requisites
 
 First of all, clone the repo to your local path:
 
 ```
 # mkdir -p $GOPATH/src/volcano.sh/
 # cd $GOPATH/src/volcano.sh/
-# git clone https://github.com/volcano-sh/volcano.git
+# git clone --recursive https://github.com/volcano-sh/volcano.git
 ```
 
 ### 1. Volcano Image
@@ -92,14 +110,14 @@ try command ```kind load docker-image <image-name>:<tag> ``` for each of the ima
 Secondly, install helm chart.
 
 ```
-helm install installer/chart --namespace <namespace> --name <specified-name>
+helm install installer/helm/chart/volcano --namespace <namespace> --name <specified-name>
 
-For eg :
-helm install installer/chart --namespace volcano-trial --name volcano-trial
+e.g :
+helm install installer/helm/chart/volcano --namespace volcano-trial --name volcano-trial
 
 ```
 
-To Verify your installation run the following commands:
+To verify your installation run the following commands:
 
 ```
 #1. Verify the Running Pods

diff --git a/docs/samples/kubecon-2019-china/drf/nginx-1.yaml b/docs/samples/kubecon-2019-china/drf/nginx-1.yaml
@@ -0,0 +1,26 @@
+apiVersion: apps/v1
+kind: ReplicaSet
+metadata:
+  name: nginx-1
+  labels:
+    app: nginx-1
+spec:
+  # modify replicas according to your case
+  replicas: 8
+  selector:
+    matchLabels:
+      app: nginx-1
+  template:
+    metadata:
+      labels:
+        app: nginx-1
+    spec:
+      schedulerName: volcano
+      containers:
+      - name: nginx-1
+        image: nginx
+        resources:
+          requests:
+            cpu: "1000m"
+          limits:
+            cpu: "1000m"
diff --git a/docs/samples/kubecon-2019-china/drf/nginx-2.yaml b/docs/samples/kubecon-2019-china/drf/nginx-2.yaml
@@ -0,0 +1,26 @@
+apiVersion: apps/v1
+kind: ReplicaSet
+metadata:
+  name: nginx-2
+  labels:
+    app: nginx-2
+spec:
+  # modify replicas according to your case
+  replicas: 8
+  selector:
+    matchLabels:
+      app: nginx-2
+  template:
+    metadata:
+      labels:
+        app: nginx-2
+    spec:
+      schedulerName: volcano
+      containers:
+      - name: nginx-2
+        image: nginx
+        resources:
+          requests:
+            cpu: "1000m"
+          limits:
+            cpu: "1000m"
diff --git a/docs/samples/kubecon-2019-china/drf/nginx.yaml b/docs/samples/kubecon-2019-china/drf/nginx.yaml
@@ -0,0 +1,25 @@
+apiVersion: apps/v1
+kind: ReplicaSet
+metadata:
+  name: nginx
+  labels:
+    app: nginx
+spec:
+  # modify replicas according to your case
+  replicas: 8
+  selector:
+    matchLabels:
+      app: nginx
+  template:
+    metadata:
+      labels:
+        app: nginx
+    spec:
+      containers:
+      - name: nginx
+        image: nginx
+        resources:
+          requests:
+            cpu: "1000m"
+          limits:
+            cpu: "1000m"
diff --git a/docs/samples/kubecon-2019-china/gang/mpi-example.yaml b/docs/samples/kubecon-2019-china/gang/mpi-example.yaml
@@ -0,0 +1,77 @@
+apiVersion: batch.volcano.sh/v1alpha1
+kind: Job
+metadata:
+  name: lm-mpi-job
+  labels:
+    # 根据业务需要设置作业类型
+    "volcano.sh/job-type": "MPI"
+spec:
+  # 设置最小需要的服务 (小于总replicas数)
+  minAvailable: 4
+  schedulerName: volcano
+  plugins:
+    # 提供 ssh 免密认证
+    ssh: []
+    # 提供运行作业所需要的网络信息，hosts文件，headless service等
+    svc: []
+  # 如果有pod被 杀死，重启整个作业
+  policies:
+    - event: PodEvicted
+      action: RestartJob
+  tasks:
+    - replicas: 1
+      name: mpimaster
+      # 当 mpiexec 结束，认识整个mpi作业结束
+      policies:
+        - event: TaskCompleted
+          action: CompleteJob
+      template:
+        spec:
+          # Volcano 的信息会统一放到 /etc/volcano 目录下
+          containers:
+            - command:
+                - /bin/sh
+                - -c
+                - |
+                  MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`;
+                  mkdir -p /var/run/sshd; /usr/sbin/sshd;
+                  mpiexec --allow-run-as-root --host ${MPI_HOST} -np 3 mpi_hello_world;
+              image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
+              name: mpimaster
+              ports:
+                - containerPort: 22
+                  name: mpijob-port
+              workingDir: /home
+              resources:
+                requests:
+                  cpu: "500m"
+                limits:
+                  cpu: "500m"
+          restartPolicy: OnFailure
+          imagePullSecrets:
+            - name: default-secret
+    - replicas: 3
+      name: mpiworker
+      template:
+        spec:
+          containers:
+            - command:
+                - /bin/sh
+                - -c
+                - |
+                  mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
+              image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
+              name: mpiworker
+              ports:
+                - containerPort: 22
+                  name: mpijob-port
+              workingDir: /home
+              resources:
+                requests:
+                  cpu: "1000m"
+                limits:
+                  cpu: "1000m"
+          restartPolicy: OnFailure
+          imagePullSecrets:
+            - name: default-secret
+
diff --git a/docs/samples/kubecon-2019-china/gang/nginx.yaml b/docs/samples/kubecon-2019-china/gang/nginx.yaml
@@ -0,0 +1,25 @@
+apiVersion: apps/v1
+kind: ReplicaSet
+metadata:
+  name: nginx
+  labels:
+    app: nginx
+spec:
+  # modify replicas according to your case
+  replicas: 6
+  selector:
+    matchLabels:
+      app: nginx
+  template:
+    metadata:
+      labels:
+        app: nginx
+    spec:
+      containers:
+      - name: nginx
+        image: nginx
+        resources:
+          requests:
+            cpu: "1000m"
+          limits:
+            cpu: "1000m"
diff --git a/docs/samples/kubecon-2019-china/horovod-sample/lm-horovod-tf-mnist-v0.5.yaml b/docs/samples/kubecon-2019-china/horovod-sample/lm-horovod-tf-mnist-v0.5.yaml
@@ -0,0 +1,73 @@
+apiVersion: batch.volcano.sh/v1alpha1
+kind: Job
+metadata:
+  name: lm-horovod-job
+  labels:
+    "volcano.sh/job-type": Horovod
+spec:
+  minAvailable: 4
+  schedulerName: volcano
+  plugins:
+    ssh: []
+    svc: []
+  # 如果有pod被 杀死，重启整个作业
+  policies:
+    - event: PodEvicted
+      action: RestartJob
+  tasks:
+    - replicas: 1
+      name: master
+      policies:
+        - event: TaskCompleted
+          action: CompleteJob
+      template:
+        spec:
+          containers:
+            - command:
+                - /bin/sh
+                - -c
+                - |
+                  WORKER_HOST=`cat /etc/volcano/worker.host | tr "\n" ","`;
+                  mkdir -p /var/run/sshd; /usr/sbin/sshd;
+                  mpiexec --allow-run-as-root --host ${WORKER_HOST} -np 3 python tensorflow_mnist_lm.py;
+              image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/horovod-tf-mnist:0.5
+              name: master
+              ports:
+                - containerPort: 22
+                  name: job-port
+              resources:
+                requests:
+                  cpu: "500m"
+                  memory: "1024Mi"
+                limits:
+                  cpu: "500m"
+                  memory: "1024Mi"
+          restartPolicy: OnFailure
+          imagePullSecrets:
+            - name: default-secret
+    - replicas: 3
+      name: worker
+      template:
+        spec:
+          containers:
+            - command:
+                - /bin/sh
+                - -c
+                - |
+                  mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
+              image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/horovod-tf-mnist:0.5
+              name: worker
+              ports:
+                - containerPort: 22
+                  name: job-port
+              resources:
+                requests:
+                  cpu: "1000m"
+                  memory: "2048Mi"
+                limits:
+                  cpu: "1000m"
+                  memory: "2048Mi"
+          restartPolicy: OnFailure
+          imagePullSecrets:
+            - name: default-secret
+---