Skip to content

Commit

Permalink
update crd
Browse files Browse the repository at this point in the history
Signed-off-by: ii2day <ji.li@daocloud.io>
  • Loading branch information
ii2day committed Aug 24, 2023
1 parent 52b93dc commit b8e45e8
Show file tree
Hide file tree
Showing 22 changed files with 154 additions and 89 deletions.
1 change: 0 additions & 1 deletion charts/crds/kdoctor.io_apphttphealthies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1111,7 +1111,6 @@ spec:
type: object
type: object
terminationGracePeriodMinutes:
default: 60
format: int64
type: integer
type: object
Expand Down
1 change: 0 additions & 1 deletion charts/crds/kdoctor.io_netdnses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1109,7 +1109,6 @@ spec:
type: object
type: object
terminationGracePeriodMinutes:
default: 60
format: int64
type: integer
type: object
Expand Down
1 change: 0 additions & 1 deletion charts/crds/kdoctor.io_netreaches.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1111,7 +1111,6 @@ spec:
type: object
type: object
terminationGracePeriodMinutes:
default: 60
format: int64
type: integer
type: object
Expand Down
1 change: 1 addition & 0 deletions charts/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ data:
nethttp_defaultRequest_PerRequestTimeoutInMS: {{ .Values.feature.nethttp_defaultRequest_PerRequestTimeoutInMS }}
netdns_defaultConcurrency: {{ .Values.feature.netdns_defaultConcurrency }}
multusPodAnnotationKey: {{ .Values.feature.multusPodAnnotationKey }}
agentDefaultTerminationGracePeriodMinutes: {{ .Values.feature.agentDefaultTerminationGracePeriodMinutes }}
crdMaxHistory: {{ .Values.feature.crdMaxHistory }}
{{- if .Values.feature.enableIPv4 }}
agentSerivceIpv4Name: {{ include "project.kdoctorAgent.serviceIpv4Name" . }}
Expand Down
28 changes: 16 additions & 12 deletions charts/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ global:
## @param global.configName the configmap name
configName: "kdoctor"

## @param global.configAppTemplate the configmap name of agent
configAppTemplate: "kdoctor-app-config-template"


Expand Down Expand Up @@ -61,6 +62,9 @@ feature:
## @param feature.netdns_defaultConcurrency concurrency for kind netdns
netdns_defaultConcurrency: 50

## @param feature.agentDefaultTerminationGracePeriodMinutes agent termination after minutes
agentDefaultTerminationGracePeriodMinutes: 60

## @param feature.taskPollIntervalInSecond the interval to poll the task in controller and agent pod
taskPollIntervalInSecond: 5

Expand All @@ -77,7 +81,7 @@ feature:

## @param feature.aggregateReport.cleanAgedReportIntervalInMinute the interval in minute for removing aged report
cleanAgedReportIntervalInMinute: "10"

## aggregate report from agent
agent:
## @param feature.aggregateReport.agent.reportPath the path where the agent pod temporarily store task report.
reportPath: "/report"
Expand Down Expand Up @@ -182,6 +186,7 @@ kdoctorAgent:
## @param kdoctorAgent.priorityClassName the priority Class Name for kdoctorAgent
priorityClassName: "system-node-critical"

## @param kdoctorAgent.reportHostPath storage path when pvc is disabled
reportHostPath: "/var/run/kdoctor/agent"

## @param kdoctorAgent.affinity the affinity of kdoctorAgent
Expand All @@ -208,14 +213,12 @@ kdoctorAgent:
resources:
## @param kdoctorAgent.resources.limits.cpu the cpu limit of kdoctorAgent pod
## @param kdoctorAgent.resources.limits.memory the memory limit of kdoctorAgent pod
## @param kdoctorAgent.resources.requests.cpu the cpu requests of kdoctorAgent pod
## @param kdoctorAgent.resources.requests.memory the memory requests of kdoctorAgent pod
limits:
cpu: 1000m
memory: 1024Mi
requests:
cpu: 100m
memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi

## @param kdoctorAgent.securityContext the security Context of kdoctorAgent pod
securityContext: {}
Expand All @@ -228,9 +231,9 @@ kdoctorAgent:
httpServer:
## @param kdoctorAgent.httpServer.healthPort the http Port for kdoctorAgent, for health checking
healthPort: 5710
## @param kdoctorAgent.httpServer.appPort the http Port for kdoctorAgent, testing connect
## @param kdoctorAgent.httpServer.appHttpPort the http Port for kdoctorAgent, testing connect
appHttpPort: 80
## @param kdoctorAgent.httpServer.appPort the https Port for kdoctorAgent, testing connect
## @param kdoctorAgent.httpServer.appHttpsPort the https Port for kdoctorAgent, testing connect
appHttpsPort: 443

startupProbe:
Expand Down Expand Up @@ -310,7 +313,7 @@ kdoctorController:
## @param kdoctorController.replicas the replicas number of kdoctorController pod
replicas: 1

## @param kdoctorController.binName the binName name of kdoctorController
## @param kdoctorController.cmdBinName the binName name of kdoctorController
cmdBinName: "/usr/bin/controller"

## @param kdoctorController.hostnetwork enable hostnetwork mode of kdoctorController pod. Notice, if no CNI available before template installation, must enable this
Expand Down Expand Up @@ -409,7 +412,7 @@ kdoctorController:
minAvailable: 1

httpServer:
## @param kdoctorController.port the http Port for kdoctorController, for health checking and http service
## @param kdoctorController.httpServer.port the http Port for kdoctorController, for health checking and http service
port: 80

startupProbe:
Expand Down Expand Up @@ -490,18 +493,19 @@ kdoctorController:
gopsPort: 5724

apiserver:
## @param kdoctorApiserver.name the kdoctorApiserver name
## @param kdoctorController.apiserver.name the kdoctorApiserver name
name: "kdoctor-apiserver"

## TLS configuration for kdoctor
tls:
## ## TLS ca for kdoctor
ca:
## @param tls.ca.secretName the secret name for storing TLS certificates
secretName: "kdoctor-ca"

## TLS configuration for kdoctor client
client:
## @param tls.ca.client.secretName the secret name for storing TLS certificates
## @param tls.client.secretName the secret name for storing TLS certificates
secretName: "kdoctor-client-cert"

## TLS configuration for webhook
Expand Down
68 changes: 68 additions & 0 deletions docs/concepts/runtime-zh_CN.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
## runtime

[**English**](./runtime.md) | **简体中文**

当下发任务 CR 后,kdoctor-controller 会根据 CR 中的 AgentSpec 生成对应的任务载体(DaemonSet 或 Deployment)当所有 Pod 就绪后,开始按照 Spec 中的任务定义执行任务,每一个任务独立使用一个载体。

### 载体资源

当任务 CR 下发后,kdocotr-controller 会创建如下资源进行任务。

### 工作负载

工作负载为 DaemonSet 或 Deployment,默认为 DaemonSet,负载中的每一个 Pod 根据任务配置进行的请求,并将执行结果落盘到 Pod 中,可通过 AgentSpec 中设置
工作负载的销毁时间,默认任务执行完 60 分钟后,销毁工作负载,当删除 CR 任务时,工作负载会一并被删除。

### Service

在创建工作负载时,kdoctor-controller 同时会根据 IP Family 的配置,创建对应的 service 并于工作负载的 pod 绑定。用于测试 service 网络连通性。与工作负载
的销毁逻辑相同。

### Ingress

当任务为 NetReach 时,若测试目标包含 Ingress 时,会创建一个 Ingress,用于测试 Ingress 的网络联通性,与工作负载的销毁逻辑相同。

### 报告收取

当任务 CR 下发后,kdoctor-controller 会将任务注册进 ReportManager,ReportManager 会定期去每一个任务负载中通过 GRPC 接口获取报告,并聚合
在 kdoctor-controller 中,聚合后可通过命令 `kubectl get kdoctorreport` 获取报告结果,因此,若报告未收集完成就将工作负载删除将影响报告聚合结果。


### 生命周期

```mermaid
sequenceDiagram
participant cr 任务
participant kdoctor_controller
participant workload
participant pod
participant service
participant ingress
cr 任务 ->>kdoctor_controller: cr 任务下发
kdoctor_controller ->>workload: 创建 ownerReferences 为任务 cr 工作负载(daemonSet 或 Deployment)
workload ->>pod: 创建任务执行 pod
kdoctor_controller ->>service: 创建 ownerReferences 为工作负载的 service
kdoctor_controller ->>ingress: 创建 ownerReferences 为工作负载的 ingress
workload ->>kdoctor_controller: workload 就绪
service ->>kdoctor_controller: service 就绪
ingress ->>kdoctor_controller: ingress 就绪
kdoctor_controller ->>pod: 任务执行
kdoctor_controller ->>pod: 定时收取报告
pod ->>kdoctor_controller: 任务执行完成
kdoctor_controller ->>pod: 报告收取完成
pod ->>pod: 报告收取完成默认 10 分钟后,自动清理报告
kdoctor_controller ->>workload: 到达 runtime 销毁时间,销毁 workload
workload ->>service: 到达 runtime 销毁时间,销毁 service
workload ->>ingress: 到达 runtime 销毁时间,销毁 ingress
cr 任务 ->>kdoctor_controller: cr 任务删除
kdoctor_controller ->> workload: cr 任务删除,workload 删除
workload ->> pod: workload 删除,pod 删除
workload ->>service: workload 删除,service 删除
workload ->>ingress: workload 删除,ingress 删除
```

* 任务开始后,kdoctor-controller 会定时向任务中的 pod 收取报告,任务完成后,报告收集完成,不会再进行报告收集。
* 任务负载执行完任务后,报告被 kdoctor-controller 收取报告默认 10 分钟后,会自动清理掉负载中的报告。
* 当删除掉已经完成的任务 CR 后,报告依然存在 kdoctor-controller 报告目录下,但无法通过 k8s 聚合 api 查看,需要手动才能进行查看。
* 当删除执行中的任务 CR 时,任务会终止,创建 CR 时生成的资源会一并删除,已经收集好的报告依然存放在 kdoctor-controller 报告目录下。
3 changes: 3 additions & 0 deletions docs/concepts/runtime.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## runtime

[**简体中文**](./runtime-zh_CN.md) | **English**
1 change: 1 addition & 0 deletions docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ nav:
- NetDns: usage/netdns.md
- Concepts:
- Architecture: reference/arch.md
- Runtime: concepts/runtime.md
- Reference:
- AppHttpHealthy: reference/apphttphealthy.md
- NetReach: reference/netreach.md
Expand Down
23 changes: 11 additions & 12 deletions docs/reference/apphttphealthy-zh_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

## 基本描述

对于这种任务,kdoctor-controller 会根据 agentSpec 生成对应的 agent,每一个 agent pod 都会向指定的目标发送http请求,并获得成功率和平均延迟。它可以指定成功条件来判断结果是否成功。并且,可以通过聚合API获取详细的报告。
对于这种任务,kdoctor-controller 会根据 agentSpec 生成对应的 [agent](../concepts/runtime-zh_CN.md) 等资源,每一个 agent pod 都会向指定的目标发送http请求,并获得成功率和平均延迟。它可以指定成功条件来判断结果是否成功。并且,可以通过聚合API获取详细的报告。

## AppHttpHealthy 示例

Expand Down Expand Up @@ -75,17 +75,16 @@ status:
#### AgentSpec
| 字段 | 描述 | 结构 | 验证 | 取值 | 默认值 |
|-------------------------------|------------------------|----------------------------------------------------------------------------------------------------------------------------------------|-----|----------------------|--------------------|
| annotation | agent 工作负载的 annotation | map[string]string | 可选 | | |
| kind | agent 工作负载的类型 | string | 可选 | Deployment、DaemonSet | DaemonSet |
| deploymentReplicas | agent 工作负载类型为 deployment 时的期望副本数 | int | 可选 | 大于等于 0 | 0 |
| affinity | agent 工作负载亲和性 | [labelSelector](https://github.com/kubernetes/kubernetes/blob/v1.27.0/staging/src/k8s.io/apimachinery/pkg/apis/meta/v1/types.go#L1195) | 可选 | | |
| env | agent 工作负载环境变量 | [env](https://github.com/kubernetes/kubernetes/blob/v1.27.0/staging/src/k8s.io/api/core/v1/types.go#L2012) | 可选 | | |
| hostNetwork | agent 工作负载是否使用宿主机网络 | bool | 可选 | true、false | false |
| resources | agent 工作负载资源使用配置 | [resources](https://github.com/kubernetes/kubernetes/blob/v1.27.0/staging/src/k8s.io/api/core/v1/types.go#L2333) | 可选 | | cpu:100m,memory:128Mi |
| terminationGracePeriodMinutes | agent 工作负载完成任务后多少分钟之后终止 | int | 可选 | 大于等于 0 | 60 |
| 字段 | 描述 | 结构 | 验证 | 取值 | 默认值 |
|-------------------------------|------------------------|----------------------------------------------------------------------------------------------------------------------------------|-----|----------------------|-------------------------------|
| annotation | agent 工作负载的 annotation | map[string]string | 可选 | | |
| kind | agent 工作负载的类型 | string | 可选 | Deployment、DaemonSet | DaemonSet |
| deploymentReplicas | agent 工作负载类型为 deployment 时的期望副本数 | int | 可选 | 大于等于 0 | 0 |
| affinity | agent 工作负载亲和性 | labelSelector | 可选 | | |
| env | agent 工作负载环境变量 | env | 可选 | | |
| hostNetwork | agent 工作负载是否使用宿主机网络 | bool | 可选 | true、false | false |
| resources | agent 工作负载资源使用配置 | resources | 可选 | | limit cpu:1000m,memory:1024Mi |
| terminationGracePeriodMinutes | agent 工作负载完成任务后多少分钟之后终止 | int | 可选 | 大于等于 0 | 60 |
#### Schedule
Expand Down
Loading

0 comments on commit b8e45e8

Please sign in to comment.