Skip to content

Commit 451c70e

Browse files
authored
fix: return empty deployment if not found (#2716)
## Description Restarts on grafloki start over k8s were not working because of the way the not found error was being handled. This change returns a nil object if a deployment isn't found by k8s instead of returning an error.
1 parent e31c0d4 commit 451c70e

File tree

7 files changed

+125
-33
lines changed

7 files changed

+125
-33
lines changed

cli/cli/commands/service/service_helpers/service_helpers.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ func GetAddServiceStarlarkScript(serviceName string, serviceConfigStarlark strin
130130
}
131131

132132
func RunAddServiceStarlarkScript(ctx context.Context, serviceName, enclaveIdentifier, starlarkScript string, enclaveCtx *enclaves.EnclaveContext) (*enclaves.StarlarkRunResult, error) {
133-
logrus.Infof("ADD SERVICE STARLARK:\n%v", starlarkScript)
133+
logrus.Debugf("Add service starlark:\n%v", starlarkScript)
134134
starlarkRunResult, err := enclaveCtx.RunStarlarkScriptBlocking(ctx, starlarkScript, starlark_run_config.NewRunStarlarkConfig())
135135
if err != nil {
136136
return nil, stacktrace.Propagate(err, "An error has occurred when running Starlark to add service")

cli/cli/helpers/grafloki/docker_grafloki.go

+35-15
Original file line numberDiff line numberDiff line change
@@ -50,29 +50,38 @@ func StartGrafLokiInDocker(ctx context.Context, graflokiConfig resolved_config.G
5050
}
5151

5252
var lokiHost string
53+
var removeGrafanaAndLokiFunc func()
54+
shouldRemoveGrafanaAndLoki := false
5355
doesGrafanaAndLokiExist, lokiHost, err := checkGrafanaAndLokiContainerExistence(ctx, dockerManager, lokiContainerLabels, grafanaContainerLabels)
5456
if err != nil {
5557
return "", "", stacktrace.Propagate(err, "An error occurred checking if Grafana and Loki exist.")
5658
}
5759
if !doesGrafanaAndLokiExist {
5860
logrus.Infof("No running Grafana and Loki containers found. Creating them...")
59-
lokiHost, err = createGrafanaAndLokiContainers(ctx, dockerManager, graflokiConfig)
61+
lokiHost, removeGrafanaAndLokiFunc, err = createGrafanaAndLokiContainers(ctx, dockerManager, graflokiConfig)
6062
if err != nil {
6163
return "", "", stacktrace.Propagate(err, "An error occurred creating Grafana and Loki containers.")
6264
}
65+
shouldRemoveGrafanaAndLoki = true
66+
defer func() {
67+
if shouldRemoveGrafanaAndLoki {
68+
removeGrafanaAndLokiFunc()
69+
}
70+
}()
6371
}
6472

6573
grafanaUrl := fmt.Sprintf("http://%v:%v", localhostAddr, grafanaPort)
74+
shouldRemoveGrafanaAndLoki = false
6675
return lokiHost, grafanaUrl, nil
6776
}
6877

69-
func createGrafanaAndLokiContainers(ctx context.Context, dockerManager *docker_manager.DockerManager, graflokConfig resolved_config.GrafanaLokiConfig) (string, error) {
78+
func createGrafanaAndLokiContainers(ctx context.Context, dockerManager *docker_manager.DockerManager, graflokConfig resolved_config.GrafanaLokiConfig) (string, func(), error) {
7079
lokiNatPort := nat.Port(strconv.Itoa(lokiPort) + "/tcp")
7180
grafanaNatPort := nat.Port(strconv.Itoa(grafanaPort) + "/tcp")
7281

7382
bridgeNetworkId, err := dockerManager.GetNetworkIdByName(ctx, bridgeNetworkName)
7483
if err != nil {
75-
return "", stacktrace.Propagate(err, "An error occurred getting Docker network id by Name: %v", bridgeNetworkName)
84+
return "", nil, stacktrace.Propagate(err, "An error occurred getting Docker network id by Name: %v", bridgeNetworkName)
7685
}
7786

7887
lokiImage := defaultLokiImage
@@ -81,7 +90,7 @@ func createGrafanaAndLokiContainers(ctx context.Context, dockerManager *docker_m
8190
}
8291
lokiUuid, err := uuid_generator.GenerateUUIDString()
8392
if err != nil {
84-
return "", stacktrace.Propagate(err, "An error occurred generating a uuid for Loki.")
93+
return "", nil, stacktrace.Propagate(err, "An error occurred generating a uuid for Loki.")
8594
}
8695
lokiContainerName := fmt.Sprintf("%v%v", LokiContainerNamePrefix, lokiUuid)
8796
lokiArgs := docker_manager.NewCreateAndStartContainerArgsBuilder(lokiImage, lokiContainerName, bridgeNetworkId).
@@ -95,29 +104,32 @@ func createGrafanaAndLokiContainers(ctx context.Context, dockerManager *docker_m
95104
Build()
96105
lokiContainerId, _, err := dockerManager.CreateAndStartContainer(ctx, lokiArgs)
97106
if err != nil {
98-
return "", stacktrace.Propagate(err, "An error occurred creating '%v' container.", lokiContainerName)
107+
return "", nil, stacktrace.Propagate(err, "An error occurred creating '%v' container.", lokiContainerName)
99108
}
100109
shouldDestroyLokiContainer := true
101-
defer func() {
110+
removeLokiContainerFunc := func() {
102111
if shouldDestroyLokiContainer {
103112
err := dockerManager.RemoveContainer(ctx, lokiContainerId)
104113
if err != nil {
105114
logrus.Warnf("Attempted to remove Loki container after an error occurred creating it but an error occurred removing it.")
106115
logrus.Warnf("Manually remove Loki container with id: %v", lokiContainerId)
107116
}
108117
}
118+
}
119+
defer func() {
120+
removeLokiContainerFunc()
109121
}()
110122
logrus.Infof("Loki container started.")
111123

112124
lokiBridgeNetworkIpAddr, err := dockerManager.GetContainerIPOnNetwork(ctx, lokiContainerId, bridgeNetworkName)
113125
if err != nil {
114-
return "", stacktrace.Propagate(err, "An error occurred getting container '%v' ip address on network '%v'.", lokiContainerId, bridgeNetworkName)
126+
return "", nil, stacktrace.Propagate(err, "An error occurred getting container '%v' ip address on network '%v'.", lokiContainerId, bridgeNetworkName)
115127
}
116128

117129
lokiBridgeNetworkIpAddress := fmt.Sprintf("http://%v:%v", lokiBridgeNetworkIpAddr, lokiPort)
118130
lokiHostNetworkIpAddress := fmt.Sprintf("http://%v:%v", localhostAddr, lokiPort)
119131
if err := waitForLokiReadiness(lokiHostNetworkIpAddress, lokiReadinessPath); err != nil {
120-
return "", stacktrace.Propagate(err, "An error occurred waiting for Loki container to become ready.")
132+
return "", nil, stacktrace.Propagate(err, "An error occurred waiting for Loki container to become ready.")
121133
}
122134

123135
grafanaDatasource := &GrafanaDatasources{
@@ -134,17 +146,17 @@ func createGrafanaAndLokiContainers(ctx context.Context, dockerManager *docker_m
134146
}}
135147
grafanaDatasourceYaml, err := yaml.Marshal(grafanaDatasource)
136148
if err != nil {
137-
return "", stacktrace.Propagate(err, "An error occurred serializing Grafana datasource to yaml: %v", grafanaDatasourceYaml)
149+
return "", nil, stacktrace.Propagate(err, "An error occurred serializing Grafana datasource to yaml: %v", grafanaDatasourceYaml)
138150
}
139151
logrus.Debugf("Grafana data source yaml %v", string(grafanaDatasourceYaml))
140152

141153
tmpFile, err := os.CreateTemp("", "grafana-datasource-*.yaml")
142154
if err != nil {
143-
return "", stacktrace.Propagate(err, "An error occurred creating temp datasource config.")
155+
return "", nil, stacktrace.Propagate(err, "An error occurred creating temp datasource config.")
144156
}
145157
defer tmpFile.Close()
146158
if _, err := tmpFile.WriteString(string(grafanaDatasourceYaml)); err != nil {
147-
return "", stacktrace.Propagate(err, "An error occurred writing config.")
159+
return "", nil, stacktrace.Propagate(err, "An error occurred writing config.")
148160
}
149161

150162
grafanaImage := defaultGrafanaImage
@@ -154,7 +166,7 @@ func createGrafanaAndLokiContainers(ctx context.Context, dockerManager *docker_m
154166
root := service_user.NewServiceUser(rootUserUid)
155167
grafanaUuid, err := uuid_generator.GenerateUUIDString()
156168
if err != nil {
157-
return "", stacktrace.Propagate(err, "An error occurred generating a uuid for Grafana.")
169+
return "", nil, stacktrace.Propagate(err, "An error occurred generating a uuid for Grafana.")
158170
}
159171
grafanaContainerName := fmt.Sprintf("%v%v", GrafanaContainerNamePrefix, grafanaUuid)
160172
grafanaArgs := docker_manager.NewCreateAndStartContainerArgsBuilder(grafanaImage, grafanaContainerName, bridgeNetworkId).
@@ -177,23 +189,31 @@ func createGrafanaAndLokiContainers(ctx context.Context, dockerManager *docker_m
177189
Build()
178190
grafanaContainerId, _, err := dockerManager.CreateAndStartContainer(ctx, grafanaArgs)
179191
if err != nil {
180-
return "", stacktrace.Propagate(err, "An error creating creating '%v' container.", grafanaContainerName)
192+
return "", nil, stacktrace.Propagate(err, "An error creating creating '%v' container.", grafanaContainerName)
181193
}
182194
shouldDestroyGrafanaContainer := true
183-
defer func() {
195+
removeGrafanaContainerFunc := func() {
184196
if shouldDestroyGrafanaContainer {
185197
err := dockerManager.RemoveContainer(ctx, grafanaContainerId)
186198
if err != nil {
187199
logrus.Warnf("Attempted to remove Grafana container after an error occurred creating it but an error occurred removing it.")
188200
logrus.Warnf("Manually remove Grafana container with id: %v", grafanaContainerId)
189201
}
190202
}
203+
}
204+
defer func() {
205+
removeGrafanaContainerFunc()
191206
}()
192207
logrus.Infof("Grafana container started.")
193208

209+
removeGrafanaAndLokiContainersFunc := func() {
210+
removeLokiContainerFunc()
211+
removeGrafanaContainerFunc()
212+
}
213+
194214
shouldDestroyLokiContainer = false
195215
shouldDestroyGrafanaContainer = false
196-
return lokiBridgeNetworkIpAddress, nil
216+
return lokiBridgeNetworkIpAddress, removeGrafanaAndLokiContainersFunc, nil
197217
}
198218

199219
func waitForLokiReadiness(lokiHost string, readyPath string) error {

cli/cli/helpers/grafloki/kubernetes_grafloki.go

+17-11
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ const (
3030
lokiProbeTimeoutSeconds = 10
3131

3232
// takes around 30 seconds for loki pod to become ready
33-
lokiDeploymentMaxRetries = 40
33+
lokiDeploymentMaxRetries = 60
3434
lokiDeploymentRetryInterval = 1 * time.Second
3535
defaultStorageClass = ""
3636
)
@@ -53,8 +53,12 @@ func StartGrafLokiInKubernetes(ctx context.Context, graflokiConfig resolved_conf
5353
var lokiHost string
5454
var removeGrafanaAndLokiFunc func()
5555
shouldRemoveGrafanaAndLoki := false
56-
doesGrafanaAndLokiExist, lokiHost := checkGrafanaAndLokiDeploymentExistence(ctx, k8sManager)
56+
doesGrafanaAndLokiExist, lokiHost, err := checkGrafanaAndLokiDeploymentExistence(ctx, k8sManager)
57+
if err != nil {
58+
return "", "", stacktrace.Propagate(err, "An error occurred checking if Grafana and Loki exist.")
59+
}
5760
if !doesGrafanaAndLokiExist {
61+
logrus.Infof("No running Grafana and Loki deployments found. Creating them...")
5862
lokiHost, removeGrafanaAndLokiFunc, err = createGrafanaAndLokiDeployments(ctx, k8sManager, graflokiConfig)
5963
if err != nil {
6064
return "", "", stacktrace.Propagate(err, "An error occurred creating Grafana and Loki deployments.")
@@ -445,27 +449,29 @@ func createGrafanaAndLokiDeployments(ctx context.Context, k8sManager *kubernetes
445449
return lokiHost, removeGrafanaAndLokiDeploymentsFunc, nil
446450
}
447451

448-
func checkGrafanaAndLokiDeploymentExistence(ctx context.Context, k8sManager *kubernetes_manager.KubernetesManager) (bool, string) {
452+
func checkGrafanaAndLokiDeploymentExistence(ctx context.Context, k8sManager *kubernetes_manager.KubernetesManager) (bool, string, error) {
449453
existsLoki := false
450454
existsGrafana := false
451455
var lokiHost string
452456

453457
lokiDeployment, err := k8sManager.GetDeployment(ctx, graflokiNamespace, lokiDeploymentName)
454-
if err == nil && lokiDeployment != nil {
458+
if err != nil {
459+
return false, "", stacktrace.Propagate(err, "An error occurred getting Loki deployment '%v'", lokiDeploymentName)
460+
}
461+
if lokiDeployment != nil {
455462
existsLoki = true
456463
lokiHost = getLokiUrlInsideK8sCluster(lokiServiceName, graflokiNamespace, lokiNodePort)
457-
} else {
458-
return existsLoki, "" // loki doesn't in this case so eject early
459464
}
460465

461466
grafanaDeployment, err := k8sManager.GetDeployment(ctx, graflokiNamespace, grafanaDeploymentName)
462-
if err == nil && grafanaDeployment != nil {
463-
existsGrafana = false
464-
} else {
465-
return existsGrafana, ""
467+
if err != nil {
468+
return false, "", stacktrace.Propagate(err, "An error occurred getting Grafana deployment '%v'", grafanaDeploymentName)
469+
}
470+
if grafanaDeployment != nil {
471+
existsGrafana = true
466472
}
467473

468-
return existsLoki && existsGrafana, lokiHost
474+
return existsLoki && existsGrafana, lokiHost, nil
469475
}
470476

471477
func StopGrafLokiInKubernetes(ctx context.Context) error {

container-engine-lib/lib/backend_impls/kubernetes/kubernetes_manager/kubernetes_manager.go

+8-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"encoding/json"
1212
"fmt"
1313
"io"
14+
apierrors "k8s.io/apimachinery/pkg/api/errors"
1415
"net/http"
1516
"net/url"
1617
"os"
@@ -1444,20 +1445,23 @@ func (manager *KubernetesManager) RemoveDeployment(ctx context.Context, namespac
14441445
}
14451446

14461447
func (manager *KubernetesManager) GetDeployment(ctx context.Context, namespace string, name string) (*v1.Deployment, error) {
1447-
daemonSetClient := manager.kubernetesClientSet.AppsV1().Deployments(namespace)
1448+
deploymentClient := manager.kubernetesClientSet.AppsV1().Deployments(namespace)
14481449

1449-
daemonSet, err := daemonSetClient.Get(ctx, name, metav1.GetOptions{
1450+
deployment, err := deploymentClient.Get(ctx, name, metav1.GetOptions{
14501451
TypeMeta: metav1.TypeMeta{
14511452
Kind: "",
14521453
APIVersion: "",
14531454
},
14541455
ResourceVersion: "",
14551456
})
1457+
if apierrors.IsNotFound(err) {
1458+
return nil, nil // in the case the deployment doesn't exist, simply return a nil object
1459+
}
14561460
if err != nil {
1457-
return nil, stacktrace.Propagate(err, "Failed to get daemon set with name '%s'", name)
1461+
return nil, stacktrace.Propagate(err, "Failed to get deployment with name '%s'", name)
14581462
}
14591463

1460-
return daemonSet, nil
1464+
return deployment, nil
14611465
}
14621466

14631467
func (manager *KubernetesManager) CreateDeployment(

docs/docs/cli-reference/service-add.md

+20-2
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@ To add a service to an enclave, run:
1010
kurtosis service add $THE_ENCLAVE_IDENTIFIER $THE_SERVICE_IDENTIFIER $CONTAINER_IMAGE
1111
```
1212

13-
where `$THE_ENCLAVE_IDENTIFIER` and the `$THE_SERVICE_IDENTIFIER` are [resource identifiers](../advanced-concepts/resource-identifier.md) for the enclave and service, respectively.
13+
where `$THE_ENCLAVE_IDENTIFIER` and the `$THE_SERVICE_IDENTIFIER` are [resource identifiers](../advanced-concepts/resource-identifier.md) for the enclave and service, respectively.
1414
Note, the service identifier needs to be formatted according to RFC 1035. Specifically, 1-63 lowercase alphanumeric characters with dashes and cannot start or end with dashes. Also service names
15-
have to start with a lowercase alphabet.
15+
have to start with a lowercase alphabet.
1616

1717
Much like `docker run`, this command has multiple options available to customize the service that's started:
1818

19+
1. The `--cmd` flag can be used to override the default command that the container runs
1920
1. The `--entrypoint` flag can be passed in to override the binary the service runs
2021
1. The `--env` flag can be used to specify a set of environment variables that should be set when running the service
2122
1. The `--ports` flag can be used to set the ports that the service will listen on
@@ -25,3 +26,20 @@ To override the service's CMD, add a `--` after the image name and then pass in
2526
```bash
2627
kurtosis service add --entrypoint sh my-enclave test-service alpine -- -c "echo 'Hello world'"
2728
```
29+
30+
Alternatively, if you have an existing service config in JSON format (for example, one that was output using `kurtosis service inspect`), you can use the `--json-service-config` flag to add a service using that config:
31+
32+
```bash
33+
kurtosis service add my-enclave test-service --json-service-config ./my-service-config.json
34+
```
35+
36+
To read the JSON config from stdin, use:
37+
38+
```bash
39+
kurtosis service add my-enclave test-service --json-service-config - < ./my-service-config.json
40+
```
41+
42+
:::note Override
43+
When using `--json-service-config`, the standard flags and args like `--image`, `--cmd`, `--entrypoint`, `--env`, and `$CONTAINER_IMAGE` will be ignored in favor of the provided config.
44+
:::
45+

docs/docs/cli-reference/service-inspect.md

+5
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,8 @@ Running the above command will print detailed information about:
2222

2323
By default, the service UUID is shortened. To view the full UUID of your service, add the following flag:
2424
* `--full-uuid`
25+
26+
You can also control the output format using the `--output` (`-o`) flag:
27+
* `--output yaml` will print the service config in YAML format
28+
* `--output json` will print the service config in JSON format (this can be piped into `service add` via `--json-service-config`)
29+
* If `--output` is omitted, the result will be printed in a human-readable format
+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
---
2+
title: service update
3+
sidebar_label: service update
4+
slug: /service-update
5+
---
6+
7+
To update an existing service in an enclave, run:
8+
9+
```bash
10+
kurtosis service update $THE_ENCLAVE_IDENTIFIER $THE_SERVICE_IDENTIFIER [flags]
11+
```
12+
13+
where `$THE_ENCLAVE_IDENTIFIER` and `$THE_SERVICE_IDENTIFIER` are [resource identifiers](../advanced-concepts/resource-identifier.md) for the enclave and service, respectively.
14+
15+
This command updates a service in-place by modifying its configuration. Only the specified parameters will be changed — the rest of the service config will remain as-is.
16+
17+
Much like `docker run`, this command has multiple options available to customize the updated service:
18+
19+
1. The `--image` flag can be used to update the service’s container image
20+
1. The `--entrypoint` flag can override the binary the service runs
21+
1. The `--env` flag can be used to set or override environment variables. Env var overrides with the same key will override existing env vars.
22+
1. The `--ports` flag can be used to add or override private port definitions. Port overrides with the same port id will override existing port bindings.
23+
1. The `--files` flag can be used to mount new file artifacts. Files artifacts overrides with the same key will override existing files artifact mounts.
24+
1. The `--cmd` flag can be used to override the CMD that is run when the container starts
25+
26+
Example:
27+
28+
```bash
29+
kurtosis service update my-enclave test-service \
30+
--image my-custom-image \
31+
--entrypoint my-binary \
32+
--env "FOO:bar,BAR:baz" \
33+
--ports "port1:8080/tcp"
34+
```
35+
36+
:::note Restarted Container
37+
This command replaces the existing service with a new container using the updated configuration. The service will be briefly stopped and restarted as part of this process.
38+
:::
39+

0 commit comments

Comments
 (0)