Skip to content

Commit

Permalink
chore: add container id to talosctl -k containers and `talosctl -k …
Browse files Browse the repository at this point in the history
…logs`

This PR adds first 12 symbols from container ID and adds them to `talosctl -k containers` each container output.
That way we can ensure that we get the logs from proper container even if there is a newer one.

Closes #6886

Co-authored-by: Utku Ozdemir <utku.ozdemir@siderolabs.com>
Signed-off-by: Dmitriy Matrenichev <dmitry.matrenichev@siderolabs.com>
  • Loading branch information
DmitriyMV and utkuozdemir committed Mar 7, 2023
1 parent 22ef81c commit ebc92f3
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 44 deletions.
7 changes: 7 additions & 0 deletions hack/release.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,13 @@ and has the capability to wipe the node's system disk (partial wipe is not suppo
Talos now supports a new `os:operator` role for the Talos API.
This role allows everything `os:reader` role allows plus access to maintenance APIs:
rebooting, shutting down a node, accessing packet capture, etcd alarm APIs, etcd backup, etc.
"""

[notes.containers-output]
title = "Talosctl containers"
description="""\
`talosctl logs -k` and `talosctl containers -k` now support and output container display names with their ids.
This allows to distinguish between containers with the same name.
"""

[make_deps]
Expand Down
65 changes: 49 additions & 16 deletions internal/pkg/containers/cri/cri.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package cri
import (
"context"
"encoding/json"
"fmt"
"strings"
"syscall"
"time"
Expand Down Expand Up @@ -85,28 +86,30 @@ func (i *inspector) Images() (map[string]string, error) {
return result, nil
}

func parseContainerDisplay(id string) (namespace, pod, name string) {
slashIdx := strings.Index(id, "/")
if slashIdx > 0 {
namespace, pod = id[:slashIdx], id[slashIdx+1:]
semicolonIdx := strings.LastIndex(pod, ":")
func parseContainerDisplay(id string) (namespace, pod, name, containerID string) {
namespace, pod, ok := strings.Cut(id, "/")
if !ok {
return "", "", id, ""
}

if semicolonIdx > 0 {
name = pod[semicolonIdx+1:]
pod = pod[:semicolonIdx]
}
} else {
name = id
pod, name, ok = strings.Cut(pod, ":")
if !ok {
return namespace, pod, "", ""
}

return
name, containerID, ok = strings.Cut(name, ":")
if !ok {
return namespace, pod, name, ""
}

return namespace, pod, name, containerID
}

// Container returns info about a single container.
//
// If container is not found, Container returns nil.
func (i *inspector) Container(id string) (*ctrs.Container, error) {
namespace, pod, name := parseContainerDisplay(id)
namespace, pod, name, cntID := parseContainerDisplay(id)
if pod == "" {
return nil, nil
}
Expand Down Expand Up @@ -157,9 +160,30 @@ func (i *inspector) Container(id string) (*ctrs.Container, error) {
return nil, nil
}

if cntID != "" {
cnt, ok := findContainer(cntID, containers)
if !ok {
return nil, fmt.Errorf("container %q not found", id)
}

return i.buildContainer(cnt)
}

return i.buildContainer(containers[0])
}

func findContainer(cntID string, containers []*runtimeapi.Container) (*runtimeapi.Container, bool) {
// I'm sure that we can proabably find container using CRI labels, but
// I'm not sure if it will work with partial IDs.
for _, cnt := range containers {
if strings.Contains(cnt.Id, cntID) {
return cnt, true
}
}

return nil, false
}

func (i *inspector) buildPod(sandbox *runtimeapi.PodSandbox) (*ctrs.Pod, error) {
sandboxStatus, sandboxInfo, err := i.client.PodSandboxStatus(i.ctx, sandbox.Id)
if err != nil {
Expand Down Expand Up @@ -214,9 +238,10 @@ func (i *inspector) buildContainer(container *runtimeapi.Container) (*ctrs.Conta
}

podName := container.Labels["io.kubernetes.pod.namespace"] + "/" + container.Labels["io.kubernetes.pod.name"]

ctr := &ctrs.Container{
Inspector: i,
Display: podName + ":" + container.Metadata.Name,
Display: podName + ":" + container.Metadata.Name + ":" + safeCut(container.Id, 12),
Name: container.Metadata.Name,
ID: container.Id,
Digest: container.ImageRef,
Expand All @@ -242,6 +267,14 @@ func (i *inspector) buildContainer(container *runtimeapi.Container) (*ctrs.Conta
return ctr, nil
}

func safeCut(id string, i int) string {
if len(id) > i {
return id[:i]
}

return id
}

// Pods collects information about running pods & containers.
//
//nolint:gocyclo
Expand Down Expand Up @@ -330,13 +363,13 @@ func (i *inspector) Pods() ([]*ctrs.Pod, error) {
}

// GetProcessStderr returns process stderr.
func (i *inspector) GetProcessStderr(id string) (string, error) {
func (i *inspector) GetProcessStderr(string) (string, error) {
// CRI doesn't seem to have an easy way to do that
return "", nil
}

// Kill sends signal to container task.
func (i *inspector) Kill(id string, isPodSandbox bool, signal syscall.Signal) error {
func (i *inspector) Kill(id string, isPodSandbox bool, _ syscall.Signal) error {
if isPodSandbox {
return i.client.StopPodSandbox(i.ctx, id)
}
Expand Down
8 changes: 4 additions & 4 deletions website/content/v1.4/advanced/static-pods.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,16 @@ If the API server is not available, status of the static pod can also be inspect

```bash
$ talosctl containers --kubernetes
NODE NAMESPACE ID IMAGE PID STATUS
172.20.0.3 k8s.io default/nginx-talos-default-controlplane-2 registry.k8s.io/pause:3.6 4886 SANDBOX_READY
172.20.0.3 k8s.io └─ default/nginx-talos-default-controlplane-2:nginx docker.io/library/nginx:latest
NODE NAMESPACE ID IMAGE PID STATUS
172.20.0.3 k8s.io default/nginx-talos-default-controlplane-2 registry.k8s.io/pause:3.6 4886 SANDBOX_READY
172.20.0.3 k8s.io └─ default/nginx-talos-default-controlplane-2:nginx:4183a7d7a771 docker.io/library/nginx:latest
...
```

Logs of static pods can be retrieved with `talosctl logs --kubernetes`:

```bash
$ talosctl logs --kubernetes default/nginx-talos-default-controlplane-2:nginx
$ talosctl logs --kubernetes default/nginx-talos-default-controlplane-2:nginx:4183a7d7a771
172.20.0.3: 2022-02-10T15:26:01.289208227Z stderr F 2022/02/10 15:26:01 [notice] 1#1: using the "epoll" event method
172.20.0.3: 2022-02-10T15:26:01.2892466Z stderr F 2022/02/10 15:26:01 [notice] 1#1: nginx/1.21.6
172.20.0.3: 2022-02-10T15:26:01.28925723Z stderr F 2022/02/10 15:26:01 [notice] 1#1: built by gcc 10.2.1 20210110 (Debian 10.2.1-6)
Expand Down
16 changes: 8 additions & 8 deletions website/content/v1.4/advanced/troubleshooting-control-plane.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,14 @@ In any case, the status of the control plane components on each control plane no
$ talosctl -n <IP> containers --kubernetes
NODE NAMESPACE ID IMAGE PID STATUS
172.20.0.2 k8s.io kube-system/kube-apiserver-talos-default-controlplane-1 registry.k8s.io/pause:3.2 2539 SANDBOX_READY
172.20.0.2 k8s.io └─ kube-system/kube-apiserver-talos-default-controlplane-1:kube-apiserver registry.k8s.io/kube-apiserver:v{{< k8s_release >}} 2572 CONTAINER_RUNNING
172.20.0.2 k8s.io └─ kube-system/kube-apiserver-talos-default-controlplane-1:kube-apiserver:51c3aad7a271 registry.k8s.io/kube-apiserver:v{{< k8s_release >}} 2572 CONTAINER_RUNNING
```

If `kube-apiserver` shows as `CONTAINER_EXITED`, it might have exited due to configuration error.
Logs can be checked with `taloctl logs --kubernetes` (or with `-k` as a shorthand):

```bash
$ talosctl -n <IP> logs -k kube-system/kube-apiserver-talos-default-controlplane-1:kube-apiserver
$ talosctl -n <IP> logs -k kube-system/kube-apiserver-talos-default-controlplane-1:kube-apiserver:51c3aad7a271
172.20.0.2: 2021-03-05T20:46:13.133902064Z stderr F 2021/03/05 20:46:13 Running command:
172.20.0.2: 2021-03-05T20:46:13.133933824Z stderr F Command env: (log-file=, also-stdout=false, redirect-stderr=true)
172.20.0.2: 2021-03-05T20:46:13.133938524Z stderr F Run from directory:
Expand Down Expand Up @@ -271,12 +271,12 @@ If the control plane endpoint is not yet up, the container status of the control
```bash
$ talosctl -n <IP> c -k
NODE NAMESPACE ID IMAGE PID STATUS
NODE NAMESPACE ID IMAGE PID STATUS
...
172.20.0.2 k8s.io kube-system/kube-controller-manager-talos-default-controlplane-1 registry.k8s.io/pause:3.2 2547 SANDBOX_READY
172.20.0.2 k8s.io └─ kube-system/kube-controller-manager-talos-default-controlplane-1:kube-controller-manager registry.k8s.io/kube-controller-manager:v{{< k8s_release >}} 2580 CONTAINER_RUNNING
172.20.0.2 k8s.io kube-system/kube-scheduler-talos-default-controlplane-1 registry.k8s.io/pause:3.2 2638 SANDBOX_READY
172.20.0.2 k8s.io └─ kube-system/kube-scheduler-talos-default-controlplane-1:kube-scheduler registry.k8s.io/kube-scheduler:v{{< k8s_release >}} 2670 CONTAINER_RUNNING
172.20.0.2 k8s.io kube-system/kube-controller-manager-talos-default-controlplane-1 registry.k8s.io/pause:3.2 2547 SANDBOX_READY
172.20.0.2 k8s.io └─ kube-system/kube-controller-manager-talos-default-controlplane-1:kube-controller-manager:84fc77c59e17 registry.k8s.io/kube-controller-manager:v{{< k8s_release >}} 2580 CONTAINER_RUNNING
172.20.0.2 k8s.io kube-system/kube-scheduler-talos-default-controlplane-1 registry.k8s.io/pause:3.2 2638 SANDBOX_READY
172.20.0.2 k8s.io └─ kube-system/kube-scheduler-talos-default-controlplane-1:kube-scheduler:4182a7d7f779 registry.k8s.io/kube-scheduler:v{{< k8s_release >}} 2670 CONTAINER_RUNNING
...
```
Expand All @@ -285,7 +285,7 @@ Otherwise the process might crashing.
The logs can be checked with `talosctl logs --kubernetes <containerID>`:
```bash
$ talosctl -n <IP> logs -k kube-system/kube-controller-manager-talos-default-controlplane-1:kube-controller-manager
$ talosctl -n <IP> logs -k kube-system/kube-controller-manager-talos-default-controlplane-1:kube-controller-manager:84fc77c59e17
172.20.0.3: 2021-03-09T13:59:34.291667526Z stderr F 2021/03/09 13:59:34 Running command:
172.20.0.3: 2021-03-09T13:59:34.291702262Z stderr F Command env: (log-file=, also-stdout=false, redirect-stderr=true)
172.20.0.3: 2021-03-09T13:59:34.291707121Z stderr F Run from directory:
Expand Down
14 changes: 7 additions & 7 deletions website/content/v1.4/talos-guides/configuration/containerd.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ Now the `pause` image is set to `registry.k8s.io/pause:3.8`:

```bash
$ talosctl containers --kubernetes
NODE NAMESPACE ID IMAGE PID STATUS
172.20.0.5 k8s.io kube-system/kube-flannel-6hfck registry.k8s.io/pause:3.8 1773 SANDBOX_READY
172.20.0.5 k8s.io └─ kube-system/kube-flannel-6hfck:install-cni ghcr.io/siderolabs/install-cni:v1.3.0-alpha.0-2-gb155fa0 0 CONTAINER_EXITED
172.20.0.5 k8s.io └─ kube-system/kube-flannel-6hfck:install-config ghcr.io/siderolabs/flannel:v0.20.1 0 CONTAINER_EXITED
172.20.0.5 k8s.io └─ kube-system/kube-flannel-6hfck:kube-flannel ghcr.io/siderolabs/flannel:v0.20.1 2092 CONTAINER_RUNNING
172.20.0.5 k8s.io kube-system/kube-proxy-xp7jq registry.k8s.io/pause:3.8 1780 SANDBOX_READY
172.20.0.5 k8s.io └─ kube-system/kube-proxy-xp7jq:kube-proxy registry.k8s.io/kube-proxy:v1.26.0-alpha.3 1843 CONTAINER_RUNNING
NODE NAMESPACE ID IMAGE PID STATUS
172.20.0.5 k8s.io kube-system/kube-flannel-6hfck registry.k8s.io/pause:3.8 1773 SANDBOX_READY
172.20.0.5 k8s.io └─ kube-system/kube-flannel-6hfck:install-cni:bc39fec3cbac ghcr.io/siderolabs/install-cni:v1.3.0-alpha.0-2-gb155fa0 0 CONTAINER_EXITED
172.20.0.5 k8s.io └─ kube-system/kube-flannel-6hfck:install-config:5c3989353b98 ghcr.io/siderolabs/flannel:v0.20.1 0 CONTAINER_EXITED
172.20.0.5 k8s.io └─ kube-system/kube-flannel-6hfck:kube-flannel:116c67b50da8 ghcr.io/siderolabs/flannel:v0.20.1 2092 CONTAINER_RUNNING
172.20.0.5 k8s.io kube-system/kube-proxy-xp7jq registry.k8s.io/pause:3.8 1780 SANDBOX_READY
172.20.0.5 k8s.io └─ kube-system/kube-proxy-xp7jq:kube-proxy:84fc77c59e17 registry.k8s.io/kube-proxy:v1.26.0-alpha.3 1843 CONTAINER_RUNNING
```
18 changes: 9 additions & 9 deletions website/content/v1.4/talos-guides/configuration/logging.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ Container logs for Kubernetes pods can be retrieved with `talosctl logs -k` comm

```sh
$ talosctl -n 172.20.1.2 containers -k
NODE NAMESPACE ID IMAGE PID STATUS
172.20.1.2 k8s.io kube-system/kube-flannel-dk6d5 registry.k8s.io/pause:3.6 1329 SANDBOX_READY
172.20.1.2 k8s.io └─ kube-system/kube-flannel-dk6d5:install-cni ghcr.io/siderolabs/install-cni:v0.7.0-alpha.0-1-g2bb2efc 0 CONTAINER_EXITED
172.20.1.2 k8s.io └─ kube-system/kube-flannel-dk6d5:install-config quay.io/coreos/flannel:v0.13.0 0 CONTAINER_EXITED
172.20.1.2 k8s.io └─ kube-system/kube-flannel-dk6d5:kube-flannel quay.io/coreos/flannel:v0.13.0 1610 CONTAINER_RUNNING
172.20.1.2 k8s.io kube-system/kube-proxy-gfkqj registry.k8s.io/pause:3.5 1311 SANDBOX_READY
172.20.1.2 k8s.io └─ kube-system/kube-proxy-gfkqj:kube-proxy registry.k8s.io/kube-proxy:v{{< k8s_release >}} 1379 CONTAINER_RUNNING

$ talosctl -n 172.20.1.2 logs -k kube-system/kube-proxy-gfkqj:kube-proxy
NODE NAMESPACE ID IMAGE PID STATUS
172.20.1.2 k8s.io kube-system/kube-flannel-dk6d5 registry.k8s.io/pause:3.6 1329 SANDBOX_READY
172.20.1.2 k8s.io └─ kube-system/kube-flannel-dk6d5:install-cni:f1d4cf68feb9 ghcr.io/siderolabs/install-cni:v0.7.0-alpha.0-1-g2bb2efc 0 CONTAINER_EXITED
172.20.1.2 k8s.io └─ kube-system/kube-flannel-dk6d5:install-config:bc39fec3cbac quay.io/coreos/flannel:v0.13.0 0 CONTAINER_EXITED
172.20.1.2 k8s.io └─ kube-system/kube-flannel-dk6d5:kube-flannel:5c3989353b98 quay.io/coreos/flannel:v0.13.0 1610 CONTAINER_RUNNING
172.20.1.2 k8s.io kube-system/kube-proxy-gfkqj registry.k8s.io/pause:3.5 1311 SANDBOX_READY
172.20.1.2 k8s.io └─ kube-system/kube-proxy-gfkqj:kube-proxy:ad5e8ddc7e7f registry.k8s.io/kube-proxy:v{{< k8s_release >}} 1379 CONTAINER_RUNNING

$ talosctl -n 172.20.1.2 logs -k kube-system/kube-proxy-gfkqj:kube-proxy:ad5e8ddc7e7f
172.20.1.2: 2021-11-30T19:13:20.567825192Z stderr F I1130 19:13:20.567737 1 server_others.go:138] "Detected node IP" address="172.20.0.3"
172.20.1.2: 2021-11-30T19:13:20.599684397Z stderr F I1130 19:13:20.599613 1 server_others.go:206] "Using iptables Proxier"
[...]
Expand Down

0 comments on commit ebc92f3

Please sign in to comment.