diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c4c0f51e96ea0..a934694517af4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -379,7 +379,7 @@ jobs: integration-linux: name: Linux Integration runs-on: ${{ matrix.os }} - timeout-minutes: 40 + timeout-minutes: 60 needs: [project, linters, protos, man] strategy: @@ -412,10 +412,6 @@ jobs: script/setup/install-failpoint-binaries - name: Install criu - # NOTE: Required arm64 enable CONFIG_CHECKPOINT_RESTORE (need to confirm GitHub action runners config) - # - # REF: https://criu.org/Linux_kernel - if: matrix.os != 'arm64-8core-32gb' run: | sudo add-apt-repository -y ppa:criu/ppa sudo apt-get update @@ -499,6 +495,22 @@ jobs: env sudo -E PATH=$PATH ./script/critest.sh "${{github.workspace}}/report" + - name: Install tools + # The GitHub Actions image already has buildah and podman included. Not the + # arm64-8core-32gb images. buildah and podman is needed to convert the + # Kubernetes checkpoint archive to an OCI image. + # See contrib/checkpoint/checkpoint-restore-cri-test.sh + if: matrix.os == 'arm64-8core-32gb' + run: sudo apt-get install -y buildah podman + + - name: Checkpoint/Restore via CRI + env: + TEST_RUNTIME: ${{ matrix.runtime }} + CGROUP_DRIVER: ${{ matrix.cgroup_driver }} + run: | + env + sudo -E PATH=$PATH ./contrib/checkpoint/checkpoint-restore-cri-test.sh + # Log the status of this VM to investigate issues like # https://github.com/containerd/containerd/issues/4969 - name: Host Status diff --git a/api/go.mod b/api/go.mod index e7ad7e4d50f5e..20163ce1f74c2 100644 --- a/api/go.mod +++ b/api/go.mod @@ -20,4 +20,5 @@ require ( golang.org/x/net v0.23.0 // indirect golang.org/x/sys v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/api/go.sum b/api/go.sum index 0311d1c1a2271..327b575eb3d76 100644 --- a/api/go.sum +++ b/api/go.sum @@ -76,5 +76,5 @@ google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGm google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/client/container.go b/client/container.go index b9cf25e9370f7..dc2abe975402d 100644 --- a/client/container.go +++ b/client/container.go @@ -85,6 +85,9 @@ type Container interface { Update(context.Context, ...UpdateContainerOpts) error // Checkpoint creates a checkpoint image of the current container Checkpoint(context.Context, string, ...CheckpointOpts) (Image, error) + // Restore restores a container and returns the PID of the + // restored containers init process. + Restore(context.Context, cio.Creator, string) (int, error) } func containerFromRecord(client *Client, c containers.Container) *container { @@ -242,41 +245,13 @@ func (c *container) NewTask(ctx context.Context, ioCreate cio.Creator, opts ...N Stdout: cfg.Stdout, Stderr: cfg.Stderr, } - r, err := c.get(ctx) - if err != nil { + if err := c.handleMounts(ctx, request); err != nil { return nil, err } - if r.SnapshotKey != "" { - if r.Snapshotter == "" { - return nil, fmt.Errorf("unable to resolve rootfs mounts without snapshotter on container: %w", errdefs.ErrInvalidArgument) - } - // get the rootfs from the snapshotter and add it to the request - s, err := c.client.getSnapshotter(ctx, r.Snapshotter) - if err != nil { - return nil, err - } - mounts, err := s.Mounts(ctx, r.SnapshotKey) - if err != nil { - return nil, err - } - spec, err := c.Spec(ctx) - if err != nil { - return nil, err - } - for _, m := range mounts { - if spec.Linux != nil && spec.Linux.MountLabel != "" { - if ml := label.FormatMountLabel("", spec.Linux.MountLabel); ml != "" { - m.Options = append(m.Options, ml) - } - } - request.Rootfs = append(request.Rootfs, &types.Mount{ - Type: m.Type, - Source: m.Source, - Target: m.Target, - Options: m.Options, - }) - } + r, err := c.get(ctx) + if err != nil { + return nil, err } info := TaskInfo{ runtime: r.Runtime.Name, @@ -348,6 +323,94 @@ func (c *container) Update(ctx context.Context, opts ...UpdateContainerOpts) err return nil } +func (c *container) handleMounts(ctx context.Context, request *tasks.CreateTaskRequest) error { + r, err := c.get(ctx) + if err != nil { + return err + } + + if r.SnapshotKey != "" { + if r.Snapshotter == "" { + return fmt.Errorf("unable to resolve rootfs mounts without snapshotter on container: %w", errdefs.ErrInvalidArgument) + } + + // get the rootfs from the snapshotter and add it to the request + s, err := c.client.getSnapshotter(ctx, r.Snapshotter) + if err != nil { + return err + } + mounts, err := s.Mounts(ctx, r.SnapshotKey) + if err != nil { + return err + } + spec, err := c.Spec(ctx) + if err != nil { + return err + } + for _, m := range mounts { + if spec.Linux != nil && spec.Linux.MountLabel != "" { + if ml := label.FormatMountLabel("", spec.Linux.MountLabel); ml != "" { + m.Options = append(m.Options, ml) + } + } + request.Rootfs = append(request.Rootfs, &types.Mount{ + Type: m.Type, + Source: m.Source, + Target: m.Target, + Options: m.Options, + }) + } + } + + return nil +} + +func (c *container) Restore(ctx context.Context, ioCreate cio.Creator, rootDir string) (int, error) { + errorPid := -1 + i, err := ioCreate(c.id) + if err != nil { + return errorPid, err + } + defer func() { + if err != nil && i != nil { + i.Cancel() + i.Close() + } + }() + cfg := i.Config() + + request := &tasks.CreateTaskRequest{ + ContainerID: c.id, + Terminal: cfg.Terminal, + Stdin: cfg.Stdin, + Stdout: cfg.Stdout, + Stderr: cfg.Stderr, + } + + if err := c.handleMounts(ctx, request); err != nil { + return errorPid, err + } + + request.Checkpoint = &types.Descriptor{ + Annotations: map[string]string{ + // The following annotation is used to restore a checkpoint + // via CRI. This is mainly used to restore a container + // in Kubernetes. + "RestoreFromPath": rootDir, + }, + } + // (adrianreber): it is not totally clear to me, but it seems the only + // way to restore a container in containerd is going through Create(). + // This functions sets up Create() in such a way to handle container + // restore coming through the CRI. + response, err := c.client.TaskService().Create(ctx, request) + if err != nil { + return errorPid, errgrpc.ToNative(err) + } + + return int(response.GetPid()), nil +} + func (c *container) Checkpoint(ctx context.Context, ref string, opts ...CheckpointOpts) (Image, error) { index := &ocispec.Index{ Versioned: ver.Versioned{ diff --git a/contrib/checkpoint/checkcriu.go b/contrib/checkpoint/checkcriu.go new file mode 100644 index 0000000000000..58527332c5d85 --- /dev/null +++ b/contrib/checkpoint/checkcriu.go @@ -0,0 +1,11 @@ +package main + +import ( + criu "github.com/checkpoint-restore/go-criu/v7/utils" +) + +func main() { + if err := criu.CheckForCriu(criu.PodCriuVersion); err != nil { + panic(err) + } +} diff --git a/contrib/checkpoint/checkpoint-restore-cri-test.sh b/contrib/checkpoint/checkpoint-restore-cri-test.sh new file mode 100755 index 0000000000000..ee9dbd556140e --- /dev/null +++ b/contrib/checkpoint/checkpoint-restore-cri-test.sh @@ -0,0 +1,213 @@ +#!/usr/bin/env bash + +set -eu -o pipefail + +DIR=$(dirname "${0}") + +cd "${DIR}" +go build -o checkcriu + +if ! "./checkcriu"; then + echo "ERROR: CRIU check failed" + exit 1 +fi + +if [ ! -e "$(command -v crictl)" ]; then + echo "ERROR: crictl binary not found" + exit 1 +fi + +TESTDIR=$(mktemp -d -p "${PWD}") + +SUCCESS=0 + +function cleanup() { + rm -f ./checkcriu + crictl ps -a || true + pkill containerd || true + if [ "${SUCCESS}" == "1" ]; then + echo PASS + else + echo "--> containerd logs" + sed 's/^/----> \t/' <"${TESTDIR}/containerd.log" + echo FAIL + fi + umount "$(find "${TESTDIR}" -name shm -type d | head -1)" >/dev/null 2>&1 || true + umount "$(find "${TESTDIR}" -name rootfs -type d | head -1)" >/dev/null 2>&1 || true + rm -rf "${TESTDIR}" || true +} +trap cleanup EXIT + +TESTDATA=testdata +# shellcheck disable=SC2034 +export CONTAINERD_ADDRESS="$TESTDIR/c.sock" +export CONTAINER_RUNTIME_ENDPOINT="unix:///${CONTAINERD_ADDRESS}" +TEST_IMAGE=docker.io/library/alpine:latest + +function test_from_archive() { + echo "--> Deleting all pods: " + crictl -t 5s rmp -fa | sed 's/^/----> \t/' + echo -n "--> Pulling base image ${TEST_IMAGE}: " + crictl pull "${TEST_IMAGE}" + POD_JSON=$(mktemp) + # adapt the log directory + jq ".log_directory=\"${TESTDIR}\"" "$TESTDATA"/sandbox_config.json >"$POD_JSON" + echo -n "--> Start pod: " + pod_id=$(crictl runp "$POD_JSON") + echo "$pod_id" + echo -n "--> Create container: " + ctr_id=$(crictl create "$pod_id" "$TESTDATA"/container_sleep.json "$POD_JSON") + echo "$ctr_id" + echo -n "--> Start container: " + crictl start "$ctr_id" + lines_before=$(crictl logs "$ctr_id" | wc -l) + # changes file system to see if changes are included in the checkpoint + echo "--> Modifying container rootfs" + crictl exec "$ctr_id" touch /root/testfile + crictl exec "$ctr_id" rm /etc/motd + echo -n "--> Checkpointing container: " + crictl -t 10s checkpoint --export="$TESTDIR"/cp.tar "$ctr_id" + echo "--> Cleanup container: " + crictl rm -f "$ctr_id" | sed 's/^/----> \t/' + echo "--> Cleanup pod: " + crictl rmp -f "$pod_id" | sed 's/^/----> \t/' + echo "--> Cleanup images: " + crictl rmi "${TEST_IMAGE}" | sed 's/^/----> \t/' + echo -n "--> Start pod: " + pod_id=$(crictl runp "$POD_JSON") + echo "$pod_id" + # Replace original container with checkpoint image + RESTORE_JSON=$(mktemp) + jq ".image.image=\"$TESTDIR/cp.tar\"" "$TESTDATA"/container_sleep.json >"$RESTORE_JSON" + echo -n "--> Create container from checkpoint: " + # This requires a larger timeout as we just deleted the image and + # pulling can take some time. + ctr_id=$(crictl -t 20s create "$pod_id" "$RESTORE_JSON" "$POD_JSON") + echo "$ctr_id" + rm -f "$RESTORE_JSON" "$POD_JSON" + echo -n "--> Start container from checkpoint: " + crictl start "$ctr_id" + sleep 1 + lines_after=$(crictl logs "$ctr_id" | wc -l) + if [ "$lines_before" -ge "$lines_after" ]; then + echo "number of lines after checkpointing ($lines_after) " \ + "should be larger than before checkpointing ($lines_before)" + false + fi + # Cleanup + echo "--> Cleanup images: " + crictl rmi "${TEST_IMAGE}" | sed 's/^/----> \t/' + echo -n "--> Verifying container rootfs: " + crictl exec "$ctr_id" ls -la /root/testfile + if crictl exec "$ctr_id" ls -la /etc/motd >/dev/null 2>&1; then + echo "error: file /etc/motd should not exist but it does" + exit 1 + fi + echo "--> Deleting all pods: " + crictl -t 5s rmp -fa | sed 's/^/----> \t/' + SUCCESS=1 +} + +function test_from_oci() { + echo "--> Deleting all pods: " + crictl -t 5s rmp -fa | sed 's/^/----> \t/' + echo -n "--> Pulling base image ${TEST_IMAGE}: " + crictl pull "${TEST_IMAGE}" + echo -n "--> Start pod: " + pod_id=$(crictl runp "$TESTDATA"/sandbox_config.json) + echo "$pod_id" + echo -n "--> Create container: " + ctr_id=$(crictl create "$pod_id" "$TESTDATA"/container_sleep.json "$TESTDATA"/sandbox_config.json) + echo "$ctr_id" + echo -n "--> Start container: " + crictl start "$ctr_id" + echo -n "--> Checkpointing container: " + crictl -t 10s checkpoint --export="$TESTDIR"/cp.tar "$ctr_id" + echo "--> Cleanup container: " + crictl rm -f "$ctr_id" | sed 's/^/----> \t/' + echo "--> Cleanup pod: " + crictl rmp -f "$pod_id" | sed 's/^/----> \t/' + echo "--> Cleanup pod: " + crictl rmi "${TEST_IMAGE}" | sed 's/^/----> \t/' + # Change cgroup of new sandbox + RESTORE_POD_JSON=$(mktemp) + jq ".linux.cgroup_parent=\"different_cgroup_789\"" "$TESTDATA"/sandbox_config.json >"$RESTORE_POD_JSON" + echo -n "--> Start pod: " + pod_id=$(crictl runp "$RESTORE_POD_JSON") + echo "$pod_id" + # Replace original container with checkpoint image + RESTORE_JSON=$(mktemp) + # Convert tar checkpoint archive to OCI image + echo "--> Convert checkpoint archive $TESTDIR/cp.tar to OCI image 'checkpoint-image:latest': " + newcontainer=$(buildah from scratch) + echo -n "----> Add checkpoint archive to new OCI image: " + buildah add "$newcontainer" "$TESTDIR"/cp.tar / + echo "----> Add checkpoint annotation to new OCI image: " + buildah config --annotation=org.criu.checkpoint.container.name=test "$newcontainer" + echo "----> Save new OCI image: " + buildah commit -q "$newcontainer" checkpoint-image:latest 2>&1 | sed 's/^/------> \t/' + echo "----> Cleanup temporary images: " + buildah rm "$newcontainer" | sed 's/^/------> \t/' + # Export OCI image to disk + echo "----> Export OCI image to disk: " + podman image save -q --format oci-archive -o "$TESTDIR"/oci.tar localhost/checkpoint-image:latest | sed 's/^/------> \t/' + echo "----> Cleanup temporary images: " + buildah rmi localhost/checkpoint-image:latest | sed 's/^/------> \t/' + # Remove potentially old version of the checkpoint image + echo "----> Cleanup potential old copies: " + ../../bin/ctr -n k8s.io images rm --sync localhost/checkpoint-image:latest 2>&1 | sed 's/^/------> \t/' + # Import image + echo "----> Import new image: " + ../../bin/ctr -n k8s.io images import "$TESTDIR"/oci.tar 2>&1 | sed 's/^/------> \t/' + jq ".image.image=\"localhost/checkpoint-image:latest\"" "$TESTDATA"/container_sleep.json >"$RESTORE_JSON" + echo -n "--> Create container from checkpoint: " + ctr_id=$(crictl -t 10s create "$pod_id" "$RESTORE_JSON" "$RESTORE_POD_JSON") + echo "$ctr_id" + rm -f "$RESTORE_JSON" "$RESTORE_POD_JSON" + echo -n "--> Start container from checkpoint: " + crictl start "$ctr_id" + # Cleanup + echo "--> Cleanup images: " + ../../bin/ctr -n k8s.io images rm localhost/checkpoint-image:latest | sed 's/^/----> \t/' + echo "--> Cleanup images: " + crictl rmi "${TEST_IMAGE}" | sed 's/^/----> \t/' + echo "--> Deleting all pods: " + crictl -t 5s rmp -fa | sed 's/^/----> \t/' + SUCCESS=1 +} + +cat >"${TESTDIR}/config.toml" < Starting containerd: " +../../bin/containerd \ + --address "${TESTDIR}/c.sock" \ + --config "${TESTDIR}/config.toml" \ + --root "${TESTDIR}/root" \ + --state "${TESTDIR}/state" \ + --log-level trace &>"${TESTDIR}/containerd.log" & +# Make sure containerd is ready before calling critest. +retry_counter=0 +max_retries=10 +while true; do + ((retry_counter += 1)) + if crictl info 2>&1 | sed 's/^/----> \t/'; then + break + else + sleep 1.5 + fi + if [ "${retry_counter}" -gt "${max_retries}" ]; then + echo "--> Failed to start containerd" + exit 1 + fi + +done + +test_from_archive +SUCCESS=0 +test_from_oci diff --git a/contrib/checkpoint/checkpoint-restore-kubernetes-test.sh b/contrib/checkpoint/checkpoint-restore-kubernetes-test.sh new file mode 100755 index 0000000000000..8de48bf805bc3 --- /dev/null +++ b/contrib/checkpoint/checkpoint-restore-kubernetes-test.sh @@ -0,0 +1,219 @@ +#!/usr/bin/env bash + +set -eu -o pipefail + +DIR=$(dirname "${0}") + +cd "${DIR}" +go build -o checkcriu + +if ! "./checkcriu"; then + echo "ERROR: CRIU check failed" + exit 1 +fi + +if [ ! -e "$(command -v crictl)" ]; then + echo "ERROR: crictl binary not found" + exit 1 +fi + +if [ ! -e "$(command -v kubectl)" ]; then + echo "ERROR: kubectl binary not found" + exit 1 +fi + +if [ ! -e "$(command -v ctr)" ]; then + echo "ERROR: ctr binary not found" + exit 1 +fi + +TESTDIR=$(mktemp -d) +OUTPUT=$(mktemp) +SUCCESS=0 + +function cleanup() { + # shellcheck disable=SC2317 + rm -f ./checkcriu + # shellcheck disable=SC2317 + rm -rf "${TESTDIR}" "${OUTPUT}" + # shellcheck disable=SC2317 + if [ "${SUCCESS}" == "1" ]; then + # shellcheck disable=SC2317 + echo PASS + else + # shellcheck disable=SC2317 + echo FAIL + fi +} +trap cleanup EXIT + +TESTDATA=testdata +# shellcheck disable=SC2034 +CONTAINER_RUNTIME_ENDPOINT="unix:///run/containerd/containerd.sock" +# shellcheck disable=SC2034 +KUBECONFIG=/var/run/kubernetes/admin.kubeconfig + +echo -n "--> Cleanup test pod/containers from previous run: " +kubectl delete pod sleeper --grace-period=1 || true +echo -n "--> Create new test pod/containers: " +kubectl apply -f $TESTDATA/sleep.yaml + +echo "--> Wait until test pod/containers are ready: " +while [ "$(kubectl get pod sleeper -o jsonpath="{.status.containerStatuses[0].started}")" == "false" ]; do + echo "----> Waiting for pod/container sleeper/sleep to get ready" + sleep 0.5 +done + +echo "--> Do curl request to the test container: " +curl -s "$(kubectl get pod sleeper --template '{{.status.podIP}}'):8088" | tee "${OUTPUT}" | sed 's/^/----> \t/' +COUNTER_BEFORE=$(cut -d\ -f2 <"${OUTPUT}") +echo "--> Check logs of test container: " +kubectl logs sleeper -c sleep | tee "${OUTPUT}" | sed 's/^/----> \t/' +LINES_BEFORE=$(wc -l <"${OUTPUT}") + +if [ ! -e /var/run/kubernetes/client-admin.crt ]; then + echo "Missing /var/run/kubernetes/client-admin.crt. Exiting" + exit 1 +fi + +if [ ! -e /var/run/kubernetes/client-admin.key ]; then + echo "Missing /var/run/kubernetes/client-admin.key. Exiting" + exit 1 +fi + +echo -n "--> Creating checkpoint: " +CP=$(curl -s --insecure --cert /var/run/kubernetes/client-admin.crt --key /var/run/kubernetes/client-admin.key -X POST "https://localhost:10250/checkpoint/default/sleeper/sleep" | jq -r ".items[0]") +echo "$CP" + +echo -n "--> Cleanup test pod/containers: " +kubectl delete pod sleeper --grace-period=1 + +OCI="localhost/checkpoint-image:latest" + +echo "--> Converting checkpoint archive $CP to OCI images $OCI" + +newcontainer=$(buildah from scratch) +echo -n "----> Add checkpoint archive to new OCI image: " +buildah add "$newcontainer" "$CP" / +echo "----> Add checkpoint annotation to new OCI image: " +buildah config --annotation=org.criu.checkpoint.container.name=test "$newcontainer" +echo "----> Save new OCI image: " +buildah commit -q "$newcontainer" "$OCI" 2>&1 | sed 's/^/------> \t/' +echo "----> Cleanup temporary images: " +buildah rm "$newcontainer" | sed 's/^/------> \t/' +# Export OCI image to disk +echo "----> Export OCI image to disk: " +podman image save -q --format oci-archive -o "$TESTDIR"/oci.tar "$OCI" | sed 's/^/------> \t/' +echo "----> Cleanup temporary images: " +buildah rmi "$OCI" | sed 's/^/------> \t/' +# Remove potentially old version of the checkpoint image +echo "----> Cleanup potential old copies: " +ctr -n k8s.io images rm --sync localhost/checkpoint-image:latest 2>&1 | sed 's/^/------> \t/' +# Import image +echo "----> Import checkpoint image: " +ctr -n k8s.io images import "$TESTDIR"/oci.tar 2>&1 | sed 's/^/------> \t/' + +echo -n "--> Create pod/containers from checkpoint: " +kubectl apply -f $TESTDATA/sleep-restore.yaml + +echo "--> Wait until test pod/containers are ready: " +while [ "$(kubectl get pod sleeper -o jsonpath="{.status.containerStatuses[0].started}")" == "false" ]; do + echo "----> Waiting for pod/container sleeper/sleep to get ready" + sleep 0.5 +done + +echo "--> Do curl request to the test container: " +curl -s "$(kubectl get pod sleeper --template '{{.status.podIP}}'):8088" | tee "${OUTPUT}" | sed 's/^/----> \t/' +COUNTER_AFTER=$(cut -d\ -f2 <"${OUTPUT}") +echo "--> Check logs of test container: " +kubectl logs sleeper -c sleep | tee "${OUTPUT}" | sed 's/^/----> \t/' +LINES_AFTER=$(wc -l <"${OUTPUT}") + +if [ "$LINES_BEFORE" -ge "$LINES_AFTER" ]; then + echo "number of lines after checkpointing ($LINES_AFTER) " \ + "should be larger than before checkpointing ($LINES_BEFORE)" + false +fi + +if [ "$COUNTER_BEFORE" -ge "$COUNTER_AFTER" ]; then + echo "number of lines after checkpointing ($COUNTER_AFTER) " \ + "should be larger than before checkpointing ($COUNTER_BEFORE)" + false +fi + +# Let's see if container restart also works correctly +CONTAINER_ID=$(kubectl get pod sleeper -o jsonpath="{.status.containerStatuses[0].containerID}" | cut -d '/' -f 3) +CONTAINER_PID=$(crictl inspect "${CONTAINER_ID}" | jq .info.pid) + +echo "--> Kill container $CONTAINER_ID by killing PID $CONTAINER_PID" +# Kill PID in containner +kill -9 "${CONTAINER_PID}" + +# wait for the replacement container to come up +echo "--> Wait until replacement pod/containers is started: " +while [ "$(kubectl get pod sleeper -o jsonpath="{.status.containerStatuses[0].restartCount}")" != "1" ]; do + echo "----> Waiting for pod/container sleeper/sleep to get ready" + sleep 1 +done + +echo "--> Do curl request to the test container: " +curl -s "$(kubectl get pod sleeper --template '{{.status.podIP}}'):8088" | tee "${OUTPUT}" | sed 's/^/----> \t/' +COUNTER_AFTER=$(cut -d\ -f2 <"${OUTPUT}") +echo "--> Check logs of test container: " +kubectl logs sleeper -c sleep | tee "${OUTPUT}" | sed 's/^/----> \t/' +LINES_AFTER=$(wc -l <"${OUTPUT}") + +if [ "$LINES_BEFORE" -ge "$LINES_AFTER" ]; then + echo "number of lines after checkpointing ($LINES_AFTER) " \ + "should be larger than before checkpointing ($LINES_BEFORE)" + false +fi + +if [ "$COUNTER_BEFORE" -ge "$COUNTER_AFTER" ]; then + echo "number of lines after checkpointing ($COUNTER_AFTER) " \ + "should be larger than before checkpointing ($COUNTER_BEFORE)" + false +fi + +# Let's see if container restart also works correctly a second time + +CONTAINER_ID=$(kubectl get pod sleeper -o jsonpath="{.status.containerStatuses[0].containerID}" | cut -d '/' -f 3) +CONTAINER_PID=$(crictl inspect "${CONTAINER_ID}" | jq .info.pid) + +# Kill PID in containner +echo "--> Kill container $CONTAINER_ID by killing PID $CONTAINER_PID" +kill -9 "${CONTAINER_PID}" + +# wait for the replacement container to come up +echo "--> Wait until replacement pod/containers is started: " +while [ "$(kubectl get pod sleeper -o jsonpath="{.status.containerStatuses[0].restartCount}")" != "2" ]; do + echo "----> Waiting for pod/container sleeper/sleep to get ready" + sleep 1 +done + +echo "--> Do curl request to the test container: " +curl -s "$(kubectl get pod sleeper --template '{{.status.podIP}}'):8088" | tee "${OUTPUT}" | sed 's/^/----> \t/' +COUNTER_AFTER=$(cut -d\ -f2 <"${OUTPUT}") +echo "--> Check logs of test container: " +kubectl logs sleeper -c sleep | tee "${OUTPUT}" | sed 's/^/----> \t/' +LINES_AFTER=$(wc -l <"${OUTPUT}") + +if [ "$LINES_BEFORE" -ge "$LINES_AFTER" ]; then + echo "number of lines after checkpointing ($LINES_AFTER) " \ + "should be larger than before checkpointing ($LINES_BEFORE)" + false +fi + +if [ "$COUNTER_BEFORE" -ge "$COUNTER_AFTER" ]; then + echo "number of lines after checkpointing ($COUNTER_AFTER) " \ + "should be larger than before checkpointing ($COUNTER_BEFORE)" + false +fi + +echo -n "--> Creating checkpoint from restored container: " +CP=$(curl -s --insecure --cert /var/run/kubernetes/client-admin.crt --key /var/run/kubernetes/client-admin.key -X POST "https://localhost:10250/checkpoint/default/sleeper/sleep" | jq -r ".items[0]") +echo "$CP" + +SUCCESS=1 + +exit 0 diff --git a/contrib/checkpoint/testdata/container_sleep.json b/contrib/checkpoint/testdata/container_sleep.json new file mode 100644 index 0000000000000..4ef2c17754756 --- /dev/null +++ b/contrib/checkpoint/testdata/container_sleep.json @@ -0,0 +1,49 @@ +{ + "metadata": { + "name": "podsandbox-sleep" + }, + "image": { + "image": "docker.io/library/alpine:latest" + }, + "command": [ + "/bin/ash" + ], + "args": [ + "-c", + "while true; do echo -n 'hello: '; date; sleep 0.5;done" + + ], + "working_dir": "/", + "envs": [ + { + "key": "PATH", + "value": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + }, + { + "key": "GLIBC_TUNABLES", + "value": "glibc.pthread.rseq=0" + } + ], + "annotations": { + "pod": "podsandbox" + }, + "log_path": "log.file", + "stdin": false, + "stdin_once": false, + "tty": false, + "linux": { + "security_context": { + "namespace_options": { + "pid": 1 + }, + "readonly_rootfs": false + }, + "resources": { + "cpu_period": 10000, + "cpu_quota": 20000, + "cpu_shares": 512, + "oom_score_adj": 30, + "memory_limit_in_bytes": 268435456 + } + } +} diff --git a/contrib/checkpoint/testdata/sandbox_config.json b/contrib/checkpoint/testdata/sandbox_config.json new file mode 100644 index 0000000000000..033653f13e277 --- /dev/null +++ b/contrib/checkpoint/testdata/sandbox_config.json @@ -0,0 +1,50 @@ +{ + "metadata": { + "name": "podsandbox1", + "uid": "redhat-test-crio", + "namespace": "redhat.test.crio", + "attempt": 1 + }, + "hostname": "crictl_host", + "log_directory": "", + "dns_config": { + "servers": [ + "8.8.8.8" + ] + }, + "port_mappings": [], + "resources": { + "cpu": { + "limits": 3, + "requests": 2 + }, + "memory": { + "limits": 50000000, + "requests": 2000000 + } + }, + "labels": { + "group": "test" + }, + "annotations": { + "owner": "hmeng", + "security.alpha.kubernetes.io/seccomp/pod": "unconfined", + "com.example.test": "sandbox annotation" + }, + "linux": { + "cgroup_parent": "pod_123-456.slice", + "security_context": { + "namespace_options": { + "network": 0, + "pid": 1, + "ipc": 0 + }, + "selinux_options": { + "user": "system_u", + "role": "system_r", + "type": "svirt_lxc_net_t", + "level": "s0:c4,c5" + } + } + } +} diff --git a/contrib/checkpoint/testdata/sleep-restore.yaml b/contrib/checkpoint/testdata/sleep-restore.yaml new file mode 100644 index 0000000000000..79937f399fa2e --- /dev/null +++ b/contrib/checkpoint/testdata/sleep-restore.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Pod +metadata: + name: sleeper +spec: + containers: + - name: sleep + image: localhost/checkpoint-image:latest + imagePullPolicy: IfNotPresent + restartPolicy: Always diff --git a/contrib/checkpoint/testdata/sleep.yaml b/contrib/checkpoint/testdata/sleep.yaml new file mode 100644 index 0000000000000..19b472523ec4d --- /dev/null +++ b/contrib/checkpoint/testdata/sleep.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Pod +metadata: + name: sleeper +spec: + containers: + - name: sleep + image: quay.io/adrianreber/counter:latest diff --git a/core/runtime/runtime.go b/core/runtime/runtime.go index 438752f76fa6e..5fea15ea7826f 100644 --- a/core/runtime/runtime.go +++ b/core/runtime/runtime.go @@ -42,6 +42,8 @@ type CreateOpts struct { IO IO // Checkpoint digest to restore container state Checkpoint string + // Mark this as a restore via a local checkpoint archive (most likely via CRI) + RestoreFromPath bool // RuntimeOptions for the runtime RuntimeOptions typeurl.Any // TaskOptions received for the task diff --git a/core/runtime/v2/shim.go b/core/runtime/v2/shim.go index db7307bc0ce9f..b2a21e44333cd 100644 --- a/core/runtime/v2/shim.go +++ b/core/runtime/v2/shim.go @@ -33,6 +33,7 @@ import ( "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" + crmetadata "github.com/checkpoint-restore/checkpointctl/lib" eventstypes "github.com/containerd/containerd/api/events" task "github.com/containerd/containerd/api/runtime/task/v3" "github.com/containerd/containerd/api/types" @@ -45,6 +46,8 @@ import ( "github.com/containerd/containerd/v2/core/events/exchange" "github.com/containerd/containerd/v2/core/runtime" + "github.com/containerd/containerd/v2/pkg/archive" + "github.com/containerd/containerd/v2/pkg/archive/compression" "github.com/containerd/containerd/v2/pkg/atomicfile" "github.com/containerd/containerd/v2/pkg/dialer" "github.com/containerd/containerd/v2/pkg/identifiers" @@ -586,6 +589,50 @@ func (s *shimTask) Create(ctx context.Context, opts runtime.CreateOpts) (runtime return nil, errgrpc.ToNative(err) } + if opts.RestoreFromPath { + // Unpack rootfs-diff.tar if it exists. + // This needs to happen between the 'Create()' from above and before the 'Start()' from below. + rootfsDiff := filepath.Join(opts.Checkpoint, "..", crmetadata.RootFsDiffTar) + + _, err = os.Stat(rootfsDiff) + if err == nil { + rootfsDiffTar, err := os.Open(rootfsDiff) + if err != nil { + return nil, fmt.Errorf("failed to open rootfs-diff archive %s for import: %w", rootfsDiffTar.Name(), err) + } + defer func(f *os.File) { + if err := f.Close(); err != nil { + log.G(ctx).Errorf("Unable to close file %s: %q", f.Name(), err) + } + }(rootfsDiffTar) + + decompressed, err := compression.DecompressStream(rootfsDiffTar) + if err != nil { + return nil, fmt.Errorf("failed to decompress archive %s for import: %w", rootfsDiffTar.Name(), err) + } + + rootfs := filepath.Join(s.Bundle(), "rootfs") + _, err = archive.Apply( + ctx, + rootfs, + decompressed, + ) + + if err != nil { + return nil, fmt.Errorf("unpacking of rootfs-diff archive %s into %s failed: %w", rootfsDiffTar.Name(), rootfs, err) + } + log.G(ctx).Debugf("Unpacked checkpoint in %s", rootfs) + } + // (adrianreber): This is unclear to me. But it works (and it is necessary). + // This is probably connected to my misunderstanding why + // restoring a container goes through Create(). + log.G(ctx).Infof("About to start with opts.Checkpoint %s", opts.Checkpoint) + err = s.Start(ctx) + if err != nil { + return nil, err + } + } + return s, nil } diff --git a/internal/cri/server/container_checkpoint.go b/internal/cri/server/container_checkpoint.go index 1c433bc09e594..3eebd89fd5a0e 100644 --- a/internal/cri/server/container_checkpoint.go +++ b/internal/cri/server/container_checkpoint.go @@ -22,11 +22,27 @@ import ( "context" "time" + containerstore "github.com/containerd/containerd/v2/internal/cri/store/container" + "github.com/containerd/containerd/v2/internal/cri/store/sandbox" + "github.com/containerd/errdefs" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) +func (c *criService) checkIfCheckpointOCIImage(ctx context.Context, input string) (string, error) { + return "", nil +} + +func (c *criService) CRImportCheckpoint( + ctx context.Context, + meta *containerstore.Metadata, + sandbox *sandbox.Sandbox, + sandboxConfig *runtime.PodSandboxConfig, +) (ctrID string, retErr error) { + return "", errdefs.ErrNotImplemented +} + func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (res *runtime.CheckpointContainerResponse, err error) { // The next line is just needed to make the linter happy. containerCheckpointTimer.WithValues("no-runtime").UpdateSince(time.Now()) diff --git a/internal/cri/server/container_checkpoint_linux.go b/internal/cri/server/container_checkpoint_linux.go index baabee3486ce2..4884e15ab0d90 100644 --- a/internal/cri/server/container_checkpoint_linux.go +++ b/internal/cri/server/container_checkpoint_linux.go @@ -31,49 +31,428 @@ import ( "time" crmetadata "github.com/checkpoint-restore/checkpointctl/lib" - "github.com/checkpoint-restore/go-criu/v7" + "github.com/checkpoint-restore/go-criu/v7/stats" + "github.com/checkpoint-restore/go-criu/v7/utils" "github.com/containerd/containerd/api/types/runc/options" + "github.com/containerd/containerd/v2/client" "github.com/containerd/containerd/v2/core/content" "github.com/containerd/containerd/v2/core/images" + "github.com/containerd/containerd/v2/core/mount" + "github.com/containerd/containerd/v2/internal/cri/annotations" + containerstore "github.com/containerd/containerd/v2/internal/cri/store/container" + imagestore "github.com/containerd/containerd/v2/internal/cri/store/image" + "github.com/containerd/containerd/v2/internal/cri/store/sandbox" "github.com/containerd/containerd/v2/pkg/archive" "github.com/containerd/containerd/v2/pkg/protobuf/proto" ptypes "github.com/containerd/containerd/v2/pkg/protobuf/types" "github.com/containerd/containerd/v2/plugins" + "github.com/containerd/continuity/fs" + "github.com/containerd/errdefs" "github.com/containerd/log" - v1 "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/containerd/platforms" - "github.com/containerd/containerd/v2/client" + "github.com/distribution/reference" + "github.com/opencontainers/image-spec/identity" + v1 "github.com/opencontainers/image-spec/specs-go/v1" + spec "github.com/opencontainers/runtime-spec/specs-go" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + kubetypes "k8s.io/kubelet/pkg/types" ) -// PodCriuVersion is the version of CRIU needed for -// checkpointing and restoring containers out of and into Pods. -const podCriuVersion = 31600 +// checkIfCheckpointOCIImage returns checks if the input refers to a checkpoint image. +// It returns the StorageImageID of the image the input resolves to, nil otherwise. +func (c *criService) checkIfCheckpointOCIImage(ctx context.Context, input string) (string, error) { + if input == "" { + return "", nil + } + if _, err := os.Stat(input); err == nil { + return "", nil + } + + image, err := c.LocalResolve(input) + if err != nil { + return "", fmt.Errorf("failed to resolve image %q: %w", input, err) + } + containerdImage, err := c.toContainerdImage(ctx, image) + if err != nil { + return "", fmt.Errorf("failed to get image from containerd %q: %w", input, err) + } + input = containerdImage.Name() + images, err := c.client.ImageService().Get(ctx, input) + if err != nil { + return "", fmt.Errorf("failed to get image from containerd %q: %w", input, err) + } -// CheckForCriu uses CRIU's go bindings to check if the CRIU -// binary exists and if it at least the version Podman needs. -func checkForCriu(version int) error { - c := criu.MakeCriu() - criuVersion, err := c.GetCriuVersion() + rawIndex, err := content.ReadBlob(ctx, c.client.ContentStore(), images.Target) if err != nil { - return fmt.Errorf("failed to check for criu version: %w", err) + return "", fmt.Errorf("failed to read image blob from containerd %q: %w", input, err) } - if criuVersion >= version { - return nil + var index v1.Index + if err = json.Unmarshal(rawIndex, &index); err != nil { + return "", fmt.Errorf("failed to unmarshall blob into OCI index: %w", err) + } + + if index.Annotations == nil { + return "", nil + } + + ann, ok := index.Annotations[crmetadata.CheckpointAnnotationName] + if !ok { + return "", nil + } + + log.G(ctx).Infof("Found checkpoint of container %v in %v", ann, input) + + return image.ID, nil +} + +func (c *criService) CRImportCheckpoint( + ctx context.Context, + meta *containerstore.Metadata, + sandbox *sandbox.Sandbox, + sandboxConfig *runtime.PodSandboxConfig, +) (ctrID string, retErr error) { + var mountPoint string + start := time.Now() + // Ensure that the image to restore the checkpoint from has been provided. + if meta.Config.Image == nil || meta.Config.Image.Image == "" { + return "", errors.New(`attribute "image" missing from container definition`) + } + + inputImage := meta.Config.Image.Image + createAnnotations := meta.Config.Annotations + createLabels := meta.Config.Labels + + restoreStorageImageID, err := c.checkIfCheckpointOCIImage(ctx, inputImage) + if err != nil { + return "", err + } + + mountPoint, err = os.MkdirTemp("", "checkpoint") + if err != nil { + return "", err + } + defer func() { + if err := os.RemoveAll(mountPoint); err != nil { + log.G(ctx).Errorf("Could not recursively remove %s: %q", mountPoint, err) + } + }() + var archiveFile *os.File + if restoreStorageImageID != "" { + log.G(ctx).Debugf("Restoring from oci image %s", inputImage) + platform, err := c.sandboxService.SandboxPlatform(ctx, sandbox.Sandboxer, sandbox.ID) + if err != nil { + return "", fmt.Errorf("failed to query sandbox platform: %w", err) + } + img, err := c.client.ImageService().Get(ctx, restoreStorageImageID) + if err != nil { + return "", err + } + + i := client.NewImageWithPlatform(c.client, img, platforms.Only(platform)) + diffIDs, err := i.RootFS(ctx) + if err != nil { + return "", err + } + chainID := identity.ChainID(diffIDs).String() + ociRuntime, err := c.config.GetSandboxRuntime(sandboxConfig, sandbox.Metadata.RuntimeHandler) + if err != nil { + return "", fmt.Errorf("failed to get sandbox runtime: %w", err) + } + s := c.client.SnapshotService(c.RuntimeSnapshotter(ctx, ociRuntime)) + + mounts, err := s.View(ctx, mountPoint, chainID) + if err != nil { + if errdefs.IsAlreadyExists(err) { + mounts, err = s.Mounts(ctx, mountPoint) + } + if err != nil { + return "", err + } + } + if err := mount.All(mounts, mountPoint); err != nil { + return "", err + } + } else { + + archiveFile, err = os.Open(inputImage) + if err != nil { + return "", fmt.Errorf("failed to open checkpoint archive %s for import: %w", inputImage, err) + } + defer func(f *os.File) { + if err := f.Close(); err != nil { + log.G(ctx).Errorf("Unable to close file %s: %q", f.Name(), err) + } + }(archiveFile) + + filter := archive.WithFilter(func(hdr *tar.Header) (bool, error) { + // The checkpoint archive is unpacked twice if using a tar file directly. + // The first time only the metadata files are relevant to prepare the + // restore operation. This filter function ignores the large parts of + // the checkpoint archive. This is usually the actual checkpoint + // coming from CRIU as well as the rootfs diff tar file. + excludePatterns := []string{ + "artifacts", + "ctr.log", + crmetadata.RootFsDiffTar, + crmetadata.NetworkStatusFile, + crmetadata.DeletedFilesFile, + crmetadata.CheckpointDirectory, + } + for _, pattern := range excludePatterns { + if strings.HasPrefix(hdr.Name, pattern) { + return false, nil + } + } + return true, nil + }) + + _, err = archive.Apply( + ctx, + mountPoint, + archiveFile, + []archive.ApplyOpt{filter}..., + ) + + if err != nil { + return "", fmt.Errorf("unpacking of checkpoint archive %s failed: %w", mountPoint, err) + } + log.G(ctx).Debugf("Unpacked checkpoint in %s", mountPoint) + } + // Load spec.dump from temporary directory + dumpSpec := new(spec.Spec) + if _, err := crmetadata.ReadJSONFile(dumpSpec, mountPoint, crmetadata.SpecDumpFile); err != nil { + return "", fmt.Errorf("failed to read %q: %w", crmetadata.SpecDumpFile, err) + } + + // Load config.dump from temporary directory + config := new(crmetadata.ContainerConfig) + if _, err := crmetadata.ReadJSONFile(config, mountPoint, crmetadata.ConfigDumpFile); err != nil { + return "", fmt.Errorf("failed to read %q: %w", crmetadata.ConfigDumpFile, err) + } + + // Load status.dump from temporary directory + containerStatus := new(runtime.ContainerStatus) + if _, err := crmetadata.ReadJSONFile(containerStatus, mountPoint, crmetadata.StatusDumpFile); err != nil { + return "", fmt.Errorf("failed to read %q: %w", crmetadata.StatusDumpFile, err) + } + + if meta.SandboxID == "" { + // restore into previous sandbox + meta.SandboxID = dumpSpec.Annotations[annotations.SandboxID] + ctrID = config.ID + } else { + ctrID = "" + } + + ctrMetadata := runtime.ContainerMetadata{} + + if meta.Config.Metadata != nil && meta.Config.Metadata.Name != "" { + ctrMetadata.Name = containerStatus.GetMetadata().GetName() + } + + originalAnnotations := containerStatus.GetAnnotations() + originalLabels := containerStatus.GetLabels() + + sandboxUID := sandboxConfig.GetMetadata().GetUid() + + if sandboxUID != "" { + if _, ok := originalLabels[kubetypes.KubernetesPodUIDLabel]; ok { + originalLabels[kubetypes.KubernetesPodUIDLabel] = sandboxUID + } + if _, ok := originalAnnotations[kubetypes.KubernetesPodUIDLabel]; ok { + originalAnnotations[kubetypes.KubernetesPodUIDLabel] = sandboxUID + } + } + + if createLabels != nil { + fixupLabels := []string{ + // Update the container name. It has already been update in metadata.Name. + // It also needs to be updated in the container labels. + kubetypes.KubernetesContainerNameLabel, + // Update pod name in the labels. + kubetypes.KubernetesPodNameLabel, + // Also update namespace. + kubetypes.KubernetesPodNamespaceLabel, + } + + for _, annotation := range fixupLabels { + _, ok1 := createLabels[annotation] + _, ok2 := originalLabels[annotation] + + // If the value is not set in the original container or + // if it is not set in the new container, just skip + // the step of updating metadata. + if ok1 && ok2 { + originalLabels[annotation] = createLabels[annotation] + } + } + } + + if createAnnotations != nil { + // The hash also needs to be update or Kubernetes thinks the container needs to be restarted + _, ok1 := createAnnotations["io.kubernetes.container.hash"] + _, ok2 := originalAnnotations["io.kubernetes.container.hash"] + + if ok1 && ok2 { + originalAnnotations["io.kubernetes.container.hash"] = createAnnotations["io.kubernetes.container.hash"] + } + + // The restart count also needs to be correctly updated + _, ok1 = createAnnotations["io.kubernetes.container.restartCount"] + _, ok2 = originalAnnotations["io.kubernetes.container.restartCount"] + + if ok1 && ok2 { + originalAnnotations["io.kubernetes.container.restartCount"] = createAnnotations["io.kubernetes.container.restartCount"] + } + } + + // Pulling the image the checkpoint is based on. This is a bit different + // than automatic image pulling. The checkpoint image is not automatically + // pulled, but the image the checkpoint is based on. + // During checkpointing the base image of the checkpoint is stored in the + // checkpoint archive as NAME@DIGEST. The checkpoint archive also contains + // the tag with which it was initially pulled. + // First step is to pull NAME@DIGEST + containerdImage, err := c.client.Pull(ctx, config.RootfsImageRef) + if err != nil { + return "", fmt.Errorf("failed to pull checkpoint base image %s: %w", config.RootfsImageRef, err) + } + if _, err := reference.ParseAnyReference(config.RootfsImageName); err != nil { + return "", fmt.Errorf("error parsing reference: %q is not a valid repository/tag %v", config.RootfsImageName, err) + } + tagImage, err := c.client.ImageService().Get(ctx, config.RootfsImageRef) + if err != nil { + return "", fmt.Errorf("failed to get checkpoint base image %s: %w", config.RootfsImageRef, err) + } + // Second step is to tag the image with the same tag it used to have + // during checkpointing. For the error that the image NAME:TAG already + // exists is ignored. It could happen that NAME:TAG now belongs to + // another NAME@DIGEST than during checkpointing and the restore will + // happen on another image. + // TODO: handle if NAME:TAG points to a different NAME@DIGEST + tagImage.Name = config.RootfsImageName + _, err = c.client.ImageService().Create(ctx, tagImage) + if err != nil { + if !errdefs.IsAlreadyExists(err) { + return "", fmt.Errorf("failed to tag checkpoint base image %s with %s: %w", config.RootfsImageRef, config.RootfsImageName, err) + } + } + + var image imagestore.Image + for i := 1; i < 500; i++ { + // This is probably wrong. Not sure how to wait for an image to appear in + // the image (or content) store. + log.G(ctx).Debugf("Trying to resolve %s:%d", containerdImage.Name(), i) + image, err = c.LocalResolve(containerdImage.Name()) + if err == nil { + break + } + time.Sleep(time.Microsecond * time.Duration(i)) + } + if err != nil { + return "", fmt.Errorf("failed to resolve image %q during checkpoint import: %w", config.RootfsImageName, err) + } + imageConfig := image.ImageSpec.Config + env := append([]string{}, imageConfig.Env...) + for _, e := range meta.Config.GetEnvs() { + env = append(env, e.GetKey()+"="+e.GetValue()) + } + imageConfig.Env = append(imageConfig.Env, env...) + + originalAnnotations["restored"] = "true" + originalAnnotations["checkpointedAt"] = config.CheckpointedAt.Format(time.RFC3339Nano) + originalAnnotations["checkpointImage"] = meta.Config.Image.GetUserSpecifiedImage() + + meta.Config.Annotations = originalAnnotations + + // Remove the checkpoint image name and show the base image name in the metadata. + // The checkpoint image name is still available in the annotations. + meta.Config.Image.Image = containerStatus.Image.GetImage() + meta.Config.Image.UserSpecifiedImage = containerStatus.Image.GetUserSpecifiedImage() + + cstatus, err := c.sandboxService.SandboxStatus(ctx, sandbox.Sandboxer, sandbox.ID, false) + if err != nil { + return "", fmt.Errorf("failed to get controller status: %w", err) + } + + containerRootDir, err := c.createContainer( + &createContainerRequest{ + ctx: ctx, + containerID: meta.ID, + sandbox: sandbox, + sandboxID: meta.SandboxID, + imageID: image.ID, + containerConfig: meta.Config, + imageConfig: &imageConfig, + podSandboxConfig: sandboxConfig, + sandboxRuntimeHandler: sandbox.Metadata.RuntimeHandler, + sandboxPid: cstatus.Pid, + NetNSPath: sandbox.NetNSPath, + containerName: containerName, + containerdImage: &containerdImage, + meta: meta, + restore: true, + start: start, + }, + ) + if err != nil { + return "", err + } + + if restoreStorageImageID != "" { + if err := fs.CopyDir(containerRootDir, mountPoint); err != nil { + return "", err + } + if err := mount.UnmountAll(mountPoint, 0); err != nil { + return "", err + } + } else { + // unpack the checkpoint archive + filter := archive.WithFilter(func(hdr *tar.Header) (bool, error) { + excludePatterns := []string{ + crmetadata.ConfigDumpFile, + crmetadata.SpecDumpFile, + crmetadata.StatusDumpFile, + } + + for _, pattern := range excludePatterns { + if strings.HasPrefix(hdr.Name, pattern) { + return false, nil + } + } + + return true, nil + }) + + // Start from the beginning of the checkpoint archive + archiveFile.Seek(0, 0) + _, err = archive.Apply(ctx, containerRootDir, archiveFile, []archive.ApplyOpt{filter}...) + + if err != nil { + return "", fmt.Errorf("unpacking of checkpoint archive %s failed: %w", containerRootDir, err) + } + } + log.G(ctx).Debugf("Unpacked checkpoint in %s", containerRootDir) + + // Restore container log file (if it exists) + containerLog := filepath.Join(containerRootDir, "container.log") + _, err = c.os.Stat(containerLog) + if err == nil { + if err := c.os.CopyFile(containerLog, meta.LogPath, 0600); err != nil { + return "", fmt.Errorf("restoring container log file %s failed: %w", containerLog, err) + } } - return fmt.Errorf("checkpoint/restore requires at least CRIU %d, current version is %d", version, criuVersion) + return meta.ID, nil } func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (*runtime.CheckpointContainerResponse, error) { start := time.Now() - if err := checkForCriu(podCriuVersion); err != nil { - // This is the wrong error message and needs to be adapted once - // Kubernetes (the e2e_node/checkpoint) test has been changed to - // handle too old or missing CRIU error messages. + if err := utils.CheckForCriu(utils.PodCriuVersion); err != nil { errorMessage := fmt.Sprintf( "CRIU binary not found or too old (<%d). Failed to checkpoint container %q", - podCriuVersion, + utils.PodCriuVersion, r.GetContainerId(), ) log.G(ctx).WithError(err).Error(errorMessage) @@ -84,9 +463,16 @@ func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.Checkpo ) } + criContainerStatus, err := c.ContainerStatus(ctx, &runtime.ContainerStatusRequest{ + ContainerId: r.GetContainerId(), + }) + if err != nil { + return nil, fmt.Errorf("an error occurred when trying to find container the container status %q: %w", r.GetContainerId(), err) + } + container, err := c.containerStore.Get(r.GetContainerId()) if err != nil { - return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err) + return nil, fmt.Errorf("an error occurred when trying to find container %q: %w", r.GetContainerId(), err) } state := container.Status.Get().State() @@ -99,31 +485,20 @@ func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.Checkpo ) } - imageRef := container.ImageRef - image, err := c.GetImage(imageRef) - if err != nil { - return nil, fmt.Errorf("getting container image failed: %w", err) - } - i, err := container.Container.Info(ctx) if err != nil { return nil, fmt.Errorf("get container info: %w", err) } configJSON, err := json.Marshal(&crmetadata.ContainerConfig{ - ID: container.ID, - Name: container.Name, - RootfsImageName: func() string { - if len(image.References) > 0 { - return image.References[0] - } - return "" - }(), - RootfsImageRef: imageRef, - OCIRuntime: i.Runtime.Name, - RootfsImage: container.Config.GetImage().UserSpecifiedImage, - CheckpointedAt: time.Now(), - CreatedTime: i.CreatedAt, + ID: container.ID, + Name: container.Name, + RootfsImageName: criContainerStatus.GetStatus().GetImage().GetImage(), + RootfsImageRef: criContainerStatus.GetStatus().GetImageRef(), + OCIRuntime: i.Runtime.Name, + RootfsImage: criContainerStatus.GetStatus().GetImage().GetImage(), + CheckpointedAt: time.Now(), + CreatedTime: i.CreatedAt, }) if err != nil { return nil, fmt.Errorf("generating container config JSON failed: %w", err) @@ -146,6 +521,10 @@ func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.Checkpo contentStore = img.ContentStore() ) + // Once all content from the checkpoint image has been saved, the + // checkpoint image can be remove from the local image store. + defer c.client.ImageService().Delete(ctx, img.Metadata().Name) + rawIndex, err = content.ReadBlob(ctx, contentStore, targetDesc) if err != nil { return nil, fmt.Errorf("failed to retrieve checkpoint index blob from content store: %w", err) @@ -160,10 +539,63 @@ func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.Checkpo } defer os.RemoveAll(cpPath) + // This internal containerd file is used by checkpointctl for + // checkpoint archive analysis. + if err := c.os.CopyFile( + filepath.Join(c.getContainerRootDir(r.GetContainerId()), crmetadata.StatusFile), + filepath.Join(cpPath, crmetadata.StatusFile), + 0o600, + ); err != nil { + return nil, err + } + + // This file is created by CRIU and includes timing analysis. + // Also used by checkpointctl + if err := c.os.CopyFile( + filepath.Join(c.getContainerRootDir(r.GetContainerId()), stats.StatsDump), + filepath.Join(cpPath, stats.StatsDump), + 0o600, + ); err != nil { + return nil, err + } + + // The log file created by CRIU. This file could be missing. + // Let's ignore errors if the file is missing. + if err := c.os.CopyFile( + filepath.Join(c.getContainerRootDir(r.GetContainerId()), crmetadata.DumpLogFile), + filepath.Join(cpPath, crmetadata.DumpLogFile), + 0o600, + ); err != nil { + if !errors.Is(errors.Unwrap(err), os.ErrNotExist) { + return nil, err + } + } + + // Save the existing container log file + _, err = c.os.Stat(criContainerStatus.GetStatus().GetLogPath()) + if err == nil { + if err := c.os.CopyFile( + criContainerStatus.GetStatus().GetLogPath(), + filepath.Join(cpPath, "container.log"), + 0o600, + ); err != nil { + return nil, err + } + } + if err := os.WriteFile(filepath.Join(cpPath, crmetadata.ConfigDumpFile), configJSON, 0o600); err != nil { return nil, err } + containerStatus, err := json.Marshal(criContainerStatus.GetStatus()) + if err != nil { + return nil, fmt.Errorf("failed to marshal container status: %w", err) + } + + if err := os.WriteFile(filepath.Join(cpPath, crmetadata.StatusDumpFile), containerStatus, 0o600); err != nil { + return nil, err + } + // walk the manifests and pull out the blobs that we need to save in the checkpoint tarball: // - the checkpoint criu data // - the rw diff tarball @@ -189,7 +621,7 @@ func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.Checkpo // write final tarball of all content tar := archive.Diff(ctx, "", cpPath) - outFile, err := os.OpenFile(r.Location, os.O_RDWR|os.O_CREATE, 0600) + outFile, err := os.OpenFile(r.Location, os.O_RDWR|os.O_CREATE, 0o600) if err != nil { return nil, err } @@ -204,6 +636,8 @@ func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.Checkpo containerCheckpointTimer.WithValues(i.Runtime.Name).UpdateSince(start) + log.G(ctx).Infof("Wrote checkpoint archive to %s for %s", outFile.Name(), r.GetContainerId()) + return &runtime.CheckpointContainerResponse{}, nil } diff --git a/internal/cri/server/container_create.go b/internal/cri/server/container_create.go index 50f0fc715d194..4c0786963b916 100644 --- a/internal/cri/server/container_create.go +++ b/internal/cri/server/container_create.go @@ -25,15 +25,6 @@ import ( "strings" "time" - "github.com/containerd/log" - "github.com/containerd/typeurl/v2" - "github.com/davecgh/go-spew/spew" - imagespec "github.com/opencontainers/image-spec/specs-go/v1" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/selinux/go-selinux" - "github.com/opencontainers/selinux/go-selinux/label" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - containerd "github.com/containerd/containerd/v2/client" "github.com/containerd/containerd/v2/core/containers" "github.com/containerd/containerd/v2/internal/cri/annotations" @@ -42,11 +33,21 @@ import ( crilabels "github.com/containerd/containerd/v2/internal/cri/labels" customopts "github.com/containerd/containerd/v2/internal/cri/opts" containerstore "github.com/containerd/containerd/v2/internal/cri/store/container" + "github.com/containerd/containerd/v2/internal/cri/store/sandbox" "github.com/containerd/containerd/v2/internal/cri/util" "github.com/containerd/containerd/v2/pkg/blockio" "github.com/containerd/containerd/v2/pkg/oci" "github.com/containerd/containerd/v2/pkg/tracing" + "github.com/containerd/log" "github.com/containerd/platforms" + "github.com/containerd/typeurl/v2" + "github.com/davecgh/go-spew/spew" + imagespec "github.com/opencontainers/image-spec/specs-go/v1" + v1 "github.com/opencontainers/image-spec/specs-go/v1" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux" + "github.com/opencontainers/selinux/go-selinux/label" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) func init() { @@ -111,6 +112,52 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta Config: config, } + // Check if image is a file. If it is a file it might be a checkpoint archive. + checkpointImage, err := func() (bool, error) { + if _, err := c.os.Stat(config.GetImage().GetImage()); err == nil { + log.G(ctx).Infof( + "%q is a file. Assuming it is a checkpoint archive", + config.GetImage().GetImage(), + ) + return true, nil + } + // Check if this is an OCI checkpoint image + imageID, err := c.checkIfCheckpointOCIImage(ctx, config.GetImage().GetImage()) + if err != nil { + return false, fmt.Errorf("failed to check if this is a checkpoint image: %w", err) + } + + return imageID != "", nil + }() + if err != nil { + return nil, err + } + + if checkpointImage { + // This might be a checkpoint image. Let's pass + // it to the checkpoint code. + + if sandboxConfig.GetMetadata() == nil { + return nil, fmt.Errorf("sandboxConfig must not be empty") + } + + ctrID, err := c.CRImportCheckpoint( + ctx, + &meta, + &sandbox, + sandboxConfig, + ) + if err != nil { + log.G(ctx).Errorf("failed to prepare %s for restore %q", ctrID, err) + return nil, err + } + log.G(ctx).Infof("Prepared %s for restore", ctrID) + + return &runtime.CreateContainerResponse{ + ContainerId: id, + }, nil + } + // Prepare container image snapshot. For container, the image should have // been pulled before creating the container, so do not ensure the image. image, err := c.LocalResolve(config.GetImage().GetImage()) @@ -121,98 +168,155 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta if err != nil { return nil, fmt.Errorf("failed to get image from containerd %q: %w", image.ID, err) } + span.SetAttributes( tracing.Attribute("container.image.ref", containerdImage.Name()), ) - start := time.Now() + _, err = c.createContainer( + &createContainerRequest{ + ctx: ctx, + containerID: id, + sandbox: &sandbox, + sandboxID: sandboxID, + imageID: image.ID, + containerConfig: config, + imageConfig: &image.ImageSpec.Config, + podSandboxConfig: sandboxConfig, + sandboxRuntimeHandler: sandbox.Metadata.RuntimeHandler, + sandboxPid: sandboxPid, + NetNSPath: sandbox.NetNSPath, + containerName: containerName, + containerdImage: &containerdImage, + meta: &meta, + start: time.Now(), + }, + ) + if err != nil { + return nil, err + } + + return &runtime.CreateContainerResponse{ContainerId: id}, nil +} + +type createContainerRequest struct { + ctx context.Context + containerID string + sandbox *sandbox.Sandbox + sandboxID string + imageID string + containerConfig *runtime.ContainerConfig + imageConfig *v1.ImageConfig + podSandboxConfig *runtime.PodSandboxConfig + sandboxRuntimeHandler string + sandboxPid uint32 + NetNSPath string + containerName string + containerdImage *containerd.Image + meta *containerstore.Metadata + restore bool + start time.Time +} + +func (c *criService) createContainer(r *createContainerRequest) (_ string, retErr error) { + span := tracing.SpanFromContext(r.ctx) // Create container root directory. - containerRootDir := c.getContainerRootDir(id) - if err = c.os.MkdirAll(containerRootDir, 0755); err != nil { - return nil, fmt.Errorf("failed to create container root directory %q: %w", - containerRootDir, err) + containerRootDir := c.getContainerRootDir(r.containerID) + if err := c.os.MkdirAll(containerRootDir, 0755); err != nil { + return "", fmt.Errorf( + "failed to create container root directory %q: %w", + containerRootDir, + err, + ) } defer func() { if retErr != nil { // Cleanup the container root directory. - if err = c.os.RemoveAll(containerRootDir); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to remove container root directory %q", - containerRootDir) + if err := c.os.RemoveAll(containerRootDir); err != nil { + log.G(r.ctx).WithError(err).Errorf( + "Failed to remove container root directory %q", + containerRootDir, + ) } } }() - volatileContainerRootDir := c.getVolatileContainerRootDir(id) - if err = c.os.MkdirAll(volatileContainerRootDir, 0755); err != nil { - return nil, fmt.Errorf("failed to create volatile container root directory %q: %w", - volatileContainerRootDir, err) + volatileContainerRootDir := c.getVolatileContainerRootDir(r.containerID) + if err := c.os.MkdirAll(volatileContainerRootDir, 0755); err != nil { + return "", fmt.Errorf( + "failed to create volatile container root directory %q: %w", + volatileContainerRootDir, + err, + ) } defer func() { if retErr != nil { // Cleanup the volatile container root directory. - if err = c.os.RemoveAll(volatileContainerRootDir); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to remove volatile container root directory %q", - volatileContainerRootDir) + if err := c.os.RemoveAll(volatileContainerRootDir); err != nil { + log.G(r.ctx).WithError(err).Errorf( + "Failed to remove volatile container root directory %q", + volatileContainerRootDir, + ) } } }() - platform, err := c.sandboxService.SandboxPlatform(ctx, sandbox.Sandboxer, sandboxID) + platform, err := c.sandboxService.SandboxPlatform(r.ctx, r.sandbox.Sandboxer, r.sandboxID) if err != nil { - return nil, fmt.Errorf("failed to query sandbox platform: %w", err) + return "", fmt.Errorf("failed to query sandbox platform: %w", err) } - var volumeMounts []*runtime.Mount if !c.config.IgnoreImageDefinedVolumes { // Create container image volumes mounts. - volumeMounts = c.volumeMounts(platform, containerRootDir, config, &image.ImageSpec.Config) - } else if len(image.ImageSpec.Config.Volumes) != 0 { - log.G(ctx).Debugf("Ignoring volumes defined in image %v because IgnoreImageDefinedVolumes is set", image.ID) + volumeMounts = c.volumeMounts(platform, containerRootDir, r.containerConfig, r.imageConfig) + } else if len(r.imageConfig.Volumes) != 0 { + log.G(r.ctx).Debugf("Ignoring volumes defined in image %v because IgnoreImageDefinedVolumes is set", r.imageID) } - ociRuntime, err := c.config.GetSandboxRuntime(sandboxConfig, sandbox.Metadata.RuntimeHandler) + ociRuntime, err := c.config.GetSandboxRuntime(r.podSandboxConfig, r.sandboxRuntimeHandler) if err != nil { - return nil, fmt.Errorf("failed to get sandbox runtime: %w", err) + return "", fmt.Errorf("failed to get sandbox runtime: %w", err) } var runtimeHandler *runtime.RuntimeHandler for _, f := range c.runtimeHandlers { - if f.Name == sandbox.Metadata.RuntimeHandler { + if f.Name == r.sandboxRuntimeHandler { runtimeHandler = f break } } - log.G(ctx).Debugf("Use OCI runtime %+v for sandbox %q and container %q", ociRuntime, sandboxID, id) + log.G(r.ctx).Debugf("Use OCI runtime %+v for sandbox %q and container %q", ociRuntime, r.sandboxID, r.containerID) - imageName := containerdImage.Name() - if name := config.GetImage().GetUserSpecifiedImage(); name != "" { + imageName := (*r.containerdImage).Name() + if name := r.containerConfig.GetImage().GetUserSpecifiedImage(); name != "" { imageName = name } + spec, err := c.buildContainerSpec( platform, - id, - sandboxID, - sandboxPid, - sandbox.NetNSPath, - containerName, + r.containerID, + r.sandboxID, + r.sandboxPid, + r.NetNSPath, + r.containerName, imageName, - config, - sandboxConfig, - &image.ImageSpec.Config, + r.containerConfig, + r.podSandboxConfig, + r.imageConfig, volumeMounts, ociRuntime, runtimeHandler, ) if err != nil { - return nil, fmt.Errorf("failed to generate container %q spec: %w", id, err) + return "", fmt.Errorf("failed to generate container %q spec: %w", r.containerID, err) } - meta.ProcessLabel = spec.Process.SelinuxLabel + r.meta.ProcessLabel = spec.Process.SelinuxLabel // handle any KVM based runtime if err := modifyProcessLabel(ociRuntime.Type, spec); err != nil { - return nil, err + return "", err } - if config.GetLinux().GetSecurityContext().GetPrivileged() { + if r.containerConfig.GetLinux().GetSecurityContext().GetPrivileged() { // If privileged don't set the SELinux label but still record it on the container so // the unused MCS label can be release later spec.Process.SelinuxLabel = "" @@ -223,23 +327,23 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta } }() - log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec)) + log.G(r.ctx).Debugf("Container %q spec: %#+v", r.containerID, spew.NewFormatter(spec)) // Grab any platform specific snapshotter opts. - sOpts, err := snapshotterOpts(config) + sOpts, err := snapshotterOpts(r.containerConfig) if err != nil { - return nil, err + return "", err } // Set snapshotter before any other options. opts := []containerd.NewContainerOpts{ - containerd.WithSnapshotter(c.RuntimeSnapshotter(ctx, ociRuntime)), + containerd.WithSnapshotter(c.RuntimeSnapshotter(r.ctx, ociRuntime)), // Prepare container rootfs. This is always writeable even if // the container wants a readonly rootfs since we want to give // the runtime (runc) a chance to modify (e.g. to create mount // points corresponding to spec.Mounts) before making the // rootfs readonly (requested by spec.Root.Readonly). - customopts.WithNewSnapshot(id, containerdImage, sOpts...), + customopts.WithNewSnapshot(r.containerID, *r.containerdImage, sOpts...), } if len(volumeMounts) > 0 { mountMap := make(map[string]string) @@ -248,121 +352,122 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta } opts = append(opts, customopts.WithVolumes(mountMap, platform)) } - meta.ImageRef = image.ID - meta.StopSignal = image.ImageSpec.Config.StopSignal + r.meta.ImageRef = r.imageID + r.meta.StopSignal = r.imageConfig.StopSignal // Validate log paths and compose full container log path. - if sandboxConfig.GetLogDirectory() != "" && config.GetLogPath() != "" { - meta.LogPath = filepath.Join(sandboxConfig.GetLogDirectory(), config.GetLogPath()) - log.G(ctx).Debugf("Composed container full log path %q using sandbox log dir %q and container log path %q", - meta.LogPath, sandboxConfig.GetLogDirectory(), config.GetLogPath()) + if r.podSandboxConfig.GetLogDirectory() != "" && r.containerConfig.GetLogPath() != "" { + r.meta.LogPath = filepath.Join(r.podSandboxConfig.GetLogDirectory(), r.containerConfig.GetLogPath()) + log.G(r.ctx).Debugf("Composed container full log path %q using sandbox log dir %q and container log path %q", + r.meta.LogPath, r.podSandboxConfig.GetLogDirectory(), r.containerConfig.GetLogPath()) } else { - log.G(ctx).Infof("Logging will be disabled due to empty log paths for sandbox (%q) or container (%q)", - sandboxConfig.GetLogDirectory(), config.GetLogPath()) + log.G(r.ctx).Infof("Logging will be disabled due to empty log paths for sandbox (%q) or container (%q)", + r.podSandboxConfig.GetLogDirectory(), r.containerConfig.GetLogPath()) } var containerIO *cio.ContainerIO switch ociRuntime.IOType { case criconfig.IOTypeStreaming: - containerIO, err = cio.NewContainerIO(id, - cio.WithStreams(sandbox.Endpoint.Address, config.GetTty(), config.GetStdin())) + containerIO, err = cio.NewContainerIO(r.containerID, + cio.WithStreams(r.sandbox.Endpoint.Address, r.containerConfig.GetTty(), r.containerConfig.GetStdin())) default: - containerIO, err = cio.NewContainerIO(id, - cio.WithNewFIFOs(volatileContainerRootDir, config.GetTty(), config.GetStdin())) + containerIO, err = cio.NewContainerIO(r.containerID, + cio.WithNewFIFOs(volatileContainerRootDir, r.containerConfig.GetTty(), r.containerConfig.GetStdin())) } if err != nil { - return nil, fmt.Errorf("failed to create container io: %w", err) + return "", fmt.Errorf("failed to create container io: %w", err) } defer func() { if retErr != nil { if err := containerIO.Close(); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to close container io %q", id) + log.G(r.ctx).WithError(err).Errorf("Failed to close container io %q", r.containerID) } } }() - specOpts, err := c.platformSpecOpts(platform, config, &image.ImageSpec.Config) + specOpts, err := c.platformSpecOpts(platform, r.containerConfig, r.imageConfig) if err != nil { - return nil, fmt.Errorf("failed to get container spec opts: %w", err) + return "", fmt.Errorf("failed to get container spec opts: %w", err) } - containerLabels := util.BuildLabels(config.Labels, image.ImageSpec.Config.Labels, crilabels.ContainerKindContainer) + containerLabels := util.BuildLabels(r.containerConfig.Labels, r.imageConfig.Labels, crilabels.ContainerKindContainer) // TODO the sandbox in the cache should hold this info - runtimeName, runtimeOption, err := c.runtimeInfo(ctx, sandboxID) + runtimeName, runtimeOption, err := c.runtimeInfo(r.ctx, r.sandboxID) if err != nil { - return nil, fmt.Errorf("unable to get sandbox %q runtime info: %w", sandboxID, err) + return "", fmt.Errorf("unable to get sandbox %q runtime info: %w", r.sandboxID, err) } opts = append(opts, containerd.WithSpec(spec, specOpts...), containerd.WithRuntime(runtimeName, runtimeOption), containerd.WithContainerLabels(containerLabels), - containerd.WithContainerExtension(crilabels.ContainerMetadataExtension, &meta), + containerd.WithContainerExtension(crilabels.ContainerMetadataExtension, r.meta), ) - opts = append(opts, containerd.WithSandbox(sandboxID)) + opts = append(opts, containerd.WithSandbox(r.sandboxID)) opts = append(opts, c.nri.WithContainerAdjustment()) defer func() { if retErr != nil { deferCtx, deferCancel := util.DeferContext() defer deferCancel() - c.nri.UndoCreateContainer(deferCtx, &sandbox, id, spec) + c.nri.UndoCreateContainer(deferCtx, r.sandbox, r.containerID, spec) } }() var cntr containerd.Container - if cntr, err = c.client.NewContainer(ctx, id, opts...); err != nil { - return nil, fmt.Errorf("failed to create containerd container: %w", err) + if cntr, err = c.client.NewContainer(r.ctx, r.containerID, opts...); err != nil { + return "", fmt.Errorf("failed to create containerd container: %w", err) } defer func() { if retErr != nil { deferCtx, deferCancel := util.DeferContext() defer deferCancel() if err := cntr.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id) + log.G(r.ctx).WithError(err).Errorf("Failed to delete containerd container %q", r.containerID) } } }() - status := containerstore.Status{CreatedAt: time.Now().UnixNano()} + status := containerstore.Status{CreatedAt: time.Now().UnixNano(), Restore: r.restore} status = copyResourcesToStatus(spec, status) - container, err := containerstore.NewContainer(meta, + container, err := containerstore.NewContainer(*r.meta, containerstore.WithStatus(status, containerRootDir), containerstore.WithContainer(cntr), containerstore.WithContainerIO(containerIO), ) if err != nil { - return nil, fmt.Errorf("failed to create internal container object for %q: %w", id, err) + return "", fmt.Errorf("failed to create internal container object for %q: %w", r.containerID, err) } defer func() { if retErr != nil { // Cleanup container checkpoint on error. if err := container.Delete(); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to cleanup container checkpoint for %q", id) + log.G(r.ctx).WithError(err).Errorf("Failed to cleanup container checkpoint for %q", r.containerID) } } }() // Add container into container store. if err := c.containerStore.Add(container); err != nil { - return nil, fmt.Errorf("failed to add container %q into store: %w", id, err) + return "", fmt.Errorf("failed to add container %q into store: %w", r.containerID, err) } - c.generateAndSendContainerEvent(ctx, id, sandboxID, runtime.ContainerEventType_CONTAINER_CREATED_EVENT) + c.generateAndSendContainerEvent(r.ctx, r.containerID, r.sandboxID, runtime.ContainerEventType_CONTAINER_CREATED_EVENT) - err = c.nri.PostCreateContainer(ctx, &sandbox, &container) + err = c.nri.PostCreateContainer(r.ctx, r.sandbox, &container) if err != nil { - log.G(ctx).WithError(err).Errorf("NRI post-create notification failed") + log.G(r.ctx).WithError(err).Errorf("NRI post-create notification failed") } - containerCreateTimer.WithValues(ociRuntime.Type).UpdateSince(start) + containerCreateTimer.WithValues(ociRuntime.Type).UpdateSince(r.start) span.AddEvent("container created", - tracing.Attribute("container.create.duration", time.Since(start).String()), + tracing.Attribute("container.create.duration", time.Since(r.start).String()), ) - return &runtime.CreateContainerResponse{ContainerId: id}, nil + + return containerRootDir, nil } // volumeMounts sets up image volumes for container. Rely on the removal of container diff --git a/internal/cri/server/container_start.go b/internal/cri/server/container_start.go index 36e4d72469915..9f3ce614e86e9 100644 --- a/internal/cri/server/container_start.go +++ b/internal/cri/server/container_start.go @@ -21,6 +21,8 @@ import ( "errors" "fmt" "io" + "os" + "path/filepath" "time" "github.com/containerd/containerd/v2/pkg/tracing" @@ -28,6 +30,8 @@ import ( "github.com/containerd/log" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + crmetadata "github.com/checkpoint-restore/checkpointctl/lib" + "github.com/checkpoint-restore/go-criu/v7/stats" containerd "github.com/containerd/containerd/v2/client" cio "github.com/containerd/containerd/v2/internal/cri/io" containerstore "github.com/containerd/containerd/v2/internal/cri/store/container" @@ -91,6 +95,91 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain } span.SetAttributes(tracing.Attribute("sandbox.id", sandboxID)) + ioCreation := func(id string) (_ containerdio.IO, err error) { + stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, config.GetTty()) + if err != nil { + return nil, fmt.Errorf("failed to create container loggers: %w", err) + } + cntr.IO.AddOutput("log", stdoutWC, stderrWC) + cntr.IO.Pipe() + return cntr.IO, nil + } + + if cntr.Status.Get().Restore { + // If during start the container is detected as a checkpoint the container + // will be marked with Restore() == true. In this case not the normal + // start code is needed but this code which does a restore. + pid, err := container.Restore( + ctx, + ioCreation, + filepath.Join(c.getContainerRootDir(r.GetContainerId()), crmetadata.CheckpointDirectory), + ) + + if err != nil { + return nil, fmt.Errorf("failed to restore containerd task: %w", err) + } + // Update container start timestamp. + if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { + if pid < 0 { + return status, fmt.Errorf("restore returned a PID < 0 (%d); that should not happen", pid) + } + status.Pid = uint32(pid) + status.StartedAt = time.Now().UnixNano() + return status, nil + }); err != nil { + return nil, fmt.Errorf("failed to update container %q state: %w", id, err) + } + + c.generateAndSendContainerEvent(ctx, id, sandboxID, runtime.ContainerEventType_CONTAINER_STARTED_EVENT) + + task, err := cntr.Container.Task(ctx, nil) + if err != nil { + return nil, fmt.Errorf("failed to get task for container %q: %w", id, err) + } + // wait is a long running background request, no timeout needed. + exitCh, err := task.Wait(ctrdutil.NamespacedContext()) + if err != nil { + return nil, fmt.Errorf("failed to wait for containerd task: %w", err) + } + + defer func() { + if retErr != nil { + deferCtx, deferCancel := ctrdutil.DeferContext() + defer deferCancel() + err = c.nri.StopContainer(deferCtx, &sandbox, &cntr) + if err != nil { + log.G(ctx).WithError(err).Errorf("NRI stop failed for failed container %q", id) + } + } + }() + // It handles the TaskExit event and update container state after this. + c.startContainerExitMonitor(context.Background(), id, task.Pid(), exitCh) + + // cleanup checkpoint artifacts after restore + cleanup := [...]string{ + crmetadata.RestoreLogFile, + crmetadata.DumpLogFile, + stats.StatsDump, + stats.StatsRestore, + crmetadata.NetworkStatusFile, + crmetadata.RootFsDiffTar, + crmetadata.DeletedFilesFile, + crmetadata.CheckpointDirectory, + crmetadata.StatusDumpFile, + crmetadata.ConfigDumpFile, + crmetadata.SpecDumpFile, + "container.log", + } + for _, del := range cleanup { + file := filepath.Join(c.getContainerRootDir(r.GetContainerId()), del) + err = os.RemoveAll(file) + if err != nil { + log.G(ctx).Infof("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err) + } + } + log.G(ctx).Infof("Restored container %s successfully", r.GetContainerId()) + return &runtime.StartContainerResponse{}, nil + } // Recheck target container validity in Linux namespace options. if linux := config.GetLinux(); linux != nil { nsOpts := linux.GetSecurityContext().GetNamespaceOptions() @@ -102,16 +191,6 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain } } - ioCreation := func(id string) (_ containerdio.IO, err error) { - stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, config.GetTty()) - if err != nil { - return nil, fmt.Errorf("failed to create container loggers: %w", err) - } - cntr.IO.AddOutput("log", stdoutWC, stderrWC) - cntr.IO.Pipe() - return cntr.IO, nil - } - ociRuntime, err := c.config.GetSandboxRuntime(sandbox.Config, sandbox.Metadata.RuntimeHandler) if err != nil { return nil, fmt.Errorf("failed to get sandbox runtime: %w", err) diff --git a/internal/cri/server/container_status_test.go b/internal/cri/server/container_status_test.go index b584d54e38fbd..f53fb1aa601fd 100644 --- a/internal/cri/server/container_status_test.go +++ b/internal/cri/server/container_status_test.go @@ -395,3 +395,9 @@ func (c *fakeSpecOnlyContainer) Update(context.Context, ...containerd.UpdateCont c.t.Error("fakeSpecOnlyContainer.Update: not implemented") return errors.New("not implemented") } + +// Restore implements client.Container. +func (c *fakeSpecOnlyContainer) Restore(context.Context, cio.Creator, string) (int, error) { + c.t.Error("fakeSpecOnlyContainer.Restore: not implemented") + return -1, errors.New("not implemented") +} diff --git a/internal/cri/server/podsandbox/recover_test.go b/internal/cri/server/podsandbox/recover_test.go index 003f0839de22b..3d6b076b71ae6 100644 --- a/internal/cri/server/podsandbox/recover_test.go +++ b/internal/cri/server/podsandbox/recover_test.go @@ -198,6 +198,10 @@ func (f *fakeContainer) Checkpoint(ctx context.Context, s string, opts ...contai return nil, errdefs.ErrNotImplemented } +func (f *fakeContainer) Restore(context.Context, cio.Creator, string) (int, error) { + return -1, errdefs.ErrNotImplemented +} + func sandboxExtension(id string) map[string]typeurl.Any { metadata := sandbox.Metadata{ ID: id, diff --git a/internal/cri/store/container/status.go b/internal/cri/store/container/status.go index 0f6d38b4c9e1d..d28069e0a0c7a 100644 --- a/internal/cri/store/container/status.go +++ b/internal/cri/store/container/status.go @@ -98,6 +98,9 @@ type Status struct { Unknown bool `json:"-"` // Resources has container runtime resource constraints Resources *runtime.ContainerResources + // Restore marks this container as a container to be restored from a + // checkpoint and not started. + Restore bool } // State returns current state of the container based on the container status. diff --git a/plugins/services/tasks/local.go b/plugins/services/tasks/local.go index c4737c25da4e4..30385e0bb481b 100644 --- a/plugins/services/tasks/local.go +++ b/plugins/services/tasks/local.go @@ -180,6 +180,16 @@ func (l *local) Create(ctx context.Context, r *api.CreateTaskRequest, _ ...grpc. taskAPIVersion = taskOptions.TaskApiVersion } + restoreFromPath := false + // For a restore via CRI. + if r.Checkpoint != nil && r.Checkpoint.Annotations != nil { + ann, ok := r.Checkpoint.Annotations["RestoreFromPath"] + if ok { + checkpointPath = ann + restoreFromPath = true + } + } + // jump get checkpointPath from checkpoint image if checkpointPath == "" && r.Checkpoint != nil { checkpointPath, err = os.MkdirTemp(os.Getenv("XDG_RUNTIME_DIR"), "ctrd-checkpoint") @@ -204,6 +214,7 @@ func (l *local) Create(ctx context.Context, r *api.CreateTaskRequest, _ ...grpc. return nil, err } } + opts := runtime.CreateOpts{ Spec: container.Spec, IO: runtime.IO{ @@ -212,13 +223,14 @@ func (l *local) Create(ctx context.Context, r *api.CreateTaskRequest, _ ...grpc. Stderr: r.Stderr, Terminal: r.Terminal, }, - Checkpoint: checkpointPath, - Runtime: container.Runtime.Name, - RuntimeOptions: container.Runtime.Options, - TaskOptions: r.Options, - SandboxID: container.SandboxID, - Address: taskAPIAddress, - Version: taskAPIVersion, + Checkpoint: checkpointPath, + RestoreFromPath: restoreFromPath, + Runtime: container.Runtime.Name, + RuntimeOptions: container.Runtime.Options, + TaskOptions: r.Options, + SandboxID: container.SandboxID, + Address: taskAPIAddress, + Version: taskAPIVersion, } if r.RuntimePath != "" { opts.Runtime = r.RuntimePath diff --git a/vendor/github.com/checkpoint-restore/go-criu/v7/stats/stats.pb.go b/vendor/github.com/checkpoint-restore/go-criu/v7/stats/stats.pb.go new file mode 100644 index 0000000000000..ffec3809b94f0 --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v7/stats/stats.pb.go @@ -0,0 +1,462 @@ +// SPDX-License-Identifier: MIT + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.30.0 +// protoc v4.23.4 +// source: stats/stats.proto + +package stats + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// This one contains statistics about dump/restore process +type DumpStatsEntry struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + FreezingTime *uint32 `protobuf:"varint,1,req,name=freezing_time,json=freezingTime" json:"freezing_time,omitempty"` + FrozenTime *uint32 `protobuf:"varint,2,req,name=frozen_time,json=frozenTime" json:"frozen_time,omitempty"` + MemdumpTime *uint32 `protobuf:"varint,3,req,name=memdump_time,json=memdumpTime" json:"memdump_time,omitempty"` + MemwriteTime *uint32 `protobuf:"varint,4,req,name=memwrite_time,json=memwriteTime" json:"memwrite_time,omitempty"` + PagesScanned *uint64 `protobuf:"varint,5,req,name=pages_scanned,json=pagesScanned" json:"pages_scanned,omitempty"` + PagesSkippedParent *uint64 `protobuf:"varint,6,req,name=pages_skipped_parent,json=pagesSkippedParent" json:"pages_skipped_parent,omitempty"` + PagesWritten *uint64 `protobuf:"varint,7,req,name=pages_written,json=pagesWritten" json:"pages_written,omitempty"` + IrmapResolve *uint32 `protobuf:"varint,8,opt,name=irmap_resolve,json=irmapResolve" json:"irmap_resolve,omitempty"` + PagesLazy *uint64 `protobuf:"varint,9,req,name=pages_lazy,json=pagesLazy" json:"pages_lazy,omitempty"` + PagePipes *uint64 `protobuf:"varint,10,opt,name=page_pipes,json=pagePipes" json:"page_pipes,omitempty"` + PagePipeBufs *uint64 `protobuf:"varint,11,opt,name=page_pipe_bufs,json=pagePipeBufs" json:"page_pipe_bufs,omitempty"` + ShpagesScanned *uint64 `protobuf:"varint,12,opt,name=shpages_scanned,json=shpagesScanned" json:"shpages_scanned,omitempty"` + ShpagesSkippedParent *uint64 `protobuf:"varint,13,opt,name=shpages_skipped_parent,json=shpagesSkippedParent" json:"shpages_skipped_parent,omitempty"` + ShpagesWritten *uint64 `protobuf:"varint,14,opt,name=shpages_written,json=shpagesWritten" json:"shpages_written,omitempty"` +} + +func (x *DumpStatsEntry) Reset() { + *x = DumpStatsEntry{} + if protoimpl.UnsafeEnabled { + mi := &file_stats_stats_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DumpStatsEntry) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DumpStatsEntry) ProtoMessage() {} + +func (x *DumpStatsEntry) ProtoReflect() protoreflect.Message { + mi := &file_stats_stats_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DumpStatsEntry.ProtoReflect.Descriptor instead. +func (*DumpStatsEntry) Descriptor() ([]byte, []int) { + return file_stats_stats_proto_rawDescGZIP(), []int{0} +} + +func (x *DumpStatsEntry) GetFreezingTime() uint32 { + if x != nil && x.FreezingTime != nil { + return *x.FreezingTime + } + return 0 +} + +func (x *DumpStatsEntry) GetFrozenTime() uint32 { + if x != nil && x.FrozenTime != nil { + return *x.FrozenTime + } + return 0 +} + +func (x *DumpStatsEntry) GetMemdumpTime() uint32 { + if x != nil && x.MemdumpTime != nil { + return *x.MemdumpTime + } + return 0 +} + +func (x *DumpStatsEntry) GetMemwriteTime() uint32 { + if x != nil && x.MemwriteTime != nil { + return *x.MemwriteTime + } + return 0 +} + +func (x *DumpStatsEntry) GetPagesScanned() uint64 { + if x != nil && x.PagesScanned != nil { + return *x.PagesScanned + } + return 0 +} + +func (x *DumpStatsEntry) GetPagesSkippedParent() uint64 { + if x != nil && x.PagesSkippedParent != nil { + return *x.PagesSkippedParent + } + return 0 +} + +func (x *DumpStatsEntry) GetPagesWritten() uint64 { + if x != nil && x.PagesWritten != nil { + return *x.PagesWritten + } + return 0 +} + +func (x *DumpStatsEntry) GetIrmapResolve() uint32 { + if x != nil && x.IrmapResolve != nil { + return *x.IrmapResolve + } + return 0 +} + +func (x *DumpStatsEntry) GetPagesLazy() uint64 { + if x != nil && x.PagesLazy != nil { + return *x.PagesLazy + } + return 0 +} + +func (x *DumpStatsEntry) GetPagePipes() uint64 { + if x != nil && x.PagePipes != nil { + return *x.PagePipes + } + return 0 +} + +func (x *DumpStatsEntry) GetPagePipeBufs() uint64 { + if x != nil && x.PagePipeBufs != nil { + return *x.PagePipeBufs + } + return 0 +} + +func (x *DumpStatsEntry) GetShpagesScanned() uint64 { + if x != nil && x.ShpagesScanned != nil { + return *x.ShpagesScanned + } + return 0 +} + +func (x *DumpStatsEntry) GetShpagesSkippedParent() uint64 { + if x != nil && x.ShpagesSkippedParent != nil { + return *x.ShpagesSkippedParent + } + return 0 +} + +func (x *DumpStatsEntry) GetShpagesWritten() uint64 { + if x != nil && x.ShpagesWritten != nil { + return *x.ShpagesWritten + } + return 0 +} + +type RestoreStatsEntry struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + PagesCompared *uint64 `protobuf:"varint,1,req,name=pages_compared,json=pagesCompared" json:"pages_compared,omitempty"` + PagesSkippedCow *uint64 `protobuf:"varint,2,req,name=pages_skipped_cow,json=pagesSkippedCow" json:"pages_skipped_cow,omitempty"` + ForkingTime *uint32 `protobuf:"varint,3,req,name=forking_time,json=forkingTime" json:"forking_time,omitempty"` + RestoreTime *uint32 `protobuf:"varint,4,req,name=restore_time,json=restoreTime" json:"restore_time,omitempty"` + PagesRestored *uint64 `protobuf:"varint,5,opt,name=pages_restored,json=pagesRestored" json:"pages_restored,omitempty"` +} + +func (x *RestoreStatsEntry) Reset() { + *x = RestoreStatsEntry{} + if protoimpl.UnsafeEnabled { + mi := &file_stats_stats_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *RestoreStatsEntry) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*RestoreStatsEntry) ProtoMessage() {} + +func (x *RestoreStatsEntry) ProtoReflect() protoreflect.Message { + mi := &file_stats_stats_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use RestoreStatsEntry.ProtoReflect.Descriptor instead. +func (*RestoreStatsEntry) Descriptor() ([]byte, []int) { + return file_stats_stats_proto_rawDescGZIP(), []int{1} +} + +func (x *RestoreStatsEntry) GetPagesCompared() uint64 { + if x != nil && x.PagesCompared != nil { + return *x.PagesCompared + } + return 0 +} + +func (x *RestoreStatsEntry) GetPagesSkippedCow() uint64 { + if x != nil && x.PagesSkippedCow != nil { + return *x.PagesSkippedCow + } + return 0 +} + +func (x *RestoreStatsEntry) GetForkingTime() uint32 { + if x != nil && x.ForkingTime != nil { + return *x.ForkingTime + } + return 0 +} + +func (x *RestoreStatsEntry) GetRestoreTime() uint32 { + if x != nil && x.RestoreTime != nil { + return *x.RestoreTime + } + return 0 +} + +func (x *RestoreStatsEntry) GetPagesRestored() uint64 { + if x != nil && x.PagesRestored != nil { + return *x.PagesRestored + } + return 0 +} + +type StatsEntry struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Dump *DumpStatsEntry `protobuf:"bytes,1,opt,name=dump" json:"dump,omitempty"` + Restore *RestoreStatsEntry `protobuf:"bytes,2,opt,name=restore" json:"restore,omitempty"` +} + +func (x *StatsEntry) Reset() { + *x = StatsEntry{} + if protoimpl.UnsafeEnabled { + mi := &file_stats_stats_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StatsEntry) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StatsEntry) ProtoMessage() {} + +func (x *StatsEntry) ProtoReflect() protoreflect.Message { + mi := &file_stats_stats_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StatsEntry.ProtoReflect.Descriptor instead. +func (*StatsEntry) Descriptor() ([]byte, []int) { + return file_stats_stats_proto_rawDescGZIP(), []int{2} +} + +func (x *StatsEntry) GetDump() *DumpStatsEntry { + if x != nil { + return x.Dump + } + return nil +} + +func (x *StatsEntry) GetRestore() *RestoreStatsEntry { + if x != nil { + return x.Restore + } + return nil +} + +var File_stats_stats_proto protoreflect.FileDescriptor + +var file_stats_stats_proto_rawDesc = []byte{ + 0x0a, 0x11, 0x73, 0x74, 0x61, 0x74, 0x73, 0x2f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x22, 0xad, 0x04, 0x0a, 0x10, 0x64, 0x75, 0x6d, 0x70, 0x5f, 0x73, 0x74, 0x61, + 0x74, 0x73, 0x5f, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x23, 0x0a, 0x0d, 0x66, 0x72, 0x65, 0x65, + 0x7a, 0x69, 0x6e, 0x67, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x02, 0x28, 0x0d, 0x52, + 0x0c, 0x66, 0x72, 0x65, 0x65, 0x7a, 0x69, 0x6e, 0x67, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x1f, 0x0a, + 0x0b, 0x66, 0x72, 0x6f, 0x7a, 0x65, 0x6e, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x02, + 0x28, 0x0d, 0x52, 0x0a, 0x66, 0x72, 0x6f, 0x7a, 0x65, 0x6e, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x21, + 0x0a, 0x0c, 0x6d, 0x65, 0x6d, 0x64, 0x75, 0x6d, 0x70, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, + 0x20, 0x02, 0x28, 0x0d, 0x52, 0x0b, 0x6d, 0x65, 0x6d, 0x64, 0x75, 0x6d, 0x70, 0x54, 0x69, 0x6d, + 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x6d, 0x65, 0x6d, 0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x74, 0x69, + 0x6d, 0x65, 0x18, 0x04, 0x20, 0x02, 0x28, 0x0d, 0x52, 0x0c, 0x6d, 0x65, 0x6d, 0x77, 0x72, 0x69, + 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x61, 0x67, 0x65, 0x73, 0x5f, + 0x73, 0x63, 0x61, 0x6e, 0x6e, 0x65, 0x64, 0x18, 0x05, 0x20, 0x02, 0x28, 0x04, 0x52, 0x0c, 0x70, + 0x61, 0x67, 0x65, 0x73, 0x53, 0x63, 0x61, 0x6e, 0x6e, 0x65, 0x64, 0x12, 0x30, 0x0a, 0x14, 0x70, + 0x61, 0x67, 0x65, 0x73, 0x5f, 0x73, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, 0x5f, 0x70, 0x61, 0x72, + 0x65, 0x6e, 0x74, 0x18, 0x06, 0x20, 0x02, 0x28, 0x04, 0x52, 0x12, 0x70, 0x61, 0x67, 0x65, 0x73, + 0x53, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x12, 0x23, 0x0a, + 0x0d, 0x70, 0x61, 0x67, 0x65, 0x73, 0x5f, 0x77, 0x72, 0x69, 0x74, 0x74, 0x65, 0x6e, 0x18, 0x07, + 0x20, 0x02, 0x28, 0x04, 0x52, 0x0c, 0x70, 0x61, 0x67, 0x65, 0x73, 0x57, 0x72, 0x69, 0x74, 0x74, + 0x65, 0x6e, 0x12, 0x23, 0x0a, 0x0d, 0x69, 0x72, 0x6d, 0x61, 0x70, 0x5f, 0x72, 0x65, 0x73, 0x6f, + 0x6c, 0x76, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x69, 0x72, 0x6d, 0x61, 0x70, + 0x52, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x61, 0x67, 0x65, 0x73, + 0x5f, 0x6c, 0x61, 0x7a, 0x79, 0x18, 0x09, 0x20, 0x02, 0x28, 0x04, 0x52, 0x09, 0x70, 0x61, 0x67, + 0x65, 0x73, 0x4c, 0x61, 0x7a, 0x79, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x61, 0x67, 0x65, 0x5f, 0x70, + 0x69, 0x70, 0x65, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x04, 0x52, 0x09, 0x70, 0x61, 0x67, 0x65, + 0x50, 0x69, 0x70, 0x65, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x70, 0x61, 0x67, 0x65, 0x5f, 0x70, 0x69, + 0x70, 0x65, 0x5f, 0x62, 0x75, 0x66, 0x73, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, 0x70, + 0x61, 0x67, 0x65, 0x50, 0x69, 0x70, 0x65, 0x42, 0x75, 0x66, 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x73, + 0x68, 0x70, 0x61, 0x67, 0x65, 0x73, 0x5f, 0x73, 0x63, 0x61, 0x6e, 0x6e, 0x65, 0x64, 0x18, 0x0c, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x0e, 0x73, 0x68, 0x70, 0x61, 0x67, 0x65, 0x73, 0x53, 0x63, 0x61, + 0x6e, 0x6e, 0x65, 0x64, 0x12, 0x34, 0x0a, 0x16, 0x73, 0x68, 0x70, 0x61, 0x67, 0x65, 0x73, 0x5f, + 0x73, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, 0x5f, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x18, 0x0d, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x14, 0x73, 0x68, 0x70, 0x61, 0x67, 0x65, 0x73, 0x53, 0x6b, 0x69, + 0x70, 0x70, 0x65, 0x64, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x73, 0x68, + 0x70, 0x61, 0x67, 0x65, 0x73, 0x5f, 0x77, 0x72, 0x69, 0x74, 0x74, 0x65, 0x6e, 0x18, 0x0e, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x0e, 0x73, 0x68, 0x70, 0x61, 0x67, 0x65, 0x73, 0x57, 0x72, 0x69, 0x74, + 0x74, 0x65, 0x6e, 0x22, 0xd5, 0x01, 0x0a, 0x13, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5f, + 0x73, 0x74, 0x61, 0x74, 0x73, 0x5f, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x25, 0x0a, 0x0e, 0x70, + 0x61, 0x67, 0x65, 0x73, 0x5f, 0x63, 0x6f, 0x6d, 0x70, 0x61, 0x72, 0x65, 0x64, 0x18, 0x01, 0x20, + 0x02, 0x28, 0x04, 0x52, 0x0d, 0x70, 0x61, 0x67, 0x65, 0x73, 0x43, 0x6f, 0x6d, 0x70, 0x61, 0x72, + 0x65, 0x64, 0x12, 0x2a, 0x0a, 0x11, 0x70, 0x61, 0x67, 0x65, 0x73, 0x5f, 0x73, 0x6b, 0x69, 0x70, + 0x70, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x77, 0x18, 0x02, 0x20, 0x02, 0x28, 0x04, 0x52, 0x0f, 0x70, + 0x61, 0x67, 0x65, 0x73, 0x53, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, 0x43, 0x6f, 0x77, 0x12, 0x21, + 0x0a, 0x0c, 0x66, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, + 0x20, 0x02, 0x28, 0x0d, 0x52, 0x0b, 0x66, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x54, 0x69, 0x6d, + 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5f, 0x74, 0x69, 0x6d, + 0x65, 0x18, 0x04, 0x20, 0x02, 0x28, 0x0d, 0x52, 0x0b, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, + 0x54, 0x69, 0x6d, 0x65, 0x12, 0x25, 0x0a, 0x0e, 0x70, 0x61, 0x67, 0x65, 0x73, 0x5f, 0x72, 0x65, + 0x73, 0x74, 0x6f, 0x72, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0d, 0x70, 0x61, + 0x67, 0x65, 0x73, 0x52, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x64, 0x22, 0x64, 0x0a, 0x0b, 0x73, + 0x74, 0x61, 0x74, 0x73, 0x5f, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x25, 0x0a, 0x04, 0x64, 0x75, + 0x6d, 0x70, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x64, 0x75, 0x6d, 0x70, 0x5f, + 0x73, 0x74, 0x61, 0x74, 0x73, 0x5f, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x04, 0x64, 0x75, 0x6d, + 0x70, 0x12, 0x2e, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5f, 0x73, 0x74, 0x61, + 0x74, 0x73, 0x5f, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x07, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, + 0x65, +} + +var ( + file_stats_stats_proto_rawDescOnce sync.Once + file_stats_stats_proto_rawDescData = file_stats_stats_proto_rawDesc +) + +func file_stats_stats_proto_rawDescGZIP() []byte { + file_stats_stats_proto_rawDescOnce.Do(func() { + file_stats_stats_proto_rawDescData = protoimpl.X.CompressGZIP(file_stats_stats_proto_rawDescData) + }) + return file_stats_stats_proto_rawDescData +} + +var file_stats_stats_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_stats_stats_proto_goTypes = []interface{}{ + (*DumpStatsEntry)(nil), // 0: dump_stats_entry + (*RestoreStatsEntry)(nil), // 1: restore_stats_entry + (*StatsEntry)(nil), // 2: stats_entry +} +var file_stats_stats_proto_depIdxs = []int32{ + 0, // 0: stats_entry.dump:type_name -> dump_stats_entry + 1, // 1: stats_entry.restore:type_name -> restore_stats_entry + 2, // [2:2] is the sub-list for method output_type + 2, // [2:2] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name +} + +func init() { file_stats_stats_proto_init() } +func file_stats_stats_proto_init() { + if File_stats_stats_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_stats_stats_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DumpStatsEntry); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_stats_stats_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*RestoreStatsEntry); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_stats_stats_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StatsEntry); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_stats_stats_proto_rawDesc, + NumEnums: 0, + NumMessages: 3, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_stats_stats_proto_goTypes, + DependencyIndexes: file_stats_stats_proto_depIdxs, + MessageInfos: file_stats_stats_proto_msgTypes, + }.Build() + File_stats_stats_proto = out.File + file_stats_stats_proto_rawDesc = nil + file_stats_stats_proto_goTypes = nil + file_stats_stats_proto_depIdxs = nil +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/v7/stats/stats.proto b/vendor/github.com/checkpoint-restore/go-criu/v7/stats/stats.proto new file mode 100644 index 0000000000000..64e46181dad1a --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v7/stats/stats.proto @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: MIT + +syntax = "proto2"; + +// This one contains statistics about dump/restore process +message dump_stats_entry { + required uint32 freezing_time = 1; + required uint32 frozen_time = 2; + required uint32 memdump_time = 3; + required uint32 memwrite_time = 4; + + required uint64 pages_scanned = 5; + required uint64 pages_skipped_parent = 6; + required uint64 pages_written = 7; + + optional uint32 irmap_resolve = 8; + + required uint64 pages_lazy = 9; + optional uint64 page_pipes = 10; + optional uint64 page_pipe_bufs = 11; + + optional uint64 shpages_scanned = 12; + optional uint64 shpages_skipped_parent = 13; + optional uint64 shpages_written = 14; +} + +message restore_stats_entry { + required uint64 pages_compared = 1; + required uint64 pages_skipped_cow = 2; + + required uint32 forking_time = 3; + required uint32 restore_time = 4; + + optional uint64 pages_restored = 5; +} + +message stats_entry { + optional dump_stats_entry dump = 1; + optional restore_stats_entry restore = 2; +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/v7/stats/types.go b/vendor/github.com/checkpoint-restore/go-criu/v7/stats/types.go new file mode 100644 index 0000000000000..69d5b0cea4975 --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v7/stats/types.go @@ -0,0 +1,14 @@ +package stats + +const ( + StatsDump = "stats-dump" + StatsRestore = "stats-restore" + + ImgServiceMagic = 0x55105940 /* Zlatoust */ + StatsMagic = 0x57093306 /* Ostashkov */ + + PrimaryMagicOffset = 0x0 + SecondaryMagicOffset = 0x4 + SizeOffset = 0x8 + PayloadOffset = 0xC +) diff --git a/vendor/github.com/checkpoint-restore/go-criu/v7/stats/utils.go b/vendor/github.com/checkpoint-restore/go-criu/v7/stats/utils.go new file mode 100644 index 0000000000000..2b65cc2cdad7a --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v7/stats/utils.go @@ -0,0 +1,52 @@ +package stats + +import ( + "encoding/binary" + "errors" + "os" + "path/filepath" + + "google.golang.org/protobuf/proto" +) + +func readStatisticsFile(imgDir *os.File, fileName string) (*StatsEntry, error) { + buf, err := os.ReadFile(filepath.Join(imgDir.Name(), fileName)) + if err != nil { + return nil, err + } + + if binary.LittleEndian.Uint32(buf[PrimaryMagicOffset:SecondaryMagicOffset]) != ImgServiceMagic { + return nil, errors.New("primary magic not found") + } + + if binary.LittleEndian.Uint32(buf[SecondaryMagicOffset:SizeOffset]) != StatsMagic { + return nil, errors.New("secondary magic not found") + } + + payloadSize := binary.LittleEndian.Uint32(buf[SizeOffset:PayloadOffset]) + + st := &StatsEntry{} + if err := proto.Unmarshal(buf[PayloadOffset:PayloadOffset+payloadSize], st); err != nil { + return nil, err + } + + return st, nil +} + +func CriuGetDumpStats(imgDir *os.File) (*DumpStatsEntry, error) { + st, err := readStatisticsFile(imgDir, StatsDump) + if err != nil { + return nil, err + } + + return st.GetDump(), nil +} + +func CriuGetRestoreStats(imgDir *os.File) (*RestoreStatsEntry, error) { + st, err := readStatisticsFile(imgDir, StatsRestore) + if err != nil { + return nil, err + } + + return st.GetRestore(), nil +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/v7/utils/criu.go b/vendor/github.com/checkpoint-restore/go-criu/v7/utils/criu.go new file mode 100644 index 0000000000000..4daf9af802bd5 --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v7/utils/criu.go @@ -0,0 +1,8 @@ +package utils + +// MinCriuVersion for Podman at least CRIU 3.11 is required +const MinCriuVersionPodman = 31100 + +// PodCriuVersion is the version of CRIU needed for +// checkpointing and restoring containers out of and into Pods. +const PodCriuVersion = 31600 diff --git a/vendor/github.com/checkpoint-restore/go-criu/v7/utils/criu_linux.go b/vendor/github.com/checkpoint-restore/go-criu/v7/utils/criu_linux.go new file mode 100644 index 0000000000000..1c3fe5141532d --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v7/utils/criu_linux.go @@ -0,0 +1,42 @@ +package utils + +import ( + "fmt" + + "github.com/checkpoint-restore/go-criu/v7" + "github.com/checkpoint-restore/go-criu/v7/rpc" + "google.golang.org/protobuf/proto" +) + +// CheckForCRIU checks if CRIU is available and if it is as least the +// version as specified in the "version" parameter. +func CheckForCriu(version int) error { + criuVersion, err := GetCriuVersion() + if err != nil { + return fmt.Errorf("failed to check for criu version: %w", err) + } + + if criuVersion >= version { + return nil + } + return fmt.Errorf("checkpoint/restore requires at least CRIU %d, current version is %d", version, criuVersion) +} + +// Convenience function to easily check if memory tracking is supported. +func IsMemTrack() bool { + features, err := criu.MakeCriu().FeatureCheck( + &rpc.CriuFeatures{ + MemTrack: proto.Bool(true), + }, + ) + if err != nil { + return false + } + + return features.GetMemTrack() +} + +func GetCriuVersion() (int, error) { + c := criu.MakeCriu() + return c.GetCriuVersion() +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/v7/utils/criu_unsupported.go b/vendor/github.com/checkpoint-restore/go-criu/v7/utils/criu_unsupported.go new file mode 100644 index 0000000000000..739da0ada38c8 --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v7/utils/criu_unsupported.go @@ -0,0 +1,18 @@ +//go:build !linux +// +build !linux + +package utils + +import "fmt" + +func CheckForCriu(version int) error { + return fmt.Errorf("CRIU not supported on this platform") +} + +func IsMemTrack() bool { + return false +} + +func GetCriuVersion() (int, error) { + return 0, fmt.Errorf("CRIU not supported in this platform") +} diff --git a/vendor/k8s.io/kubelet/pkg/types/labels.go b/vendor/k8s.io/kubelet/pkg/types/labels.go new file mode 100644 index 0000000000000..79f80b1e62b22 --- /dev/null +++ b/vendor/k8s.io/kubelet/pkg/types/labels.go @@ -0,0 +1,52 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package types + +// Label keys for labels used in this package. +const ( + KubernetesPodNameLabel = "io.kubernetes.pod.name" + KubernetesPodNamespaceLabel = "io.kubernetes.pod.namespace" + KubernetesPodUIDLabel = "io.kubernetes.pod.uid" + KubernetesContainerNameLabel = "io.kubernetes.container.name" +) + +// Label value constants +const ( + // PodInfraContainerName is the KubernetesPodNameLabel value for infra + // containers. + PodInfraContainerName = "POD" +) + +// GetContainerName returns the value of the KubernetesContainerNameLabel. +func GetContainerName(labels map[string]string) string { + return labels[KubernetesContainerNameLabel] +} + +// GetPodName returns the value of the KubernetesPodNameLabel. +func GetPodName(labels map[string]string) string { + return labels[KubernetesPodNameLabel] +} + +// GetPodUID returns the value of the KubernetesPodUIDLabel. +func GetPodUID(labels map[string]string) string { + return labels[KubernetesPodUIDLabel] +} + +// GetPodNamespace returns the value of the KubernetesPodNamespaceLabel. +func GetPodNamespace(labels map[string]string) string { + return labels[KubernetesPodNamespaceLabel] +} diff --git a/vendor/modules.txt b/vendor/modules.txt index f1b23652be78a..42786ae92f02f 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -81,6 +81,8 @@ github.com/checkpoint-restore/checkpointctl/lib ## explicit; go 1.20 github.com/checkpoint-restore/go-criu/v7 github.com/checkpoint-restore/go-criu/v7/rpc +github.com/checkpoint-restore/go-criu/v7/stats +github.com/checkpoint-restore/go-criu/v7/utils # github.com/cilium/ebpf v0.16.0 ## explicit; go 1.21 github.com/cilium/ebpf @@ -857,6 +859,7 @@ k8s.io/klog/v2/internal/sloghandler k8s.io/kubelet/pkg/cri/streaming k8s.io/kubelet/pkg/cri/streaming/portforward k8s.io/kubelet/pkg/cri/streaming/remotecommand +k8s.io/kubelet/pkg/types # k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 ## explicit; go 1.18 k8s.io/utils/clock