Skip to content

Commit

Permalink
Add interlocks to ensure operations are not interrupted (#150)
Browse files Browse the repository at this point in the history
* Add interlocks to ensure system-agent does not get restarted when it is applying a plan and does not start applying a plan when a restart is pending
* Remove s390x from drone file
* Don't always set CROSS to true when building

Signed-off-by: Chris Kim <oats87g@gmail.com>
  • Loading branch information
Oats87 authored Dec 12, 2023
1 parent 57830e0 commit 806ef42
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 98 deletions.
188 changes: 94 additions & 94 deletions .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -160,96 +160,96 @@ volumes:
host:
path: /var/run/docker.sock

---
kind: pipeline
name: s390x

platform:
os: linux
arch: amd64

# Hack needed for s390x: https://gist.github.com/colstrom/c2f359f72658aaabb44150ac20b16d7c#gistcomment-3858388
node:
arch: s390x

steps:
- name: build
image: rancher/dapper:v0.6.0
commands:
- dapper ci
volumes:
- name: docker
path: /var/run/docker.sock

- name: github_binary_release
image: rancher/drone-images:github-release-s390x
settings:
api_key:
from_secret: github_token
prerelease: true
checksum:
- sha256
checksum_file: CHECKSUMsum-s390x.txt
checksum_flatten: true
files:
- "dist/artifacts/*"
when:
instance:
- drone-publish.rancher.io
ref:
- refs/head/master
- refs/tags/*
event:
- tag

- name: docker-publish
image: rancher/drone-images:docker-s390x
volumes:
- name: docker
path: /var/run/docker.sock
settings:
dockerfile: package/Dockerfile
password:
from_secret: docker_password
repo: "rancher/system-agent"
tag: "${DRONE_TAG}-s390x"
username:
from_secret: docker_username
when:
instance:
- drone-publish.rancher.io
ref:
- refs/head/master
- refs/tags/*
event:
- tag

- name: docker-publish-suc
image: rancher/drone-images:docker-s390x
volumes:
- name: docker
path: /var/run/docker.sock
settings:
dockerfile: package/Dockerfile.suc
password:
from_secret: docker_password
repo: "rancher/system-agent"
tag: "${DRONE_TAG}-suc-s390x"
username:
from_secret: docker_username
when:
instance:
- drone-publish.rancher.io
ref:
- refs/head/master
- refs/tags/*
event:
- tag

volumes:
- name: docker
host:
path: /var/run/docker.sock
#---
#kind: pipeline
#name: s390x
#
#platform:
# os: linux
# arch: amd64
#
## Hack needed for s390x: https://gist.github.com/colstrom/c2f359f72658aaabb44150ac20b16d7c#gistcomment-3858388
#node:
# arch: s390x
#
#steps:
#- name: build
# image: rancher/dapper:v0.6.0
# commands:
# - dapper ci
# volumes:
# - name: docker
# path: /var/run/docker.sock
#
#- name: github_binary_release
# image: rancher/drone-images:github-release-s390x
# settings:
# api_key:
# from_secret: github_token
# prerelease: true
# checksum:
# - sha256
# checksum_file: CHECKSUMsum-s390x.txt
# checksum_flatten: true
# files:
# - "dist/artifacts/*"
# when:
# instance:
# - drone-publish.rancher.io
# ref:
# - refs/head/master
# - refs/tags/*
# event:
# - tag
#
#- name: docker-publish
# image: rancher/drone-images:docker-s390x
# volumes:
# - name: docker
# path: /var/run/docker.sock
# settings:
# dockerfile: package/Dockerfile
# password:
# from_secret: docker_password
# repo: "rancher/system-agent"
# tag: "${DRONE_TAG}-s390x"
# username:
# from_secret: docker_username
# when:
# instance:
# - drone-publish.rancher.io
# ref:
# - refs/head/master
# - refs/tags/*
# event:
# - tag
#
#- name: docker-publish-suc
# image: rancher/drone-images:docker-s390x
# volumes:
# - name: docker
# path: /var/run/docker.sock
# settings:
# dockerfile: package/Dockerfile.suc
# password:
# from_secret: docker_password
# repo: "rancher/system-agent"
# tag: "${DRONE_TAG}-suc-s390x"
# username:
# from_secret: docker_username
# when:
# instance:
# - drone-publish.rancher.io
# ref:
# - refs/head/master
# - refs/tags/*
# event:
# - tag
#
#volumes:
#- name: docker
# host:
# path: /var/run/docker.sock

---
kind: pipeline
Expand All @@ -270,7 +270,7 @@ steps:
platforms:
- linux/amd64
- linux/arm64
- linux/s390x
# - linux/s390x
target: "rancher/system-agent:${DRONE_TAG}"
template: "rancher/system-agent:${DRONE_TAG}-ARCH"
when:
Expand All @@ -285,7 +285,7 @@ steps:
depends_on:
- amd64
- arm64
- s390x
#- s390x

---
kind: pipeline
Expand All @@ -306,7 +306,7 @@ steps:
platforms:
- linux/amd64
- linux/arm64
- linux/s390x
# - linux/s390x
target: "rancher/system-agent:${DRONE_TAG}-suc"
template: "rancher/system-agent:${DRONE_TAG}-suc-ARCH"
when:
Expand All @@ -321,4 +321,4 @@ steps:
depends_on:
- amd64
- arm64
- s390x
#- s390x
29 changes: 28 additions & 1 deletion install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ fi
FALLBACK=v0.2.9
CACERTS_PATH=cacerts
RETRYCOUNT=4500
APPLYINATOR_ACTIVE_WAIT_COUNT=60 # If the system-agent is unhealthy but had created an interlock file to indicate it was actively applying a plan, after 5 minutes, ignore the interlock.

# info logs the given argument at info log level.
info() {
Expand Down Expand Up @@ -426,11 +427,13 @@ setup_env() {
}

ensure_directories() {
mkdir -p ${CATTLE_AGENT_VAR_DIR}
mkdir -p ${CATTLE_AGENT_VAR_DIR}/interlock
mkdir -p ${CATTLE_AGENT_CONFIG_DIR}
chmod 700 ${CATTLE_AGENT_VAR_DIR}
chmod 700 ${CATTLE_AGENT_VAR_DIR}/interlock
chmod 700 ${CATTLE_AGENT_CONFIG_DIR}
chown root:root ${CATTLE_AGENT_VAR_DIR}
chown root:root ${CATTLE_AGENT_VAR_DIR}/interlock
chown root:root ${CATTLE_AGENT_CONFIG_DIR}
}

Expand Down Expand Up @@ -783,6 +786,7 @@ appliedPlanDirectory: ${CATTLE_AGENT_VAR_DIR}/applied
remoteEnabled: ${CATTLE_REMOTE_ENABLED}
localEnabled: ${CATTLE_LOCAL_ENABLED}
localPlanDirectory: ${CATTLE_AGENT_VAR_DIR}/plans
interlockDirectory: ${CATTLE_AGENT_VAR_DIR}/interlock
preserveWorkDirectory: ${CATTLE_PRESERVE_WORKDIR}
EOF
umask "${UMASK}"
Expand All @@ -796,6 +800,9 @@ generate_cattle_identifier() {
info "Generating Cattle ID"
if [ -f "${CATTLE_AGENT_CONFIG_DIR}/cattle-id" ]; then
CATTLE_ID=$(cat ${CATTLE_AGENT_CONFIG_DIR}/cattle-id);
if [ -z "${CATTLE_ID}" ]; then
fatal "Cattle ID was empty, aborting installation"
fi
info "Cattle ID was already detected as ${CATTLE_ID}. Not generating a new one."
return
fi
Expand All @@ -805,6 +812,9 @@ generate_cattle_identifier() {
umask 0177
echo "${CATTLE_ID}" > ${CATTLE_AGENT_CONFIG_DIR}/cattle-id
umask "${UMASK}"
if [ ! -s ${CATTLE_AGENT_CONFIG_DIR}/cattle-id ]; then
fatal "Cattle ID could not be persisted. Aborting installation"
fi
return
fi
info "Not generating Cattle ID"
Expand Down Expand Up @@ -832,6 +842,19 @@ create_env_file() {
done
}

ensure_applyinator_not_active() {
i=1
while [ "${i}" -ne "${APPLYINATOR_ACTIVE_WAIT_COUNT}" ]; do
if [ -f "${CATTLE_AGENT_VAR_DIR}/interlock/applyinator-active" ]; then
i=$((i + 1))
info "Active plan reconciliation detected. Sleeping for 5 seconds and retrying check"
sleep 5
continue
fi
break
done
}

do_install() {
if [ $(id -u) != 0 ]; then
fatal "This script must be run as root."
Expand All @@ -843,6 +866,9 @@ do_install() {
ensure_directories
verify_downloader curl || fatal "can not find curl for downloading files"

touch ${CATTLE_AGENT_VAR_DIR}/interlock/restart-pending
ensure_applyinator_not_active

if [ -n "${CATTLE_CA_CHECKSUM}" ]; then
validate_ca_required
fi
Expand All @@ -865,6 +891,7 @@ do_install() {
systemctl enable rancher-system-agent
info "Starting/restarting rancher-system-agent.service"
systemctl restart rancher-system-agent
rm -f ${CATTLE_AGENT_VAR_DIR}/interlock/restart-pending
}

do_install "$@"
Expand Down
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func run(c *cli.Context) error {
logrus.Infof("Using directory %s for work", cf.WorkDir)

imageUtil := image.NewUtility(cf.ImagesDir, cf.ImageCredentialProviderConfig, cf.ImageCredentialProviderBinDir, cf.AgentRegistriesFile)
applyinator := applyinator.NewApplyinator(cf.WorkDir, cf.PreserveWorkDir, cf.AppliedPlanDir, imageUtil)
applyinator := applyinator.NewApplyinator(cf.WorkDir, cf.PreserveWorkDir, cf.AppliedPlanDir, cf.InterlockDir, imageUtil)

if cf.RemoteEnabled {
logrus.Infof("Starting remote watch of plans")
Expand Down
Loading

0 comments on commit 806ef42

Please sign in to comment.