Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add interlocks to ensure operations are not interrupted #150

Merged
merged 8 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 94 additions & 94 deletions .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -160,96 +160,96 @@ volumes:
host:
path: /var/run/docker.sock

---
kind: pipeline
name: s390x

platform:
os: linux
arch: amd64

# Hack needed for s390x: https://gist.github.com/colstrom/c2f359f72658aaabb44150ac20b16d7c#gistcomment-3858388
node:
arch: s390x

steps:
- name: build
image: rancher/dapper:v0.6.0
commands:
- dapper ci
volumes:
- name: docker
path: /var/run/docker.sock

- name: github_binary_release
image: rancher/drone-images:github-release-s390x
settings:
api_key:
from_secret: github_token
prerelease: true
checksum:
- sha256
checksum_file: CHECKSUMsum-s390x.txt
checksum_flatten: true
files:
- "dist/artifacts/*"
when:
instance:
- drone-publish.rancher.io
ref:
- refs/head/master
- refs/tags/*
event:
- tag

- name: docker-publish
image: rancher/drone-images:docker-s390x
volumes:
- name: docker
path: /var/run/docker.sock
settings:
dockerfile: package/Dockerfile
password:
from_secret: docker_password
repo: "rancher/system-agent"
tag: "${DRONE_TAG}-s390x"
username:
from_secret: docker_username
when:
instance:
- drone-publish.rancher.io
ref:
- refs/head/master
- refs/tags/*
event:
- tag

- name: docker-publish-suc
image: rancher/drone-images:docker-s390x
volumes:
- name: docker
path: /var/run/docker.sock
settings:
dockerfile: package/Dockerfile.suc
password:
from_secret: docker_password
repo: "rancher/system-agent"
tag: "${DRONE_TAG}-suc-s390x"
username:
from_secret: docker_username
when:
instance:
- drone-publish.rancher.io
ref:
- refs/head/master
- refs/tags/*
event:
- tag

volumes:
- name: docker
host:
path: /var/run/docker.sock
#---
#kind: pipeline
#name: s390x
#
#platform:
# os: linux
# arch: amd64
#
## Hack needed for s390x: https://gist.github.com/colstrom/c2f359f72658aaabb44150ac20b16d7c#gistcomment-3858388
#node:
# arch: s390x
#
#steps:
#- name: build
# image: rancher/dapper:v0.6.0
# commands:
# - dapper ci
# volumes:
# - name: docker
# path: /var/run/docker.sock
#
#- name: github_binary_release
# image: rancher/drone-images:github-release-s390x
# settings:
# api_key:
# from_secret: github_token
# prerelease: true
# checksum:
# - sha256
# checksum_file: CHECKSUMsum-s390x.txt
# checksum_flatten: true
# files:
# - "dist/artifacts/*"
# when:
# instance:
# - drone-publish.rancher.io
# ref:
# - refs/head/master
# - refs/tags/*
# event:
# - tag
#
#- name: docker-publish
# image: rancher/drone-images:docker-s390x
# volumes:
# - name: docker
# path: /var/run/docker.sock
# settings:
# dockerfile: package/Dockerfile
# password:
# from_secret: docker_password
# repo: "rancher/system-agent"
# tag: "${DRONE_TAG}-s390x"
# username:
# from_secret: docker_username
# when:
# instance:
# - drone-publish.rancher.io
# ref:
# - refs/head/master
# - refs/tags/*
# event:
# - tag
#
#- name: docker-publish-suc
# image: rancher/drone-images:docker-s390x
# volumes:
# - name: docker
# path: /var/run/docker.sock
# settings:
# dockerfile: package/Dockerfile.suc
# password:
# from_secret: docker_password
# repo: "rancher/system-agent"
# tag: "${DRONE_TAG}-suc-s390x"
# username:
# from_secret: docker_username
# when:
# instance:
# - drone-publish.rancher.io
# ref:
# - refs/head/master
# - refs/tags/*
# event:
# - tag
#
#volumes:
#- name: docker
# host:
# path: /var/run/docker.sock

---
kind: pipeline
Expand All @@ -270,7 +270,7 @@ steps:
platforms:
- linux/amd64
- linux/arm64
- linux/s390x
# - linux/s390x
target: "rancher/system-agent:${DRONE_TAG}"
template: "rancher/system-agent:${DRONE_TAG}-ARCH"
when:
Expand All @@ -285,7 +285,7 @@ steps:
depends_on:
- amd64
- arm64
- s390x
#- s390x

---
kind: pipeline
Expand All @@ -306,7 +306,7 @@ steps:
platforms:
- linux/amd64
- linux/arm64
- linux/s390x
# - linux/s390x
target: "rancher/system-agent:${DRONE_TAG}-suc"
template: "rancher/system-agent:${DRONE_TAG}-suc-ARCH"
when:
Expand All @@ -321,4 +321,4 @@ steps:
depends_on:
- amd64
- arm64
- s390x
#- s390x
29 changes: 28 additions & 1 deletion install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ fi
FALLBACK=v0.2.9
CACERTS_PATH=cacerts
RETRYCOUNT=4500
APPLYINATOR_ACTIVE_WAIT_COUNT=60 # If the system-agent is unhealthy but had created an interlock file to indicate it was actively applying a plan, after 5 minutes, ignore the interlock.

# info logs the given argument at info log level.
info() {
Expand Down Expand Up @@ -426,11 +427,13 @@ setup_env() {
}

ensure_directories() {
mkdir -p ${CATTLE_AGENT_VAR_DIR}
mkdir -p ${CATTLE_AGENT_VAR_DIR}/interlock
mkdir -p ${CATTLE_AGENT_CONFIG_DIR}
chmod 700 ${CATTLE_AGENT_VAR_DIR}
chmod 700 ${CATTLE_AGENT_VAR_DIR}/interlock
chmod 700 ${CATTLE_AGENT_CONFIG_DIR}
chown root:root ${CATTLE_AGENT_VAR_DIR}
chown root:root ${CATTLE_AGENT_VAR_DIR}/interlock
chown root:root ${CATTLE_AGENT_CONFIG_DIR}
}

Expand Down Expand Up @@ -783,6 +786,7 @@ appliedPlanDirectory: ${CATTLE_AGENT_VAR_DIR}/applied
remoteEnabled: ${CATTLE_REMOTE_ENABLED}
localEnabled: ${CATTLE_LOCAL_ENABLED}
localPlanDirectory: ${CATTLE_AGENT_VAR_DIR}/plans
interlockDirectory: ${CATTLE_AGENT_VAR_DIR}/interlock
preserveWorkDirectory: ${CATTLE_PRESERVE_WORKDIR}
EOF
umask "${UMASK}"
Expand All @@ -796,6 +800,9 @@ generate_cattle_identifier() {
info "Generating Cattle ID"
if [ -f "${CATTLE_AGENT_CONFIG_DIR}/cattle-id" ]; then
CATTLE_ID=$(cat ${CATTLE_AGENT_CONFIG_DIR}/cattle-id);
if [ -z "${CATTLE_ID}" ]; then
fatal "Cattle ID was empty, aborting installation"
fi
info "Cattle ID was already detected as ${CATTLE_ID}. Not generating a new one."
return
fi
Expand All @@ -805,6 +812,9 @@ generate_cattle_identifier() {
umask 0177
echo "${CATTLE_ID}" > ${CATTLE_AGENT_CONFIG_DIR}/cattle-id
umask "${UMASK}"
if [ ! -s ${CATTLE_AGENT_CONFIG_DIR}/cattle-id ]; then
fatal "Cattle ID could not be persisted. Aborting installation"
fi
return
fi
info "Not generating Cattle ID"
Expand Down Expand Up @@ -832,6 +842,19 @@ create_env_file() {
done
}

ensure_applyinator_not_active() {
i=1
while [ "${i}" -ne "${APPLYINATOR_ACTIVE_WAIT_COUNT}" ]; do
if [ -f "${CATTLE_AGENT_VAR_DIR}/interlock/applyinator-active" ]; then
i=$((i + 1))
info "Active plan reconciliation detected. Sleeping for 5 seconds and retrying check"
sleep 5
continue
fi
break
done
}

do_install() {
if [ $(id -u) != 0 ]; then
fatal "This script must be run as root."
Expand All @@ -843,6 +866,9 @@ do_install() {
ensure_directories
verify_downloader curl || fatal "can not find curl for downloading files"

touch ${CATTLE_AGENT_VAR_DIR}/interlock/restart-pending
ensure_applyinator_not_active

if [ -n "${CATTLE_CA_CHECKSUM}" ]; then
validate_ca_required
fi
Expand All @@ -865,6 +891,7 @@ do_install() {
systemctl enable rancher-system-agent
info "Starting/restarting rancher-system-agent.service"
systemctl restart rancher-system-agent
rm -f ${CATTLE_AGENT_VAR_DIR}/interlock/restart-pending
}

do_install "$@"
Expand Down
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func run(c *cli.Context) error {
logrus.Infof("Using directory %s for work", cf.WorkDir)

imageUtil := image.NewUtility(cf.ImagesDir, cf.ImageCredentialProviderConfig, cf.ImageCredentialProviderBinDir, cf.AgentRegistriesFile)
applyinator := applyinator.NewApplyinator(cf.WorkDir, cf.PreserveWorkDir, cf.AppliedPlanDir, imageUtil)
applyinator := applyinator.NewApplyinator(cf.WorkDir, cf.PreserveWorkDir, cf.AppliedPlanDir, cf.InterlockDir, imageUtil)

if cf.RemoteEnabled {
logrus.Infof("Starting remote watch of plans")
Expand Down
Loading