Files
kamal/test/cli/healthcheck_test.rb
Donal McBreen 8a41d15b69 Zero downtime deployment with cord file
When replacing a container currently we:
1. Boot the new container
2. Wait for it to become healthy
3. Stop the old container

Traefik will send requests to the old container until it notices that it
is unhealthy. But it may have stopped serving requests before that point
which can result in errors.

To get round that the new boot process is:

1. Create a directory with a single file on the host
2. Boot the new container, mounting the cord file into /tmp and
including a check for the file in the docker healthcheck
3. Wait for it to become healthy
4. Delete the healthcheck file ("cut the cord") for the old container
5. Wait for it to become unhealthy and give Traefik a couple of seconds
to notice
6. Stop the old container

The extra steps ensure that Traefik stops sending requests before the
old container is shutdown.
2023-09-06 14:35:30 +01:00

73 lines
4.3 KiB
Ruby

require_relative "cli_test_case"
class CliHealthcheckTest < CliTestCase
test "perform" do
# Prevent expected failures from outputting to terminal
Thread.report_on_exception = false
Kamal::Utils::HealthcheckPoller.stubs(:sleep) # No sleeping when retrying
Kamal::Configuration.any_instance.stubs(:run_id).returns("12345678901234567890123456789012")
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "KAMAL_CONTAINER_NAME=\"healthcheck-app\"", "--env-file", ".kamal/env/roles/app-web.env", "--health-cmd", "\"curl -f http://localhost:3000/up || exit 1\"", "--health-interval", "\"1s\"", "dhh/app:999")
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :container, :rm, raise_on_non_zero_exit: false)
# Fail twice to test retry logic
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
.returns("starting")
.then
.returns("unhealthy")
.then
.returns("healthy")
run_command("perform").tap do |output|
assert_match "container not ready (starting), retrying in 1s (attempt 1/7)...", output
assert_match "container not ready (unhealthy), retrying in 2s (attempt 2/7)...", output
assert_match "Container is healthy!", output
end
end
test "perform failing to become healthy" do
# Prevent expected failures from outputting to terminal
Thread.report_on_exception = false
Kamal::Utils::HealthcheckPoller.stubs(:sleep) # No sleeping when retrying
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "KAMAL_CONTAINER_NAME=\"healthcheck-app\"", "--env-file", ".kamal/env/roles/app-web.env", "--health-cmd", "\"curl -f http://localhost:3000/up || exit 1\"", "--health-interval", "\"1s\"", "dhh/app:999")
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :container, :rm, raise_on_non_zero_exit: false)
# Continually report unhealthy
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
.returns("unhealthy")
# Capture logs when failing
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1")
.returns("some log output")
# Capture container health log when failing
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_pretty_json)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{json .State.Health}}'")
.returns('{"Status":"unhealthy","Log":[{"ExitCode": 1,"Output": "/bin/sh: 1: curl: not found\n"}]}"')
exception = assert_raises do
run_command("perform")
end
assert_match "container not ready (unhealthy)", exception.message
end
private
def run_command(*command)
stdouted { Kamal::Cli::Healthcheck.start([*command, "-c", "test/fixtures/deploy_with_accessories.yml"]) }
end
end