Move health checks into Docker
Replaces our current host-based HTTP healthchecks with Docker healthchecks, and adds a new `healthcheck.cmd` config option that can be used to define a custom health check command. Also removes Traefik's healthchecks, since they are no longer necessary. When deploying a container that has a healthcheck defined, we wait for it to report a healthy status before stopping the old container that it replaces. Containers that don't have a healthcheck defined continue to wait for `MRSK.config.readiness_delay`. There are some pros and cons to using Docker healthchecks rather than checking from the host. The main advantages are: - Supports non-HTTP checks, and app-specific check scripts provided by a container. - When booting a container, allows MRSK to wait for a container to be healthy before shutting down the old container it replaces. This should be safer than relying on a timeout. - Containers with healthchecks won't be active in Traefik until they reach a healthy state, which prevents any traffic from being routed to them before they are ready. The main _disadvantage_ is that containers are now required to provide some way to check their health. Our default check assumes that `curl` is available in the container which, while common, won't always be the case.
This commit is contained in:
@@ -5,62 +5,58 @@ class CliHealthcheckTest < CliTestCase
|
||||
# Prevent expected failures from outputting to terminal
|
||||
Thread.report_on_exception = false
|
||||
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:sleep) # No sleeping when retrying
|
||||
Mrsk::Utils::HealthcheckPoller.stubs(:sleep) # No sleeping when retrying
|
||||
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
|
||||
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
|
||||
.with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "MRSK_CONTAINER_NAME=\"healthcheck-app\"", "dhh/app:999")
|
||||
.with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "MRSK_CONTAINER_NAME=\"healthcheck-app\"", "--health-cmd", "\"curl -f http://localhost:3000/up || exit 1\"", "--health-interval", "\"1s\"", "dhh/app:999")
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
|
||||
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :container, :rm, raise_on_non_zero_exit: false)
|
||||
|
||||
# Fail twice to test retry logic
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
|
||||
.with(:curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", "--max-time", "2", "http://localhost:3999/up")
|
||||
.raises(SSHKit::Command::Failed)
|
||||
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
|
||||
.returns("starting")
|
||||
.then
|
||||
.raises(SSHKit::Command::Failed)
|
||||
.returns("unhealthy")
|
||||
.then
|
||||
.returns("200")
|
||||
.returns("healthy")
|
||||
|
||||
run_command("perform").tap do |output|
|
||||
assert_match "Health check against /up failed to respond, retrying in 1s (attempt 1/7)...", output
|
||||
assert_match "Health check against /up failed to respond, retrying in 2s (attempt 2/7)...", output
|
||||
assert_match "Health check against /up succeeded with 200 OK!", output
|
||||
assert_match "container not ready (starting), retrying in 1s (attempt 1/7)...", output
|
||||
assert_match "container not ready (unhealthy), retrying in 2s (attempt 2/7)...", output
|
||||
assert_match "Container is healthy!", output
|
||||
end
|
||||
end
|
||||
|
||||
test "perform failing because of curl" do
|
||||
test "perform failing to become healthy" do
|
||||
# Prevent expected failures from outputting to terminal
|
||||
Thread.report_on_exception = false
|
||||
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:execute) # No need to execute anything here
|
||||
Mrsk::Utils::HealthcheckPoller.stubs(:sleep) # No sleeping when retrying
|
||||
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
|
||||
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
|
||||
.with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "MRSK_CONTAINER_NAME=\"healthcheck-app\"", "--health-cmd", "\"curl -f http://localhost:3000/up || exit 1\"", "--health-interval", "\"1s\"", "dhh/app:999")
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
|
||||
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :container, :rm, raise_on_non_zero_exit: false)
|
||||
|
||||
# Continually report unhealthy
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
|
||||
.with(:curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", "--max-time", "2", "http://localhost:3999/up")
|
||||
.returns("curl: command not found")
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
|
||||
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1")
|
||||
|
||||
exception = assert_raises SSHKit::Runner::ExecuteError do
|
||||
run_command("perform")
|
||||
end
|
||||
assert_match "Health check against /up failed to return 200 OK!", exception.message
|
||||
end
|
||||
|
||||
test "perform failing for unknown reason" do
|
||||
# Prevent expected failures from outputting to terminal
|
||||
Thread.report_on_exception = false
|
||||
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:execute) # No need to execute anything here
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
|
||||
.with(:curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", "--max-time", "2", "http://localhost:3999/up")
|
||||
.returns("500")
|
||||
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
|
||||
.returns("unhealthy")
|
||||
|
||||
# Capture logs when failing
|
||||
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
|
||||
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1")
|
||||
.returns("some log output")
|
||||
|
||||
exception = assert_raises do
|
||||
run_command("perform")
|
||||
end
|
||||
assert_match "Health check against /up failed with status 500", exception.message
|
||||
assert_match "container not ready (unhealthy)", exception.message
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
Reference in New Issue
Block a user