Merge pull request #219 from basecamp/docker-health-checks

This commit is contained in:
David Heinemeier Hansson
2023-04-28 11:43:33 +02:00
committed by GitHub
13 changed files with 186 additions and 115 deletions

View File

@@ -2,8 +2,11 @@ require_relative "cli_test_case"
class CliAppTest < CliTestCase
test "boot" do
# Stub current version fetch
SSHKit::Backend::Abstract.any_instance.stubs(:capture).returns("123") # old version
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info).returns("123") # old version
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
.returns("running") # health check
run_command("boot").tap do |output|
assert_match "docker tag dhh/app:latest dhh/app:latest", output
@@ -19,6 +22,10 @@ class CliAppTest < CliTestCase
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", raise_on_non_zero_exit: false)
.returns("12345678") # running version
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
.returns("running") # health check
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :ps, "--filter", "label=service=app", "--filter", "label=role=web", "--filter", "status=running", "--latest", "--format", "\"{{.Names}}\"", "|", "grep -oE \"\\-[^-]+$\"", "|", "cut -c 2-", raise_on_non_zero_exit: false)
.returns("123") # old version

View File

@@ -5,62 +5,58 @@ class CliHealthcheckTest < CliTestCase
# Prevent expected failures from outputting to terminal
Thread.report_on_exception = false
SSHKit::Backend::Abstract.any_instance.stubs(:sleep) # No sleeping when retrying
Mrsk::Utils::HealthcheckPoller.stubs(:sleep) # No sleeping when retrying
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "MRSK_CONTAINER_NAME=\"healthcheck-app\"", "dhh/app:999")
.with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "MRSK_CONTAINER_NAME=\"healthcheck-app\"", "--health-cmd", "\"curl -f http://localhost:3000/up || exit 1\"", "--health-interval", "\"1s\"", "dhh/app:999")
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :container, :rm, raise_on_non_zero_exit: false)
# Fail twice to test retry logic
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", "--max-time", "2", "http://localhost:3999/up")
.raises(SSHKit::Command::Failed)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
.returns("starting")
.then
.raises(SSHKit::Command::Failed)
.returns("unhealthy")
.then
.returns("200")
.returns("healthy")
run_command("perform").tap do |output|
assert_match "Health check against /up failed to respond, retrying in 1s (attempt 1/7)...", output
assert_match "Health check against /up failed to respond, retrying in 2s (attempt 2/7)...", output
assert_match "Health check against /up succeeded with 200 OK!", output
assert_match "container not ready (starting), retrying in 1s (attempt 1/7)...", output
assert_match "container not ready (unhealthy), retrying in 2s (attempt 2/7)...", output
assert_match "Container is healthy!", output
end
end
test "perform failing because of curl" do
test "perform failing to become healthy" do
# Prevent expected failures from outputting to terminal
Thread.report_on_exception = false
SSHKit::Backend::Abstract.any_instance.stubs(:execute) # No need to execute anything here
Mrsk::Utils::HealthcheckPoller.stubs(:sleep) # No sleeping when retrying
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "MRSK_CONTAINER_NAME=\"healthcheck-app\"", "--health-cmd", "\"curl -f http://localhost:3000/up || exit 1\"", "--health-interval", "\"1s\"", "dhh/app:999")
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :container, :rm, raise_on_non_zero_exit: false)
# Continually report unhealthy
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", "--max-time", "2", "http://localhost:3999/up")
.returns("curl: command not found")
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1")
exception = assert_raises SSHKit::Runner::ExecuteError do
run_command("perform")
end
assert_match "Health check against /up failed to return 200 OK!", exception.message
end
test "perform failing for unknown reason" do
# Prevent expected failures from outputting to terminal
Thread.report_on_exception = false
SSHKit::Backend::Abstract.any_instance.stubs(:execute) # No need to execute anything here
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", "--max-time", "2", "http://localhost:3999/up")
.returns("500")
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
.returns("unhealthy")
# Capture logs when failing
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1")
.returns("some log output")
exception = assert_raises do
run_command("perform")
end
assert_match "Health check against /up failed with status 500", exception.message
assert_match "container not ready (unhealthy)", exception.message
end
private