To speed up deployments, we'll remove the healthcheck step. This adds some risk to deployments for non-web roles - if they don't have a Docker healthcheck configured then the only check we do is if the container is running. If there is a bad image we might see the container running before it exits and deploy it. Previously the healthcheck step would have avoided this by ensuring a web container could boot and serve traffic first. To mitigate this, we'll add a deployment barrier. Until one of the primary role containers passes its healthcheck, we'll keep the barrier up and avoid stopping the containers on the non-primary roles. It the primary role container fails its healthcheck, we'll close the barrier and shut down the new containers on the waiting roles. We also have a new integration test to check we correctly handle a a broken image. This highlighted that SSHKit's default runner will stop at the first error it encounters. We'll now have a custom runner that waits for all threads to finish allowing them to clean up.
64 lines
1.6 KiB
Ruby
64 lines
1.6 KiB
Ruby
module Kamal::Cli::Healthcheck::Poller
|
|
extend self
|
|
|
|
TRAEFIK_UPDATE_DELAY = 5
|
|
|
|
|
|
def wait_for_healthy(pause_after_ready: false, &block)
|
|
attempt = 1
|
|
max_attempts = KAMAL.config.healthcheck["max_attempts"]
|
|
|
|
begin
|
|
case status = block.call
|
|
when "healthy"
|
|
sleep TRAEFIK_UPDATE_DELAY if pause_after_ready
|
|
when "running" # No health check configured
|
|
sleep KAMAL.config.readiness_delay if pause_after_ready
|
|
else
|
|
raise Kamal::Cli::Healthcheck::Error, "container not ready (#{status})"
|
|
end
|
|
rescue Kamal::Cli::Healthcheck::Error => e
|
|
if attempt <= max_attempts
|
|
info "#{e.message}, retrying in #{attempt}s (attempt #{attempt}/#{max_attempts})..."
|
|
sleep attempt
|
|
attempt += 1
|
|
retry
|
|
else
|
|
raise
|
|
end
|
|
end
|
|
|
|
info "Container is healthy!"
|
|
end
|
|
|
|
def wait_for_unhealthy(pause_after_ready: false, &block)
|
|
attempt = 1
|
|
max_attempts = KAMAL.config.healthcheck["max_attempts"]
|
|
|
|
begin
|
|
case status = block.call
|
|
when "unhealthy"
|
|
sleep TRAEFIK_UPDATE_DELAY if pause_after_ready
|
|
else
|
|
raise Kamal::Cli::Healthcheck::Error, "container not unhealthy (#{status})"
|
|
end
|
|
rescue Kamal::Cli::Healthcheck::Error => e
|
|
if attempt <= max_attempts
|
|
info "#{e.message}, retrying in #{attempt}s (attempt #{attempt}/#{max_attempts})..."
|
|
sleep attempt
|
|
attempt += 1
|
|
retry
|
|
else
|
|
raise
|
|
end
|
|
end
|
|
|
|
info "Container is unhealthy!"
|
|
end
|
|
|
|
private
|
|
def info(message)
|
|
SSHKit.config.output.info(message)
|
|
end
|
|
end
|