Move health checks into Docker
Replaces our current host-based HTTP healthchecks with Docker healthchecks, and adds a new `healthcheck.cmd` config option that can be used to define a custom health check command. Also removes Traefik's healthchecks, since they are no longer necessary. When deploying a container that has a healthcheck defined, we wait for it to report a healthy status before stopping the old container that it replaces. Containers that don't have a healthcheck defined continue to wait for `MRSK.config.readiness_delay`. There are some pros and cons to using Docker healthchecks rather than checking from the host. The main advantages are: - Supports non-HTTP checks, and app-specific check scripts provided by a container. - When booting a container, allows MRSK to wait for a container to be healthy before shutting down the old container it replaces. This should be safer than relying on a timeout. - Containers with healthchecks won't be active in Traefik until they reach a healthy state, which prevents any traffic from being routed to them before they are ready. The main _disadvantage_ is that containers are now required to provide some way to check their health. Our default check assumes that `curl` is available in the container which, while common, won't always be the case.
This commit is contained in:
@@ -6,8 +6,6 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
|
||||
using_version(version_or_latest) do |version|
|
||||
say "Start container with version #{version} using a #{MRSK.config.readiness_delay}s readiness delay (or reboot if already running)...", :magenta
|
||||
|
||||
cli = self
|
||||
|
||||
on(MRSK.hosts) do
|
||||
execute *MRSK.auditor.record("Tagging #{MRSK.config.absolute_image} as the latest image"), verbosity: :debug
|
||||
execute *MRSK.app.tag_current_as_latest
|
||||
@@ -17,19 +15,24 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
|
||||
roles = MRSK.roles_on(host)
|
||||
|
||||
roles.each do |role|
|
||||
execute *MRSK.auditor(role: role).record("Booted app version #{version}"), verbosity: :debug
|
||||
app = MRSK.app(role: role)
|
||||
auditor = MRSK.auditor(role: role)
|
||||
|
||||
if capture_with_info(*MRSK.app(role: role).container_id_for_version(version), raise_on_non_zero_exit: false).present?
|
||||
execute *auditor.record("Booted app version #{version}"), verbosity: :debug
|
||||
|
||||
if capture_with_info(*app.container_id_for_version(version), raise_on_non_zero_exit: false).present?
|
||||
tmp_version = "#{version}_#{SecureRandom.hex(8)}"
|
||||
info "Renaming container #{version} to #{tmp_version} as already deployed on #{host}"
|
||||
execute *MRSK.auditor(role: role).record("Renaming container #{version} to #{tmp_version}"), verbosity: :debug
|
||||
execute *MRSK.app(role: role).rename_container(version: version, new_version: tmp_version)
|
||||
execute *auditor.record("Renaming container #{version} to #{tmp_version}"), verbosity: :debug
|
||||
execute *app.rename_container(version: version, new_version: tmp_version)
|
||||
end
|
||||
|
||||
old_version = capture_with_info(*MRSK.app(role: role).current_running_version, raise_on_non_zero_exit: false).strip
|
||||
execute *MRSK.app(role: role).run
|
||||
sleep MRSK.config.readiness_delay
|
||||
execute *MRSK.app(role: role).stop(version: old_version), raise_on_non_zero_exit: false if old_version.present?
|
||||
old_version = capture_with_info(*app.current_running_version, raise_on_non_zero_exit: false).strip
|
||||
execute *app.run
|
||||
|
||||
Mrsk::Utils::HealthcheckPoller.wait_for_healthy(pause_after_ready: true) { capture_with_info(*app.status(version: version)) }
|
||||
|
||||
execute *app.stop(version: old_version), raise_on_non_zero_exit: false if old_version.present?
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
class Mrsk::Cli::Healthcheck < Mrsk::Cli::Base
|
||||
|
||||
class HealthcheckError < StandardError; end
|
||||
|
||||
default_command :perform
|
||||
|
||||
desc "perform", "Health check current app version"
|
||||
@@ -9,38 +6,10 @@ class Mrsk::Cli::Healthcheck < Mrsk::Cli::Base
|
||||
on(MRSK.primary_host) do
|
||||
begin
|
||||
execute *MRSK.healthcheck.run
|
||||
|
||||
target = "Health check against #{MRSK.config.healthcheck["path"]}"
|
||||
attempt = 1
|
||||
max_attempts = MRSK.config.healthcheck["max_attempts"]
|
||||
|
||||
begin
|
||||
status = capture_with_info(*MRSK.healthcheck.curl)
|
||||
|
||||
if status == "200"
|
||||
info "#{target} succeeded with 200 OK!"
|
||||
else
|
||||
raise HealthcheckError, "#{target} failed with status #{status}"
|
||||
end
|
||||
rescue SSHKit::Command::Failed
|
||||
if attempt <= max_attempts
|
||||
info "#{target} failed to respond, retrying in #{attempt}s (attempt #{attempt}/#{max_attempts})..."
|
||||
sleep attempt
|
||||
attempt += 1
|
||||
|
||||
retry
|
||||
else
|
||||
raise
|
||||
end
|
||||
end
|
||||
rescue SSHKit::Command::Failed, HealthcheckError => e
|
||||
Mrsk::Utils::HealthcheckPoller.wait_for_healthy { capture_with_info(*MRSK.healthcheck.status) }
|
||||
rescue Mrsk::Utils::HealthcheckPoller::HealthcheckError => e
|
||||
error capture_with_info(*MRSK.healthcheck.logs)
|
||||
|
||||
if e.message =~ /curl/
|
||||
raise SSHKit::Command::Failed, "#{target} failed to return 200 OK!"
|
||||
else
|
||||
raise
|
||||
end
|
||||
raise
|
||||
ensure
|
||||
execute *MRSK.healthcheck.stop, raise_on_non_zero_exit: false
|
||||
execute *MRSK.healthcheck.remove, raise_on_non_zero_exit: false
|
||||
|
||||
Reference in New Issue
Block a user