Merge pull request #740 from basecamp/remove-healthcheck-step

Remove the healthcheck step
This commit is contained in:
Donal McBreen
2024-05-21 12:00:25 +01:00
committed by GitHub
26 changed files with 300 additions and 345 deletions

View File

@@ -14,12 +14,16 @@ class Kamal::Cli::App < Kamal::Cli::Base
end
end
#  Primary hosts and roles are returned first, so they can open the barrier
barrier = Kamal::Cli::Healthcheck::Barrier.new if KAMAL.roles.many?
on(KAMAL.hosts, **KAMAL.boot_strategy) do |host|
KAMAL.roles_on(host).each do |role|
Kamal::Cli::App::Boot.new(host, role, version, self).run
Kamal::Cli::App::Boot.new(host, role, self, version, barrier).run
end
end
#  Tag once the app booted on all hosts
on(KAMAL.hosts) do |host|
execute *KAMAL.auditor.record("Tagging #{KAMAL.config.absolute_image} as the latest image"), verbosity: :debug
execute *KAMAL.app.tag_latest_image

View File

@@ -1,19 +1,30 @@
class Kamal::Cli::App::Boot
attr_reader :host, :role, :version, :sshkit
delegate :execute, :capture_with_info, :info, to: :sshkit
delegate :uses_cord?, :assets?, to: :role
attr_reader :host, :role, :version, :barrier, :sshkit
delegate :execute, :capture_with_info, :capture_with_pretty_json, :info, :error, to: :sshkit
delegate :uses_cord?, :assets?, :running_traefik?, to: :role
def initialize(host, role, version, sshkit)
def initialize(host, role, sshkit, version, barrier)
@host = host
@role = role
@version = version
@barrier = barrier
@sshkit = sshkit
end
def run
old_version = old_version_renamed_if_clashing
start_new_version
wait_at_barrier if queuer?
begin
start_new_version
rescue => e
close_barrier if gatekeeper?
stop_new_version
raise
end
release_barrier if gatekeeper?
if old_version
stop_old_version(old_version)
@@ -21,18 +32,6 @@ class Kamal::Cli::App::Boot
end
private
def app
@app ||= KAMAL.app(role: role, host: host)
end
def auditor
@auditor = KAMAL.auditor(role: role)
end
def audit(message)
execute *auditor.record(message), verbosity: :debug
end
def old_version_renamed_if_clashing
if capture_with_info(*app.container_id_for_version(version), raise_on_non_zero_exit: false).present?
renamed_version = "#{version}_replaced_#{SecureRandom.hex(8)}"
@@ -46,12 +45,17 @@ class Kamal::Cli::App::Boot
def start_new_version
audit "Booted app version #{version}"
execute *app.tie_cord(role.cord_host_file) if uses_cord?
hostname = "#{host.to_s[0...51].gsub(/\.+$/, '')}-#{SecureRandom.hex(6)}"
execute *app.run(hostname: hostname)
Kamal::Cli::Healthcheck::Poller.wait_for_healthy(pause_after_ready: true) { capture_with_info(*app.status(version: version)) }
end
def stop_new_version
execute *app.stop(version: version), raise_on_non_zero_exit: false
end
def stop_old_version(version)
if uses_cord?
cord = capture_with_info(*app.cord(version: version), raise_on_non_zero_exit: false).strip
@@ -65,4 +69,51 @@ class Kamal::Cli::App::Boot
execute *app.clean_up_assets if assets?
end
def release_barrier
if barrier.open
info "First #{KAMAL.primary_role} container is healthy on #{host}, booting other roles"
end
end
def wait_at_barrier
info "Waiting for the first healthy #{KAMAL.primary_role} container before booting #{role} on #{host}..."
barrier.wait
info "First #{KAMAL.primary_role} container is healthy, booting #{role} on #{host}..."
rescue Kamal::Cli::Healthcheck::Error
info "First #{KAMAL.primary_role} container is unhealthy, not booting #{role} on #{host}"
raise
end
def close_barrier
if barrier.close
info "First #{KAMAL.primary_role} container is unhealthy on #{host}, not booting other roles"
error capture_with_info(*app.logs(version: version))
error capture_with_info(*app.container_health_log(version: version))
end
end
def barrier_role?
role == KAMAL.primary_role
end
def app
@app ||= KAMAL.app(role: role, host: host)
end
def auditor
@auditor = KAMAL.auditor(role: role)
end
def audit(message)
execute *auditor.record(message), verbosity: :debug
end
def gatekeeper?
barrier && barrier_role?
end
def queuer?
barrier && !barrier_role?
end
end

View File

@@ -1,21 +0,0 @@
class Kamal::Cli::Healthcheck < Kamal::Cli::Base
default_command :perform
desc "perform", "Health check current app version"
def perform
raise "The primary host is not configured to run Traefik" unless KAMAL.config.role(KAMAL.config.primary_role).running_traefik?
on(KAMAL.primary_host) do
begin
execute *KAMAL.healthcheck.run
Poller.wait_for_healthy { capture_with_info(*KAMAL.healthcheck.status) }
rescue Poller::HealthcheckError => e
error capture_with_info(*KAMAL.healthcheck.logs)
error capture_with_pretty_json(*KAMAL.healthcheck.container_health_log)
raise
ensure
execute *KAMAL.healthcheck.stop, raise_on_non_zero_exit: false
execute *KAMAL.healthcheck.remove, raise_on_non_zero_exit: false
end
end
end
end

View File

@@ -0,0 +1,31 @@
class Kamal::Cli::Healthcheck::Barrier
def initialize
@ivar = Concurrent::IVar.new
end
def close
set(false)
end
def open
set(true)
end
def wait
unless opened?
raise Kamal::Cli::Healthcheck::Error.new("Halted at barrier")
end
end
private
def opened?
@ivar.value
end
def set(value)
@ivar.set(value)
true
rescue Concurrent::MultipleAssignmentError
false
end
end

View File

@@ -0,0 +1,2 @@
class Kamal::Cli::Healthcheck::Error < StandardError
end

View File

@@ -3,7 +3,6 @@ module Kamal::Cli::Healthcheck::Poller
TRAEFIK_UPDATE_DELAY = 5
class HealthcheckError < StandardError; end
def wait_for_healthy(pause_after_ready: false, &block)
attempt = 1
@@ -16,9 +15,9 @@ module Kamal::Cli::Healthcheck::Poller
when "running" # No health check configured
sleep KAMAL.config.readiness_delay if pause_after_ready
else
raise HealthcheckError, "container not ready (#{status})"
raise Kamal::Cli::Healthcheck::Error, "container not ready (#{status})"
end
rescue HealthcheckError => e
rescue Kamal::Cli::Healthcheck::Error => e
if attempt <= max_attempts
info "#{e.message}, retrying in #{attempt}s (attempt #{attempt}/#{max_attempts})..."
sleep attempt
@@ -41,9 +40,9 @@ module Kamal::Cli::Healthcheck::Poller
when "unhealthy"
sleep TRAEFIK_UPDATE_DELAY if pause_after_ready
else
raise HealthcheckError, "container not unhealthy (#{status})"
raise Kamal::Cli::Healthcheck::Error, "container not unhealthy (#{status})"
end
rescue HealthcheckError => e
rescue Kamal::Cli::Healthcheck::Error => e
if attempt <= max_attempts
info "#{e.message}, retrying in #{attempt}s (attempt #{attempt}/#{max_attempts})..."
sleep attempt

View File

@@ -41,11 +41,6 @@ class Kamal::Cli::Main < Kamal::Cli::Base
say "Ensure Traefik is running...", :magenta
invoke "kamal:cli:traefik:boot", [], invoke_options
if KAMAL.config.role(KAMAL.config.primary_role).running_traefik?
say "Ensure app can pass healthcheck...", :magenta
invoke "kamal:cli:healthcheck:perform", [], invoke_options
end
say "Detect stale containers...", :magenta
invoke "kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true)
@@ -76,9 +71,6 @@ class Kamal::Cli::Main < Kamal::Cli::Base
run_hook "pre-deploy"
say "Ensure app can pass healthcheck...", :magenta
invoke "kamal:cli:healthcheck:perform", [], invoke_options
say "Detect stale containers...", :magenta
invoke "kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true)
@@ -227,9 +219,6 @@ class Kamal::Cli::Main < Kamal::Cli::Base
desc "env", "Manage environment files"
subcommand "env", Kamal::Cli::Env
desc "healthcheck", "Healthcheck application"
subcommand "healthcheck", Kamal::Cli::Healthcheck
desc "lock", "Manage the deploy lock"
subcommand "lock", Kamal::Cli::Lock
@@ -254,7 +243,7 @@ class Kamal::Cli::Main < Kamal::Cli::Base
raise "Container not found" unless container_id.present?
end
end
rescue SSHKit::Runner::ExecuteError => e
rescue SSHKit::Runner::ExecuteError, SSHKit::Runner::MultipleExecuteError => e
if e.message =~ /Container not found/
say "Error looking for container version #{version}: #{e.message}"
return false

View File

@@ -1,4 +1,6 @@
module Kamal::Commands::App::Containers
DOCKER_HEALTH_LOG_FORMAT = "'{{json .State.Health}}'"
def list_containers
docker :container, :ls, "--all", *filter_args
end
@@ -20,4 +22,10 @@ module Kamal::Commands::App::Containers
def remove_containers
docker :container, :prune, "--force", *filter_args
end
def container_health_log(version:)
pipe \
container_id_for(container_name: container_name(version)),
xargs(docker(:inspect, "--format", DOCKER_HEALTH_LOG_FORMAT))
end
end

View File

@@ -1,7 +1,7 @@
module Kamal::Commands::App::Logging
def logs(since: nil, lines: nil, grep: nil)
def logs(version: nil, since: nil, lines: nil, grep: nil)
pipe \
current_running_container_id,
version ? container_id_for_version(version) : current_running_container_id,
"xargs docker logs#{" --since #{since}" if since}#{" --tail #{lines}" if lines} 2>&1",
("grep '#{grep}'" if grep)
end

View File

@@ -3,7 +3,6 @@ module Kamal::Commands
delegate :sensitive, :argumentize, to: Kamal::Utils
DOCKER_HEALTH_STATUS_FORMAT = "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'"
DOCKER_HEALTH_LOG_FORMAT = "'{{json .State.Health}}'"
attr_accessor :config

View File

@@ -1,59 +0,0 @@
class Kamal::Commands::Healthcheck < Kamal::Commands::Base
def run
primary = config.role(config.primary_role)
docker :run,
"--detach",
"--name", container_name_with_version,
"--publish", "#{exposed_port}:#{config.healthcheck["port"]}",
"--label", "service=#{config.healthcheck_service}",
"-e", "KAMAL_CONTAINER_NAME=\"#{config.healthcheck_service}\"",
*primary.env_args(config.primary_host),
*primary.health_check_args(cord: false),
*config.volume_args,
*primary.option_args,
config.absolute_image,
primary.cmd
end
def status
pipe container_id, xargs(docker(:inspect, "--format", DOCKER_HEALTH_STATUS_FORMAT))
end
def container_health_log
pipe container_id, xargs(docker(:inspect, "--format", DOCKER_HEALTH_LOG_FORMAT))
end
def logs
pipe container_id, xargs(docker(:logs, "--tail", log_lines, "2>&1"))
end
def stop
pipe container_id, xargs(docker(:stop))
end
def remove
pipe container_id, xargs(docker(:container, :rm))
end
private
def container_name_with_version
"#{config.healthcheck_service}-#{config.version}"
end
def container_id
container_id_for(container_name: container_name_with_version)
end
def health_url
"http://localhost:#{exposed_port}#{config.healthcheck["path"]}"
end
def exposed_port
config.healthcheck["exposed_port"]
end
def log_lines
config.healthcheck["log_lines"]
end
end

View File

@@ -188,7 +188,7 @@ class Kamal::Configuration
def healthcheck
{ "path" => "/up", "port" => 3000, "max_attempts" => 7, "exposed_port" => 3999, "cord" => "/tmp/kamal-cord", "log_lines" => 50 }.merge(raw_config.healthcheck || {})
{ "path" => "/up", "port" => 3000, "max_attempts" => 7, "cord" => "/tmp/kamal-cord", "log_lines" => 50 }.merge(raw_config.healthcheck || {})
end
def healthcheck_service

View File

@@ -103,3 +103,39 @@ class SSHKit::Backend::Netssh
prepend LimitConcurrentStartsInstance
end
class SSHKit::Runner::Parallel
# SSHKit joins the threads in sequence and fails on the first error it encounters, which means that we wait threads
# before the first failure to complete but not for ones after.
#
# We'll patch it to wait for them all to complete, and to record all the threads that errored so we can see when a
# problem occurs on multiple hosts.
module CompleteAll
def execute
threads = hosts.map do |host|
Thread.new(host) do |h|
backend(h, &block).run
rescue ::StandardError => e
e2 = SSHKit::Runner::ExecuteError.new e
raise e2, "Exception while executing #{host.user ? "as #{host.user}@" : "on host "}#{host}: #{e.message}"
end
end
exceptions = []
threads.each do |t|
begin
t.join
rescue SSHKit::Runner::ExecuteError => e
exceptions << e
end
end
if exceptions.one?
raise exceptions.first
elsif exceptions.many?
raise exceptions.first, [ "Exceptions on #{exceptions.count} hosts:", exceptions.map(&:message) ].join("\n")
end
end
end
prepend CompleteAll
end

View File

@@ -118,6 +118,56 @@ class CliAppTest < CliTestCase
end
end
test "boot with web barrier opened" do
Object.any_instance.stubs(:sleep)
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info).returns("123") # old version
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
.returns("running").at_least_once # web health check passing
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-123$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
.returns("unhealthy").at_least_once # web health check failing
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-workers-latest$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
.returns("running").at_least_once # workers health check
run_command("boot", config: :with_roles, host: nil).tap do |output|
assert_match "Waiting for the first healthy web container before booting workers on 1.1.1.3...", output
assert_match "Waiting for the first healthy web container before booting workers on 1.1.1.4...", output
assert_match "First web container is healthy, booting workers on 1.1.1.3", output
assert_match "First web container is healthy, booting workers on 1.1.1.4", output
end
end
test "boot with web barrier closed" do
Thread.report_on_exception = false
Object.any_instance.stubs(:sleep)
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info).returns("123") # old version
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
.returns("unhealthy").at_least_once # web health check failing
stderred do
run_command("boot", config: :with_roles, host: nil, allow_execute_error: true).tap do |output|
assert_match "Waiting for the first healthy web container before booting workers on 1.1.1.3...", output
assert_match "Waiting for the first healthy web container before booting workers on 1.1.1.4...", output
assert_match "First web container is unhealthy, not booting workers on 1.1.1.3", output
assert_match "First web container is unhealthy, not booting workers on 1.1.1.4", output
assert_match "Running docker container ls --all --filter name=^app-web-latest$ --quiet | xargs docker stop on 1.1.1.1", output
assert_match "Running docker container ls --all --filter name=^app-web-latest$ --quiet | xargs docker stop on 1.1.1.2", output
end
end
ensure
Thread.report_on_exception = true
end
test "start" do
run_command("start").tap do |output|
assert_match "docker start app-web-999", output
@@ -283,8 +333,12 @@ class CliAppTest < CliTestCase
end
private
def run_command(*command, config: :with_accessories)
stdouted { Kamal::Cli::App.start([ *command, "-c", "test/fixtures/deploy_#{config}.yml", "--hosts", "1.1.1.1" ]) }
def run_command(*command, config: :with_accessories, host: "1.1.1.1", allow_execute_error: false)
stdouted do
Kamal::Cli::App.start([ *command, "-c", "test/fixtures/deploy_#{config}.yml", *([ "--hosts", host ] if host) ])
rescue SSHKit::Runner::ExecuteError => e
raise e unless allow_execute_error
end
end
def stub_running

View File

@@ -1,82 +0,0 @@
require_relative "cli_test_case"
class CliHealthcheckTest < CliTestCase
test "perform" do
# Prevent expected failures from outputting to terminal
Thread.report_on_exception = false
Kamal::Cli::Healthcheck::Poller.stubs(:sleep) # No sleeping when retrying
Kamal::Configuration.any_instance.stubs(:run_id).returns("12345678901234567890123456789012")
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "KAMAL_CONTAINER_NAME=\"healthcheck-app\"", "--env-file", ".kamal/env/roles/app-web.env", "--health-cmd", "\"curl -f http://localhost:3000/up || exit 1\"", "--health-interval", "\"1s\"", "dhh/app:999")
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :container, :rm, raise_on_non_zero_exit: false)
# Fail twice to test retry logic
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
.returns("starting")
.then
.returns("unhealthy")
.then
.returns("healthy")
run_command("perform").tap do |output|
assert_match "container not ready (starting), retrying in 1s (attempt 1/7)...", output
assert_match "container not ready (unhealthy), retrying in 2s (attempt 2/7)...", output
assert_match "Container is healthy!", output
end
end
test "perform failing to become healthy" do
# Prevent expected failures from outputting to terminal
Thread.report_on_exception = false
Kamal::Cli::Healthcheck::Poller.stubs(:sleep) # No sleeping when retrying
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "KAMAL_CONTAINER_NAME=\"healthcheck-app\"", "--env-file", ".kamal/env/roles/app-web.env", "--health-cmd", "\"curl -f http://localhost:3000/up || exit 1\"", "--health-interval", "\"1s\"", "dhh/app:999")
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :container, :rm, raise_on_non_zero_exit: false)
# Continually report unhealthy
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'")
.returns("unhealthy")
# Capture logs when failing
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1")
.returns("some log output")
# Capture container health log when failing
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_pretty_json)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{json .State.Health}}'")
.returns('{"Status":"unhealthy","Log":[{"ExitCode": 1,"Output": "/bin/sh: 1: curl: not found\n"}]}"')
exception = assert_raises do
run_command("perform")
end
assert_match "container not ready (unhealthy)", exception.message
end
test "raises an exception if primary does not have traefik" do
SSHKit::Backend::Abstract.any_instance.expects(:execute).never
exception = assert_raises do
run_command("perform", config_file: "test/fixtures/deploy_workers_only.yml")
end
assert_equal "The primary host is not configured to run Traefik", exception.message
end
private
def run_command(*command, config_file: "test/fixtures/deploy_with_accessories.yml")
stdouted { Kamal::Cli::Healthcheck.start([ *command, "-c", config_file ]) }
end
end

View File

@@ -25,7 +25,6 @@ class CliMainTest < CliTestCase
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:registry:login", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:pull", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:traefik:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:prune:all", [], invoke_options)
@@ -38,7 +37,6 @@ class CliMainTest < CliTestCase
assert_match /Log into image registry/, output
assert_match /Pull app image/, output
assert_match /Ensure Traefik is running/, output
assert_match /Ensure app can pass healthcheck/, output
assert_match /Detect stale containers/, output
assert_match /Prune old containers and images/, output
assert_match /Releasing the deploy lock/, output
@@ -51,7 +49,6 @@ class CliMainTest < CliTestCase
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:registry:login", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:deliver", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:traefik:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:prune:all", [], invoke_options)
@@ -65,7 +62,6 @@ class CliMainTest < CliTestCase
assert_match /Build and push app image/, output
assert_hook_ran "pre-deploy", output, **hook_variables
assert_match /Ensure Traefik is running/, output
assert_match /Ensure app can pass healthcheck/, output
assert_match /Detect stale containers/, output
assert_match /Prune old containers and images/, output
assert_hook_ran "post-deploy", output, **hook_variables, runtime: true
@@ -78,7 +74,6 @@ class CliMainTest < CliTestCase
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:registry:login", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:pull", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:traefik:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:prune:all", [], invoke_options)
@@ -88,7 +83,6 @@ class CliMainTest < CliTestCase
assert_match /Log into image registry/, output
assert_match /Pull app image/, output
assert_match /Ensure Traefik is running/, output
assert_match /Ensure app can pass healthcheck/, output
assert_match /Detect stale containers/, output
assert_match /Prune old containers and images/, output
assert_match /Releasing the deploy lock/, output
@@ -154,7 +148,6 @@ class CliMainTest < CliTestCase
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:registry:login", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:deliver", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:traefik:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:prune:all", [], invoke_options)
@@ -185,7 +178,6 @@ class CliMainTest < CliTestCase
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:registry:login", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:deliver", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:traefik:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:prune:all", [], invoke_options)
@@ -197,7 +189,6 @@ class CliMainTest < CliTestCase
invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "version" => "999", "skip_hooks" => false, "verbose" => true }
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:deliver", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
@@ -210,7 +201,6 @@ class CliMainTest < CliTestCase
assert_match /Build and push app image/, output
assert_hook_ran "pre-deploy", output, **hook_variables
assert_match /Running the pre-deploy hook.../, output
assert_match /Ensure app can pass healthcheck/, output
assert_hook_ran "post-deploy", output, **hook_variables, runtime: true
end
end
@@ -219,13 +209,11 @@ class CliMainTest < CliTestCase
invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "version" => "999", "skip_hooks" => false }
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:pull", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
run_command("redeploy", "--skip_push").tap do |output|
assert_match /Pull app image/, output
assert_match /Ensure app can pass healthcheck/, output
end
end

View File

@@ -99,6 +99,11 @@ class CommanderTest < ActiveSupport::TestCase
assert_equal [ "workers" ], @kamal.roles_on("1.1.1.3").map(&:name)
end
test "roles_on web comes first" do
configure_with(:deploy_with_two_roles_one_host)
assert_equal [ "web", "workers" ], @kamal.roles_on("1.1.1.1").map(&:name)
end
test "default group strategy" do
assert_empty @kamal.boot_strategy
end

View File

@@ -1,124 +0,0 @@
require "test_helper"
class CommandsHealthcheckTest < ActiveSupport::TestCase
setup do
@config = {
service: "app", image: "dhh/app", registry: { "username" => "dhh", "password" => "secret" }, servers: [ "1.1.1.1" ],
traefik: { "args" => { "accesslog.format" => "json", "metrics.prometheus.buckets" => "0.1,0.3,1.2,5.0" } }
}
end
test "run" do
assert_equal \
"docker run --detach --name healthcheck-app-123 --publish 3999:3000 --label service=healthcheck-app -e KAMAL_CONTAINER_NAME=\"healthcheck-app\" --env-file .kamal/env/roles/app-web.env --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" dhh/app:123",
new_command.run.join(" ")
end
test "run with custom port" do
@config[:healthcheck] = { "port" => 3001 }
assert_equal \
"docker run --detach --name healthcheck-app-123 --publish 3999:3001 --label service=healthcheck-app -e KAMAL_CONTAINER_NAME=\"healthcheck-app\" --env-file .kamal/env/roles/app-web.env --health-cmd \"curl -f http://localhost:3001/up || exit 1\" --health-interval \"1s\" dhh/app:123",
new_command.run.join(" ")
end
test "run with destination" do
@destination = "staging"
assert_equal \
"docker run --detach --name healthcheck-app-staging-123 --publish 3999:3000 --label service=healthcheck-app-staging -e KAMAL_CONTAINER_NAME=\"healthcheck-app-staging\" --env-file .kamal/env/roles/app-web-staging.env --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" dhh/app:123",
new_command.run.join(" ")
end
test "run with custom healthcheck" do
@config[:healthcheck] = { "cmd" => "/bin/up" }
assert_equal \
"docker run --detach --name healthcheck-app-123 --publish 3999:3000 --label service=healthcheck-app -e KAMAL_CONTAINER_NAME=\"healthcheck-app\" --env-file .kamal/env/roles/app-web.env --health-cmd \"/bin/up\" --health-interval \"1s\" dhh/app:123",
new_command.run.join(" ")
end
test "run with custom options" do
@config[:servers] = { "web" => { "hosts" => [ "1.1.1.1" ], "options" => { "mount" => "somewhere" } } }
@config[:healthcheck] = { "exposed_port" => 4999 }
assert_equal \
"docker run --detach --name healthcheck-app-123 --publish 4999:3000 --label service=healthcheck-app -e KAMAL_CONTAINER_NAME=\"healthcheck-app\" --env-file .kamal/env/roles/app-web.env --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --mount \"somewhere\" dhh/app:123",
new_command.run.join(" ")
end
test "run with tags" do
@config[:servers] = [ { "1.1.1.1" => "tag1" } ]
@config[:env] = {}
@config[:env]["tags"] = { "tag1" => { "ENV1" => "value1" } }
assert_equal \
"docker run --detach --name healthcheck-app-123 --publish 3999:3000 --label service=healthcheck-app -e KAMAL_CONTAINER_NAME=\"healthcheck-app\" --env-file .kamal/env/roles/app-web.env --env ENV1=\"value1\" --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" dhh/app:123",
new_command.run.join(" ")
end
test "status" do
assert_equal \
"docker container ls --all --filter name=^healthcheck-app-123$ --quiet | xargs docker inspect --format '{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'",
new_command.status.join(" ")
end
test "container_health_log" do
assert_equal \
"docker container ls --all --filter name=^healthcheck-app-123$ --quiet | xargs docker inspect --format '{{json .State.Health}}'",
new_command.container_health_log.join(" ")
end
test "stop" do
assert_equal \
"docker container ls --all --filter name=^healthcheck-app-123$ --quiet | xargs docker stop",
new_command.stop.join(" ")
end
test "stop with destination" do
@destination = "staging"
assert_equal \
"docker container ls --all --filter name=^healthcheck-app-staging-123$ --quiet | xargs docker stop",
new_command.stop.join(" ")
end
test "remove" do
assert_equal \
"docker container ls --all --filter name=^healthcheck-app-123$ --quiet | xargs docker container rm",
new_command.remove.join(" ")
end
test "remove with destination" do
@destination = "staging"
assert_equal \
"docker container ls --all --filter name=^healthcheck-app-staging-123$ --quiet | xargs docker container rm",
new_command.remove.join(" ")
end
test "logs" do
assert_equal \
"docker container ls --all --filter name=^healthcheck-app-123$ --quiet | xargs docker logs --tail 50 2>&1",
new_command.logs.join(" ")
end
test "logs with custom lines number" do
@config[:healthcheck] = { "log_lines" => 150 }
assert_equal \
"docker container ls --all --filter name=^healthcheck-app-123$ --quiet | xargs docker logs --tail 150 2>&1",
new_command.logs.join(" ")
end
test "logs with destination" do
@destination = "staging"
assert_equal \
"docker container ls --all --filter name=^healthcheck-app-staging-123$ --quiet | xargs docker logs --tail 50 2>&1",
new_command.logs.join(" ")
end
private
def new_command
Kamal::Commands::Healthcheck.new(Kamal::Configuration.new(@config, destination: @destination, version: "123"))
end
end

View File

@@ -272,7 +272,7 @@ class ConfigurationTest < ActiveSupport::TestCase
volume_args: [ "--volume", "/local/path:/container/path" ],
builder: {},
logging: [ "--log-opt", "max-size=\"10m\"" ],
healthcheck: { "path"=>"/up", "port"=>3000, "max_attempts" => 7, "exposed_port" => 3999, "cord" => "/tmp/kamal-cord", "log_lines" => 50 } }
healthcheck: { "path"=>"/up", "port"=>3000, "max_attempts" => 7, "cord" => "/tmp/kamal-cord", "log_lines" => 50 } }
assert_equal expected_config, @config.to_h
end

View File

@@ -0,0 +1,15 @@
service: app
image: dhh/app
servers:
workers:
hosts:
- 1.1.1.1
web:
hosts:
- 1.1.1.1
env:
REDIS_URL: redis://x/y
registry:
server: registry.digitalocean.com
username: user
password: pw

View File

@@ -0,0 +1,34 @@
require_relative "integration_test"
class BrokenDeployTest < IntegrationTest
test "deploying a bad image" do
@app = "app_with_roles"
kamal :envify
first_version = latest_app_version
kamal :deploy
assert_app_is_up version: first_version
assert_container_running host: :vm3, name: "app-workers-#{first_version}"
second_version = break_app
output = kamal :deploy, raise_on_error: false, capture: true
assert_failed_deploy output
assert_app_is_up version: first_version
assert_container_running host: :vm3, name: "app-workers-#{first_version}"
assert_container_not_running host: :vm3, name: "app-workers-#{second_version}"
end
private
def assert_failed_deploy(output)
assert_match "Waiting for the first healthy web container before booting workers on vm3...", output
assert_match /First web container is unhealthy on vm[12], not booting other roles/, output
assert_match "First web container is unhealthy, not booting workers on vm3", output
assert_match "nginx: [emerg] unexpected end of file, expecting \";\" or \"}\" in /etc/nginx/conf.d/default.conf:2", output
assert_match 'ERROR {"Status":"unhealthy","FailingStreak":0,"Log":[]}', output
end
end

View File

@@ -28,6 +28,7 @@ builder:
COMMIT_SHA: <%= `git rev-parse HEAD` %>
healthcheck:
cmd: wget -qO- http://localhost > /dev/null || exit 1
max_attempts: 3
traefik:
args:
accesslog: true
@@ -41,3 +42,4 @@ accessories:
roles:
- web
stop_wait_time: 1
readiness_delay: 0

View File

@@ -22,6 +22,7 @@ builder:
COMMIT_SHA: <%= `git rev-parse HEAD` %>
healthcheck:
cmd: wget -qO- http://localhost > /dev/null || exit 1
max_attempts: 3
traefik:
args:
accesslog: true
@@ -35,3 +36,4 @@ accessories:
roles:
- web
stop_wait_time: 1
readiness_delay: 0

View File

@@ -0,0 +1,3 @@
#!/bin/bash
cd $1 && echo "bad nginx config" > default.conf && git commit -am 'Broken'

View File

@@ -78,6 +78,11 @@ class IntegrationTest < ActiveSupport::TestCase
latest_app_version
end
def break_app
deployer_exec "./break_app.sh #{@app}", workdir: "/"
latest_app_version
end
def latest_app_version
deployer_exec("git rev-parse HEAD", capture: true)
end
@@ -131,4 +136,16 @@ class IntegrationTest < ActiveSupport::TestCase
puts "Tried to get the response code again and got #{app_response.code}"
end
end
def assert_container_running(host:, name:)
assert container_running?(host: host, name: name)
end
def assert_container_not_running(host:, name:)
assert_not container_running?(host: host, name: name)
end
def container_running?(host:, name:)
docker_compose("exec #{host} docker ps --filter=name=#{name} | tail -n+2", capture: true).tap { |x| p [ x, x.strip, x.strip.present? ] }.strip.present?
end
end

View File

@@ -56,6 +56,12 @@ class MainTest < IntegrationTest
assert_app_is_up version: version
assert_hooks_ran "pre-connect", "pre-build", "pre-deploy", "post-deploy"
assert_container_running host: :vm3, name: "app-workers-#{version}"
second_version = update_app_rev
kamal :redeploy
assert_app_is_up version: second_version
assert_container_running host: :vm3, name: "app-workers-#{second_version}"
end
test "config" do
@@ -73,7 +79,7 @@ class MainTest < IntegrationTest
assert_equal({ user: "root", port: 22, keepalive: true, keepalive_interval: 30, log_level: :fatal }, config[:ssh_options])
assert_equal({ "multiarch" => false, "args" => { "COMMIT_SHA" => version } }, config[:builder])
assert_equal [ "--log-opt", "max-size=\"10m\"" ], config[:logging]
assert_equal({ "path" => "/up", "port" => 3000, "max_attempts" => 7, "exposed_port" => 3999, "cord"=>"/tmp/kamal-cord", "log_lines" => 50, "cmd"=>"wget -qO- http://localhost > /dev/null || exit 1" }, config[:healthcheck])
assert_equal({ "path" => "/up", "port" => 3000, "max_attempts" => 3, "cord"=>"/tmp/kamal-cord", "log_lines" => 50, "cmd"=>"wget -qO- http://localhost > /dev/null || exit 1" }, config[:healthcheck])
end
test "setup and remove" do
@@ -157,8 +163,4 @@ class MainTest < IntegrationTest
assert vm1_image_ids.any?
assert vm1_container_ids.any?
end
def assert_container_running(host:, name:)
assert docker_compose("exec #{host} docker ps --filter=name=#{name} -q", capture: true).strip.present?
end
end