diff --git a/bin/kamal b/bin/kamal index a8e2ac2b..96c0dc64 100755 --- a/bin/kamal +++ b/bin/kamal @@ -7,15 +7,6 @@ require "kamal" begin Kamal::Cli::Main.start(ARGV) -rescue SSHKit::Runner::MultipleExecuteError => e - e.execute_errors.each do |execute_error| - puts " \e[31mERROR (#{execute_error.cause.class}): #{execute_error.message}\e[0m" - end - if ENV["VERBOSE"] - puts "Backtrace for the first error:" - puts e.execute_errors.first.cause.backtrace - end - exit 1 rescue SSHKit::Runner::ExecuteError => e puts " \e[31mERROR (#{e.cause.class}): #{e.message}\e[0m" puts e.cause.backtrace if ENV["VERBOSE"] diff --git a/lib/kamal/cli/app/boot.rb b/lib/kamal/cli/app/boot.rb index 47f8c21c..c4125771 100644 --- a/lib/kamal/cli/app/boot.rb +++ b/lib/kamal/cli/app/boot.rb @@ -14,7 +14,17 @@ class Kamal::Cli::App::Boot def run old_version = old_version_renamed_if_clashing - start_new_version + wait_at_barrier if queuer? + + begin + start_new_version + rescue => e + close_barrier if gatekeeper? + stop_new_version + raise + end + + release_barrier if gatekeeper? if old_version stop_old_version(old_version) @@ -34,22 +44,16 @@ class Kamal::Cli::App::Boot end def start_new_version - wait_at_barrier if queuer? - audit "Booted app version #{version}" execute *app.tie_cord(role.cord_host_file) if uses_cord? hostname = "#{host.to_s[0...51].gsub(/\.+$/, '')}-#{SecureRandom.hex(6)}" execute *app.run(hostname: hostname) Kamal::Cli::Healthcheck::Poller.wait_for_healthy(pause_after_ready: true) { capture_with_info(*app.status(version: version)) } + end - release_barrier if gatekeeper? - rescue => e - close_barrier if gatekeeper? - + def stop_new_version execute *app.stop(version: version), raise_on_non_zero_exit: false - - raise end def stop_old_version(version) @@ -68,22 +72,22 @@ class Kamal::Cli::App::Boot def release_barrier if barrier.open - info "First #{KAMAL.primary_role} container healthy, continuing other roles (#{host})" + info "First #{KAMAL.primary_role} container is healthy on #{host}, booting other roles" end end def wait_at_barrier - info "Waiting for a healthy #{KAMAL.primary_role} container (#{host})..." + info "Waiting for the first healthy #{KAMAL.primary_role} container before booting #{role} on #{host}..." barrier.wait - info "First #{KAMAL.primary_role} container is healthy, continuing (#{host})" + info "First #{KAMAL.primary_role} container is healthy, booting #{role} on #{host}..." rescue Kamal::Cli::Healthcheck::Error - info "First #{KAMAL.primary_role} container is unhealthy, stopping (#{host})" + info "First #{KAMAL.primary_role} container is unhealthy, not booting #{role} on #{host}" raise end def close_barrier if barrier.close - info "First #{KAMAL.primary_role} container unhealthy, stopping other roles (#{host})" + info "First #{KAMAL.primary_role} container is unhealthy on #{host}, not booting other roles" error capture_with_info(*app.logs(version: version)) error capture_with_info(*app.container_health_log(version: version)) end diff --git a/lib/kamal/cli/main.rb b/lib/kamal/cli/main.rb index 594fb3fd..c05c1503 100644 --- a/lib/kamal/cli/main.rb +++ b/lib/kamal/cli/main.rb @@ -244,7 +244,7 @@ class Kamal::Cli::Main < Kamal::Cli::Base raise "Container not found" unless container_id.present? end end - rescue SSHKit::Runner::ExecuteError => e + rescue SSHKit::Runner::ExecuteError, SSHKit::Runner::MultipleExecuteError => e if e.message =~ /Container not found/ say "Error looking for container version #{version}: #{e.message}" return false diff --git a/lib/kamal/sshkit_with_ext.rb b/lib/kamal/sshkit_with_ext.rb index 373ae843..2d0257a8 100644 --- a/lib/kamal/sshkit_with_ext.rb +++ b/lib/kamal/sshkit_with_ext.rb @@ -104,14 +104,6 @@ class SSHKit::Backend::Netssh prepend LimitConcurrentStartsInstance end -class SSHKit::Runner::MultipleExecuteError < SSHKit::StandardError - attr_reader :execute_errors - - def initialize(execute_errors) - @execute_errors = execute_errors - end -end - class SSHKit::Runner::Parallel # SSHKit joins the threads in sequence and fails on the first error it encounters, which means that we wait threads # before the first failure to complete but not for ones after. @@ -140,7 +132,7 @@ class SSHKit::Runner::Parallel if exceptions.one? raise exceptions.first elsif exceptions.many? - raise SSHKit::Runner::MultipleExecuteError.new(exceptions) + raise exceptions.first, [ "Exceptions on #{exceptions.count} hosts:", exceptions.map(&:message) ].join("\n") end end end diff --git a/test/cli/app_test.rb b/test/cli/app_test.rb index f684deb8..7a2a266e 100644 --- a/test/cli/app_test.rb +++ b/test/cli/app_test.rb @@ -136,11 +136,11 @@ class CliAppTest < CliTestCase .returns("running").at_least_once # workers health check run_command("boot", config: :with_roles, host: nil).tap do |output| - assert_match "Waiting for a healthy web container (1.1.1.3)...", output - assert_match "Waiting for a healthy web container (1.1.1.4)...", output - assert_match "First web container is healthy, continuing (1.1.1.3)", output - assert_match "First web container is healthy, continuing (1.1.1.4)", output - end + assert_match "Waiting for the first healthy web container before booting workers on 1.1.1.3...", output + assert_match "Waiting for the first healthy web container before booting workers on 1.1.1.4...", output + assert_match "First web container is healthy, booting workers on 1.1.1.3", output + assert_match "First web container is healthy, booting workers on 1.1.1.4", output + end end test "boot with web barrier closed" do @@ -156,14 +156,12 @@ class CliAppTest < CliTestCase stderred do run_command("boot", config: :with_roles, host: nil, allow_execute_error: true).tap do |output| - assert_match "Waiting for a healthy web container (1.1.1.3)...", output - assert_match "Waiting for a healthy web container (1.1.1.4)...", output - assert_match "First web container is unhealthy, stopping (1.1.1.3)", output - assert_match "First web container is unhealthy, stopping (1.1.1.4)", output + assert_match "Waiting for the first healthy web container before booting workers on 1.1.1.3...", output + assert_match "Waiting for the first healthy web container before booting workers on 1.1.1.4...", output + assert_match "First web container is unhealthy, not booting workers on 1.1.1.3", output + assert_match "First web container is unhealthy, not booting workers on 1.1.1.4", output assert_match "Running docker container ls --all --filter name=^app-web-latest$ --quiet | xargs docker stop on 1.1.1.1", output assert_match "Running docker container ls --all --filter name=^app-web-latest$ --quiet | xargs docker stop on 1.1.1.2", output - assert_match "Running docker container ls --all --filter name=^app-workers-latest$ --quiet | xargs docker stop on 1.1.1.3", output - assert_match "Running docker container ls --all --filter name=^app-workers-latest$ --quiet | xargs docker stop on 1.1.1.4", output end end ensure diff --git a/test/integration/broken_deploy_test.rb b/test/integration/broken_deploy_test.rb index 6b71844e..a3f74d45 100644 --- a/test/integration/broken_deploy_test.rb +++ b/test/integration/broken_deploy_test.rb @@ -25,9 +25,9 @@ class BrokenDeployTest < IntegrationTest private def assert_failed_deploy(output) - assert_match "Waiting for a healthy web container (vm3)...", output - assert_match /First #{KAMAL.primary_role} container is unhealthy, stopping \(vm[12]\)/, output - assert_match "First #{KAMAL.primary_role} container unhealthy, stopping other roles (vm3)...", output + assert_match "Waiting for the first healthy web container before booting workers on vm3...", output + assert_match /First web container is unhealthy on vm[12], not booting other roles/, output + assert_match "First web container is unhealthy, not booting workers on vm3", output assert_match "nginx: [emerg] unexpected end of file, expecting \";\" or \"}\" in /etc/nginx/conf.d/default.conf:2", output assert_match 'ERROR {"Status":"unhealthy","FailingStreak":0,"Log":[]}', output end