From fa7e941648ce78e2ee46cb4e9b567078072557d6 Mon Sep 17 00:00:00 2001 From: Donal McBreen Date: Tue, 21 May 2024 09:31:08 +0100 Subject: [PATCH] Make SSHKit::Runner::Parallel fail slow Using a different SSHKit runner doesn't work well, because the group runner uses the Parallel runner internally. So instead we'll patch its behaviour to fail slow. We'll also get it to return all the errors so we can report on all the hosts that failed. --- bin/kamal | 9 ++++++ lib/kamal/commander.rb | 1 - lib/kamal/sshkit_with_ext.rb | 59 ++++++++++++++++++++++-------------- 3 files changed, 45 insertions(+), 24 deletions(-) diff --git a/bin/kamal b/bin/kamal index 96c0dc64..a8e2ac2b 100755 --- a/bin/kamal +++ b/bin/kamal @@ -7,6 +7,15 @@ require "kamal" begin Kamal::Cli::Main.start(ARGV) +rescue SSHKit::Runner::MultipleExecuteError => e + e.execute_errors.each do |execute_error| + puts " \e[31mERROR (#{execute_error.cause.class}): #{execute_error.message}\e[0m" + end + if ENV["VERBOSE"] + puts "Backtrace for the first error:" + puts e.execute_errors.first.cause.backtrace + end + exit 1 rescue SSHKit::Runner::ExecuteError => e puts " \e[31mERROR (#{e.cause.class}): #{e.message}\e[0m" puts e.cause.backtrace if ENV["VERBOSE"] diff --git a/lib/kamal/commander.rb b/lib/kamal/commander.rb index f4c54215..e7c5d21f 100644 --- a/lib/kamal/commander.rb +++ b/lib/kamal/commander.rb @@ -150,7 +150,6 @@ class Kamal::Commander sshkit.max_concurrent_starts = config.sshkit.max_concurrent_starts sshkit.ssh_options = config.ssh.options end - SSHKit.config.default_runner = SSHKit::Runner::ParallelCompleteAll SSHKit.config.command_map[:docker] = "docker" # No need to use /usr/bin/env, just clogs up the logs SSHKit.config.output_verbosity = verbosity end diff --git a/lib/kamal/sshkit_with_ext.rb b/lib/kamal/sshkit_with_ext.rb index c556774b..373ae843 100644 --- a/lib/kamal/sshkit_with_ext.rb +++ b/lib/kamal/sshkit_with_ext.rb @@ -104,33 +104,46 @@ class SSHKit::Backend::Netssh prepend LimitConcurrentStartsInstance end -require "thread" +class SSHKit::Runner::MultipleExecuteError < SSHKit::StandardError + attr_reader :execute_errors -module SSHKit - module Runner - class ParallelCompleteAll < Abstract - def execute - threads = hosts.map do |host| - Thread.new(host) do |h| - begin - backend(h, &block).run - rescue ::StandardError => e - e2 = SSHKit::Runner::ExecuteError.new e - raise e2, "Exception while executing #{host.user ? "as #{host.user}@" : "on host "}#{host}: #{e.message}" - end - end - end + def initialize(execute_errors) + @execute_errors = execute_errors + end +end - exception = nil - threads.each do |t| - begin - t.join - rescue SSHKit::Runner::ExecuteError => e - exception ||= e - end +class SSHKit::Runner::Parallel + # SSHKit joins the threads in sequence and fails on the first error it encounters, which means that we wait threads + # before the first failure to complete but not for ones after. + # + # We'll patch it to wait for them all to complete, and to record all the threads that errored so we can see when a + # problem occurs on multiple hosts. + module CompleteAll + def execute + threads = hosts.map do |host| + Thread.new(host) do |h| + backend(h, &block).run + rescue ::StandardError => e + e2 = SSHKit::Runner::ExecuteError.new e + raise e2, "Exception while executing #{host.user ? "as #{host.user}@" : "on host "}#{host}: #{e.message}" end - raise exception if exception + end + + exceptions = [] + threads.each do |t| + begin + t.join + rescue SSHKit::Runner::ExecuteError => e + exceptions << e + end + end + if exceptions.one? + raise exceptions.first + elsif exceptions.many? + raise SSHKit::Runner::MultipleExecuteError.new(exceptions) end end end + + prepend CompleteAll end