From 051556674f03d1e9abd5c72e17c2ac1376c78b5f Mon Sep 17 00:00:00 2001 From: Donal McBreen Date: Wed, 12 Apr 2023 09:37:20 +0100 Subject: [PATCH] Minimise holding the deploy lock If we get an error we'll only hold the deploy lock if it occurs while trying to switch the running containers. We'll also move tagging the latest image from when the image is pulled to just before the container switch. This ensures that earlier errors don't leave the hosts with an updated latest tag while still running the older version. --- lib/mrsk/cli/app.rb | 27 ++++++++-------- lib/mrsk/cli/base.rb | 49 ++++++++++++++++++++--------- lib/mrsk/cli/main.rb | 51 +++++++++++++++++++------------ lib/mrsk/commander.rb | 13 ++++++-- lib/mrsk/commands/app.rb | 4 +++ lib/mrsk/commands/builder/base.rb | 1 - test/cli/app_test.rb | 1 + test/cli/build_test.rb | 2 +- test/cli/main_test.rb | 24 +++++++++++++-- test/commands/app_test.rb | 6 ++++ 10 files changed, 124 insertions(+), 54 deletions(-) diff --git a/lib/mrsk/cli/app.rb b/lib/mrsk/cli/app.rb index fe2599a1..7b003860 100644 --- a/lib/mrsk/cli/app.rb +++ b/lib/mrsk/cli/app.rb @@ -8,25 +8,28 @@ class Mrsk::Cli::App < Mrsk::Cli::Base cli = self + on(MRSK.hosts) do + execute *MRSK.auditor.record("Tagging #{MRSK.config.absolute_image} as the latest image"), verbosity: :debug + execute *MRSK.app.tag_current_as_latest + end + on(MRSK.hosts) do |host| roles = MRSK.roles_on(host) roles.each do |role| execute *MRSK.auditor(role: role).record("Booted app version #{version}"), verbosity: :debug - begin - if capture_with_info(*MRSK.app(role: role).container_id_for_version(version)).present? - tmp_version = "#{version}_#{SecureRandom.hex(8)}" - info "Renaming container #{version} to #{tmp_version} as already deployed on #{host}" - execute *MRSK.auditor(role: role).record("Renaming container #{version} to #{tmp_version}"), verbosity: :debug - execute *MRSK.app(role: role).rename_container(version: version, new_version: tmp_version) - end - - old_version = capture_with_info(*MRSK.app(role: role).current_running_version).strip - execute *MRSK.app(role: role).run - sleep MRSK.config.readiness_delay - execute *MRSK.app(role: role).stop(version: old_version), raise_on_non_zero_exit: false if old_version.present? + if capture_with_info(*MRSK.app(role: role).container_id_for_version(version)).present? + tmp_version = "#{version}_#{SecureRandom.hex(8)}" + info "Renaming container #{version} to #{tmp_version} as already deployed on #{host}" + execute *MRSK.auditor(role: role).record("Renaming container #{version} to #{tmp_version}"), verbosity: :debug + execute *MRSK.app(role: role).rename_container(version: version, new_version: tmp_version) end + + old_version = capture_with_info(*MRSK.app(role: role).current_running_version).strip + execute *MRSK.app(role: role).run + sleep MRSK.config.readiness_delay + execute *MRSK.app(role: role).stop(version: old_version), raise_on_non_zero_exit: false if old_version.present? end end end diff --git a/lib/mrsk/cli/base.rb b/lib/mrsk/cli/base.rb index a0eb200a..f772b821 100644 --- a/lib/mrsk/cli/base.rb +++ b/lib/mrsk/cli/base.rb @@ -77,22 +77,32 @@ module Mrsk::Cli end def with_lock - acquire_lock + if MRSK.holding_lock? + yield + else + acquire_lock - yield + begin + yield + rescue + if MRSK.hold_lock_on_error? + error " \e[31mDeploy lock was not released\e[0m" + else + release_lock + end - release_lock - rescue - error " \e[31mDeploy lock was not released\e[0m" if MRSK.lock_count > 0 - raise + raise + end + + release_lock + end end def acquire_lock - if MRSK.lock_count == 0 - say "Acquiring the deploy lock" - on(MRSK.primary_host) { execute *MRSK.lock.acquire("Automatic deploy lock", MRSK.config.version) } - end - MRSK.lock_count += 1 + say "Acquiring the deploy lock" + on(MRSK.primary_host) { execute *MRSK.lock.acquire("Automatic deploy lock", MRSK.config.version) } + + MRSK.holding_lock = true rescue SSHKit::Runner::ExecuteError => e if e.message =~ /cannot create directory/ invoke "mrsk:cli:lock:status", [] @@ -103,10 +113,19 @@ module Mrsk::Cli end def release_lock - MRSK.lock_count -= 1 - if MRSK.lock_count == 0 - say "Releasing the deploy lock" - on(MRSK.primary_host) { execute *MRSK.lock.release } + say "Releasing the deploy lock" + on(MRSK.primary_host) { execute *MRSK.lock.release } + + MRSK.holding_lock = false + end + + def hold_lock_on_error + if MRSK.hold_lock_on_error? + yield + else + MRSK.hold_lock_on_error = true + yield + MRSK.hold_lock_on_error = false end end end diff --git a/lib/mrsk/cli/main.rb b/lib/mrsk/cli/main.rb index a5dfdacb..d38e5a02 100644 --- a/lib/mrsk/cli/main.rb +++ b/lib/mrsk/cli/main.rb @@ -37,7 +37,9 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base say "Ensure app can pass healthcheck...", :magenta invoke "mrsk:cli:healthcheck:perform", [], invoke_options - invoke "mrsk:cli:app:boot", [], invoke_options + hold_lock_on_error do + invoke "mrsk:cli:app:boot", [], invoke_options + end say "Prune old containers and images...", :magenta invoke "mrsk:cli:prune:all", [], invoke_options @@ -65,7 +67,9 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base say "Ensure app can pass healthcheck...", :magenta invoke "mrsk:cli:healthcheck:perform", [], invoke_options - invoke "mrsk:cli:app:boot", [], invoke_options + hold_lock_on_error do + invoke "mrsk:cli:app:boot", [], invoke_options + end end audit_broadcast "Redeployed #{service_version} in #{runtime.round} seconds" unless options[:skip_broadcast] @@ -75,34 +79,41 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base desc "rollback [VERSION]", "Rollback app to VERSION" def rollback(version) with_lock do - MRSK.config.version = version + invoke_options = deploy_options - if container_available?(version) - say "Start version #{version}, then wait #{MRSK.config.readiness_delay}s for app to boot before stopping the old version...", :magenta - - cli = self + hold_lock_on_error do + MRSK.config.version = version old_version = nil - on(MRSK.hosts) do |host| - roles = MRSK.roles_on(host) + if container_available?(version) + say "Start version #{version}, then wait #{MRSK.config.readiness_delay}s for app to boot before stopping the old version...", :magenta - roles.each do |role| - app = MRSK.app(role: role) - old_version = capture_with_info(*app.current_running_version).strip.presence + on(MRSK.hosts) do + execute *MRSK.auditor.record("Tagging #{MRSK.config.absolute_image} as the latest image"), verbosity: :debug + execute *MRSK.app.tag_current_as_latest + end - execute *app.start + on(MRSK.hosts) do |host| + roles = MRSK.roles_on(host) - if old_version - sleep MRSK.config.readiness_delay + roles.each do |role| + app = MRSK.app(role: role) + old_version = capture_with_info(*app.current_running_version).strip.presence - execute *app.stop(version: old_version), raise_on_non_zero_exit: false + execute *app.start + + if old_version + sleep MRSK.config.readiness_delay + + execute *app.stop(version: old_version), raise_on_non_zero_exit: false + end end end - end - audit_broadcast "Rolled back #{service_version(Mrsk::Utils.abbreviate_version(old_version))} to #{service_version}" unless options[:skip_broadcast] - else - say "The app version '#{version}' is not available as a container (use 'mrsk app containers' for available versions)", :red + audit_broadcast "Rolled back #{service_version(Mrsk::Utils.abbreviate_version(old_version))} to #{service_version}" unless options[:skip_broadcast] + else + say "The app version '#{version}' is not available as a container (use 'mrsk app containers' for available versions)", :red + end end end end diff --git a/lib/mrsk/commander.rb b/lib/mrsk/commander.rb index 9dc557af..3dccdeae 100644 --- a/lib/mrsk/commander.rb +++ b/lib/mrsk/commander.rb @@ -2,11 +2,12 @@ require "active_support/core_ext/enumerable" require "active_support/core_ext/module/delegation" class Mrsk::Commander - attr_accessor :verbosity, :lock_count + attr_accessor :verbosity, :holding_lock, :hold_lock_on_error def initialize self.verbosity = :info - self.lock_count = 0 + self.holding_lock = false + self.hold_lock_on_error = false end def config @@ -115,6 +116,14 @@ class Mrsk::Commander SSHKit.config.output_verbosity = old_level end + def holding_lock? + self.holding_lock + end + + def hold_lock_on_error? + self.hold_lock_on_error + end + private # Lazy setup of SSHKit def configure_sshkit_with(config) diff --git a/lib/mrsk/commands/app.rb b/lib/mrsk/commands/app.rb index 224e7090..c6ddb858 100644 --- a/lib/mrsk/commands/app.rb +++ b/lib/mrsk/commands/app.rb @@ -128,6 +128,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base docker :image, :prune, "--all", "--force", *filter_args end + def tag_current_as_latest + docker :tag, config.absolute_image, config.latest_image + end + private def container_name(version = nil) diff --git a/lib/mrsk/commands/builder/base.rb b/lib/mrsk/commands/builder/base.rb index 92244510..15c6b188 100644 --- a/lib/mrsk/commands/builder/base.rb +++ b/lib/mrsk/commands/builder/base.rb @@ -7,7 +7,6 @@ class Mrsk::Commands::Builder::Base < Mrsk::Commands::Base def pull docker :pull, config.absolute_image - docker :pull, config.latest_image end def build_options diff --git a/test/cli/app_test.rb b/test/cli/app_test.rb index 2dc5ad64..da032dab 100644 --- a/test/cli/app_test.rb +++ b/test/cli/app_test.rb @@ -6,6 +6,7 @@ class CliAppTest < CliTestCase SSHKit::Backend::Abstract.any_instance.stubs(:capture).returns("123") # old version run_command("boot").tap do |output| + assert_match "docker tag dhh/app:latest dhh/app:latest", output assert_match "docker run --detach --restart unless-stopped", output assert_match "docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop", output end diff --git a/test/cli/build_test.rb b/test/cli/build_test.rb index 1c06034b..a610d849 100644 --- a/test/cli/build_test.rb +++ b/test/cli/build_test.rb @@ -30,7 +30,7 @@ class CliBuildTest < CliTestCase test "pull" do run_command("pull").tap do |output| assert_match /docker image rm --force dhh\/app:999/, output - assert_match /docker pull dhh\/app:latest/, output + assert_match /docker pull dhh\/app:999/, output end end diff --git a/test/cli/main_test.rb b/test/cli/main_test.rb index e08462b3..2f73abfe 100644 --- a/test/cli/main_test.rb +++ b/test/cli/main_test.rb @@ -79,18 +79,35 @@ class CliMainTest < CliTestCase end end - test "deploy errors leave lock in place" do + test "deploy errors during critical section leave lock in place" do + invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "skip_broadcast" => false, "version" => "999" } + + Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:server:bootstrap", [], invoke_options) + Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:registry:login", [], invoke_options) + Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:build:deliver", [], invoke_options) + Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:traefik:boot", [], invoke_options) + Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:healthcheck:perform", [], invoke_options) + Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:app:boot", [], invoke_options).raises(RuntimeError) + + assert !MRSK.holding_lock? + assert_raises(RuntimeError) do + stderred { run_command("deploy") } + end + assert MRSK.holding_lock? + end + + test "deploy errors during outside section leave remove lock" do invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "skip_broadcast" => false, "version" => "999" } Mrsk::Cli::Main.any_instance.expects(:invoke) .with("mrsk:cli:server:bootstrap", [], invoke_options) .raises(RuntimeError) - assert_equal 0, MRSK.lock_count + assert !MRSK.holding_lock? assert_raises(RuntimeError) do stderred { run_command("deploy") } end - assert_equal 1, MRSK.lock_count + assert !MRSK.holding_lock? end test "redeploy" do @@ -136,6 +153,7 @@ class CliMainTest < CliTestCase run_command("rollback", "123", config_file: "deploy_with_accessories").tap do |output| assert_match "Start version 123", output + assert_match "docker tag dhh/app:123 dhh/app:latest", output assert_match "docker start app-web-123", output assert_match "docker container ls --all --filter name=^app-web-version-to-rollback$ --quiet | xargs docker stop", output, "Should stop the container that was previously running" end diff --git a/test/commands/app_test.rb b/test/commands/app_test.rb index 6f463d78..44177167 100644 --- a/test/commands/app_test.rb +++ b/test/commands/app_test.rb @@ -267,6 +267,12 @@ class CommandsAppTest < ActiveSupport::TestCase new_command.remove_images.join(" ") end + test "tag_current_as_latest" do + assert_equal \ + "docker tag dhh/app:999 dhh/app:latest", + new_command.tag_current_as_latest.join(" ") + end + private def new_command(role: "web") Mrsk::Commands::App.new(Mrsk::Configuration.new(@config, destination: @destination, version: "999"), role: role)