Minimise holding the deploy lock

If we get an error we'll only hold the deploy lock if it occurs while
trying to switch the running containers.

We'll also move tagging the latest image from when the image is pulled
to just before the container switch. This ensures that earlier errors
don't leave the hosts with an updated latest tag while still running the
older version.
This commit is contained in:
Donal McBreen
2023-04-12 09:37:20 +01:00
parent 60a19f0b30
commit 051556674f
10 changed files with 124 additions and 54 deletions

View File

@@ -8,13 +8,17 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
cli = self
on(MRSK.hosts) do
execute *MRSK.auditor.record("Tagging #{MRSK.config.absolute_image} as the latest image"), verbosity: :debug
execute *MRSK.app.tag_current_as_latest
end
on(MRSK.hosts) do |host|
roles = MRSK.roles_on(host)
roles.each do |role|
execute *MRSK.auditor(role: role).record("Booted app version #{version}"), verbosity: :debug
begin
if capture_with_info(*MRSK.app(role: role).container_id_for_version(version)).present?
tmp_version = "#{version}_#{SecureRandom.hex(8)}"
info "Renaming container #{version} to #{tmp_version} as already deployed on #{host}"
@@ -31,7 +35,6 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
end
end
end
end
desc "start", "Start existing app container on servers"
def start

View File

@@ -77,22 +77,32 @@ module Mrsk::Cli
end
def with_lock
if MRSK.holding_lock?
yield
else
acquire_lock
begin
yield
release_lock
rescue
error " \e[31mDeploy lock was not released\e[0m" if MRSK.lock_count > 0
if MRSK.hold_lock_on_error?
error " \e[31mDeploy lock was not released\e[0m"
else
release_lock
end
raise
end
release_lock
end
end
def acquire_lock
if MRSK.lock_count == 0
say "Acquiring the deploy lock"
on(MRSK.primary_host) { execute *MRSK.lock.acquire("Automatic deploy lock", MRSK.config.version) }
end
MRSK.lock_count += 1
MRSK.holding_lock = true
rescue SSHKit::Runner::ExecuteError => e
if e.message =~ /cannot create directory/
invoke "mrsk:cli:lock:status", []
@@ -103,10 +113,19 @@ module Mrsk::Cli
end
def release_lock
MRSK.lock_count -= 1
if MRSK.lock_count == 0
say "Releasing the deploy lock"
on(MRSK.primary_host) { execute *MRSK.lock.release }
MRSK.holding_lock = false
end
def hold_lock_on_error
if MRSK.hold_lock_on_error?
yield
else
MRSK.hold_lock_on_error = true
yield
MRSK.hold_lock_on_error = false
end
end
end

View File

@@ -37,7 +37,9 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
say "Ensure app can pass healthcheck...", :magenta
invoke "mrsk:cli:healthcheck:perform", [], invoke_options
hold_lock_on_error do
invoke "mrsk:cli:app:boot", [], invoke_options
end
say "Prune old containers and images...", :magenta
invoke "mrsk:cli:prune:all", [], invoke_options
@@ -65,8 +67,10 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
say "Ensure app can pass healthcheck...", :magenta
invoke "mrsk:cli:healthcheck:perform", [], invoke_options
hold_lock_on_error do
invoke "mrsk:cli:app:boot", [], invoke_options
end
end
audit_broadcast "Redeployed #{service_version} in #{runtime.round} seconds" unless options[:skip_broadcast]
end
@@ -75,13 +79,19 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
desc "rollback [VERSION]", "Rollback app to VERSION"
def rollback(version)
with_lock do
invoke_options = deploy_options
hold_lock_on_error do
MRSK.config.version = version
old_version = nil
if container_available?(version)
say "Start version #{version}, then wait #{MRSK.config.readiness_delay}s for app to boot before stopping the old version...", :magenta
cli = self
old_version = nil
on(MRSK.hosts) do
execute *MRSK.auditor.record("Tagging #{MRSK.config.absolute_image} as the latest image"), verbosity: :debug
execute *MRSK.app.tag_current_as_latest
end
on(MRSK.hosts) do |host|
roles = MRSK.roles_on(host)
@@ -106,6 +116,7 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
end
end
end
end
desc "details", "Show details about all containers"
def details

View File

@@ -2,11 +2,12 @@ require "active_support/core_ext/enumerable"
require "active_support/core_ext/module/delegation"
class Mrsk::Commander
attr_accessor :verbosity, :lock_count
attr_accessor :verbosity, :holding_lock, :hold_lock_on_error
def initialize
self.verbosity = :info
self.lock_count = 0
self.holding_lock = false
self.hold_lock_on_error = false
end
def config
@@ -115,6 +116,14 @@ class Mrsk::Commander
SSHKit.config.output_verbosity = old_level
end
def holding_lock?
self.holding_lock
end
def hold_lock_on_error?
self.hold_lock_on_error
end
private
# Lazy setup of SSHKit
def configure_sshkit_with(config)

View File

@@ -128,6 +128,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
docker :image, :prune, "--all", "--force", *filter_args
end
def tag_current_as_latest
docker :tag, config.absolute_image, config.latest_image
end
private
def container_name(version = nil)

View File

@@ -7,7 +7,6 @@ class Mrsk::Commands::Builder::Base < Mrsk::Commands::Base
def pull
docker :pull, config.absolute_image
docker :pull, config.latest_image
end
def build_options

View File

@@ -6,6 +6,7 @@ class CliAppTest < CliTestCase
SSHKit::Backend::Abstract.any_instance.stubs(:capture).returns("123") # old version
run_command("boot").tap do |output|
assert_match "docker tag dhh/app:latest dhh/app:latest", output
assert_match "docker run --detach --restart unless-stopped", output
assert_match "docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop", output
end

View File

@@ -30,7 +30,7 @@ class CliBuildTest < CliTestCase
test "pull" do
run_command("pull").tap do |output|
assert_match /docker image rm --force dhh\/app:999/, output
assert_match /docker pull dhh\/app:latest/, output
assert_match /docker pull dhh\/app:999/, output
end
end

View File

@@ -79,18 +79,35 @@ class CliMainTest < CliTestCase
end
end
test "deploy errors leave lock in place" do
test "deploy errors during critical section leave lock in place" do
invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "skip_broadcast" => false, "version" => "999" }
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:server:bootstrap", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:registry:login", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:build:deliver", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:traefik:boot", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:healthcheck:perform", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:app:boot", [], invoke_options).raises(RuntimeError)
assert !MRSK.holding_lock?
assert_raises(RuntimeError) do
stderred { run_command("deploy") }
end
assert MRSK.holding_lock?
end
test "deploy errors during outside section leave remove lock" do
invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "skip_broadcast" => false, "version" => "999" }
Mrsk::Cli::Main.any_instance.expects(:invoke)
.with("mrsk:cli:server:bootstrap", [], invoke_options)
.raises(RuntimeError)
assert_equal 0, MRSK.lock_count
assert !MRSK.holding_lock?
assert_raises(RuntimeError) do
stderred { run_command("deploy") }
end
assert_equal 1, MRSK.lock_count
assert !MRSK.holding_lock?
end
test "redeploy" do
@@ -136,6 +153,7 @@ class CliMainTest < CliTestCase
run_command("rollback", "123", config_file: "deploy_with_accessories").tap do |output|
assert_match "Start version 123", output
assert_match "docker tag dhh/app:123 dhh/app:latest", output
assert_match "docker start app-web-123", output
assert_match "docker container ls --all --filter name=^app-web-version-to-rollback$ --quiet | xargs docker stop", output, "Should stop the container that was previously running"
end

View File

@@ -267,6 +267,12 @@ class CommandsAppTest < ActiveSupport::TestCase
new_command.remove_images.join(" ")
end
test "tag_current_as_latest" do
assert_equal \
"docker tag dhh/app:999 dhh/app:latest",
new_command.tag_current_as_latest.join(" ")
end
private
def new_command(role: "web")
Mrsk::Commands::App.new(Mrsk::Configuration.new(@config, destination: @destination, version: "999"), role: role)