Minimise holding the deploy lock

If we get an error we'll only hold the deploy lock if it occurs while
trying to switch the running containers.

We'll also move tagging the latest image from when the image is pulled
to just before the container switch. This ensures that earlier errors
don't leave the hosts with an updated latest tag while still running the
older version.
This commit is contained in:
Donal McBreen
2023-04-12 09:37:20 +01:00
parent 60a19f0b30
commit 051556674f
10 changed files with 124 additions and 54 deletions

View File

@@ -8,13 +8,17 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
cli = self cli = self
on(MRSK.hosts) do
execute *MRSK.auditor.record("Tagging #{MRSK.config.absolute_image} as the latest image"), verbosity: :debug
execute *MRSK.app.tag_current_as_latest
end
on(MRSK.hosts) do |host| on(MRSK.hosts) do |host|
roles = MRSK.roles_on(host) roles = MRSK.roles_on(host)
roles.each do |role| roles.each do |role|
execute *MRSK.auditor(role: role).record("Booted app version #{version}"), verbosity: :debug execute *MRSK.auditor(role: role).record("Booted app version #{version}"), verbosity: :debug
begin
if capture_with_info(*MRSK.app(role: role).container_id_for_version(version)).present? if capture_with_info(*MRSK.app(role: role).container_id_for_version(version)).present?
tmp_version = "#{version}_#{SecureRandom.hex(8)}" tmp_version = "#{version}_#{SecureRandom.hex(8)}"
info "Renaming container #{version} to #{tmp_version} as already deployed on #{host}" info "Renaming container #{version} to #{tmp_version} as already deployed on #{host}"
@@ -31,7 +35,6 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
end end
end end
end end
end
desc "start", "Start existing app container on servers" desc "start", "Start existing app container on servers"
def start def start

View File

@@ -77,22 +77,32 @@ module Mrsk::Cli
end end
def with_lock def with_lock
if MRSK.holding_lock?
yield
else
acquire_lock acquire_lock
begin
yield yield
release_lock
rescue rescue
error " \e[31mDeploy lock was not released\e[0m" if MRSK.lock_count > 0 if MRSK.hold_lock_on_error?
error " \e[31mDeploy lock was not released\e[0m"
else
release_lock
end
raise raise
end end
release_lock
end
end
def acquire_lock def acquire_lock
if MRSK.lock_count == 0
say "Acquiring the deploy lock" say "Acquiring the deploy lock"
on(MRSK.primary_host) { execute *MRSK.lock.acquire("Automatic deploy lock", MRSK.config.version) } on(MRSK.primary_host) { execute *MRSK.lock.acquire("Automatic deploy lock", MRSK.config.version) }
end
MRSK.lock_count += 1 MRSK.holding_lock = true
rescue SSHKit::Runner::ExecuteError => e rescue SSHKit::Runner::ExecuteError => e
if e.message =~ /cannot create directory/ if e.message =~ /cannot create directory/
invoke "mrsk:cli:lock:status", [] invoke "mrsk:cli:lock:status", []
@@ -103,10 +113,19 @@ module Mrsk::Cli
end end
def release_lock def release_lock
MRSK.lock_count -= 1
if MRSK.lock_count == 0
say "Releasing the deploy lock" say "Releasing the deploy lock"
on(MRSK.primary_host) { execute *MRSK.lock.release } on(MRSK.primary_host) { execute *MRSK.lock.release }
MRSK.holding_lock = false
end
def hold_lock_on_error
if MRSK.hold_lock_on_error?
yield
else
MRSK.hold_lock_on_error = true
yield
MRSK.hold_lock_on_error = false
end end
end end
end end

View File

@@ -37,7 +37,9 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
say "Ensure app can pass healthcheck...", :magenta say "Ensure app can pass healthcheck...", :magenta
invoke "mrsk:cli:healthcheck:perform", [], invoke_options invoke "mrsk:cli:healthcheck:perform", [], invoke_options
hold_lock_on_error do
invoke "mrsk:cli:app:boot", [], invoke_options invoke "mrsk:cli:app:boot", [], invoke_options
end
say "Prune old containers and images...", :magenta say "Prune old containers and images...", :magenta
invoke "mrsk:cli:prune:all", [], invoke_options invoke "mrsk:cli:prune:all", [], invoke_options
@@ -65,8 +67,10 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
say "Ensure app can pass healthcheck...", :magenta say "Ensure app can pass healthcheck...", :magenta
invoke "mrsk:cli:healthcheck:perform", [], invoke_options invoke "mrsk:cli:healthcheck:perform", [], invoke_options
hold_lock_on_error do
invoke "mrsk:cli:app:boot", [], invoke_options invoke "mrsk:cli:app:boot", [], invoke_options
end end
end
audit_broadcast "Redeployed #{service_version} in #{runtime.round} seconds" unless options[:skip_broadcast] audit_broadcast "Redeployed #{service_version} in #{runtime.round} seconds" unless options[:skip_broadcast]
end end
@@ -75,13 +79,19 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
desc "rollback [VERSION]", "Rollback app to VERSION" desc "rollback [VERSION]", "Rollback app to VERSION"
def rollback(version) def rollback(version)
with_lock do with_lock do
invoke_options = deploy_options
hold_lock_on_error do
MRSK.config.version = version MRSK.config.version = version
old_version = nil
if container_available?(version) if container_available?(version)
say "Start version #{version}, then wait #{MRSK.config.readiness_delay}s for app to boot before stopping the old version...", :magenta say "Start version #{version}, then wait #{MRSK.config.readiness_delay}s for app to boot before stopping the old version...", :magenta
cli = self on(MRSK.hosts) do
old_version = nil execute *MRSK.auditor.record("Tagging #{MRSK.config.absolute_image} as the latest image"), verbosity: :debug
execute *MRSK.app.tag_current_as_latest
end
on(MRSK.hosts) do |host| on(MRSK.hosts) do |host|
roles = MRSK.roles_on(host) roles = MRSK.roles_on(host)
@@ -106,6 +116,7 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
end end
end end
end end
end
desc "details", "Show details about all containers" desc "details", "Show details about all containers"
def details def details

View File

@@ -2,11 +2,12 @@ require "active_support/core_ext/enumerable"
require "active_support/core_ext/module/delegation" require "active_support/core_ext/module/delegation"
class Mrsk::Commander class Mrsk::Commander
attr_accessor :verbosity, :lock_count attr_accessor :verbosity, :holding_lock, :hold_lock_on_error
def initialize def initialize
self.verbosity = :info self.verbosity = :info
self.lock_count = 0 self.holding_lock = false
self.hold_lock_on_error = false
end end
def config def config
@@ -115,6 +116,14 @@ class Mrsk::Commander
SSHKit.config.output_verbosity = old_level SSHKit.config.output_verbosity = old_level
end end
def holding_lock?
self.holding_lock
end
def hold_lock_on_error?
self.hold_lock_on_error
end
private private
# Lazy setup of SSHKit # Lazy setup of SSHKit
def configure_sshkit_with(config) def configure_sshkit_with(config)

View File

@@ -128,6 +128,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
docker :image, :prune, "--all", "--force", *filter_args docker :image, :prune, "--all", "--force", *filter_args
end end
def tag_current_as_latest
docker :tag, config.absolute_image, config.latest_image
end
private private
def container_name(version = nil) def container_name(version = nil)

View File

@@ -7,7 +7,6 @@ class Mrsk::Commands::Builder::Base < Mrsk::Commands::Base
def pull def pull
docker :pull, config.absolute_image docker :pull, config.absolute_image
docker :pull, config.latest_image
end end
def build_options def build_options

View File

@@ -6,6 +6,7 @@ class CliAppTest < CliTestCase
SSHKit::Backend::Abstract.any_instance.stubs(:capture).returns("123") # old version SSHKit::Backend::Abstract.any_instance.stubs(:capture).returns("123") # old version
run_command("boot").tap do |output| run_command("boot").tap do |output|
assert_match "docker tag dhh/app:latest dhh/app:latest", output
assert_match "docker run --detach --restart unless-stopped", output assert_match "docker run --detach --restart unless-stopped", output
assert_match "docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop", output assert_match "docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop", output
end end

View File

@@ -30,7 +30,7 @@ class CliBuildTest < CliTestCase
test "pull" do test "pull" do
run_command("pull").tap do |output| run_command("pull").tap do |output|
assert_match /docker image rm --force dhh\/app:999/, output assert_match /docker image rm --force dhh\/app:999/, output
assert_match /docker pull dhh\/app:latest/, output assert_match /docker pull dhh\/app:999/, output
end end
end end

View File

@@ -79,18 +79,35 @@ class CliMainTest < CliTestCase
end end
end end
test "deploy errors leave lock in place" do test "deploy errors during critical section leave lock in place" do
invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "skip_broadcast" => false, "version" => "999" }
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:server:bootstrap", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:registry:login", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:build:deliver", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:traefik:boot", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:healthcheck:perform", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:app:boot", [], invoke_options).raises(RuntimeError)
assert !MRSK.holding_lock?
assert_raises(RuntimeError) do
stderred { run_command("deploy") }
end
assert MRSK.holding_lock?
end
test "deploy errors during outside section leave remove lock" do
invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "skip_broadcast" => false, "version" => "999" } invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "skip_broadcast" => false, "version" => "999" }
Mrsk::Cli::Main.any_instance.expects(:invoke) Mrsk::Cli::Main.any_instance.expects(:invoke)
.with("mrsk:cli:server:bootstrap", [], invoke_options) .with("mrsk:cli:server:bootstrap", [], invoke_options)
.raises(RuntimeError) .raises(RuntimeError)
assert_equal 0, MRSK.lock_count assert !MRSK.holding_lock?
assert_raises(RuntimeError) do assert_raises(RuntimeError) do
stderred { run_command("deploy") } stderred { run_command("deploy") }
end end
assert_equal 1, MRSK.lock_count assert !MRSK.holding_lock?
end end
test "redeploy" do test "redeploy" do
@@ -136,6 +153,7 @@ class CliMainTest < CliTestCase
run_command("rollback", "123", config_file: "deploy_with_accessories").tap do |output| run_command("rollback", "123", config_file: "deploy_with_accessories").tap do |output|
assert_match "Start version 123", output assert_match "Start version 123", output
assert_match "docker tag dhh/app:123 dhh/app:latest", output
assert_match "docker start app-web-123", output assert_match "docker start app-web-123", output
assert_match "docker container ls --all --filter name=^app-web-version-to-rollback$ --quiet | xargs docker stop", output, "Should stop the container that was previously running" assert_match "docker container ls --all --filter name=^app-web-version-to-rollback$ --quiet | xargs docker stop", output, "Should stop the container that was previously running"
end end

View File

@@ -267,6 +267,12 @@ class CommandsAppTest < ActiveSupport::TestCase
new_command.remove_images.join(" ") new_command.remove_images.join(" ")
end end
test "tag_current_as_latest" do
assert_equal \
"docker tag dhh/app:999 dhh/app:latest",
new_command.tag_current_as_latest.join(" ")
end
private private
def new_command(role: "web") def new_command(role: "web")
Mrsk::Commands::App.new(Mrsk::Configuration.new(@config, destination: @destination, version: "999"), role: role) Mrsk::Commands::App.new(Mrsk::Configuration.new(@config, destination: @destination, version: "999"), role: role)