Minimise holding the deploy lock

If we get an error we'll only hold the deploy lock if it occurs while
trying to switch the running containers.

We'll also move tagging the latest image from when the image is pulled
to just before the container switch. This ensures that earlier errors
don't leave the hosts with an updated latest tag while still running the
older version.
This commit is contained in:
Donal McBreen
2023-04-12 09:37:20 +01:00
parent 60a19f0b30
commit 051556674f
10 changed files with 124 additions and 54 deletions

View File

@@ -8,25 +8,28 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
cli = self cli = self
on(MRSK.hosts) do
execute *MRSK.auditor.record("Tagging #{MRSK.config.absolute_image} as the latest image"), verbosity: :debug
execute *MRSK.app.tag_current_as_latest
end
on(MRSK.hosts) do |host| on(MRSK.hosts) do |host|
roles = MRSK.roles_on(host) roles = MRSK.roles_on(host)
roles.each do |role| roles.each do |role|
execute *MRSK.auditor(role: role).record("Booted app version #{version}"), verbosity: :debug execute *MRSK.auditor(role: role).record("Booted app version #{version}"), verbosity: :debug
begin if capture_with_info(*MRSK.app(role: role).container_id_for_version(version)).present?
if capture_with_info(*MRSK.app(role: role).container_id_for_version(version)).present? tmp_version = "#{version}_#{SecureRandom.hex(8)}"
tmp_version = "#{version}_#{SecureRandom.hex(8)}" info "Renaming container #{version} to #{tmp_version} as already deployed on #{host}"
info "Renaming container #{version} to #{tmp_version} as already deployed on #{host}" execute *MRSK.auditor(role: role).record("Renaming container #{version} to #{tmp_version}"), verbosity: :debug
execute *MRSK.auditor(role: role).record("Renaming container #{version} to #{tmp_version}"), verbosity: :debug execute *MRSK.app(role: role).rename_container(version: version, new_version: tmp_version)
execute *MRSK.app(role: role).rename_container(version: version, new_version: tmp_version)
end
old_version = capture_with_info(*MRSK.app(role: role).current_running_version).strip
execute *MRSK.app(role: role).run
sleep MRSK.config.readiness_delay
execute *MRSK.app(role: role).stop(version: old_version), raise_on_non_zero_exit: false if old_version.present?
end end
old_version = capture_with_info(*MRSK.app(role: role).current_running_version).strip
execute *MRSK.app(role: role).run
sleep MRSK.config.readiness_delay
execute *MRSK.app(role: role).stop(version: old_version), raise_on_non_zero_exit: false if old_version.present?
end end
end end
end end

View File

@@ -77,22 +77,32 @@ module Mrsk::Cli
end end
def with_lock def with_lock
acquire_lock if MRSK.holding_lock?
yield
else
acquire_lock
yield begin
yield
rescue
if MRSK.hold_lock_on_error?
error " \e[31mDeploy lock was not released\e[0m"
else
release_lock
end
release_lock raise
rescue end
error " \e[31mDeploy lock was not released\e[0m" if MRSK.lock_count > 0
raise release_lock
end
end end
def acquire_lock def acquire_lock
if MRSK.lock_count == 0 say "Acquiring the deploy lock"
say "Acquiring the deploy lock" on(MRSK.primary_host) { execute *MRSK.lock.acquire("Automatic deploy lock", MRSK.config.version) }
on(MRSK.primary_host) { execute *MRSK.lock.acquire("Automatic deploy lock", MRSK.config.version) }
end MRSK.holding_lock = true
MRSK.lock_count += 1
rescue SSHKit::Runner::ExecuteError => e rescue SSHKit::Runner::ExecuteError => e
if e.message =~ /cannot create directory/ if e.message =~ /cannot create directory/
invoke "mrsk:cli:lock:status", [] invoke "mrsk:cli:lock:status", []
@@ -103,10 +113,19 @@ module Mrsk::Cli
end end
def release_lock def release_lock
MRSK.lock_count -= 1 say "Releasing the deploy lock"
if MRSK.lock_count == 0 on(MRSK.primary_host) { execute *MRSK.lock.release }
say "Releasing the deploy lock"
on(MRSK.primary_host) { execute *MRSK.lock.release } MRSK.holding_lock = false
end
def hold_lock_on_error
if MRSK.hold_lock_on_error?
yield
else
MRSK.hold_lock_on_error = true
yield
MRSK.hold_lock_on_error = false
end end
end end
end end

View File

@@ -37,7 +37,9 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
say "Ensure app can pass healthcheck...", :magenta say "Ensure app can pass healthcheck...", :magenta
invoke "mrsk:cli:healthcheck:perform", [], invoke_options invoke "mrsk:cli:healthcheck:perform", [], invoke_options
invoke "mrsk:cli:app:boot", [], invoke_options hold_lock_on_error do
invoke "mrsk:cli:app:boot", [], invoke_options
end
say "Prune old containers and images...", :magenta say "Prune old containers and images...", :magenta
invoke "mrsk:cli:prune:all", [], invoke_options invoke "mrsk:cli:prune:all", [], invoke_options
@@ -65,7 +67,9 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
say "Ensure app can pass healthcheck...", :magenta say "Ensure app can pass healthcheck...", :magenta
invoke "mrsk:cli:healthcheck:perform", [], invoke_options invoke "mrsk:cli:healthcheck:perform", [], invoke_options
invoke "mrsk:cli:app:boot", [], invoke_options hold_lock_on_error do
invoke "mrsk:cli:app:boot", [], invoke_options
end
end end
audit_broadcast "Redeployed #{service_version} in #{runtime.round} seconds" unless options[:skip_broadcast] audit_broadcast "Redeployed #{service_version} in #{runtime.round} seconds" unless options[:skip_broadcast]
@@ -75,34 +79,41 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
desc "rollback [VERSION]", "Rollback app to VERSION" desc "rollback [VERSION]", "Rollback app to VERSION"
def rollback(version) def rollback(version)
with_lock do with_lock do
MRSK.config.version = version invoke_options = deploy_options
if container_available?(version) hold_lock_on_error do
say "Start version #{version}, then wait #{MRSK.config.readiness_delay}s for app to boot before stopping the old version...", :magenta MRSK.config.version = version
cli = self
old_version = nil old_version = nil
on(MRSK.hosts) do |host| if container_available?(version)
roles = MRSK.roles_on(host) say "Start version #{version}, then wait #{MRSK.config.readiness_delay}s for app to boot before stopping the old version...", :magenta
roles.each do |role| on(MRSK.hosts) do
app = MRSK.app(role: role) execute *MRSK.auditor.record("Tagging #{MRSK.config.absolute_image} as the latest image"), verbosity: :debug
old_version = capture_with_info(*app.current_running_version).strip.presence execute *MRSK.app.tag_current_as_latest
end
execute *app.start on(MRSK.hosts) do |host|
roles = MRSK.roles_on(host)
if old_version roles.each do |role|
sleep MRSK.config.readiness_delay app = MRSK.app(role: role)
old_version = capture_with_info(*app.current_running_version).strip.presence
execute *app.stop(version: old_version), raise_on_non_zero_exit: false execute *app.start
if old_version
sleep MRSK.config.readiness_delay
execute *app.stop(version: old_version), raise_on_non_zero_exit: false
end
end end
end end
end
audit_broadcast "Rolled back #{service_version(Mrsk::Utils.abbreviate_version(old_version))} to #{service_version}" unless options[:skip_broadcast] audit_broadcast "Rolled back #{service_version(Mrsk::Utils.abbreviate_version(old_version))} to #{service_version}" unless options[:skip_broadcast]
else else
say "The app version '#{version}' is not available as a container (use 'mrsk app containers' for available versions)", :red say "The app version '#{version}' is not available as a container (use 'mrsk app containers' for available versions)", :red
end
end end
end end
end end

View File

@@ -2,11 +2,12 @@ require "active_support/core_ext/enumerable"
require "active_support/core_ext/module/delegation" require "active_support/core_ext/module/delegation"
class Mrsk::Commander class Mrsk::Commander
attr_accessor :verbosity, :lock_count attr_accessor :verbosity, :holding_lock, :hold_lock_on_error
def initialize def initialize
self.verbosity = :info self.verbosity = :info
self.lock_count = 0 self.holding_lock = false
self.hold_lock_on_error = false
end end
def config def config
@@ -115,6 +116,14 @@ class Mrsk::Commander
SSHKit.config.output_verbosity = old_level SSHKit.config.output_verbosity = old_level
end end
def holding_lock?
self.holding_lock
end
def hold_lock_on_error?
self.hold_lock_on_error
end
private private
# Lazy setup of SSHKit # Lazy setup of SSHKit
def configure_sshkit_with(config) def configure_sshkit_with(config)

View File

@@ -128,6 +128,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
docker :image, :prune, "--all", "--force", *filter_args docker :image, :prune, "--all", "--force", *filter_args
end end
def tag_current_as_latest
docker :tag, config.absolute_image, config.latest_image
end
private private
def container_name(version = nil) def container_name(version = nil)

View File

@@ -7,7 +7,6 @@ class Mrsk::Commands::Builder::Base < Mrsk::Commands::Base
def pull def pull
docker :pull, config.absolute_image docker :pull, config.absolute_image
docker :pull, config.latest_image
end end
def build_options def build_options

View File

@@ -6,6 +6,7 @@ class CliAppTest < CliTestCase
SSHKit::Backend::Abstract.any_instance.stubs(:capture).returns("123") # old version SSHKit::Backend::Abstract.any_instance.stubs(:capture).returns("123") # old version
run_command("boot").tap do |output| run_command("boot").tap do |output|
assert_match "docker tag dhh/app:latest dhh/app:latest", output
assert_match "docker run --detach --restart unless-stopped", output assert_match "docker run --detach --restart unless-stopped", output
assert_match "docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop", output assert_match "docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop", output
end end

View File

@@ -30,7 +30,7 @@ class CliBuildTest < CliTestCase
test "pull" do test "pull" do
run_command("pull").tap do |output| run_command("pull").tap do |output|
assert_match /docker image rm --force dhh\/app:999/, output assert_match /docker image rm --force dhh\/app:999/, output
assert_match /docker pull dhh\/app:latest/, output assert_match /docker pull dhh\/app:999/, output
end end
end end

View File

@@ -79,18 +79,35 @@ class CliMainTest < CliTestCase
end end
end end
test "deploy errors leave lock in place" do test "deploy errors during critical section leave lock in place" do
invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "skip_broadcast" => false, "version" => "999" }
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:server:bootstrap", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:registry:login", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:build:deliver", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:traefik:boot", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:healthcheck:perform", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:app:boot", [], invoke_options).raises(RuntimeError)
assert !MRSK.holding_lock?
assert_raises(RuntimeError) do
stderred { run_command("deploy") }
end
assert MRSK.holding_lock?
end
test "deploy errors during outside section leave remove lock" do
invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "skip_broadcast" => false, "version" => "999" } invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "skip_broadcast" => false, "version" => "999" }
Mrsk::Cli::Main.any_instance.expects(:invoke) Mrsk::Cli::Main.any_instance.expects(:invoke)
.with("mrsk:cli:server:bootstrap", [], invoke_options) .with("mrsk:cli:server:bootstrap", [], invoke_options)
.raises(RuntimeError) .raises(RuntimeError)
assert_equal 0, MRSK.lock_count assert !MRSK.holding_lock?
assert_raises(RuntimeError) do assert_raises(RuntimeError) do
stderred { run_command("deploy") } stderred { run_command("deploy") }
end end
assert_equal 1, MRSK.lock_count assert !MRSK.holding_lock?
end end
test "redeploy" do test "redeploy" do
@@ -136,6 +153,7 @@ class CliMainTest < CliTestCase
run_command("rollback", "123", config_file: "deploy_with_accessories").tap do |output| run_command("rollback", "123", config_file: "deploy_with_accessories").tap do |output|
assert_match "Start version 123", output assert_match "Start version 123", output
assert_match "docker tag dhh/app:123 dhh/app:latest", output
assert_match "docker start app-web-123", output assert_match "docker start app-web-123", output
assert_match "docker container ls --all --filter name=^app-web-version-to-rollback$ --quiet | xargs docker stop", output, "Should stop the container that was previously running" assert_match "docker container ls --all --filter name=^app-web-version-to-rollback$ --quiet | xargs docker stop", output, "Should stop the container that was previously running"
end end

View File

@@ -267,6 +267,12 @@ class CommandsAppTest < ActiveSupport::TestCase
new_command.remove_images.join(" ") new_command.remove_images.join(" ")
end end
test "tag_current_as_latest" do
assert_equal \
"docker tag dhh/app:999 dhh/app:latest",
new_command.tag_current_as_latest.join(" ")
end
private private
def new_command(role: "web") def new_command(role: "web")
Mrsk::Commands::App.new(Mrsk::Configuration.new(@config, destination: @destination, version: "999"), role: role) Mrsk::Commands::App.new(Mrsk::Configuration.new(@config, destination: @destination, version: "999"), role: role)