Merge pull request #211 from basecamp/minimise-lock-retention

Minimise holding the deploy lock
This commit is contained in:
David Heinemeier Hansson
2023-04-12 14:08:05 +02:00
committed by GitHub
10 changed files with 124 additions and 54 deletions

View File

@@ -8,13 +8,17 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
cli = self
on(MRSK.hosts) do
execute *MRSK.auditor.record("Tagging #{MRSK.config.absolute_image} as the latest image"), verbosity: :debug
execute *MRSK.app.tag_current_as_latest
end
on(MRSK.hosts) do |host|
roles = MRSK.roles_on(host)
roles.each do |role|
execute *MRSK.auditor(role: role).record("Booted app version #{version}"), verbosity: :debug
begin
if capture_with_info(*MRSK.app(role: role).container_id_for_version(version)).present?
tmp_version = "#{version}_#{SecureRandom.hex(8)}"
info "Renaming container #{version} to #{tmp_version} as already deployed on #{host}"
@@ -31,7 +35,6 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
end
end
end
end
desc "start", "Start existing app container on servers"
def start

View File

@@ -77,22 +77,32 @@ module Mrsk::Cli
end
def with_lock
if MRSK.holding_lock?
yield
else
acquire_lock
begin
yield
release_lock
rescue
error " \e[31mDeploy lock was not released\e[0m" if MRSK.lock_count > 0
if MRSK.hold_lock_on_error?
error " \e[31mDeploy lock was not released\e[0m"
else
release_lock
end
raise
end
release_lock
end
end
def acquire_lock
if MRSK.lock_count == 0
say "Acquiring the deploy lock"
on(MRSK.primary_host) { execute *MRSK.lock.acquire("Automatic deploy lock", MRSK.config.version) }
end
MRSK.lock_count += 1
MRSK.holding_lock = true
rescue SSHKit::Runner::ExecuteError => e
if e.message =~ /cannot create directory/
invoke "mrsk:cli:lock:status", []
@@ -103,10 +113,19 @@ module Mrsk::Cli
end
def release_lock
MRSK.lock_count -= 1
if MRSK.lock_count == 0
say "Releasing the deploy lock"
on(MRSK.primary_host) { execute *MRSK.lock.release }
MRSK.holding_lock = false
end
def hold_lock_on_error
if MRSK.hold_lock_on_error?
yield
else
MRSK.hold_lock_on_error = true
yield
MRSK.hold_lock_on_error = false
end
end
end

View File

@@ -37,7 +37,9 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
say "Ensure app can pass healthcheck...", :magenta
invoke "mrsk:cli:healthcheck:perform", [], invoke_options
hold_lock_on_error do
invoke "mrsk:cli:app:boot", [], invoke_options
end
say "Prune old containers and images...", :magenta
invoke "mrsk:cli:prune:all", [], invoke_options
@@ -65,8 +67,10 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
say "Ensure app can pass healthcheck...", :magenta
invoke "mrsk:cli:healthcheck:perform", [], invoke_options
hold_lock_on_error do
invoke "mrsk:cli:app:boot", [], invoke_options
end
end
audit_broadcast "Redeployed #{service_version} in #{runtime.round} seconds" unless options[:skip_broadcast]
end
@@ -75,13 +79,19 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
desc "rollback [VERSION]", "Rollback app to VERSION"
def rollback(version)
with_lock do
invoke_options = deploy_options
hold_lock_on_error do
MRSK.config.version = version
old_version = nil
if container_available?(version)
say "Start version #{version}, then wait #{MRSK.config.readiness_delay}s for app to boot before stopping the old version...", :magenta
cli = self
old_version = nil
on(MRSK.hosts) do
execute *MRSK.auditor.record("Tagging #{MRSK.config.absolute_image} as the latest image"), verbosity: :debug
execute *MRSK.app.tag_current_as_latest
end
on(MRSK.hosts) do |host|
roles = MRSK.roles_on(host)
@@ -106,6 +116,7 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
end
end
end
end
desc "details", "Show details about all containers"
def details

View File

@@ -2,11 +2,12 @@ require "active_support/core_ext/enumerable"
require "active_support/core_ext/module/delegation"
class Mrsk::Commander
attr_accessor :verbosity, :lock_count
attr_accessor :verbosity, :holding_lock, :hold_lock_on_error
def initialize
self.verbosity = :info
self.lock_count = 0
self.holding_lock = false
self.hold_lock_on_error = false
end
def config
@@ -115,6 +116,14 @@ class Mrsk::Commander
SSHKit.config.output_verbosity = old_level
end
def holding_lock?
self.holding_lock
end
def hold_lock_on_error?
self.hold_lock_on_error
end
private
# Lazy setup of SSHKit
def configure_sshkit_with(config)

View File

@@ -128,6 +128,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
docker :image, :prune, "--all", "--force", *filter_args
end
def tag_current_as_latest
docker :tag, config.absolute_image, config.latest_image
end
private
def container_name(version = nil)

View File

@@ -7,7 +7,6 @@ class Mrsk::Commands::Builder::Base < Mrsk::Commands::Base
def pull
docker :pull, config.absolute_image
docker :pull, config.latest_image
end
def build_options

View File

@@ -6,6 +6,7 @@ class CliAppTest < CliTestCase
SSHKit::Backend::Abstract.any_instance.stubs(:capture).returns("123") # old version
run_command("boot").tap do |output|
assert_match "docker tag dhh/app:latest dhh/app:latest", output
assert_match "docker run --detach --restart unless-stopped", output
assert_match "docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop", output
end

View File

@@ -30,7 +30,7 @@ class CliBuildTest < CliTestCase
test "pull" do
run_command("pull").tap do |output|
assert_match /docker image rm --force dhh\/app:999/, output
assert_match /docker pull dhh\/app:latest/, output
assert_match /docker pull dhh\/app:999/, output
end
end

View File

@@ -79,18 +79,35 @@ class CliMainTest < CliTestCase
end
end
test "deploy errors leave lock in place" do
test "deploy errors during critical section leave lock in place" do
invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "skip_broadcast" => false, "version" => "999" }
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:server:bootstrap", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:registry:login", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:build:deliver", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:traefik:boot", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:healthcheck:perform", [], invoke_options)
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:app:boot", [], invoke_options).raises(RuntimeError)
assert !MRSK.holding_lock?
assert_raises(RuntimeError) do
stderred { run_command("deploy") }
end
assert MRSK.holding_lock?
end
test "deploy errors during outside section leave remove lock" do
invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "skip_broadcast" => false, "version" => "999" }
Mrsk::Cli::Main.any_instance.expects(:invoke)
.with("mrsk:cli:server:bootstrap", [], invoke_options)
.raises(RuntimeError)
assert_equal 0, MRSK.lock_count
assert !MRSK.holding_lock?
assert_raises(RuntimeError) do
stderred { run_command("deploy") }
end
assert_equal 1, MRSK.lock_count
assert !MRSK.holding_lock?
end
test "redeploy" do
@@ -136,6 +153,7 @@ class CliMainTest < CliTestCase
run_command("rollback", "123", config_file: "deploy_with_accessories").tap do |output|
assert_match "Start version 123", output
assert_match "docker tag dhh/app:123 dhh/app:latest", output
assert_match "docker start app-web-123", output
assert_match "docker container ls --all --filter name=^app-web-version-to-rollback$ --quiet | xargs docker stop", output, "Should stop the container that was previously running"
end

View File

@@ -267,6 +267,12 @@ class CommandsAppTest < ActiveSupport::TestCase
new_command.remove_images.join(" ")
end
test "tag_current_as_latest" do
assert_equal \
"docker tag dhh/app:999 dhh/app:latest",
new_command.tag_current_as_latest.join(" ")
end
private
def new_command(role: "web")
Mrsk::Commands::App.new(Mrsk::Configuration.new(@config, destination: @destination, version: "999"), role: role)