Zero downtime redeploys

When deploying check if there is already a container with the existing
name. If there is rename it to "<version>_<random_hex_string>" to remove
the name clash with the new container we want to boot.

We can then do the normal zero downtime run/wait/stop.

While implementing this I discovered the --filter name=foo does a
substring match for foo, so I've updated those filters to do an exact
match instead.
This commit is contained in:
Donal McBreen
2023-03-24 17:06:54 +00:00
parent 01a2b678d7
commit 05488e4c1e
12 changed files with 48 additions and 48 deletions

View File

@@ -15,22 +15,17 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
execute *MRSK.auditor(role: role).record("Booted app version #{version}"), verbosity: :debug execute *MRSK.auditor(role: role).record("Booted app version #{version}"), verbosity: :debug
begin begin
if capture_with_info(*MRSK.app(role: role).container_id_for_version(version)).present?
tmp_version = "#{version}_#{SecureRandom.hex(8)}"
info "Renaming container #{version} to #{tmp_version} as already deployed on #{host}"
execute *MRSK.auditor(role: role).record("Renaming container #{version} to #{tmp_version}"), verbosity: :debug
execute *MRSK.app(role: role).rename_container(version: version, new_version: tmp_version)
end
old_version = capture_with_info(*MRSK.app(role: role).current_running_version).strip old_version = capture_with_info(*MRSK.app(role: role).current_running_version).strip
execute *MRSK.app(role: role).run execute *MRSK.app(role: role).run
sleep MRSK.config.readiness_delay sleep MRSK.config.readiness_delay
execute *MRSK.app(role: role).stop(version: old_version), raise_on_non_zero_exit: false if old_version.present? execute *MRSK.app(role: role).stop(version: old_version), raise_on_non_zero_exit: false if old_version.present?
rescue SSHKit::Command::Failed => e
if e.message =~ /already in use/
error "Rebooting container with same version #{version} already deployed on #{host} (may cause gap in zero-downtime promise!)"
execute *MRSK.auditor(role: role).record("Rebooted app version #{version}"), verbosity: :debug
execute *MRSK.app(role: role).stop(version: version)
execute *MRSK.app(role: role).remove_container(version: version)
execute *MRSK.app(role: role).run
else
raise
end
end end
end end
end end

View File

@@ -86,6 +86,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
docker :ps, "--quiet", *filter_args docker :ps, "--quiet", *filter_args
end end
def container_id_for_version(version)
container_id_for(container_name: container_name(version))
end
def current_running_version def current_running_version
# FIXME: Find more graceful way to extract the version from "app-version" than using sed and tail! # FIXME: Find more graceful way to extract the version from "app-version" than using sed and tail!
pipe \ pipe \
@@ -108,6 +112,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
xargs(docker(:container, :rm)) xargs(docker(:container, :rm))
end end
def rename_container(version:, new_version:)
docker :rename, container_name(version), container_name(new_version)
end
def remove_containers def remove_containers
docker :container, :prune, "--force", *filter_args docker :container, :prune, "--force", *filter_args
end end
@@ -126,10 +134,6 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
[ config.service, role, config.destination, version || config.version ].compact.join("-") [ config.service, role, config.destination, version || config.version ].compact.join("-")
end end
def container_id_for_version(version)
container_id_for(container_name: container_name(version))
end
def filter_args def filter_args
argumentize "--filter", filters argumentize "--filter", filters
end end

View File

@@ -16,7 +16,7 @@ module Mrsk::Commands
end end
def container_id_for(container_name:) def container_id_for(container_name:)
docker :container, :ls, "--all", "--filter", "name=#{container_name}", "--quiet" docker :container, :ls, "--all", "--filter", "name=^#{container_name}$", "--quiet"
end end
private private

View File

@@ -43,7 +43,7 @@ class Mrsk::Commands::Healthcheck < Mrsk::Commands::Base
end end
def container_id def container_id
container_id_for(container_name: container_name) container_id_for(container_name: container_name_with_version)
end end
def health_url def health_url

View File

@@ -26,7 +26,7 @@ class Mrsk::Commands::Traefik < Mrsk::Commands::Base
end end
def info def info
docker :ps, "--filter", "name=traefik" docker :ps, "--filter", "name=^traefik$"
end end
def logs(since: nil, lines: nil, grep: nil) def logs(since: nil, lines: nil, grep: nil)

View File

@@ -7,25 +7,26 @@ class CliAppTest < CliTestCase
run_command("boot").tap do |output| run_command("boot").tap do |output|
assert_match "docker run --detach --restart unless-stopped", output assert_match "docker run --detach --restart unless-stopped", output
assert_match "docker container ls --all --filter name=app-web-123 --quiet | xargs docker stop", output assert_match "docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop", output
end end
end end
test "boot will reboot if same version is already running" do test "boot will rename if same version is already running" do
run_command("details") # Preheat MRSK const run_command("details") # Preheat MRSK const
# Prevent expected failures from outputting to terminal SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
Thread.report_on_exception = false .with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet")
.returns("12345678") # running version
Mrsk::Commands::App.any_instance.stubs(:run) SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.raises(SSHKit::Command::Failed.new("already in use")) .with(:docker, :ps, "--filter", "label=service=app", "--filter", "label=role=web", "--format", "\"{{.Names}}\"", "|", "sed 's/-/\\n/g'", "|", "tail -n 1")
.then .returns("123") # old version
.returns([ :docker, :run ])
run_command("boot").tap do |output| run_command("boot").tap do |output|
assert_match "Rebooting container with same version latest already deployed", output # Can't start what's already running assert_match /Renaming container .* to .* as already deployed on 1.1.1.1/, output # Rename
assert_match "docker container ls --all --filter name=app-web-latest --quiet | xargs docker container rm", output # Remove old container assert_match /docker rename .* .*/, output
assert_match "docker run", output # Start new container assert_match "docker run --detach --restart unless-stopped", output
assert_match "docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop", output
end end
ensure ensure
Thread.report_on_exception = true Thread.report_on_exception = true
@@ -59,7 +60,7 @@ class CliAppTest < CliTestCase
test "remove_container" do test "remove_container" do
run_command("remove_container", "1234567").tap do |output| run_command("remove_container", "1234567").tap do |output|
assert_match "docker container ls --all --filter name=app-web-1234567 --quiet | xargs docker container rm", output assert_match "docker container ls --all --filter name=^app-web-1234567$ --quiet | xargs docker container rm", output
end end
end end

View File

@@ -7,11 +7,11 @@ class CliHealthcheckTest < CliTestCase
SSHKit::Backend::Abstract.any_instance.stubs(:sleep) # No sleeping when retrying SSHKit::Backend::Abstract.any_instance.stubs(:sleep) # No sleeping when retrying
SSHKit::Backend::Abstract.any_instance.stubs(:execute) SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=healthcheck-app", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false) .with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
SSHKit::Backend::Abstract.any_instance.stubs(:execute) SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "MRSK_CONTAINER_NAME=\"healthcheck-app\"", "dhh/app:999") .with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "MRSK_CONTAINER_NAME=\"healthcheck-app\"", "dhh/app:999")
SSHKit::Backend::Abstract.any_instance.stubs(:execute) SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=healthcheck-app", "--quiet", "|", :xargs, :docker, :container, :rm, raise_on_non_zero_exit: false) .with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :container, :rm, raise_on_non_zero_exit: false)
# Fail twice to test retry logic # Fail twice to test retry logic
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info) SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
@@ -38,7 +38,7 @@ class CliHealthcheckTest < CliTestCase
.with(:curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", "--max-time", "2", "http://localhost:3999/up") .with(:curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", "--max-time", "2", "http://localhost:3999/up")
.returns("curl: command not found") .returns("curl: command not found")
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info) SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=healthcheck-app", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1") .with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1")
exception = assert_raises SSHKit::Runner::ExecuteError do exception = assert_raises SSHKit::Runner::ExecuteError do
run_command("perform") run_command("perform")
@@ -55,7 +55,7 @@ class CliHealthcheckTest < CliTestCase
.with(:curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", "--max-time", "2", "http://localhost:3999/up") .with(:curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", "--max-time", "2", "http://localhost:3999/up")
.returns("500") .returns("500")
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info) SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=healthcheck-app", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1") .with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1")
exception = assert_raises do exception = assert_raises do
run_command("perform") run_command("perform")

View File

@@ -95,7 +95,7 @@ class CliMainTest < CliTestCase
run_command("rollback", "123").tap do |output| run_command("rollback", "123").tap do |output|
assert_match "Start version 123", output assert_match "Start version 123", output
assert_match "docker start app-123", output assert_match "docker start app-123", output
assert_match "docker container ls --all --filter name=app-version-to-rollback --quiet | xargs docker stop", output, "Should stop the container that was previously running" assert_match "docker container ls --all --filter name=^app-version-to-rollback$ --quiet | xargs docker stop", output, "Should stop the container that was previously running"
end end
end end

View File

@@ -36,7 +36,7 @@ class CliTraefikTest < CliTestCase
test "details" do test "details" do
run_command("details").tap do |output| run_command("details").tap do |output|
assert_match "docker ps --filter name=traefik", output assert_match "docker ps --filter name=^traefik$", output
end end
end end

View File

@@ -76,7 +76,7 @@ class CommandsAppTest < ActiveSupport::TestCase
test "stop with version" do test "stop with version" do
assert_equal \ assert_equal \
"docker container ls --all --filter name=app-web-123 --quiet | xargs docker stop", "docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop",
new_command.stop(version: "123").join(" ") new_command.stop(version: "123").join(" ")
end end
@@ -193,7 +193,7 @@ class CommandsAppTest < ActiveSupport::TestCase
test "container_id_for" do test "container_id_for" do
assert_equal \ assert_equal \
"docker container ls --all --filter name=app-999 --quiet", "docker container ls --all --filter name=^app-999$ --quiet",
new_command.container_id_for(container_name: "app-999").join(" ") new_command.container_id_for(container_name: "app-999").join(" ")
end end
@@ -224,14 +224,14 @@ class CommandsAppTest < ActiveSupport::TestCase
test "remove_container" do test "remove_container" do
assert_equal \ assert_equal \
"docker container ls --all --filter name=app-web-999 --quiet | xargs docker container rm", "docker container ls --all --filter name=^app-web-999$ --quiet | xargs docker container rm",
new_command.remove_container(version: "999").join(" ") new_command.remove_container(version: "999").join(" ")
end end
test "remove_container with destination" do test "remove_container with destination" do
@destination = "staging" @destination = "staging"
assert_equal \ assert_equal \
"docker container ls --all --filter name=app-web-staging-999 --quiet | xargs docker container rm", "docker container ls --all --filter name=^app-web-staging-999$ --quiet | xargs docker container rm",
new_command.remove_container(version: "999").join(" ") new_command.remove_container(version: "999").join(" ")
end end

View File

@@ -53,7 +53,7 @@ class CommandsHealthcheckTest < ActiveSupport::TestCase
test "stop" do test "stop" do
assert_equal \ assert_equal \
"docker container ls --all --filter name=healthcheck-app --quiet | xargs docker stop", "docker container ls --all --filter name=^healthcheck-app-123$ --quiet | xargs docker stop",
new_command.stop.join(" ") new_command.stop.join(" ")
end end
@@ -61,13 +61,13 @@ class CommandsHealthcheckTest < ActiveSupport::TestCase
@destination = "staging" @destination = "staging"
assert_equal \ assert_equal \
"docker container ls --all --filter name=healthcheck-app-staging --quiet | xargs docker stop", "docker container ls --all --filter name=^healthcheck-app-staging-123$ --quiet | xargs docker stop",
new_command.stop.join(" ") new_command.stop.join(" ")
end end
test "remove" do test "remove" do
assert_equal \ assert_equal \
"docker container ls --all --filter name=healthcheck-app --quiet | xargs docker container rm", "docker container ls --all --filter name=^healthcheck-app-123$ --quiet | xargs docker container rm",
new_command.remove.join(" ") new_command.remove.join(" ")
end end
@@ -75,13 +75,13 @@ class CommandsHealthcheckTest < ActiveSupport::TestCase
@destination = "staging" @destination = "staging"
assert_equal \ assert_equal \
"docker container ls --all --filter name=healthcheck-app-staging --quiet | xargs docker container rm", "docker container ls --all --filter name=^healthcheck-app-staging-123$ --quiet | xargs docker container rm",
new_command.remove.join(" ") new_command.remove.join(" ")
end end
test "logs" do test "logs" do
assert_equal \ assert_equal \
"docker container ls --all --filter name=healthcheck-app --quiet | xargs docker logs --tail 50 2>&1", "docker container ls --all --filter name=^healthcheck-app-123$ --quiet | xargs docker logs --tail 50 2>&1",
new_command.logs.join(" ") new_command.logs.join(" ")
end end
@@ -89,7 +89,7 @@ class CommandsHealthcheckTest < ActiveSupport::TestCase
@destination = "staging" @destination = "staging"
assert_equal \ assert_equal \
"docker container ls --all --filter name=healthcheck-app-staging --quiet | xargs docker logs --tail 50 2>&1", "docker container ls --all --filter name=^healthcheck-app-staging-123$ --quiet | xargs docker logs --tail 50 2>&1",
new_command.logs.join(" ") new_command.logs.join(" ")
end end

View File

@@ -82,7 +82,7 @@ class CommandsTraefikTest < ActiveSupport::TestCase
test "traefik info" do test "traefik info" do
assert_equal \ assert_equal \
"docker ps --filter name=traefik", "docker ps --filter name=^traefik$",
new_command.info.join(" ") new_command.info.join(" ")
end end