Zero downtime redeploys

When deploying check if there is already a container with the existing
name. If there is rename it to "<version>_<random_hex_string>" to remove
the name clash with the new container we want to boot.

We can then do the normal zero downtime run/wait/stop.

While implementing this I discovered the --filter name=foo does a
substring match for foo, so I've updated those filters to do an exact
match instead.
This commit is contained in:
Donal McBreen
2023-03-24 17:06:54 +00:00
parent 01a2b678d7
commit 05488e4c1e
12 changed files with 48 additions and 48 deletions

View File

@@ -15,22 +15,17 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
execute *MRSK.auditor(role: role).record("Booted app version #{version}"), verbosity: :debug
begin
if capture_with_info(*MRSK.app(role: role).container_id_for_version(version)).present?
tmp_version = "#{version}_#{SecureRandom.hex(8)}"
info "Renaming container #{version} to #{tmp_version} as already deployed on #{host}"
execute *MRSK.auditor(role: role).record("Renaming container #{version} to #{tmp_version}"), verbosity: :debug
execute *MRSK.app(role: role).rename_container(version: version, new_version: tmp_version)
end
old_version = capture_with_info(*MRSK.app(role: role).current_running_version).strip
execute *MRSK.app(role: role).run
sleep MRSK.config.readiness_delay
execute *MRSK.app(role: role).stop(version: old_version), raise_on_non_zero_exit: false if old_version.present?
rescue SSHKit::Command::Failed => e
if e.message =~ /already in use/
error "Rebooting container with same version #{version} already deployed on #{host} (may cause gap in zero-downtime promise!)"
execute *MRSK.auditor(role: role).record("Rebooted app version #{version}"), verbosity: :debug
execute *MRSK.app(role: role).stop(version: version)
execute *MRSK.app(role: role).remove_container(version: version)
execute *MRSK.app(role: role).run
else
raise
end
end
end
end

View File

@@ -86,6 +86,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
docker :ps, "--quiet", *filter_args
end
def container_id_for_version(version)
container_id_for(container_name: container_name(version))
end
def current_running_version
# FIXME: Find more graceful way to extract the version from "app-version" than using sed and tail!
pipe \
@@ -108,6 +112,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
xargs(docker(:container, :rm))
end
def rename_container(version:, new_version:)
docker :rename, container_name(version), container_name(new_version)
end
def remove_containers
docker :container, :prune, "--force", *filter_args
end
@@ -126,10 +134,6 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
[ config.service, role, config.destination, version || config.version ].compact.join("-")
end
def container_id_for_version(version)
container_id_for(container_name: container_name(version))
end
def filter_args
argumentize "--filter", filters
end

View File

@@ -16,7 +16,7 @@ module Mrsk::Commands
end
def container_id_for(container_name:)
docker :container, :ls, "--all", "--filter", "name=#{container_name}", "--quiet"
docker :container, :ls, "--all", "--filter", "name=^#{container_name}$", "--quiet"
end
private

View File

@@ -43,7 +43,7 @@ class Mrsk::Commands::Healthcheck < Mrsk::Commands::Base
end
def container_id
container_id_for(container_name: container_name)
container_id_for(container_name: container_name_with_version)
end
def health_url

View File

@@ -26,7 +26,7 @@ class Mrsk::Commands::Traefik < Mrsk::Commands::Base
end
def info
docker :ps, "--filter", "name=traefik"
docker :ps, "--filter", "name=^traefik$"
end
def logs(since: nil, lines: nil, grep: nil)

View File

@@ -7,25 +7,26 @@ class CliAppTest < CliTestCase
run_command("boot").tap do |output|
assert_match "docker run --detach --restart unless-stopped", output
assert_match "docker container ls --all --filter name=app-web-123 --quiet | xargs docker stop", output
assert_match "docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop", output
end
end
test "boot will reboot if same version is already running" do
test "boot will rename if same version is already running" do
run_command("details") # Preheat MRSK const
# Prevent expected failures from outputting to terminal
Thread.report_on_exception = false
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet")
.returns("12345678") # running version
Mrsk::Commands::App.any_instance.stubs(:run)
.raises(SSHKit::Command::Failed.new("already in use"))
.then
.returns([ :docker, :run ])
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :ps, "--filter", "label=service=app", "--filter", "label=role=web", "--format", "\"{{.Names}}\"", "|", "sed 's/-/\\n/g'", "|", "tail -n 1")
.returns("123") # old version
run_command("boot").tap do |output|
assert_match "Rebooting container with same version latest already deployed", output # Can't start what's already running
assert_match "docker container ls --all --filter name=app-web-latest --quiet | xargs docker container rm", output # Remove old container
assert_match "docker run", output # Start new container
assert_match /Renaming container .* to .* as already deployed on 1.1.1.1/, output # Rename
assert_match /docker rename .* .*/, output
assert_match "docker run --detach --restart unless-stopped", output
assert_match "docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop", output
end
ensure
Thread.report_on_exception = true
@@ -59,7 +60,7 @@ class CliAppTest < CliTestCase
test "remove_container" do
run_command("remove_container", "1234567").tap do |output|
assert_match "docker container ls --all --filter name=app-web-1234567 --quiet | xargs docker container rm", output
assert_match "docker container ls --all --filter name=^app-web-1234567$ --quiet | xargs docker container rm", output
end
end

View File

@@ -7,11 +7,11 @@ class CliHealthcheckTest < CliTestCase
SSHKit::Backend::Abstract.any_instance.stubs(:sleep) # No sleeping when retrying
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=healthcheck-app", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :run, "--detach", "--name", "healthcheck-app-999", "--publish", "3999:3000", "--label", "service=healthcheck-app", "-e", "MRSK_CONTAINER_NAME=\"healthcheck-app\"", "dhh/app:999")
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=healthcheck-app", "--quiet", "|", :xargs, :docker, :container, :rm, raise_on_non_zero_exit: false)
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :container, :rm, raise_on_non_zero_exit: false)
# Fail twice to test retry logic
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
@@ -38,7 +38,7 @@ class CliHealthcheckTest < CliTestCase
.with(:curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", "--max-time", "2", "http://localhost:3999/up")
.returns("curl: command not found")
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=healthcheck-app", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1")
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1")
exception = assert_raises SSHKit::Runner::ExecuteError do
run_command("perform")
@@ -55,7 +55,7 @@ class CliHealthcheckTest < CliTestCase
.with(:curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", "--max-time", "2", "http://localhost:3999/up")
.returns("500")
SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=healthcheck-app", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1")
.with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :logs, "--tail", 50, "2>&1")
exception = assert_raises do
run_command("perform")

View File

@@ -95,7 +95,7 @@ class CliMainTest < CliTestCase
run_command("rollback", "123").tap do |output|
assert_match "Start version 123", output
assert_match "docker start app-123", output
assert_match "docker container ls --all --filter name=app-version-to-rollback --quiet | xargs docker stop", output, "Should stop the container that was previously running"
assert_match "docker container ls --all --filter name=^app-version-to-rollback$ --quiet | xargs docker stop", output, "Should stop the container that was previously running"
end
end

View File

@@ -36,7 +36,7 @@ class CliTraefikTest < CliTestCase
test "details" do
run_command("details").tap do |output|
assert_match "docker ps --filter name=traefik", output
assert_match "docker ps --filter name=^traefik$", output
end
end

View File

@@ -76,7 +76,7 @@ class CommandsAppTest < ActiveSupport::TestCase
test "stop with version" do
assert_equal \
"docker container ls --all --filter name=app-web-123 --quiet | xargs docker stop",
"docker container ls --all --filter name=^app-web-123$ --quiet | xargs docker stop",
new_command.stop(version: "123").join(" ")
end
@@ -193,7 +193,7 @@ class CommandsAppTest < ActiveSupport::TestCase
test "container_id_for" do
assert_equal \
"docker container ls --all --filter name=app-999 --quiet",
"docker container ls --all --filter name=^app-999$ --quiet",
new_command.container_id_for(container_name: "app-999").join(" ")
end
@@ -224,14 +224,14 @@ class CommandsAppTest < ActiveSupport::TestCase
test "remove_container" do
assert_equal \
"docker container ls --all --filter name=app-web-999 --quiet | xargs docker container rm",
"docker container ls --all --filter name=^app-web-999$ --quiet | xargs docker container rm",
new_command.remove_container(version: "999").join(" ")
end
test "remove_container with destination" do
@destination = "staging"
assert_equal \
"docker container ls --all --filter name=app-web-staging-999 --quiet | xargs docker container rm",
"docker container ls --all --filter name=^app-web-staging-999$ --quiet | xargs docker container rm",
new_command.remove_container(version: "999").join(" ")
end

View File

@@ -53,7 +53,7 @@ class CommandsHealthcheckTest < ActiveSupport::TestCase
test "stop" do
assert_equal \
"docker container ls --all --filter name=healthcheck-app --quiet | xargs docker stop",
"docker container ls --all --filter name=^healthcheck-app-123$ --quiet | xargs docker stop",
new_command.stop.join(" ")
end
@@ -61,13 +61,13 @@ class CommandsHealthcheckTest < ActiveSupport::TestCase
@destination = "staging"
assert_equal \
"docker container ls --all --filter name=healthcheck-app-staging --quiet | xargs docker stop",
"docker container ls --all --filter name=^healthcheck-app-staging-123$ --quiet | xargs docker stop",
new_command.stop.join(" ")
end
test "remove" do
assert_equal \
"docker container ls --all --filter name=healthcheck-app --quiet | xargs docker container rm",
"docker container ls --all --filter name=^healthcheck-app-123$ --quiet | xargs docker container rm",
new_command.remove.join(" ")
end
@@ -75,13 +75,13 @@ class CommandsHealthcheckTest < ActiveSupport::TestCase
@destination = "staging"
assert_equal \
"docker container ls --all --filter name=healthcheck-app-staging --quiet | xargs docker container rm",
"docker container ls --all --filter name=^healthcheck-app-staging-123$ --quiet | xargs docker container rm",
new_command.remove.join(" ")
end
test "logs" do
assert_equal \
"docker container ls --all --filter name=healthcheck-app --quiet | xargs docker logs --tail 50 2>&1",
"docker container ls --all --filter name=^healthcheck-app-123$ --quiet | xargs docker logs --tail 50 2>&1",
new_command.logs.join(" ")
end
@@ -89,7 +89,7 @@ class CommandsHealthcheckTest < ActiveSupport::TestCase
@destination = "staging"
assert_equal \
"docker container ls --all --filter name=healthcheck-app-staging --quiet | xargs docker logs --tail 50 2>&1",
"docker container ls --all --filter name=^healthcheck-app-staging-123$ --quiet | xargs docker logs --tail 50 2>&1",
new_command.logs.join(" ")
end

View File

@@ -82,7 +82,7 @@ class CommandsTraefikTest < ActiveSupport::TestCase
test "traefik info" do
assert_equal \
"docker ps --filter name=traefik",
"docker ps --filter name=^traefik$",
new_command.info.join(" ")
end