From 371f98d67f543f72509ca0f6c1d46d92660c4eca Mon Sep 17 00:00:00 2001 From: David Heinemeier Hansson Date: Wed, 22 Feb 2023 19:04:23 +0100 Subject: [PATCH 1/6] Start before stopping and longer timeouts --- lib/mrsk/cli/app.rb | 8 ++++++-- lib/mrsk/cli/healthcheck.rb | 2 +- lib/mrsk/cli/main.rb | 7 +++++-- lib/mrsk/commands/app.rb | 10 ++++++++-- lib/mrsk/configuration/role.rb | 2 +- test/cli/app_test.rb | 24 +++++++++++++++++++----- test/commands/app_test.rb | 6 +++--- test/configuration/role_test.rb | 4 ++-- 8 files changed, 45 insertions(+), 18 deletions(-) diff --git a/lib/mrsk/cli/app.rb b/lib/mrsk/cli/app.rb index f4ff7c67..0f16e5a7 100644 --- a/lib/mrsk/cli/app.rb +++ b/lib/mrsk/cli/app.rb @@ -10,13 +10,17 @@ class Mrsk::Cli::App < Mrsk::Cli::Base execute *MRSK.auditor.record("Booted app version #{version}"), verbosity: :debug begin - execute *MRSK.app.stop, raise_on_non_zero_exit: false + old_version = capture_with_info(*MRSK.app.current_running_version).strip execute *MRSK.app.run(role: role.name) + sleep 10 + execute *MRSK.app.stop(version: old_version), raise_on_non_zero_exit: false if old_version.present? + rescue SSHKit::Command::Failed => e if e.message =~ /already in use/ - error "Rebooting container with same version already deployed on #{host}" + error "Rebooting container with same version #{version} already deployed on #{host}" execute *MRSK.auditor.record("Rebooted app version #{version}"), verbosity: :debug + execute *MRSK.app.stop(version: version) execute *MRSK.app.remove_container(version: version) execute *MRSK.app.run(role: role.name) else diff --git a/lib/mrsk/cli/healthcheck.rb b/lib/mrsk/cli/healthcheck.rb index eb7bb9ab..3fda7754 100644 --- a/lib/mrsk/cli/healthcheck.rb +++ b/lib/mrsk/cli/healthcheck.rb @@ -1,5 +1,5 @@ class Mrsk::Cli::Healthcheck < Mrsk::Cli::Base - MAX_ATTEMPTS = 5 + MAX_ATTEMPTS = 7 class HealthcheckError < StandardError; end diff --git a/lib/mrsk/cli/main.rb b/lib/mrsk/cli/main.rb index 352e7f30..52020092 100644 --- a/lib/mrsk/cli/main.rb +++ b/lib/mrsk/cli/main.rb @@ -55,11 +55,14 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base MRSK.version = version if container_name_available?(MRSK.config.service_with_version) - say "Stop current version, then start version #{version}...", :magenta + say "Start version #{version}, then stop the old version...", :magenta on(MRSK.hosts) do |host| - execute *MRSK.app.stop, raise_on_non_zero_exit: false + old_version = capture_with_info(*MRSK.app.current_running_version).strip.presence + execute *MRSK.app.start + sleep 10 + execute *MRSK.app.stop(version: old_version), raise_on_non_zero_exit: false end audit_broadcast "Rolled back app to version #{version}" diff --git a/lib/mrsk/commands/app.rb b/lib/mrsk/commands/app.rb index 4c8f0c7e..4b1de91a 100644 --- a/lib/mrsk/commands/app.rb +++ b/lib/mrsk/commands/app.rb @@ -18,8 +18,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base docker :start, service_with_version end - def stop - pipe current_container_id, xargs(docker(:stop)) + def stop(version: nil) + pipe \ + version ? container_id_for_version(version) : current_container_id, + xargs(docker(:stop)) end def info @@ -132,6 +134,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base end end + def container_id_for_version(version) + container_id_for(container_name: service_with_version(version)) + end + def service_filter [ "--filter", "label=service=#{config.service}" ] end diff --git a/lib/mrsk/configuration/role.rb b/lib/mrsk/configuration/role.rb index 628299c0..1795649b 100644 --- a/lib/mrsk/configuration/role.rb +++ b/lib/mrsk/configuration/role.rb @@ -61,7 +61,7 @@ class Mrsk::Configuration::Role "traefik.http.routers.#{config.service}.rule" => "PathPrefix(`/`)", "traefik.http.services.#{config.service}.loadbalancer.healthcheck.path" => config.healthcheck["path"], "traefik.http.services.#{config.service}.loadbalancer.healthcheck.interval" => "1s", - "traefik.http.middlewares.#{config.service}.retry.attempts" => "3", + "traefik.http.middlewares.#{config.service}.retry.attempts" => "5", "traefik.http.middlewares.#{config.service}.retry.initialinterval" => "500ms" } else diff --git a/test/cli/app_test.rb b/test/cli/app_test.rb index f8b5da05..76f59a80 100644 --- a/test/cli/app_test.rb +++ b/test/cli/app_test.rb @@ -2,7 +2,16 @@ require_relative "cli_test_case" class CliAppTest < CliTestCase test "boot" do - assert_match /Running docker run --detach --restart unless-stopped/, run_command("boot") + # Stub current version fetch + SSHKit::Backend::Abstract.any_instance.stubs(:capture) + .returns("999") # new version + .then + .returns("123") # old version + + run_command("boot").tap do |output| + assert_match /docker run --detach --restart unless-stopped/, output + assert_match /docker container ls --all --filter name=app-123 --quiet | xargs docker stop/, output + end end test "boot will reboot if same version is already running" do @@ -11,12 +20,17 @@ class CliAppTest < CliTestCase # Prevent expected failures from outputting to terminal Thread.report_on_exception = false - MRSK.app.stubs(:run).raises(SSHKit::Command::Failed.new("already in use")).then.returns([ :docker, :run ]) + MRSK.app.stubs(:run) + .raises(SSHKit::Command::Failed.new("already in use")) + .then + .raises(SSHKit::Command::Failed.new("already in use")) + .then + .returns([ :docker, :run ]) run_command("boot").tap do |output| - assert_match /Rebooting container with same version already deployed/, output # Can't start what's already running - assert_match /docker ps --quiet --filter label=service=app \| xargs docker stop/, output # Stop what's running - assert_match /docker container ls --all --filter name=app-999 --quiet \| xargs docker container rm/, output # Remove old container + assert_match /Rebooting container with same version 999 already deployed/, output # Can't start what's already running + assert_match /docker container ls --all --filter name=app-999 --quiet | xargs docker container rm/, output # Stop old running + assert_match /docker container ls --all --filter name=app-999 --quiet | xargs docker container rm/, output # Remove old container assert_match /docker run/, output # Start new container end ensure diff --git a/test/commands/app_test.rb b/test/commands/app_test.rb index adce57ae..a62ab80c 100644 --- a/test/commands/app_test.rb +++ b/test/commands/app_test.rb @@ -14,7 +14,7 @@ class CommandsAppTest < ActiveSupport::TestCase test "run" do assert_equal \ - "docker run --detach --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=\"456\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\" --label traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\" --label traefik.http.middlewares.app.retry.attempts=\"3\" --label traefik.http.middlewares.app.retry.initialinterval=\"500ms\" dhh/app:999", + "docker run --detach --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=\"456\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\" --label traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\" --label traefik.http.middlewares.app.retry.attempts=\"5\" --label traefik.http.middlewares.app.retry.initialinterval=\"500ms\" dhh/app:999", @app.run.join(" ") end @@ -22,7 +22,7 @@ class CommandsAppTest < ActiveSupport::TestCase @config[:volumes] = ["/local/path:/container/path" ] assert_equal \ - "docker run --detach --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=\"456\" --volume /local/path:/container/path --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\" --label traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\" --label traefik.http.middlewares.app.retry.attempts=\"3\" --label traefik.http.middlewares.app.retry.initialinterval=\"500ms\" dhh/app:999", + "docker run --detach --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=\"456\" --volume /local/path:/container/path --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\" --label traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\" --label traefik.http.middlewares.app.retry.attempts=\"5\" --label traefik.http.middlewares.app.retry.initialinterval=\"500ms\" dhh/app:999", @app.run.join(" ") end @@ -30,7 +30,7 @@ class CommandsAppTest < ActiveSupport::TestCase @config[:healthcheck] = { "path" => "/healthz" } assert_equal \ - "docker run --detach --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=\"456\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.services.app.loadbalancer.healthcheck.path=\"/healthz\" --label traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\" --label traefik.http.middlewares.app.retry.attempts=\"3\" --label traefik.http.middlewares.app.retry.initialinterval=\"500ms\" dhh/app:999", + "docker run --detach --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=\"456\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.services.app.loadbalancer.healthcheck.path=\"/healthz\" --label traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\" --label traefik.http.middlewares.app.retry.attempts=\"5\" --label traefik.http.middlewares.app.retry.initialinterval=\"500ms\" dhh/app:999", @app.run.join(" ") end diff --git a/test/configuration/role_test.rb b/test/configuration/role_test.rb index b1916a52..1017221f 100644 --- a/test/configuration/role_test.rb +++ b/test/configuration/role_test.rb @@ -42,7 +42,7 @@ class ConfigurationRoleTest < ActiveSupport::TestCase end test "special label args for web" do - assert_equal [ "--label", "service=\"app\"", "--label", "role=\"web\"", "--label", "traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\"", "--label", "traefik.http.middlewares.app.retry.attempts=\"3\"", "--label", "traefik.http.middlewares.app.retry.initialinterval=\"500ms\""], @config.role(:web).label_args + assert_equal [ "--label", "service=\"app\"", "--label", "role=\"web\"", "--label", "traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\"", "--label", "traefik.http.middlewares.app.retry.attempts=\"5\"", "--label", "traefik.http.middlewares.app.retry.initialinterval=\"500ms\""], @config.role(:web).label_args end test "custom labels" do @@ -66,7 +66,7 @@ class ConfigurationRoleTest < ActiveSupport::TestCase c[:servers]["beta"] = { "traefik" => "true", "hosts" => [ "1.1.1.5" ] } }) - assert_equal [ "--label", "service=\"app\"", "--label", "role=\"beta\"", "--label", "traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\"", "--label", "traefik.http.middlewares.app.retry.attempts=\"3\"", "--label", "traefik.http.middlewares.app.retry.initialinterval=\"500ms\"" ], config.role(:beta).label_args + assert_equal [ "--label", "service=\"app\"", "--label", "role=\"beta\"", "--label", "traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\"", "--label", "traefik.http.middlewares.app.retry.attempts=\"5\"", "--label", "traefik.http.middlewares.app.retry.initialinterval=\"500ms\"" ], config.role(:beta).label_args end test "env overwritten by role" do From e12436a1db71dabc859a87cc7482921845f4a3ab Mon Sep 17 00:00:00 2001 From: David Heinemeier Hansson Date: Thu, 23 Feb 2023 12:02:49 +0100 Subject: [PATCH 2/6] Extract readiness_delay to config --- lib/mrsk/cli/app.rb | 2 +- lib/mrsk/cli/main.rb | 2 +- lib/mrsk/configuration.rb | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/mrsk/cli/app.rb b/lib/mrsk/cli/app.rb index 0f16e5a7..bab5c063 100644 --- a/lib/mrsk/cli/app.rb +++ b/lib/mrsk/cli/app.rb @@ -12,7 +12,7 @@ class Mrsk::Cli::App < Mrsk::Cli::Base begin old_version = capture_with_info(*MRSK.app.current_running_version).strip execute *MRSK.app.run(role: role.name) - sleep 10 + sleep MRSK.config.readiness_delay execute *MRSK.app.stop(version: old_version), raise_on_non_zero_exit: false if old_version.present? rescue SSHKit::Command::Failed => e diff --git a/lib/mrsk/cli/main.rb b/lib/mrsk/cli/main.rb index a5cb08c9..d0986bc5 100644 --- a/lib/mrsk/cli/main.rb +++ b/lib/mrsk/cli/main.rb @@ -61,7 +61,7 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base old_version = capture_with_info(*MRSK.app.current_running_version).strip.presence execute *MRSK.app.start - sleep 10 + sleep MRSK.config.readiness_delay execute *MRSK.app.stop(version: old_version), raise_on_non_zero_exit: false end diff --git a/lib/mrsk/configuration.rb b/lib/mrsk/configuration.rb index ab360844..650047a2 100644 --- a/lib/mrsk/configuration.rb +++ b/lib/mrsk/configuration.rb @@ -136,6 +136,9 @@ class Mrsk::Configuration { "path" => "/up", "port" => 3000 }.merge(raw_config.healthcheck || {}) end + def readiness_delay + raw_config.readiness_delay || 7 + end def valid? ensure_required_keys_present && ensure_env_available From 4c399a74bb42d30fc81f7f984e0b78d1f85a5d78 Mon Sep 17 00:00:00 2001 From: David Heinemeier Hansson Date: Thu, 23 Feb 2023 12:02:56 +0100 Subject: [PATCH 3/6] Update to match latest --- test/cli/main_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cli/main_test.rb b/test/cli/main_test.rb index 7c6de597..36dd0f33 100644 --- a/test/cli/main_test.rb +++ b/test/cli/main_test.rb @@ -19,7 +19,7 @@ class CliMainTest < CliTestCase Mrsk::Cli::Main.any_instance.stubs(:container_name_available?).returns(true) run_command("rollback", "123").tap do |output| - assert_match /Stop current version, then start version 123/, output + assert_match /Start version 123, then stop the old version/, output assert_match /docker ps -q --filter label=service=app | xargs docker stop/, output assert_match /docker start app-123/, output end From 817336df49ae6cf6b22bbd001376cdc135bbf5bc Mon Sep 17 00:00:00 2001 From: David Heinemeier Hansson Date: Thu, 23 Feb 2023 12:03:03 +0100 Subject: [PATCH 4/6] No readiness delay in testing --- test/fixtures/deploy_with_accessories.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/fixtures/deploy_with_accessories.yml b/test/fixtures/deploy_with_accessories.yml index ee9d84fb..0d0c86f3 100644 --- a/test/fixtures/deploy_with_accessories.yml +++ b/test/fixtures/deploy_with_accessories.yml @@ -27,3 +27,5 @@ accessories: port: 6379 directories: - data:/data + +readiness_delay: 0 \ No newline at end of file From f4f2b5cb1700bdc1a9c3e8c26ce9af10cb75df29 Mon Sep 17 00:00:00 2001 From: David Heinemeier Hansson Date: Thu, 23 Feb 2023 12:04:57 +0100 Subject: [PATCH 5/6] Communicate the readiness delay --- lib/mrsk/cli/app.rb | 5 +++++ lib/mrsk/cli/main.rb | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/lib/mrsk/cli/app.rb b/lib/mrsk/cli/app.rb index bab5c063..ebc9a4d9 100644 --- a/lib/mrsk/cli/app.rb +++ b/lib/mrsk/cli/app.rb @@ -5,6 +5,8 @@ class Mrsk::Cli::App < Mrsk::Cli::Base using_version(options[:version] || most_recent_version_available) do |version| say "Start container with version #{version} (or reboot if already running)...", :magenta + cli = self + MRSK.config.roles.each do |role| on(role.hosts) do |host| execute *MRSK.auditor.record("Booted app version #{version}"), verbosity: :debug @@ -12,7 +14,10 @@ class Mrsk::Cli::App < Mrsk::Cli::Base begin old_version = capture_with_info(*MRSK.app.current_running_version).strip execute *MRSK.app.run(role: role.name) + + cli.say "Waiting #{MRSK.config.readiness_delay}s for app to boot...", :magenta sleep MRSK.config.readiness_delay + execute *MRSK.app.stop(version: old_version), raise_on_non_zero_exit: false if old_version.present? rescue SSHKit::Command::Failed => e diff --git a/lib/mrsk/cli/main.rb b/lib/mrsk/cli/main.rb index d0986bc5..eb96bc38 100644 --- a/lib/mrsk/cli/main.rb +++ b/lib/mrsk/cli/main.rb @@ -57,11 +57,16 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base if container_name_available?(MRSK.config.service_with_version) say "Start version #{version}, then stop the old version...", :magenta + cli = self + on(MRSK.hosts) do |host| old_version = capture_with_info(*MRSK.app.current_running_version).strip.presence execute *MRSK.app.start + + cli.say "Waiting #{MRSK.config.readiness_delay}s for app to start...", :magenta sleep MRSK.config.readiness_delay + execute *MRSK.app.stop(version: old_version), raise_on_non_zero_exit: false end From bc6963e6bfdcbb58be994abc49675f0692c089f5 Mon Sep 17 00:00:00 2001 From: David Heinemeier Hansson Date: Thu, 23 Feb 2023 12:16:58 +0100 Subject: [PATCH 6/6] Note that rebooting may cause air gap --- lib/mrsk/cli/app.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/mrsk/cli/app.rb b/lib/mrsk/cli/app.rb index ebc9a4d9..fed1883f 100644 --- a/lib/mrsk/cli/app.rb +++ b/lib/mrsk/cli/app.rb @@ -22,7 +22,7 @@ class Mrsk::Cli::App < Mrsk::Cli::Base rescue SSHKit::Command::Failed => e if e.message =~ /already in use/ - error "Rebooting container with same version #{version} already deployed on #{host}" + error "Rebooting container with same version #{version} already deployed on #{host} (may cause gap in zero-downtime promise!)" execute *MRSK.auditor.record("Rebooted app version #{version}"), verbosity: :debug execute *MRSK.app.stop(version: version)