Remove the healthcheck step

To speed up deployments, we'll remove the healthcheck step.

This adds some risk to deployments for non-web roles - if they don't
have a Docker healthcheck configured then the only check we do is if
the container is running.

If there is a bad image we might see the container running before it
exits and deploy it. Previously the healthcheck step would have avoided
this by ensuring a web container could boot and serve traffic first.

To mitigate this, we'll add a deployment barrier. Until one of the
primary role containers passes its healthcheck, we'll keep the barrier
up and avoid stopping the containers on the non-primary roles.

It the primary role container fails its healthcheck, we'll close the
barrier and shut down the new containers on the waiting roles.

We also have a new integration test to check we correctly handle a
a broken image. This highlighted that SSHKit's default runner will
stop at the first error it encounters. We'll now have a custom runner
that waits for all threads to finish allowing them to clean up.
This commit is contained in:
Donal McBreen
2024-03-21 11:36:21 +00:00
parent 990f1b4413
commit 0efb5ccfff
24 changed files with 269 additions and 327 deletions

View File

@@ -27,7 +27,6 @@ class CliMainTest < CliTestCase
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:registry:login", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:pull", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:traefik:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:prune:all", [], invoke_options)
@@ -40,7 +39,6 @@ class CliMainTest < CliTestCase
assert_match /Log into image registry/, output
assert_match /Pull app image/, output
assert_match /Ensure Traefik is running/, output
assert_match /Ensure app can pass healthcheck/, output
assert_match /Detect stale containers/, output
assert_match /Prune old containers and images/, output
assert_match /Releasing the deploy lock/, output
@@ -53,7 +51,6 @@ class CliMainTest < CliTestCase
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:registry:login", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:deliver", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:traefik:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:prune:all", [], invoke_options)
@@ -67,7 +64,6 @@ class CliMainTest < CliTestCase
assert_match /Build and push app image/, output
assert_hook_ran "pre-deploy", output, **hook_variables
assert_match /Ensure Traefik is running/, output
assert_match /Ensure app can pass healthcheck/, output
assert_match /Detect stale containers/, output
assert_match /Prune old containers and images/, output
assert_hook_ran "post-deploy", output, **hook_variables, runtime: true
@@ -80,7 +76,6 @@ class CliMainTest < CliTestCase
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:registry:login", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:pull", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:traefik:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:prune:all", [], invoke_options)
@@ -90,7 +85,6 @@ class CliMainTest < CliTestCase
assert_match /Log into image registry/, output
assert_match /Pull app image/, output
assert_match /Ensure Traefik is running/, output
assert_match /Ensure app can pass healthcheck/, output
assert_match /Detect stale containers/, output
assert_match /Prune old containers and images/, output
assert_match /Releasing the deploy lock/, output
@@ -156,7 +150,6 @@ class CliMainTest < CliTestCase
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:registry:login", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:deliver", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:traefik:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:prune:all", [], invoke_options)
@@ -187,7 +180,6 @@ class CliMainTest < CliTestCase
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:registry:login", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:deliver", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:traefik:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:prune:all", [], invoke_options)
@@ -199,7 +191,6 @@ class CliMainTest < CliTestCase
invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "version" => "999", "skip_hooks" => false, "verbose" => true }
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:deliver", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
@@ -212,7 +203,6 @@ class CliMainTest < CliTestCase
assert_match /Build and push app image/, output
assert_hook_ran "pre-deploy", output, **hook_variables
assert_match /Running the pre-deploy hook.../, output
assert_match /Ensure app can pass healthcheck/, output
assert_hook_ran "post-deploy", output, **hook_variables, runtime: true
end
end
@@ -221,13 +211,11 @@ class CliMainTest < CliTestCase
invoke_options = { "config_file" => "test/fixtures/deploy_simple.yml", "version" => "999", "skip_hooks" => false }
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:build:pull", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:healthcheck:perform", [], invoke_options)
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true))
Kamal::Cli::Main.any_instance.expects(:invoke).with("kamal:cli:app:boot", [], invoke_options)
run_command("redeploy", "--skip_push").tap do |output|
assert_match /Pull app image/, output
assert_match /Ensure app can pass healthcheck/, output
end
end