Remove the healthcheck step
To speed up deployments, we'll remove the healthcheck step. This adds some risk to deployments for non-web roles - if they don't have a Docker healthcheck configured then the only check we do is if the container is running. If there is a bad image we might see the container running before it exits and deploy it. Previously the healthcheck step would have avoided this by ensuring a web container could boot and serve traffic first. To mitigate this, we'll add a deployment barrier. Until one of the primary role containers passes its healthcheck, we'll keep the barrier up and avoid stopping the containers on the non-primary roles. It the primary role container fails its healthcheck, we'll close the barrier and shut down the new containers on the waiting roles. We also have a new integration test to check we correctly handle a a broken image. This highlighted that SSHKit's default runner will stop at the first error it encounters. We'll now have a custom runner that waits for all threads to finish allowing them to clean up.
This commit is contained in:
24
test/integration/broken_deploy_test.rb
Normal file
24
test/integration/broken_deploy_test.rb
Normal file
@@ -0,0 +1,24 @@
|
||||
require_relative "integration_test"
|
||||
|
||||
class BrokenDeployTest < IntegrationTest
|
||||
test "deploying a bad image" do
|
||||
@app = "app_with_roles"
|
||||
|
||||
kamal :envify
|
||||
|
||||
first_version = latest_app_version
|
||||
|
||||
kamal :deploy
|
||||
|
||||
assert_app_is_up version: first_version
|
||||
assert_container_running host: :vm3, name: "app-workers-#{first_version}"
|
||||
|
||||
second_version = break_app
|
||||
|
||||
kamal :deploy, raise_on_error: false
|
||||
|
||||
assert_app_is_up version: first_version
|
||||
assert_container_running host: :vm3, name: "app-workers-#{first_version}"
|
||||
assert_container_not_running host: :vm3, name: "app-workers-#{second_version}"
|
||||
end
|
||||
end
|
||||
@@ -28,6 +28,7 @@ builder:
|
||||
COMMIT_SHA: <%= `git rev-parse HEAD` %>
|
||||
healthcheck:
|
||||
cmd: wget -qO- http://localhost > /dev/null || exit 1
|
||||
max_attempts: 3
|
||||
traefik:
|
||||
args:
|
||||
accesslog: true
|
||||
@@ -41,3 +42,4 @@ accessories:
|
||||
roles:
|
||||
- web
|
||||
stop_wait_time: 1
|
||||
readiness_delay: 0
|
||||
|
||||
@@ -22,6 +22,7 @@ builder:
|
||||
COMMIT_SHA: <%= `git rev-parse HEAD` %>
|
||||
healthcheck:
|
||||
cmd: wget -qO- http://localhost > /dev/null || exit 1
|
||||
max_attempts: 3
|
||||
traefik:
|
||||
args:
|
||||
accesslog: true
|
||||
@@ -35,3 +36,4 @@ accessories:
|
||||
roles:
|
||||
- web
|
||||
stop_wait_time: 1
|
||||
readiness_delay: 0
|
||||
|
||||
3
test/integration/docker/deployer/break_app.sh
Executable file
3
test/integration/docker/deployer/break_app.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/bin/bash
|
||||
|
||||
cd $1 && echo "bad nginx config" > default.conf && git commit -am 'Broken'
|
||||
@@ -78,6 +78,11 @@ class IntegrationTest < ActiveSupport::TestCase
|
||||
latest_app_version
|
||||
end
|
||||
|
||||
def break_app
|
||||
deployer_exec "./break_app.sh #{@app}", workdir: "/"
|
||||
latest_app_version
|
||||
end
|
||||
|
||||
def latest_app_version
|
||||
deployer_exec("git rev-parse HEAD", capture: true)
|
||||
end
|
||||
@@ -131,4 +136,16 @@ class IntegrationTest < ActiveSupport::TestCase
|
||||
puts "Tried to get the response code again and got #{app_response.code}"
|
||||
end
|
||||
end
|
||||
|
||||
def assert_container_running(host:, name:)
|
||||
assert container_running?(host: host, name: name)
|
||||
end
|
||||
|
||||
def assert_container_not_running(host:, name:)
|
||||
assert_not container_running?(host: host, name: name)
|
||||
end
|
||||
|
||||
def container_running?(host:, name:)
|
||||
docker_compose("exec #{host} docker ps --filter=name=#{name} | tail -n+2", capture: true).tap { |x| p [ x, x.strip, x.strip.present? ] }.strip.present?
|
||||
end
|
||||
end
|
||||
|
||||
@@ -56,6 +56,12 @@ class MainTest < IntegrationTest
|
||||
assert_app_is_up version: version
|
||||
assert_hooks_ran "pre-connect", "pre-build", "pre-deploy", "post-deploy"
|
||||
assert_container_running host: :vm3, name: "app-workers-#{version}"
|
||||
|
||||
second_version = update_app_rev
|
||||
|
||||
kamal :redeploy
|
||||
assert_app_is_up version: second_version
|
||||
assert_container_running host: :vm3, name: "app-workers-#{second_version}"
|
||||
end
|
||||
|
||||
test "config" do
|
||||
@@ -73,7 +79,7 @@ class MainTest < IntegrationTest
|
||||
assert_equal({ user: "root", port: 22, keepalive: true, keepalive_interval: 30, log_level: :fatal }, config[:ssh_options])
|
||||
assert_equal({ "multiarch" => false, "args" => { "COMMIT_SHA" => version } }, config[:builder])
|
||||
assert_equal [ "--log-opt", "max-size=\"10m\"" ], config[:logging]
|
||||
assert_equal({ "path" => "/up", "port" => 3000, "max_attempts" => 7, "exposed_port" => 3999, "cord"=>"/tmp/kamal-cord", "log_lines" => 50, "cmd"=>"wget -qO- http://localhost > /dev/null || exit 1" }, config[:healthcheck])
|
||||
assert_equal({ "path" => "/up", "port" => 3000, "max_attempts" => 3, "cord"=>"/tmp/kamal-cord", "log_lines" => 50, "cmd"=>"wget -qO- http://localhost > /dev/null || exit 1" }, config[:healthcheck])
|
||||
end
|
||||
|
||||
test "setup and remove" do
|
||||
@@ -157,8 +163,4 @@ class MainTest < IntegrationTest
|
||||
assert vm1_image_ids.any?
|
||||
assert vm1_container_ids.any?
|
||||
end
|
||||
|
||||
def assert_container_running(host:, name:)
|
||||
assert docker_compose("exec #{host} docker ps --filter=name=#{name} -q", capture: true).strip.present?
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user