To speed up deployments, we'll remove the healthcheck step. This adds some risk to deployments for non-web roles - if they don't have a Docker healthcheck configured then the only check we do is if the container is running. If there is a bad image we might see the container running before it exits and deploy it. Previously the healthcheck step would have avoided this by ensuring a web container could boot and serve traffic first. To mitigate this, we'll add a deployment barrier. Until one of the primary role containers passes its healthcheck, we'll keep the barrier up and avoid stopping the containers on the non-primary roles. It the primary role container fails its healthcheck, we'll close the barrier and shut down the new containers on the waiting roles. We also have a new integration test to check we correctly handle a a broken image. This highlighted that SSHKit's default runner will stop at the first error it encounters. We'll now have a custom runner that waits for all threads to finish allowing them to clean up.
152 lines
4.5 KiB
Ruby
152 lines
4.5 KiB
Ruby
require "net/http"
|
|
require "test_helper"
|
|
|
|
class IntegrationTest < ActiveSupport::TestCase
|
|
setup do
|
|
ENV["TEST_ID"] = SecureRandom.hex
|
|
docker_compose "up --build -d"
|
|
wait_for_healthy
|
|
setup_deployer
|
|
@app = "app"
|
|
end
|
|
|
|
teardown do
|
|
unless passed?
|
|
[ :deployer, :vm1, :vm2, :shared, :load_balancer, :registry ].each do |container|
|
|
puts
|
|
puts "Logs for #{container}:"
|
|
docker_compose :logs, container
|
|
end
|
|
end
|
|
docker_compose "down -t 1"
|
|
end
|
|
|
|
private
|
|
def docker_compose(*commands, capture: false, raise_on_error: true)
|
|
command = "TEST_ID=#{ENV["TEST_ID"]} docker compose #{commands.join(" ")}"
|
|
succeeded = false
|
|
if capture
|
|
result = stdouted { succeeded = system("cd test/integration && #{command}") }
|
|
else
|
|
succeeded = system("cd test/integration && #{command}")
|
|
end
|
|
|
|
raise "Command `#{command}` failed with error code `#{$?}`" if !succeeded && raise_on_error
|
|
result
|
|
end
|
|
|
|
def deployer_exec(*commands, workdir: nil, **options)
|
|
workdir ||= "/#{@app}"
|
|
docker_compose("exec --workdir #{workdir} deployer #{commands.join(" ")}", **options)
|
|
end
|
|
|
|
def kamal(*commands, **options)
|
|
deployer_exec(:kamal, *commands, **options)
|
|
end
|
|
|
|
def assert_app_is_down
|
|
response = app_response
|
|
debug_response_code(response, "502")
|
|
assert_equal "502", response.code
|
|
end
|
|
|
|
def assert_app_is_up(version: nil)
|
|
response = app_response
|
|
debug_response_code(response, "200")
|
|
assert_equal "200", response.code
|
|
assert_app_version(version, response) if version
|
|
end
|
|
|
|
def wait_for_app_to_be_up(timeout: 20, up_count: 3)
|
|
timeout_at = Time.now + timeout
|
|
up_times = 0
|
|
response = app_response
|
|
while up_times < up_count && timeout_at > Time.now
|
|
sleep 0.1
|
|
up_times += 1 if response.code == "200"
|
|
response = app_response
|
|
end
|
|
assert_equal up_times, up_count
|
|
end
|
|
|
|
def app_response
|
|
Net::HTTP.get_response(URI.parse("http://localhost:12345/version"))
|
|
end
|
|
|
|
def update_app_rev
|
|
deployer_exec "./update_app_rev.sh #{@app}", workdir: "/"
|
|
latest_app_version
|
|
end
|
|
|
|
def break_app
|
|
deployer_exec "./break_app.sh #{@app}", workdir: "/"
|
|
latest_app_version
|
|
end
|
|
|
|
def latest_app_version
|
|
deployer_exec("git rev-parse HEAD", capture: true)
|
|
end
|
|
|
|
def assert_app_version(version, response)
|
|
assert_equal version, response.body.strip
|
|
end
|
|
|
|
def assert_hooks_ran(*hooks)
|
|
hooks.each do |hook|
|
|
file = "/tmp/#{ENV["TEST_ID"]}/#{hook}"
|
|
assert_equal "removed '#{file}'", deployer_exec("rm -v #{file}", capture: true).strip
|
|
end
|
|
end
|
|
|
|
def assert_200(response)
|
|
code = response.code
|
|
if code != "200"
|
|
puts "Got response code #{code}, here are the traefik logs:"
|
|
kamal :traefik, :logs
|
|
puts "And here are the load balancer logs"
|
|
docker_compose :logs, :load_balancer
|
|
puts "Tried to get the response code again and got #{app_response.code}"
|
|
end
|
|
assert_equal "200", code
|
|
end
|
|
|
|
def wait_for_healthy(timeout: 30)
|
|
timeout_at = Time.now + timeout
|
|
while docker_compose("ps -a | tail -n +2 | grep -v '(healthy)' | wc -l", capture: true) != "0"
|
|
if timeout_at < Time.now
|
|
docker_compose("ps -a | tail -n +2 | grep -v '(healthy)'")
|
|
raise "Container not healthy after #{timeout} seconds" if timeout_at < Time.now
|
|
end
|
|
sleep 0.1
|
|
end
|
|
end
|
|
|
|
def setup_deployer
|
|
deployer_exec("./setup.sh", workdir: "/") unless $DEPLOYER_SETUP
|
|
$DEPLOYER_SETUP = true
|
|
end
|
|
|
|
def debug_response_code(app_response, expected_code)
|
|
code = app_response.code
|
|
if code != expected_code
|
|
puts "Got response code #{code}, here are the traefik logs:"
|
|
kamal :traefik, :logs
|
|
puts "And here are the load balancer logs"
|
|
docker_compose :logs, :load_balancer
|
|
puts "Tried to get the response code again and got #{app_response.code}"
|
|
end
|
|
end
|
|
|
|
def assert_container_running(host:, name:)
|
|
assert container_running?(host: host, name: name)
|
|
end
|
|
|
|
def assert_container_not_running(host:, name:)
|
|
assert_not container_running?(host: host, name: name)
|
|
end
|
|
|
|
def container_running?(host:, name:)
|
|
docker_compose("exec #{host} docker ps --filter=name=#{name} | tail -n+2", capture: true).tap { |x| p [ x, x.strip, x.strip.present? ] }.strip.present?
|
|
end
|
|
end
|