Simplified deploy/drain timeouts

Remove `stop_wait_time` and `readiness_timeout` from the root config
and remove `deploy_timeout` and `drain_timeout` from the proxy config.

Instead we'll just have `deploy_timeout` and `drain_timeout` in the
root config.

For roles that run the proxy, they are passed to the kamal-proxy deploy
command. Once that returns we can assume the container is ready to
shut down.

For other roles, we'll use the `deploy_timeout` when polling the
container to see if it is ready and the `drain_timeout` when stopping
the container.
This commit is contained in:
Donal McBreen
2024-09-18 14:42:01 +01:00
parent 34effef70a
commit 8bcd896242
17 changed files with 85 additions and 48 deletions

View File

@@ -3,7 +3,7 @@ module Kamal::Cli::Healthcheck::Poller
def wait_for_healthy(role, &block)
attempt = 1
timeout_at = Time.now + KAMAL.config.readiness_timeout
timeout_at = Time.now + KAMAL.config.deploy_timeout
readiness_delay = KAMAL.config.readiness_delay
begin
@@ -19,7 +19,7 @@ module Kamal::Cli::Healthcheck::Poller
end
unless %w[ running healthy ].include?(status)
raise Kamal::Cli::Healthcheck::Error, "container not ready after #{KAMAL.config.readiness_timeout} seconds (#{status})"
raise Kamal::Cli::Healthcheck::Error, "container not ready after #{KAMAL.config.deploy_timeout} seconds (#{status})"
end
rescue Kamal::Cli::Healthcheck::Error => e
time_left = timeout_at - Time.now

View File

@@ -43,7 +43,7 @@ class Kamal::Commands::App < Kamal::Commands::Base
def stop(version: nil)
pipe \
version ? container_id_for_version(version) : current_running_container_id,
xargs(config.stop_wait_time ? docker(:stop, "-t", config.stop_wait_time) : docker(:stop))
xargs(docker(:stop, *role.stop_args))
end
def info

View File

@@ -6,7 +6,7 @@ require "erb"
require "net/ssh/proxy/jump"
class Kamal::Configuration
delegate :service, :image, :labels, :stop_wait_time, :hooks_path, to: :raw_config, allow_nil: true
delegate :service, :image, :labels, :hooks_path, to: :raw_config, allow_nil: true
delegate :argumentize, :optionize, to: Kamal::Utils
attr_reader :destination, :raw_config, :secrets
@@ -189,8 +189,12 @@ class Kamal::Configuration
raw_config.readiness_delay || 7
end
def readiness_timeout
raw_config.readiness_timeout || 30
def deploy_timeout
raw_config.deploy_timeout || 30
end
def drain_timeout
raw_config.drain_timeout || 30
end

View File

@@ -93,11 +93,6 @@ primary_role: workers
# Whether roles with no servers are allowed. Defaults to `false`.
allow_empty_roles: false
# Stop wait time
#
# How long we wait for a container to stop before killing it, defaults to 30 seconds
stop_wait_time: 60
# Retain containers
#
# How many old containers and images we retain, defaults to 5
@@ -114,11 +109,15 @@ minimum_version: 1.3.0
# This only applies to containers that do not run a proxy or specify a healthcheck
readiness_delay: 4
# Readiness timeout
# Deploy timeout
#
# How long to wait for a container to become ready, default 30
# This only applies to containers that do not run a proxy
readiness_timeout: 4
deploy_timeout: 10
# Drain timeout
#
# How long to wait for a containers to drain, default 30
drain_timeout: 10
# Run directory
#

View File

@@ -38,11 +38,6 @@ proxy:
# Defaults to false
ssl: true
# Deploy timeout
#
# How long to wait for the app to boot when deploying, defaults to 30 seconds
deploy_timeout: 10s
# Response timeout
#
# How long to wait for requests to complete before timing out, defaults to 30 seconds

View File

@@ -39,12 +39,12 @@ class Kamal::Configuration::Proxy
{
host: proxy_config["host"],
tls: proxy_config["ssl"],
"deploy-timeout": proxy_config["deploy_timeout"],
"drain-timeout": proxy_config["drain_timeout"],
"health-check-interval": proxy_config.dig("healthcheck", "interval"),
"health-check-timeout": proxy_config.dig("healthcheck", "timeout"),
"deploy-timeout": seconds_duration(config.deploy_timeout),
"drain-timeout": seconds_duration(config.drain_timeout),
"health-check-interval": seconds_duration(proxy_config.dig("healthcheck", "interval")),
"health-check-timeout": seconds_duration(proxy_config.dig("healthcheck", "timeout")),
"health-check-path": proxy_config.dig("healthcheck", "path"),
"target-timeout": proxy_config["response_timeout"],
"target-timeout": seconds_duration(proxy_config["response_timeout"]),
"buffer-requests": proxy_config.fetch("buffering", { "requests": true }).fetch("requests", true),
"buffer-responses": proxy_config.fetch("buffering", { "responses": true }).fetch("responses", true),
"buffer-memory": proxy_config.dig("buffering", "memory"),
@@ -68,4 +68,8 @@ class Kamal::Configuration::Proxy
private
attr_reader :config, :proxy_config
def seconds_duration(value)
value ? "#{value}s" : nil
end
end

View File

@@ -65,6 +65,12 @@ class Kamal::Configuration::Role
@logging ||= config.logging.merge(specialized_logging)
end
def stop_args
# When deploying with the proxy, kamal-proxy will drain request before returning so we don't need to wait.
timeout = running_proxy? ? nil : config.drain_timeout
[ *argumentize("-t", timeout) ]
end
def env(host)
@envs ||= {}

View File

@@ -128,9 +128,9 @@ class CliAppTest < CliTestCase
SSHKit::Backend::Abstract.any_instance.stubs(:execute).returns("")
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false).twice
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :exec, "kamal-proxy", "kamal-proxy", :deploy, "app-web", "--target", "\"123:80\"", "--buffer-requests", "--buffer-responses", "--log-request-header", "\"Cache-Control\"", "--log-request-header", "\"Last-Modified\"", "--log-request-header", "\"User-Agent\"").raises(SSHKit::Command::Failed.new("Failed to deploy"))
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
SSHKit::Backend::Abstract.any_instance.expects(:execute)
.with(:docker, :exec, "kamal-proxy", "kamal-proxy", :deploy, "app-web", "--target", "\"123:80\"", "--deploy-timeout", "\"1s\"", "--drain-timeout", "\"30s\"", "--buffer-requests", "--buffer-responses", "--log-request-header", "\"Cache-Control\"", "--log-request-header", "\"Last-Modified\"", "--log-request-header", "\"User-Agent\"").raises(SSHKit::Command::Failed.new("Failed to deploy"))
stderred do
run_command("boot", config: :with_roles, host: nil, allow_execute_error: true).tap do |output|
@@ -190,7 +190,7 @@ class CliAppTest < CliTestCase
run_command("start").tap do |output|
assert_match "docker start app-web-999", output
assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"999:80\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\"", output
assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"999:80\" --deploy-timeout \"30s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\"", output
end
end

View File

@@ -58,13 +58,13 @@ class CliProxyTest < CliTestCase
assert_match "Running docker container stop traefik ; docker container prune --force --filter label=org.opencontainers.image.title=Traefik && docker image prune --all --force --filter label=org.opencontainers.image.title=Traefik on 1.1.1.1", output
assert_match "docker container prune --force --filter label=org.opencontainers.image.title=kamal-proxy on 1.1.1.1", output
assert_match "docker run --name kamal-proxy --network kamal --detach --restart unless-stopped --publish 80:80 --publish 443:443 --volume /var/run/docker.sock:/var/run/docker.sock --volume $(pwd)/.kamal/proxy/config:/home/kamal-proxy/.config/kamal-proxy --log-opt max-size=\"10m\" #{Kamal::Configuration::Proxy::DEFAULT_IMAGE} on 1.1.1.1", output
assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"abcdefabcdef:80\" --deploy-timeout \"6s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\" on 1.1.1.1", output
assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"abcdefabcdef:80\" --deploy-timeout \"6s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\" on 1.1.1.1", output
assert_match "docker container stop kamal-proxy on 1.1.1.2", output
assert_match "Running docker container stop traefik ; docker container prune --force --filter label=org.opencontainers.image.title=Traefik && docker image prune --all --force --filter label=org.opencontainers.image.title=Traefik on 1.1.1.2", output
assert_match "docker container prune --force --filter label=org.opencontainers.image.title=kamal-proxy on 1.1.1.2", output
assert_match "docker run --name kamal-proxy --network kamal --detach --restart unless-stopped --publish 80:80 --publish 443:443 --volume /var/run/docker.sock:/var/run/docker.sock --volume $(pwd)/.kamal/proxy/config:/home/kamal-proxy/.config/kamal-proxy --log-opt max-size=\"10m\" #{Kamal::Configuration::Proxy::DEFAULT_IMAGE} on 1.1.1.2", output
assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"abcdefabcdef:80\" --deploy-timeout \"6s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\" on 1.1.1.2", output
assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"abcdefabcdef:80\" --deploy-timeout \"6s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\" on 1.1.1.2", output
end
end
@@ -211,7 +211,7 @@ class CliProxyTest < CliTestCase
assert_match "/usr/bin/env mkdir -p .kamal/apps/app/env/roles", output
assert_match %r{/usr/bin/env .* .kamal/apps/app/env/roles/web.env}, output
assert_match %r{docker run --detach --restart unless-stopped --name app-web-latest --network kamal --hostname 1.1.1.1-.* -e KAMAL_CONTAINER_NAME="app-web-latest" -e KAMAL_VERSION="latest" --env-file .kamal/apps/app/env/roles/web.env --log-opt max-size="10m" --label service="app" --label role="web" --label destination dhh/app:latest}, output
assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"12345678:80\" --deploy-timeout \"6s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\"", output
assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"12345678:80\" --deploy-timeout \"6s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\"", output
assert_match "docker container ls --all --filter name=^app-web-12345678$ --quiet | xargs docker stop", output
assert_match "docker tag dhh/app:latest dhh/app:latest", output
assert_match "/usr/bin/env mkdir -p .kamal", output

View File

@@ -4,7 +4,7 @@ class CommandsAppTest < ActiveSupport::TestCase
setup do
setup_test_secrets("secrets" => "RAILS_MASTER_KEY=456")
@config = { service: "app", image: "dhh/app", registry: { "username" => "dhh", "password" => "secret" }, servers: [ "1.1.1.1" ], env: { "secret" => [ "RAILS_MASTER_KEY" ] }, builder: { "arch" => "amd64" } }
@config = { service: "app", image: "dhh/app", registry: { "username" => "dhh", "password" => "secret" }, servers: { "web" => [ "1.1.1.1" ], "workers" => [ "1.1.1.2" ] }, env: { "secret" => [ "RAILS_MASTER_KEY" ] }, builder: { "arch" => "amd64" } }
end
teardown do
@@ -83,11 +83,15 @@ class CommandsAppTest < ActiveSupport::TestCase
new_command.stop.join(" ")
end
test "stop with custom stop wait time" do
@config[:stop_wait_time] = 30
test "stop with custom drain timeout" do
@config[:drain_timeout] = 20
assert_equal \
"sh -c 'docker ps --latest --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting --filter ancestor=$(docker image ls --filter reference=dhh/app:latest --format '\\''{{.ID}}'\\'') ; docker ps --latest --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting' | head -1 | xargs docker stop -t 30",
"sh -c 'docker ps --latest --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting --filter ancestor=$(docker image ls --filter reference=dhh/app:latest --format '\\''{{.ID}}'\\'') ; docker ps --latest --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting' | head -1 | xargs docker stop",
new_command.stop.join(" ")
assert_equal \
"sh -c 'docker ps --latest --quiet --filter label=service=app --filter label=role=workers --filter status=running --filter status=restarting --filter ancestor=$(docker image ls --filter reference=dhh/app:latest --format '\\''{{.ID}}'\\'') ; docker ps --latest --quiet --filter label=service=app --filter label=role=workers --filter status=running --filter status=restarting' | head -1 | xargs docker stop -t 20",
new_command(role: "workers").stop.join(" ")
end
test "stop with version" do

View File

@@ -109,7 +109,7 @@ class CommandsProxyTest < ActiveSupport::TestCase
test "deploy" do
assert_equal \
"docker exec kamal-proxy kamal-proxy deploy service --target \"172.1.0.2:80\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\"",
"docker exec kamal-proxy kamal-proxy deploy service --target \"172.1.0.2:80\" --deploy-timeout \"30s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\"",
new_command.deploy("service", target: "172.1.0.2").join(" ")
end

View File

@@ -242,6 +242,14 @@ class ConfigurationRoleTest < ActiveSupport::TestCase
ENV.delete("VERSION")
end
test "stop args with proxy" do
assert_equal [], config_with_roles.role(:web).stop_args
end
test "stop args with no proxy" do
assert_equal [ "-t", 30 ], config_with_roles.role(:workers).stop_args
end
private
def config
Kamal::Configuration.new(@deploy)

View File

@@ -14,7 +14,7 @@ class ConfigurationValidationTest < ActiveSupport::TestCase
assert_error "#{key}: should be a boolean", **{ key => "foo" }
end
[ :stop_wait_time, :retain_containers, :readiness_delay ].each do |key|
[ :deploy_timeout, :drain_timeout, :retain_containers, :readiness_delay ].each do |key|
assert_error "#{key}: should be an integer", **{ key => "foo" }
end

View File

@@ -13,8 +13,6 @@ registry:
builder:
arch: amd64
proxy:
deploy_timeout: 6s
accessories:
mysql:
@@ -39,4 +37,4 @@ accessories:
- data:/data
readiness_delay: 0
readiness_timeout: 1
deploy_timeout: 6

View File

@@ -16,4 +16,4 @@ registry:
password: pw
builder:
arch: amd64
readiness_timeout: 1
deploy_timeout: 1

View File

@@ -20,8 +20,9 @@ env:
secret:
- SECRET_TAG
asset_path: /usr/share/nginx/html/versions
proxy:
deploy_timeout: 2s
deploy_timeout: 2
drain_timeout: 2
readiness_delay: 0
registry:
server: registry:4443
@@ -39,5 +40,3 @@ accessories:
cmd: sh -c 'echo "Starting busybox..."; trap exit term; while true; do sleep 1; done'
roles:
- web
stop_wait_time: 1
readiness_delay: 0

View File

@@ -9,8 +9,30 @@ servers:
hosts:
- vm3
cmd: sleep infinity
deploy_timeout: 2
drain_timeout: 2
readiness_delay: 0
proxy:
deploy_timeout: 2s
healthcheck:
interval: 1
timeout: 1
path: "/up"
response_timeout: 2
buffering:
requests: true
responses: true
memory: 400_000
max_request_body: 40_000_000
max_response_body: 40_000_000
forward_headers: true
logging:
request_headers:
- Cache-Control
- X-Forwarded-Proto
response_headers:
- X-Request-ID
- X-Request-Start
asset_path: /usr/share/nginx/html/versions
@@ -30,8 +52,6 @@ accessories:
cmd: sh -c 'echo "Starting busybox..."; trap exit term; while true; do sleep 1; done'
roles:
- web
stop_wait_time: 1
readiness_delay: 0
aliases:
whome: version
worker_hostname: app exec -r workers -q --reuse hostname