Simplified deploy/drain timeouts

Remove `stop_wait_time` and `readiness_timeout` from the root config
and remove `deploy_timeout` and `drain_timeout` from the proxy config.

Instead we'll just have `deploy_timeout` and `drain_timeout` in the
root config.

For roles that run the proxy, they are passed to the kamal-proxy deploy
command. Once that returns we can assume the container is ready to
shut down.

For other roles, we'll use the `deploy_timeout` when polling the
container to see if it is ready and the `drain_timeout` when stopping
the container.
This commit is contained in:
Donal McBreen
2024-09-18 14:42:01 +01:00
parent 34effef70a
commit 8bcd896242
17 changed files with 85 additions and 48 deletions

View File

@@ -3,7 +3,7 @@ module Kamal::Cli::Healthcheck::Poller
def wait_for_healthy(role, &block) def wait_for_healthy(role, &block)
attempt = 1 attempt = 1
timeout_at = Time.now + KAMAL.config.readiness_timeout timeout_at = Time.now + KAMAL.config.deploy_timeout
readiness_delay = KAMAL.config.readiness_delay readiness_delay = KAMAL.config.readiness_delay
begin begin
@@ -19,7 +19,7 @@ module Kamal::Cli::Healthcheck::Poller
end end
unless %w[ running healthy ].include?(status) unless %w[ running healthy ].include?(status)
raise Kamal::Cli::Healthcheck::Error, "container not ready after #{KAMAL.config.readiness_timeout} seconds (#{status})" raise Kamal::Cli::Healthcheck::Error, "container not ready after #{KAMAL.config.deploy_timeout} seconds (#{status})"
end end
rescue Kamal::Cli::Healthcheck::Error => e rescue Kamal::Cli::Healthcheck::Error => e
time_left = timeout_at - Time.now time_left = timeout_at - Time.now

View File

@@ -43,7 +43,7 @@ class Kamal::Commands::App < Kamal::Commands::Base
def stop(version: nil) def stop(version: nil)
pipe \ pipe \
version ? container_id_for_version(version) : current_running_container_id, version ? container_id_for_version(version) : current_running_container_id,
xargs(config.stop_wait_time ? docker(:stop, "-t", config.stop_wait_time) : docker(:stop)) xargs(docker(:stop, *role.stop_args))
end end
def info def info

View File

@@ -6,7 +6,7 @@ require "erb"
require "net/ssh/proxy/jump" require "net/ssh/proxy/jump"
class Kamal::Configuration class Kamal::Configuration
delegate :service, :image, :labels, :stop_wait_time, :hooks_path, to: :raw_config, allow_nil: true delegate :service, :image, :labels, :hooks_path, to: :raw_config, allow_nil: true
delegate :argumentize, :optionize, to: Kamal::Utils delegate :argumentize, :optionize, to: Kamal::Utils
attr_reader :destination, :raw_config, :secrets attr_reader :destination, :raw_config, :secrets
@@ -189,8 +189,12 @@ class Kamal::Configuration
raw_config.readiness_delay || 7 raw_config.readiness_delay || 7
end end
def readiness_timeout def deploy_timeout
raw_config.readiness_timeout || 30 raw_config.deploy_timeout || 30
end
def drain_timeout
raw_config.drain_timeout || 30
end end

View File

@@ -93,11 +93,6 @@ primary_role: workers
# Whether roles with no servers are allowed. Defaults to `false`. # Whether roles with no servers are allowed. Defaults to `false`.
allow_empty_roles: false allow_empty_roles: false
# Stop wait time
#
# How long we wait for a container to stop before killing it, defaults to 30 seconds
stop_wait_time: 60
# Retain containers # Retain containers
# #
# How many old containers and images we retain, defaults to 5 # How many old containers and images we retain, defaults to 5
@@ -114,11 +109,15 @@ minimum_version: 1.3.0
# This only applies to containers that do not run a proxy or specify a healthcheck # This only applies to containers that do not run a proxy or specify a healthcheck
readiness_delay: 4 readiness_delay: 4
# Readiness timeout # Deploy timeout
# #
# How long to wait for a container to become ready, default 30 # How long to wait for a container to become ready, default 30
# This only applies to containers that do not run a proxy deploy_timeout: 10
readiness_timeout: 4
# Drain timeout
#
# How long to wait for a containers to drain, default 30
drain_timeout: 10
# Run directory # Run directory
# #

View File

@@ -38,11 +38,6 @@ proxy:
# Defaults to false # Defaults to false
ssl: true ssl: true
# Deploy timeout
#
# How long to wait for the app to boot when deploying, defaults to 30 seconds
deploy_timeout: 10s
# Response timeout # Response timeout
# #
# How long to wait for requests to complete before timing out, defaults to 30 seconds # How long to wait for requests to complete before timing out, defaults to 30 seconds

View File

@@ -39,12 +39,12 @@ class Kamal::Configuration::Proxy
{ {
host: proxy_config["host"], host: proxy_config["host"],
tls: proxy_config["ssl"], tls: proxy_config["ssl"],
"deploy-timeout": proxy_config["deploy_timeout"], "deploy-timeout": seconds_duration(config.deploy_timeout),
"drain-timeout": proxy_config["drain_timeout"], "drain-timeout": seconds_duration(config.drain_timeout),
"health-check-interval": proxy_config.dig("healthcheck", "interval"), "health-check-interval": seconds_duration(proxy_config.dig("healthcheck", "interval")),
"health-check-timeout": proxy_config.dig("healthcheck", "timeout"), "health-check-timeout": seconds_duration(proxy_config.dig("healthcheck", "timeout")),
"health-check-path": proxy_config.dig("healthcheck", "path"), "health-check-path": proxy_config.dig("healthcheck", "path"),
"target-timeout": proxy_config["response_timeout"], "target-timeout": seconds_duration(proxy_config["response_timeout"]),
"buffer-requests": proxy_config.fetch("buffering", { "requests": true }).fetch("requests", true), "buffer-requests": proxy_config.fetch("buffering", { "requests": true }).fetch("requests", true),
"buffer-responses": proxy_config.fetch("buffering", { "responses": true }).fetch("responses", true), "buffer-responses": proxy_config.fetch("buffering", { "responses": true }).fetch("responses", true),
"buffer-memory": proxy_config.dig("buffering", "memory"), "buffer-memory": proxy_config.dig("buffering", "memory"),
@@ -68,4 +68,8 @@ class Kamal::Configuration::Proxy
private private
attr_reader :config, :proxy_config attr_reader :config, :proxy_config
def seconds_duration(value)
value ? "#{value}s" : nil
end
end end

View File

@@ -65,6 +65,12 @@ class Kamal::Configuration::Role
@logging ||= config.logging.merge(specialized_logging) @logging ||= config.logging.merge(specialized_logging)
end end
def stop_args
# When deploying with the proxy, kamal-proxy will drain request before returning so we don't need to wait.
timeout = running_proxy? ? nil : config.drain_timeout
[ *argumentize("-t", timeout) ]
end
def env(host) def env(host)
@envs ||= {} @envs ||= {}

View File

@@ -128,9 +128,9 @@ class CliAppTest < CliTestCase
SSHKit::Backend::Abstract.any_instance.stubs(:execute).returns("") SSHKit::Backend::Abstract.any_instance.stubs(:execute).returns("")
SSHKit::Backend::Abstract.any_instance.stubs(:execute) SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false).twice .with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false)
SSHKit::Backend::Abstract.any_instance.stubs(:execute) SSHKit::Backend::Abstract.any_instance.expects(:execute)
.with(:docker, :exec, "kamal-proxy", "kamal-proxy", :deploy, "app-web", "--target", "\"123:80\"", "--buffer-requests", "--buffer-responses", "--log-request-header", "\"Cache-Control\"", "--log-request-header", "\"Last-Modified\"", "--log-request-header", "\"User-Agent\"").raises(SSHKit::Command::Failed.new("Failed to deploy")) .with(:docker, :exec, "kamal-proxy", "kamal-proxy", :deploy, "app-web", "--target", "\"123:80\"", "--deploy-timeout", "\"1s\"", "--drain-timeout", "\"30s\"", "--buffer-requests", "--buffer-responses", "--log-request-header", "\"Cache-Control\"", "--log-request-header", "\"Last-Modified\"", "--log-request-header", "\"User-Agent\"").raises(SSHKit::Command::Failed.new("Failed to deploy"))
stderred do stderred do
run_command("boot", config: :with_roles, host: nil, allow_execute_error: true).tap do |output| run_command("boot", config: :with_roles, host: nil, allow_execute_error: true).tap do |output|
@@ -190,7 +190,7 @@ class CliAppTest < CliTestCase
run_command("start").tap do |output| run_command("start").tap do |output|
assert_match "docker start app-web-999", output assert_match "docker start app-web-999", output
assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"999:80\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\"", output assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"999:80\" --deploy-timeout \"30s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\"", output
end end
end end

View File

@@ -58,13 +58,13 @@ class CliProxyTest < CliTestCase
assert_match "Running docker container stop traefik ; docker container prune --force --filter label=org.opencontainers.image.title=Traefik && docker image prune --all --force --filter label=org.opencontainers.image.title=Traefik on 1.1.1.1", output assert_match "Running docker container stop traefik ; docker container prune --force --filter label=org.opencontainers.image.title=Traefik && docker image prune --all --force --filter label=org.opencontainers.image.title=Traefik on 1.1.1.1", output
assert_match "docker container prune --force --filter label=org.opencontainers.image.title=kamal-proxy on 1.1.1.1", output assert_match "docker container prune --force --filter label=org.opencontainers.image.title=kamal-proxy on 1.1.1.1", output
assert_match "docker run --name kamal-proxy --network kamal --detach --restart unless-stopped --publish 80:80 --publish 443:443 --volume /var/run/docker.sock:/var/run/docker.sock --volume $(pwd)/.kamal/proxy/config:/home/kamal-proxy/.config/kamal-proxy --log-opt max-size=\"10m\" #{Kamal::Configuration::Proxy::DEFAULT_IMAGE} on 1.1.1.1", output assert_match "docker run --name kamal-proxy --network kamal --detach --restart unless-stopped --publish 80:80 --publish 443:443 --volume /var/run/docker.sock:/var/run/docker.sock --volume $(pwd)/.kamal/proxy/config:/home/kamal-proxy/.config/kamal-proxy --log-opt max-size=\"10m\" #{Kamal::Configuration::Proxy::DEFAULT_IMAGE} on 1.1.1.1", output
assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"abcdefabcdef:80\" --deploy-timeout \"6s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\" on 1.1.1.1", output assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"abcdefabcdef:80\" --deploy-timeout \"6s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\" on 1.1.1.1", output
assert_match "docker container stop kamal-proxy on 1.1.1.2", output assert_match "docker container stop kamal-proxy on 1.1.1.2", output
assert_match "Running docker container stop traefik ; docker container prune --force --filter label=org.opencontainers.image.title=Traefik && docker image prune --all --force --filter label=org.opencontainers.image.title=Traefik on 1.1.1.2", output assert_match "Running docker container stop traefik ; docker container prune --force --filter label=org.opencontainers.image.title=Traefik && docker image prune --all --force --filter label=org.opencontainers.image.title=Traefik on 1.1.1.2", output
assert_match "docker container prune --force --filter label=org.opencontainers.image.title=kamal-proxy on 1.1.1.2", output assert_match "docker container prune --force --filter label=org.opencontainers.image.title=kamal-proxy on 1.1.1.2", output
assert_match "docker run --name kamal-proxy --network kamal --detach --restart unless-stopped --publish 80:80 --publish 443:443 --volume /var/run/docker.sock:/var/run/docker.sock --volume $(pwd)/.kamal/proxy/config:/home/kamal-proxy/.config/kamal-proxy --log-opt max-size=\"10m\" #{Kamal::Configuration::Proxy::DEFAULT_IMAGE} on 1.1.1.2", output assert_match "docker run --name kamal-proxy --network kamal --detach --restart unless-stopped --publish 80:80 --publish 443:443 --volume /var/run/docker.sock:/var/run/docker.sock --volume $(pwd)/.kamal/proxy/config:/home/kamal-proxy/.config/kamal-proxy --log-opt max-size=\"10m\" #{Kamal::Configuration::Proxy::DEFAULT_IMAGE} on 1.1.1.2", output
assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"abcdefabcdef:80\" --deploy-timeout \"6s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\" on 1.1.1.2", output assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"abcdefabcdef:80\" --deploy-timeout \"6s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\" on 1.1.1.2", output
end end
end end
@@ -211,7 +211,7 @@ class CliProxyTest < CliTestCase
assert_match "/usr/bin/env mkdir -p .kamal/apps/app/env/roles", output assert_match "/usr/bin/env mkdir -p .kamal/apps/app/env/roles", output
assert_match %r{/usr/bin/env .* .kamal/apps/app/env/roles/web.env}, output assert_match %r{/usr/bin/env .* .kamal/apps/app/env/roles/web.env}, output
assert_match %r{docker run --detach --restart unless-stopped --name app-web-latest --network kamal --hostname 1.1.1.1-.* -e KAMAL_CONTAINER_NAME="app-web-latest" -e KAMAL_VERSION="latest" --env-file .kamal/apps/app/env/roles/web.env --log-opt max-size="10m" --label service="app" --label role="web" --label destination dhh/app:latest}, output assert_match %r{docker run --detach --restart unless-stopped --name app-web-latest --network kamal --hostname 1.1.1.1-.* -e KAMAL_CONTAINER_NAME="app-web-latest" -e KAMAL_VERSION="latest" --env-file .kamal/apps/app/env/roles/web.env --log-opt max-size="10m" --label service="app" --label role="web" --label destination dhh/app:latest}, output
assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"12345678:80\" --deploy-timeout \"6s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\"", output assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"12345678:80\" --deploy-timeout \"6s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\"", output
assert_match "docker container ls --all --filter name=^app-web-12345678$ --quiet | xargs docker stop", output assert_match "docker container ls --all --filter name=^app-web-12345678$ --quiet | xargs docker stop", output
assert_match "docker tag dhh/app:latest dhh/app:latest", output assert_match "docker tag dhh/app:latest dhh/app:latest", output
assert_match "/usr/bin/env mkdir -p .kamal", output assert_match "/usr/bin/env mkdir -p .kamal", output

View File

@@ -4,7 +4,7 @@ class CommandsAppTest < ActiveSupport::TestCase
setup do setup do
setup_test_secrets("secrets" => "RAILS_MASTER_KEY=456") setup_test_secrets("secrets" => "RAILS_MASTER_KEY=456")
@config = { service: "app", image: "dhh/app", registry: { "username" => "dhh", "password" => "secret" }, servers: [ "1.1.1.1" ], env: { "secret" => [ "RAILS_MASTER_KEY" ] }, builder: { "arch" => "amd64" } } @config = { service: "app", image: "dhh/app", registry: { "username" => "dhh", "password" => "secret" }, servers: { "web" => [ "1.1.1.1" ], "workers" => [ "1.1.1.2" ] }, env: { "secret" => [ "RAILS_MASTER_KEY" ] }, builder: { "arch" => "amd64" } }
end end
teardown do teardown do
@@ -83,11 +83,15 @@ class CommandsAppTest < ActiveSupport::TestCase
new_command.stop.join(" ") new_command.stop.join(" ")
end end
test "stop with custom stop wait time" do test "stop with custom drain timeout" do
@config[:stop_wait_time] = 30 @config[:drain_timeout] = 20
assert_equal \ assert_equal \
"sh -c 'docker ps --latest --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting --filter ancestor=$(docker image ls --filter reference=dhh/app:latest --format '\\''{{.ID}}'\\'') ; docker ps --latest --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting' | head -1 | xargs docker stop -t 30", "sh -c 'docker ps --latest --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting --filter ancestor=$(docker image ls --filter reference=dhh/app:latest --format '\\''{{.ID}}'\\'') ; docker ps --latest --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting' | head -1 | xargs docker stop",
new_command.stop.join(" ") new_command.stop.join(" ")
assert_equal \
"sh -c 'docker ps --latest --quiet --filter label=service=app --filter label=role=workers --filter status=running --filter status=restarting --filter ancestor=$(docker image ls --filter reference=dhh/app:latest --format '\\''{{.ID}}'\\'') ; docker ps --latest --quiet --filter label=service=app --filter label=role=workers --filter status=running --filter status=restarting' | head -1 | xargs docker stop -t 20",
new_command(role: "workers").stop.join(" ")
end end
test "stop with version" do test "stop with version" do

View File

@@ -109,7 +109,7 @@ class CommandsProxyTest < ActiveSupport::TestCase
test "deploy" do test "deploy" do
assert_equal \ assert_equal \
"docker exec kamal-proxy kamal-proxy deploy service --target \"172.1.0.2:80\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\"", "docker exec kamal-proxy kamal-proxy deploy service --target \"172.1.0.2:80\" --deploy-timeout \"30s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\"",
new_command.deploy("service", target: "172.1.0.2").join(" ") new_command.deploy("service", target: "172.1.0.2").join(" ")
end end

View File

@@ -242,6 +242,14 @@ class ConfigurationRoleTest < ActiveSupport::TestCase
ENV.delete("VERSION") ENV.delete("VERSION")
end end
test "stop args with proxy" do
assert_equal [], config_with_roles.role(:web).stop_args
end
test "stop args with no proxy" do
assert_equal [ "-t", 30 ], config_with_roles.role(:workers).stop_args
end
private private
def config def config
Kamal::Configuration.new(@deploy) Kamal::Configuration.new(@deploy)

View File

@@ -14,7 +14,7 @@ class ConfigurationValidationTest < ActiveSupport::TestCase
assert_error "#{key}: should be a boolean", **{ key => "foo" } assert_error "#{key}: should be a boolean", **{ key => "foo" }
end end
[ :stop_wait_time, :retain_containers, :readiness_delay ].each do |key| [ :deploy_timeout, :drain_timeout, :retain_containers, :readiness_delay ].each do |key|
assert_error "#{key}: should be an integer", **{ key => "foo" } assert_error "#{key}: should be an integer", **{ key => "foo" }
end end

View File

@@ -13,8 +13,6 @@ registry:
builder: builder:
arch: amd64 arch: amd64
proxy:
deploy_timeout: 6s
accessories: accessories:
mysql: mysql:
@@ -39,4 +37,4 @@ accessories:
- data:/data - data:/data
readiness_delay: 0 readiness_delay: 0
readiness_timeout: 1 deploy_timeout: 6

View File

@@ -16,4 +16,4 @@ registry:
password: pw password: pw
builder: builder:
arch: amd64 arch: amd64
readiness_timeout: 1 deploy_timeout: 1

View File

@@ -20,8 +20,9 @@ env:
secret: secret:
- SECRET_TAG - SECRET_TAG
asset_path: /usr/share/nginx/html/versions asset_path: /usr/share/nginx/html/versions
proxy: deploy_timeout: 2
deploy_timeout: 2s drain_timeout: 2
readiness_delay: 0
registry: registry:
server: registry:4443 server: registry:4443
@@ -39,5 +40,3 @@ accessories:
cmd: sh -c 'echo "Starting busybox..."; trap exit term; while true; do sleep 1; done' cmd: sh -c 'echo "Starting busybox..."; trap exit term; while true; do sleep 1; done'
roles: roles:
- web - web
stop_wait_time: 1
readiness_delay: 0

View File

@@ -9,8 +9,30 @@ servers:
hosts: hosts:
- vm3 - vm3
cmd: sleep infinity cmd: sleep infinity
deploy_timeout: 2
drain_timeout: 2
readiness_delay: 0
proxy: proxy:
deploy_timeout: 2s healthcheck:
interval: 1
timeout: 1
path: "/up"
response_timeout: 2
buffering:
requests: true
responses: true
memory: 400_000
max_request_body: 40_000_000
max_response_body: 40_000_000
forward_headers: true
logging:
request_headers:
- Cache-Control
- X-Forwarded-Proto
response_headers:
- X-Request-ID
- X-Request-Start
asset_path: /usr/share/nginx/html/versions asset_path: /usr/share/nginx/html/versions
@@ -30,8 +52,6 @@ accessories:
cmd: sh -c 'echo "Starting busybox..."; trap exit term; while true; do sleep 1; done' cmd: sh -c 'echo "Starting busybox..."; trap exit term; while true; do sleep 1; done'
roles: roles:
- web - web
stop_wait_time: 1
readiness_delay: 0
aliases: aliases:
whome: version whome: version
worker_hostname: app exec -r workers -q --reuse hostname worker_hostname: app exec -r workers -q --reuse hostname