Add healthcheck before deploy

This commit is contained in:
David Heinemeier Hansson
2023-02-18 16:22:08 +01:00
parent 2f80b300f0
commit 933ece35ab
11 changed files with 175 additions and 6 deletions

View File

@@ -22,7 +22,7 @@ env:
Then edit your `.env` file to add your registry password as `MRSK_REGISTRY_PASSWORD` (and your `RAILS_MASTER_KEY` for production with a Rails app). Then edit your `.env` file to add your registry password as `MRSK_REGISTRY_PASSWORD` (and your `RAILS_MASTER_KEY` for production with a Rails app).
Finally, you have to ensure your application can answer `200 OK` to a `GET /up` request. That's how the zero-downtime deploy process knows that your new version is ready to serve traffic. Finally, you have to ensure your application can answer `200 OK` to a `GET /up` request (or configure an alternative health path). That's how the zero-downtime deploy process knows that your new version is ready to serve traffic.
Now you're ready to deploy to the servers: Now you're ready to deploy to the servers:
@@ -370,6 +370,18 @@ That'll post a line like follows to a preconfigured chatbot in Basecamp:
[My App] [2023-02-18 11:29:52] [dhh] Rolled back to version d264c4e92470ad1bd18590f04466787262f605de [My App] [2023-02-18 11:29:52] [dhh] Rolled back to version d264c4e92470ad1bd18590f04466787262f605de
``` ```
### Using custom healthcheck path or port
MRSK defaults to checking the health of your application again `/up` on port 3000. You can tailor both with the `healthcheck` setting:
```yaml
healthcheck:
path: /healthz
port: 4000
```
This will ensure your application is configured with a traefik label for the healthcheck against `/healthz` and that the pre-deploy healthcheck that MRSK performs is done against the same path on port 4000.
## Commands ## Commands
### Running commands on servers ### Running commands on servers

View File

@@ -0,0 +1,29 @@
class Mrsk::Cli::Healthcheck < Mrsk::Cli::Base
desc "perform", "Health check the current version of the app"
def perform
on(MRSK.primary_host) do
begin
execute *MRSK.healthcheck.run
target = "Health check against #{MRSK.config.healthcheck["path"]}"
if capture_with_info(*MRSK.healthcheck.curl) == "200"
info "#{target} succeeded with 200 OK!"
else
# Catches 1xx, 2xx, 3xx
raise SSHKit::Command::Failed, "#{target} failed to return 200 OK!"
end
rescue SSHKit::Command::Failed => e
if e.message =~ /curl/
# Catches 4xx, 5xx
raise SSHKit::Command::Failed, "#{target} failed to return 200 OK!"
else
raise
end
ensure
execute *MRSK.healthcheck.stop, raise_on_non_zero_exit: false
execute *MRSK.healthcheck.remove, raise_on_non_zero_exit: false
end
end
end
end

View File

@@ -23,6 +23,9 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
say "Ensure Traefik is running...", :magenta say "Ensure Traefik is running...", :magenta
invoke "mrsk:cli:traefik:boot" invoke "mrsk:cli:traefik:boot"
say "Ensure app can pass healthcheck...", :magenta
invoke "mrsk:cli:healthcheck:perform"
invoke "mrsk:cli:app:boot" invoke "mrsk:cli:app:boot"
say "Prune old containers and images...", :magenta say "Prune old containers and images...", :magenta
@@ -38,6 +41,9 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
say "Build and push app image...", :magenta say "Build and push app image...", :magenta
invoke "mrsk:cli:build:deliver" invoke "mrsk:cli:build:deliver"
say "Ensure app can pass healthcheck...", :magenta
invoke "mrsk:cli:healthcheck:perform"
invoke "mrsk:cli:app:boot" invoke "mrsk:cli:app:boot"
end end
@@ -147,6 +153,9 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
desc "build", "Build the application image" desc "build", "Build the application image"
subcommand "build", Mrsk::Cli::Build subcommand "build", Mrsk::Cli::Build
desc "healthcheck", "Healthcheck the application"
subcommand "healthcheck", Mrsk::Cli::Healthcheck
desc "prune", "Prune old application images and containers" desc "prune", "Prune old application images and containers"
subcommand "prune", Mrsk::Cli::Prune subcommand "prune", Mrsk::Cli::Prune

View File

@@ -73,6 +73,10 @@ class Mrsk::Commander
@auditor ||= Mrsk::Commands::Auditor.new(config) @auditor ||= Mrsk::Commands::Auditor.new(config)
end end
def healthcheck
@healthcheck ||= Mrsk::Commands::Healthcheck.new(config)
end
def with_verbosity(level) def with_verbosity(level)
old_level = self.verbosity old_level = self.verbosity

View File

@@ -75,10 +75,6 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
docker :ps, "-q", *service_filter docker :ps, "-q", *service_filter
end end
def container_id_for(container_name:)
docker :container, :ls, "-a", "-f", "name=#{container_name}", "-q"
end
def current_running_version def current_running_version
# FIXME: Find more graceful way to extract the version from "app-version" than using sed and tail! # FIXME: Find more graceful way to extract the version from "app-version" than using sed and tail!
pipe \ pipe \

View File

@@ -17,6 +17,10 @@ module Mrsk::Commands
end end
end end
def container_id_for(container_name:)
docker :container, :ls, "-a", "-f", "name=#{container_name}", "-q"
end
private private
def combine(*commands, by: "&&") def combine(*commands, by: "&&")
commands commands

View File

@@ -0,0 +1,46 @@
class Mrsk::Commands::Healthcheck < Mrsk::Commands::Base
EXPOSED_PORT = 3999
def run
web = config.role(:web)
docker :run,
"-d",
"--name", container_name_with_version,
"-p", "#{EXPOSED_PORT}:#{config.healthcheck["port"]}",
"--label", "service=#{container_name}",
*web.env_args,
*config.volume_args,
config.absolute_image,
web.cmd
end
def curl
[ :curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", health_url ]
end
def stop
pipe \
container_id_for(container_name: container_name),
xargs(docker(:stop))
end
def remove
pipe \
container_id_for(container_name: container_name),
xargs(docker(:container, :rm))
end
private
def container_name
"healthcheck-#{config.service}"
end
def container_name_with_version
"healthcheck-#{config.service_with_version}"
end
def health_url
"http://localhost:#{EXPOSED_PORT}#{config.healthcheck["path"]}"
end
end

View File

@@ -107,6 +107,7 @@ class Mrsk::Configuration
end end
end end
def ssh_user def ssh_user
if raw_config.ssh.present? if raw_config.ssh.present?
raw_config.ssh["user"] || "root" raw_config.ssh["user"] || "root"
@@ -126,10 +127,15 @@ class Mrsk::Configuration
{ user: ssh_user, proxy: ssh_proxy, auth_methods: [ "publickey" ] }.compact { user: ssh_user, proxy: ssh_proxy, auth_methods: [ "publickey" ] }.compact
end end
def audit_broadcast_cmd def audit_broadcast_cmd
raw_config.audit_broadcast_cmd raw_config.audit_broadcast_cmd
end end
def healthcheck
{ "path" => "/up", "port" => "3000" }.merge(raw_config.healthcheck || {})
end
def valid? def valid?
ensure_required_keys_present && ensure_env_available ensure_required_keys_present && ensure_env_available

View File

@@ -59,7 +59,7 @@ class Mrsk::Configuration::Role
if running_traefik? if running_traefik?
{ {
"traefik.http.routers.#{config.service}.rule" => "'PathPrefix(`/`)'", "traefik.http.routers.#{config.service}.rule" => "'PathPrefix(`/`)'",
"traefik.http.services.#{config.service}.loadbalancer.healthcheck.path" => "/up", "traefik.http.services.#{config.service}.loadbalancer.healthcheck.path" => config.healthcheck["path"],
"traefik.http.services.#{config.service}.loadbalancer.healthcheck.interval" => "1s", "traefik.http.services.#{config.service}.loadbalancer.healthcheck.interval" => "1s",
"traefik.http.middlewares.#{config.service}.retry.attempts" => "3", "traefik.http.middlewares.#{config.service}.retry.attempts" => "3",
"traefik.http.middlewares.#{config.service}.retry.initialinterval" => "500ms" "traefik.http.middlewares.#{config.service}.retry.initialinterval" => "500ms"

View File

@@ -26,6 +26,14 @@ class CommandsAppTest < ActiveSupport::TestCase
@app.run.join(" ") @app.run.join(" ")
end end
test "run with custom healthcheck path" do
@config[:healthcheck] = { "path" => "/healthz" }
assert_equal \
"docker run -d --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=456 --label service=app --label role=web --label traefik.http.routers.app.rule='PathPrefix(`/`)' --label traefik.http.services.app.loadbalancer.healthcheck.path=/healthz --label traefik.http.services.app.loadbalancer.healthcheck.interval=1s --label traefik.http.middlewares.app.retry.attempts=3 --label traefik.http.middlewares.app.retry.initialinterval=500ms dhh/app:999",
@app.run.join(" ")
end
test "start" do test "start" do
assert_equal \ assert_equal \
"docker start app-999", "docker start app-999",

View File

@@ -0,0 +1,55 @@
require "test_helper"
class CommandsHealthcheckTest < ActiveSupport::TestCase
setup do
@config = {
service: "app", image: "dhh/app", registry: { "username" => "dhh", "password" => "secret" }, servers: [ "1.1.1.1" ],
traefik: { "args" => { "accesslog.format" => "json", "metrics.prometheus.buckets" => "0.1,0.3,1.2,5.0" } }
}
end
test "run" do
assert_equal \
"docker run -d --name healthcheck-app-123 -p 3999:3000 --label service=healthcheck-app dhh/app:123",
new_command.run.join(" ")
end
test "run with custom port" do
@config[:healthcheck] = { "port" => 3001 }
assert_equal \
"docker run -d --name healthcheck-app-123 -p 3999:3001 --label service=healthcheck-app dhh/app:123",
new_command.run.join(" ")
end
test "curl" do
assert_equal \
"curl --silent --output /dev/null --write-out '%{http_code}' http://localhost:3999/up",
new_command.curl.join(" ")
end
test "curl with custom path" do
@config[:healthcheck] = { "path" => "/healthz" }
assert_equal \
"curl --silent --output /dev/null --write-out '%{http_code}' http://localhost:3999/healthz",
new_command.curl.join(" ")
end
test "stop" do
assert_equal \
"docker container ls -a -f name=healthcheck-app -q | xargs docker stop",
new_command.stop.join(" ")
end
test "remove" do
assert_equal \
"docker container ls -a -f name=healthcheck-app -q | xargs docker container rm",
new_command.remove.join(" ")
end
private
def new_command
Mrsk::Commands::Healthcheck.new(Mrsk::Configuration.new(@config, version: "123"))
end
end