Merge branch 'main' into pr/205

* main:
  Simplify domain language to just "boot" and unscoped config keys
  Retain a fixed number of containers when pruning
  Don't assume rolling back in message
  Check all hosts before rolling back
  Ensure Traefik service name is consistent
  Extend traefik delay by 1 second
  Include traefik access logs
  Check if we are still getting a 404
  Also dump load balancer logs
  Dump traefik logs when app not booted
  Fix missing for apt-get
  Report on container health after failure
  Fix the integration test healthcheck
  Allow percentage-based rolling deployments
  Move `group_limit` & `group_wait` under `boot`
  Limit rolling deployment to boot operation
  Allow performing boot & start operations in groups
This commit is contained in:
David Heinemeier Hansson
2023-05-02 14:29:06 +02:00
24 changed files with 204 additions and 44 deletions

View File

@@ -40,6 +40,16 @@ class CliAppTest < CliTestCase
Thread.report_on_exception = true
end
test "boot uses group strategy when specified" do
Mrsk::Cli::App.any_instance.stubs(:on).with("1.1.1.1").twice # acquire & release lock
Mrsk::Cli::App.any_instance.stubs(:on).with([ "1.1.1.1" ]) # tag container
# Strategy is used when booting the containers
Mrsk::Cli::App.any_instance.expects(:on).with([ "1.1.1.1" ], in: :groups, limit: 3, wait: 2).with_block_given
run_command("boot", config: :with_boot_strategy)
end
test "start" do
run_command("start").tap do |output|
assert_match "docker start app-web-999", output
@@ -158,7 +168,7 @@ class CliAppTest < CliTestCase
end
private
def run_command(*command)
stdouted { Mrsk::Cli::App.start([*command, "-c", "test/fixtures/deploy_with_accessories.yml", "--hosts", "1.1.1.1"]) }
def run_command(*command, config: :with_accessories)
stdouted { Mrsk::Cli::App.start([*command, "-c", "test/fixtures/deploy_#{config}.yml", "--hosts", "1.1.1.1"]) }
end
end

View File

@@ -140,7 +140,8 @@ class CliMainTest < CliTestCase
end
test "rollback bad version" do
# Mrsk::Cli::Main.any_instance.stubs(:container_available?).returns(false)
Thread.report_on_exception = false
run_command("details") # Preheat MRSK const
run_command("rollback", "nonsense").tap do |output|
@@ -150,9 +151,19 @@ class CliMainTest < CliTestCase
end
test "rollback good version" do
Mrsk::Cli::Main.any_instance.stubs(:container_available?).returns(true)
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info).with(:docker, :ps, "--filter", "label=service=app", "--filter", "label=role=web", "--filter", "status=running", "--latest", "--format", "\"{{.Names}}\"", "|", "grep -oE \"\\-[^-]+$\"", "|", "cut -c 2-").returns("version-to-rollback\n").at_least_once
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info).with(:docker, :ps, "--filter", "label=service=app", "--filter", "label=role=workers", "--filter", "status=running", "--latest", "--format", "\"{{.Names}}\"", "|", "grep -oE \"\\-[^-]+$\"", "|", "cut -c 2-").returns("version-to-rollback\n").at_least_once
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-123$", "--quiet")
.returns("version-to-rollback\n").at_least_once
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :container, :ls, "--all", "--filter", "name=^app-workers-123$", "--quiet")
.returns("version-to-rollback\n").at_least_once
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :ps, "--filter", "label=service=app", "--filter", "label=role=web", "--filter", "status=running", "--latest", "--format", "\"{{.Names}}\"", "|", "grep -oE \"\\-[^-]+$\"", "|", "cut -c 2-")
.returns("version-to-rollback\n").at_least_once
SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info)
.with(:docker, :ps, "--filter", "label=service=app", "--filter", "label=role=workers", "--filter", "status=running", "--latest", "--format", "\"{{.Names}}\"", "|", "grep -oE \"\\-[^-]+$\"", "|", "cut -c 2-")
.returns("version-to-rollback\n").at_least_once
run_command("rollback", "123", config_file: "deploy_with_accessories").tap do |output|
assert_match "Start version 123", output

View File

@@ -10,13 +10,13 @@ class CliPruneTest < CliTestCase
test "images" do
run_command("images").tap do |output|
assert_match /docker image prune --all --force --filter label=service=app --filter until=168h on 1.1.1.\d/, output
assert_match /docker image prune --all --force --filter label=service=app --filter dangling=true on 1.1.1.\d/, output
end
end
test "containers" do
run_command("containers").tap do |output|
assert_match /docker container prune --force --filter label=service=app --filter until=72h on 1.1.1.\d/, output
assert_match /docker ps -q -a --filter label=service=app --filter status=created --filter status=exited --filter status=dead | tail -n +6 | while read container_id; do docker rm $container_id; done on 1.1.1.\d/, output
end
end

View File

@@ -2,9 +2,7 @@ require "test_helper"
class CommanderTest < ActiveSupport::TestCase
setup do
@mrsk = Mrsk::Commander.new.tap do |mrsk|
mrsk.configure config_file: Pathname.new(File.expand_path("fixtures/deploy_with_roles.yml", __dir__))
end
configure_with(:deploy_with_roles)
end
test "lazy configuration" do
@@ -55,4 +53,27 @@ class CommanderTest < ActiveSupport::TestCase
assert_equal [ "web" ], @mrsk.roles_on("1.1.1.1")
assert_equal [ "workers" ], @mrsk.roles_on("1.1.1.3")
end
test "default group strategy" do
assert_empty @mrsk.boot_strategy
end
test "specific limit group strategy" do
configure_with(:deploy_with_boot_strategy)
assert_equal({ in: :groups, limit: 3, wait: 2 }, @mrsk.boot_strategy)
end
test "percentage-based group strategy" do
configure_with(:deploy_with_precentage_boot_strategy)
assert_equal({ in: :groups, limit: 1, wait: 2 }, @mrsk.boot_strategy)
end
private
def configure_with(variant)
@mrsk = Mrsk::Commander.new.tap do |mrsk|
mrsk.configure config_file: Pathname.new(File.expand_path("fixtures/#{variant}.yml", __dir__))
end
end
end

View File

@@ -13,7 +13,7 @@ class CommandsAppTest < ActiveSupport::TestCase
test "run" do
assert_equal \
"docker run --detach --restart unless-stopped --name app-web-999 -e MRSK_CONTAINER_NAME=\"app-web-999\" -e RAILS_MASTER_KEY=\"456\" --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999",
"docker run --detach --restart unless-stopped --name app-web-999 -e MRSK_CONTAINER_NAME=\"app-web-999\" -e RAILS_MASTER_KEY=\"456\" --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999",
new_command.run.join(" ")
end
@@ -21,7 +21,7 @@ class CommandsAppTest < ActiveSupport::TestCase
@config[:volumes] = ["/local/path:/container/path" ]
assert_equal \
"docker run --detach --restart unless-stopped --name app-web-999 -e MRSK_CONTAINER_NAME=\"app-web-999\" -e RAILS_MASTER_KEY=\"456\" --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --log-opt max-size=\"10m\" --volume /local/path:/container/path --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999",
"docker run --detach --restart unless-stopped --name app-web-999 -e MRSK_CONTAINER_NAME=\"app-web-999\" -e RAILS_MASTER_KEY=\"456\" --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --log-opt max-size=\"10m\" --volume /local/path:/container/path --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999",
new_command.run.join(" ")
end
@@ -29,7 +29,7 @@ class CommandsAppTest < ActiveSupport::TestCase
@config[:healthcheck] = { "path" => "/healthz" }
assert_equal \
"docker run --detach --restart unless-stopped --name app-web-999 -e MRSK_CONTAINER_NAME=\"app-web-999\" -e RAILS_MASTER_KEY=\"456\" --health-cmd \"curl -f http://localhost:3000/healthz || exit 1\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999",
"docker run --detach --restart unless-stopped --name app-web-999 -e MRSK_CONTAINER_NAME=\"app-web-999\" -e RAILS_MASTER_KEY=\"456\" --health-cmd \"curl -f http://localhost:3000/healthz || exit 1\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999",
new_command.run.join(" ")
end
@@ -37,7 +37,7 @@ class CommandsAppTest < ActiveSupport::TestCase
@config[:healthcheck] = { "cmd" => "/bin/up" }
assert_equal \
"docker run --detach --restart unless-stopped --name app-web-999 -e MRSK_CONTAINER_NAME=\"app-web-999\" -e RAILS_MASTER_KEY=\"456\" --health-cmd \"/bin/up\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999",
"docker run --detach --restart unless-stopped --name app-web-999 -e MRSK_CONTAINER_NAME=\"app-web-999\" -e RAILS_MASTER_KEY=\"456\" --health-cmd \"/bin/up\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999",
new_command.run.join(" ")
end
@@ -45,7 +45,7 @@ class CommandsAppTest < ActiveSupport::TestCase
@config[:servers] = { "web" => { "hosts" => [ "1.1.1.1" ], "healthcheck" => { "cmd" => "/bin/healthy" } } }
assert_equal \
"docker run --detach --restart unless-stopped --name app-web-999 -e MRSK_CONTAINER_NAME=\"app-web-999\" -e RAILS_MASTER_KEY=\"456\" --health-cmd \"/bin/healthy\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999",
"docker run --detach --restart unless-stopped --name app-web-999 -e MRSK_CONTAINER_NAME=\"app-web-999\" -e RAILS_MASTER_KEY=\"456\" --health-cmd \"/bin/healthy\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999",
new_command.run.join(" ")
end
@@ -60,7 +60,7 @@ class CommandsAppTest < ActiveSupport::TestCase
@config[:logging] = { "driver" => "local", "options" => { "max-size" => "100m", "max-file" => "3" } }
assert_equal \
"docker run --detach --restart unless-stopped --name app-web-999 -e MRSK_CONTAINER_NAME=\"app-web-999\" -e RAILS_MASTER_KEY=\"456\" --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --log-driver \"local\" --log-opt max-size=\"100m\" --log-opt max-file=\"3\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999",
"docker run --detach --restart unless-stopped --name app-web-999 -e MRSK_CONTAINER_NAME=\"app-web-999\" -e RAILS_MASTER_KEY=\"456\" --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --log-driver \"local\" --log-opt max-size=\"100m\" --log-opt max-file=\"3\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999",
new_command.run.join(" ")
end

View File

@@ -10,13 +10,13 @@ class CommandsPruneTest < ActiveSupport::TestCase
test "images" do
assert_equal \
"docker image prune --all --force --filter label=service=app --filter until=168h",
"docker image prune --all --force --filter label=service=app --filter dangling=true",
new_command.images.join(" ")
end
test "containers" do
assert_equal \
"docker container prune --force --filter label=service=app --filter until=72h",
"docker ps -q -a --filter label=service=app --filter status=created --filter status=exited --filter status=dead | tail -n +6 | while read container_id; do docker rm $container_id; done",
new_command.containers.join(" ")
end

View File

@@ -42,7 +42,7 @@ class ConfigurationRoleTest < ActiveSupport::TestCase
end
test "special label args for web" do
assert_equal [ "--label", "service=\"app\"", "--label", "role=\"web\"", "--label", "traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\"", "--label", "traefik.http.middlewares.app-web-retry.retry.attempts=\"5\"", "--label", "traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\"", "--label", "traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\"" ], @config.role(:web).label_args
assert_equal [ "--label", "service=\"app\"", "--label", "role=\"web\"", "--label", "traefik.http.services.app-web.loadbalancer.server.scheme=\"http\"", "--label", "traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\"", "--label", "traefik.http.middlewares.app-web-retry.retry.attempts=\"5\"", "--label", "traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\"", "--label", "traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\"" ], @config.role(:web).label_args
end
test "custom labels" do
@@ -66,7 +66,7 @@ class ConfigurationRoleTest < ActiveSupport::TestCase
c[:servers]["beta"] = { "traefik" => "true", "hosts" => [ "1.1.1.5" ] }
})
assert_equal [ "--label", "service=\"app\"", "--label", "role=\"beta\"", "--label", "traefik.http.routers.app-beta.rule=\"PathPrefix(\\`/\\`)\"", "--label", "traefik.http.middlewares.app-beta-retry.retry.attempts=\"5\"", "--label", "traefik.http.middlewares.app-beta-retry.retry.initialinterval=\"500ms\"", "--label", "traefik.http.routers.app-beta.middlewares=\"app-beta-retry@docker\"" ], config.role(:beta).label_args
assert_equal [ "--label", "service=\"app\"", "--label", "role=\"beta\"", "--label", "traefik.http.services.app-beta.loadbalancer.server.scheme=\"http\"", "--label", "traefik.http.routers.app-beta.rule=\"PathPrefix(\\`/\\`)\"", "--label", "traefik.http.middlewares.app-beta-retry.retry.attempts=\"5\"", "--label", "traefik.http.middlewares.app-beta-retry.retry.initialinterval=\"500ms\"", "--label", "traefik.http.routers.app-beta.middlewares=\"app-beta-retry@docker\"" ], config.role(:beta).label_args
end
test "env overwritten by role" do

View File

@@ -0,0 +1,17 @@
service: app
image: dhh/app
servers:
web:
- "1.1.1.1"
- "1.1.1.2"
workers:
- "1.1.1.3"
- "1.1.1.4"
registry:
username: user
password: pw
boot:
limit: 3
wait: 2

View File

@@ -0,0 +1,17 @@
service: app
image: dhh/app
servers:
web:
- "1.1.1.1"
- "1.1.1.2"
workers:
- "1.1.1.3"
- "1.1.1.4"
registry:
username: user
password: pw
boot:
limit: 25%
wait: 2

View File

@@ -51,7 +51,15 @@ class DeployTest < ActiveSupport::TestCase
end
def assert_app_is_up
assert_equal "200", app_response.code
code = app_response.code
if code != "200"
puts "Got response code #{code}, here are the traefik logs:"
mrsk :traefik, :logs
puts "And here are the load balancer logs"
docker_compose :logs, :load_balancer
puts "Tried to get the response code again and got #{app_response.code}"
end
assert_equal "200", code
end
def app_response
@@ -61,7 +69,10 @@ class DeployTest < ActiveSupport::TestCase
def wait_for_healthy(timeout: 20)
timeout_at = Time.now + timeout
while docker_compose("ps -a | tail -n +2 | grep -v '(healthy)' | wc -l", capture: true) != "0"
raise "Container not healthy after #{timeout} seconds" if timeout_at < Time.now
if timeout_at < Time.now
docker_compose("ps -a | tail -n +2 | grep -v '(healthy)'")
raise "Container not healthy after #{timeout} seconds" if timeout_at < Time.now
end
sleep 0.1
end
end

View File

@@ -2,7 +2,7 @@ FROM ruby:3.2
WORKDIR /app
RUN apt-get update && apt-get install -y ca-certificates openssh-client curl gnupg docker.io
RUN apt-get update --fix-missing && apt-get install -y ca-certificates openssh-client curl gnupg docker.io
RUN install -m 0755 -d /etc/apt/keyrings
RUN curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg
@@ -12,7 +12,7 @@ RUN echo \
"$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \
tee /etc/apt/sources.list.d/docker.list > /dev/null
RUN apt-get update && apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
RUN apt-get update --fix-missing && apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
COPY boot.sh .
COPY app/ .

View File

@@ -10,5 +10,8 @@ registry:
builder:
multiarch: false
healthcheck:
path: /
port: 80
cmd: wget -qO- http://localhost > /dev/null
traefik:
args:
accesslog: true
accesslog.format: json

View File

@@ -2,7 +2,7 @@ FROM ubuntu:22.10
WORKDIR /work
RUN apt-get update && apt-get -y install openssh-client openssl
RUN apt-get update --fix-missing && apt-get -y install openssh-client openssl
RUN mkdir ssh && \
ssh-keygen -t rsa -f ssh/id_rsa -N ""

View File

@@ -2,7 +2,7 @@ FROM ubuntu:22.10
WORKDIR /work
RUN apt-get update && apt-get -y install openssh-client openssh-server docker.io
RUN apt-get update --fix-missing && apt-get -y install openssh-client openssh-server docker.io
RUN mkdir /root/.ssh && ln -s /shared/ssh/id_rsa.pub /root/.ssh/authorized_keys
RUN mkdir -p /etc/docker/certs.d/registry:4443 && ln -s /shared/certs/domain.crt /etc/docker/certs.d/registry:4443/ca.crt