Compare commits

..

14 Commits

Author SHA1 Message Date
David Heinemeier Hansson
fb86123db4 Bump version for 0.8.2 2023-02-23 12:28:29 +01:00
David Heinemeier Hansson
bc6963e6bf Note that rebooting may cause air gap 2023-02-23 12:16:58 +01:00
David Heinemeier Hansson
f4f2b5cb17 Communicate the readiness delay 2023-02-23 12:04:57 +01:00
David Heinemeier Hansson
817336df49 No readiness delay in testing 2023-02-23 12:03:03 +01:00
David Heinemeier Hansson
4c399a74bb Update to match latest 2023-02-23 12:02:56 +01:00
David Heinemeier Hansson
e12436a1db Extract readiness_delay to config 2023-02-23 12:02:49 +01:00
David Heinemeier Hansson
b244e919bf Merge branch 'main' into more-resilient-zero-downtime-deploy
* main:
  Add option to skip audit broadcasts (useful when testing)
2023-02-23 11:52:45 +01:00
David Heinemeier Hansson
7ad416f029 Add option to skip audit broadcasts (useful when testing) 2023-02-23 10:04:35 +01:00
David Heinemeier Hansson
371f98d67f Start before stopping and longer timeouts 2023-02-22 19:04:23 +01:00
David Heinemeier Hansson
b879412a6f Upgrade to beta! 2023-02-21 15:31:28 +01:00
David Heinemeier Hansson
e678775a18 Merge pull request #54 from intrip/print-logs-for-healthcheck-status-mistmatch
Print container logs when HealthCheck response_code != 200
2023-02-21 14:34:46 +01:00
Jacopo
689b81014b Print container logs when HealthCheck response_code != 200
The Healthcheck container is shut down right after performing the check, this
makes it harder to troubleshoot configuration issues in the healthcheck
endpoint, e.g DNS rebinding error. Printing the container logs helps the troubleshooting.
2023-02-21 11:48:29 +01:00
David Heinemeier Hansson
01a4eecf98 Bump version for 0.8.1 2023-02-20 18:21:05 +01:00
David Heinemeier Hansson
6f7422af44 Merge pull request #53 from pagbrl/fix-env-concatenation
fix(escape-cli-args): Always use quotes to escape CLI arguments
2023-02-20 18:20:28 +01:00
16 changed files with 74 additions and 28 deletions

View File

@@ -1,7 +1,7 @@
PATH PATH
remote: . remote: .
specs: specs:
mrsk (0.8.1) mrsk (0.8.2)
activesupport (>= 7.0) activesupport (>= 7.0)
dotenv (~> 2.8) dotenv (~> 2.8)
sshkit (~> 1.21) sshkit (~> 1.21)

View File

@@ -498,7 +498,7 @@ If you wish to remove the entire application, including Traefik, containers, ima
## Stage of development ## Stage of development
This is alpha software. Lots of stuff is missing. Lots of stuff will keep moving around for a while. This is beta software. Commands may still move around. But we're live in production at [37signals](https://37signals.com).
## License ## License

View File

@@ -13,7 +13,7 @@ class Mrsk::Cli::Accessory < Mrsk::Cli::Base
execute *accessory.run execute *accessory.run
end end
audit_broadcast "Booted accessory #{name}" audit_broadcast "Booted accessory #{name}" unless options[:skip_broadcast]
end end
end end
end end

View File

@@ -5,18 +5,27 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
using_version(options[:version] || most_recent_version_available) do |version| using_version(options[:version] || most_recent_version_available) do |version|
say "Start container with version #{version} (or reboot if already running)...", :magenta say "Start container with version #{version} (or reboot if already running)...", :magenta
cli = self
MRSK.config.roles.each do |role| MRSK.config.roles.each do |role|
on(role.hosts) do |host| on(role.hosts) do |host|
execute *MRSK.auditor.record("Booted app version #{version}"), verbosity: :debug execute *MRSK.auditor.record("Booted app version #{version}"), verbosity: :debug
begin begin
execute *MRSK.app.stop, raise_on_non_zero_exit: false old_version = capture_with_info(*MRSK.app.current_running_version).strip
execute *MRSK.app.run(role: role.name) execute *MRSK.app.run(role: role.name)
cli.say "Waiting #{MRSK.config.readiness_delay}s for app to boot...", :magenta
sleep MRSK.config.readiness_delay
execute *MRSK.app.stop(version: old_version), raise_on_non_zero_exit: false if old_version.present?
rescue SSHKit::Command::Failed => e rescue SSHKit::Command::Failed => e
if e.message =~ /already in use/ if e.message =~ /already in use/
error "Rebooting container with same version already deployed on #{host}" error "Rebooting container with same version #{version} already deployed on #{host} (may cause gap in zero-downtime promise!)"
execute *MRSK.auditor.record("Rebooted app version #{version}"), verbosity: :debug execute *MRSK.auditor.record("Rebooted app version #{version}"), verbosity: :debug
execute *MRSK.app.stop(version: version)
execute *MRSK.app.remove_container(version: version) execute *MRSK.app.remove_container(version: version)
execute *MRSK.app.run(role: role.name) execute *MRSK.app.run(role: role.name)
else else

View File

@@ -20,6 +20,8 @@ module Mrsk::Cli
class_option :config_file, aliases: "-c", default: "config/deploy.yml", desc: "Path to config file" class_option :config_file, aliases: "-c", default: "config/deploy.yml", desc: "Path to config file"
class_option :destination, aliases: "-d", desc: "Specify destination to be used for config file (staging -> deploy.staging.yml)" class_option :destination, aliases: "-d", desc: "Specify destination to be used for config file (staging -> deploy.staging.yml)"
class_option :skip_broadcast, aliases: "-B", type: :boolean, default: false, desc: "Skip audit broadcasts"
def initialize(*) def initialize(*)
super super
load_envs load_envs

View File

@@ -1,5 +1,7 @@
class Mrsk::Cli::Healthcheck < Mrsk::Cli::Base class Mrsk::Cli::Healthcheck < Mrsk::Cli::Base
MAX_ATTEMPTS = 5 MAX_ATTEMPTS = 7
class HealthcheckError < StandardError; end
default_command :perform default_command :perform
@@ -18,7 +20,7 @@ class Mrsk::Cli::Healthcheck < Mrsk::Cli::Base
if status == "200" if status == "200"
info "#{target} succeeded with 200 OK!" info "#{target} succeeded with 200 OK!"
else else
raise "#{target} failed with status #{status}" raise HealthcheckError, "#{target} failed with status #{status}"
end end
rescue SSHKit::Command::Failed rescue SSHKit::Command::Failed
if attempt <= MAX_ATTEMPTS if attempt <= MAX_ATTEMPTS
@@ -31,7 +33,7 @@ class Mrsk::Cli::Healthcheck < Mrsk::Cli::Base
raise raise
end end
end end
rescue SSHKit::Command::Failed => e rescue SSHKit::Command::Failed, HealthcheckError => e
error capture_with_info(*MRSK.healthcheck.logs) error capture_with_info(*MRSK.healthcheck.logs)
if e.message =~ /curl/ if e.message =~ /curl/

View File

@@ -32,7 +32,7 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
invoke "mrsk:cli:prune:all" invoke "mrsk:cli:prune:all"
end end
audit_broadcast "Deployed app in #{runtime.to_i} seconds" audit_broadcast "Deployed app in #{runtime.to_i} seconds" unless options[:skip_broadcast]
end end
desc "redeploy", "Deploy app to servers without bootstrapping servers, starting Traefik, pruning, and registry login" desc "redeploy", "Deploy app to servers without bootstrapping servers, starting Traefik, pruning, and registry login"
@@ -47,7 +47,7 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
invoke "mrsk:cli:app:boot" invoke "mrsk:cli:app:boot"
end end
audit_broadcast "Redeployed app in #{runtime.to_i} seconds" audit_broadcast "Redeployed app in #{runtime.to_i} seconds" unless options[:skip_broadcast]
end end
desc "rollback [VERSION]", "Rollback app to VERSION" desc "rollback [VERSION]", "Rollback app to VERSION"
@@ -55,14 +55,22 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
MRSK.version = version MRSK.version = version
if container_name_available?(MRSK.config.service_with_version) if container_name_available?(MRSK.config.service_with_version)
say "Stop current version, then start version #{version}...", :magenta say "Start version #{version}, then stop the old version...", :magenta
cli = self
on(MRSK.hosts) do |host| on(MRSK.hosts) do |host|
execute *MRSK.app.stop, raise_on_non_zero_exit: false old_version = capture_with_info(*MRSK.app.current_running_version).strip.presence
execute *MRSK.app.start execute *MRSK.app.start
cli.say "Waiting #{MRSK.config.readiness_delay}s for app to start...", :magenta
sleep MRSK.config.readiness_delay
execute *MRSK.app.stop(version: old_version), raise_on_non_zero_exit: false
end end
audit_broadcast "Rolled back app to version #{version}" audit_broadcast "Rolled back app to version #{version}" unless options[:skip_broadcast]
else else
say "The app version '#{version}' is not available as a container (use 'mrsk app containers' for available versions)", :red say "The app version '#{version}' is not available as a container (use 'mrsk app containers' for available versions)", :red
end end

View File

@@ -18,8 +18,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
docker :start, service_with_version docker :start, service_with_version
end end
def stop def stop(version: nil)
pipe current_container_id, xargs(docker(:stop)) pipe \
version ? container_id_for_version(version) : current_container_id,
xargs(docker(:stop))
end end
def info def info
@@ -132,6 +134,10 @@ class Mrsk::Commands::App < Mrsk::Commands::Base
end end
end end
def container_id_for_version(version)
container_id_for(container_name: service_with_version(version))
end
def service_filter def service_filter
[ "--filter", "label=service=#{config.service}" ] [ "--filter", "label=service=#{config.service}" ]
end end

View File

@@ -136,6 +136,9 @@ class Mrsk::Configuration
{ "path" => "/up", "port" => 3000 }.merge(raw_config.healthcheck || {}) { "path" => "/up", "port" => 3000 }.merge(raw_config.healthcheck || {})
end end
def readiness_delay
raw_config.readiness_delay || 7
end
def valid? def valid?
ensure_required_keys_present && ensure_env_available ensure_required_keys_present && ensure_env_available

View File

@@ -61,7 +61,7 @@ class Mrsk::Configuration::Role
"traefik.http.routers.#{config.service}.rule" => "PathPrefix(`/`)", "traefik.http.routers.#{config.service}.rule" => "PathPrefix(`/`)",
"traefik.http.services.#{config.service}.loadbalancer.healthcheck.path" => config.healthcheck["path"], "traefik.http.services.#{config.service}.loadbalancer.healthcheck.path" => config.healthcheck["path"],
"traefik.http.services.#{config.service}.loadbalancer.healthcheck.interval" => "1s", "traefik.http.services.#{config.service}.loadbalancer.healthcheck.interval" => "1s",
"traefik.http.middlewares.#{config.service}.retry.attempts" => "3", "traefik.http.middlewares.#{config.service}.retry.attempts" => "5",
"traefik.http.middlewares.#{config.service}.retry.initialinterval" => "500ms" "traefik.http.middlewares.#{config.service}.retry.initialinterval" => "500ms"
} }
else else

View File

@@ -1,3 +1,3 @@
module Mrsk module Mrsk
VERSION = "0.8.1" VERSION = "0.8.2"
end end

View File

@@ -2,7 +2,16 @@ require_relative "cli_test_case"
class CliAppTest < CliTestCase class CliAppTest < CliTestCase
test "boot" do test "boot" do
assert_match /Running docker run --detach --restart unless-stopped/, run_command("boot") # Stub current version fetch
SSHKit::Backend::Abstract.any_instance.stubs(:capture)
.returns("999") # new version
.then
.returns("123") # old version
run_command("boot").tap do |output|
assert_match /docker run --detach --restart unless-stopped/, output
assert_match /docker container ls --all --filter name=app-123 --quiet | xargs docker stop/, output
end
end end
test "boot will reboot if same version is already running" do test "boot will reboot if same version is already running" do
@@ -11,12 +20,17 @@ class CliAppTest < CliTestCase
# Prevent expected failures from outputting to terminal # Prevent expected failures from outputting to terminal
Thread.report_on_exception = false Thread.report_on_exception = false
MRSK.app.stubs(:run).raises(SSHKit::Command::Failed.new("already in use")).then.returns([ :docker, :run ]) MRSK.app.stubs(:run)
.raises(SSHKit::Command::Failed.new("already in use"))
.then
.raises(SSHKit::Command::Failed.new("already in use"))
.then
.returns([ :docker, :run ])
run_command("boot").tap do |output| run_command("boot").tap do |output|
assert_match /Rebooting container with same version already deployed/, output # Can't start what's already running assert_match /Rebooting container with same version 999 already deployed/, output # Can't start what's already running
assert_match /docker ps --quiet --filter label=service=app \| xargs docker stop/, output # Stop what's running assert_match /docker container ls --all --filter name=app-999 --quiet | xargs docker container rm/, output # Stop old running
assert_match /docker container ls --all --filter name=app-999 --quiet \| xargs docker container rm/, output # Remove old container assert_match /docker container ls --all --filter name=app-999 --quiet | xargs docker container rm/, output # Remove old container
assert_match /docker run/, output # Start new container assert_match /docker run/, output # Start new container
end end
ensure ensure

View File

@@ -19,7 +19,7 @@ class CliMainTest < CliTestCase
Mrsk::Cli::Main.any_instance.stubs(:container_name_available?).returns(true) Mrsk::Cli::Main.any_instance.stubs(:container_name_available?).returns(true)
run_command("rollback", "123").tap do |output| run_command("rollback", "123").tap do |output|
assert_match /Stop current version, then start version 123/, output assert_match /Start version 123, then stop the old version/, output
assert_match /docker ps -q --filter label=service=app | xargs docker stop/, output assert_match /docker ps -q --filter label=service=app | xargs docker stop/, output
assert_match /docker start app-123/, output assert_match /docker start app-123/, output
end end

View File

@@ -14,7 +14,7 @@ class CommandsAppTest < ActiveSupport::TestCase
test "run" do test "run" do
assert_equal \ assert_equal \
"docker run --detach --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=\"456\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\" --label traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\" --label traefik.http.middlewares.app.retry.attempts=\"3\" --label traefik.http.middlewares.app.retry.initialinterval=\"500ms\" dhh/app:999", "docker run --detach --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=\"456\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\" --label traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\" --label traefik.http.middlewares.app.retry.attempts=\"5\" --label traefik.http.middlewares.app.retry.initialinterval=\"500ms\" dhh/app:999",
@app.run.join(" ") @app.run.join(" ")
end end
@@ -22,7 +22,7 @@ class CommandsAppTest < ActiveSupport::TestCase
@config[:volumes] = ["/local/path:/container/path" ] @config[:volumes] = ["/local/path:/container/path" ]
assert_equal \ assert_equal \
"docker run --detach --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=\"456\" --volume /local/path:/container/path --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\" --label traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\" --label traefik.http.middlewares.app.retry.attempts=\"3\" --label traefik.http.middlewares.app.retry.initialinterval=\"500ms\" dhh/app:999", "docker run --detach --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=\"456\" --volume /local/path:/container/path --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\" --label traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\" --label traefik.http.middlewares.app.retry.attempts=\"5\" --label traefik.http.middlewares.app.retry.initialinterval=\"500ms\" dhh/app:999",
@app.run.join(" ") @app.run.join(" ")
end end
@@ -30,7 +30,7 @@ class CommandsAppTest < ActiveSupport::TestCase
@config[:healthcheck] = { "path" => "/healthz" } @config[:healthcheck] = { "path" => "/healthz" }
assert_equal \ assert_equal \
"docker run --detach --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=\"456\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.services.app.loadbalancer.healthcheck.path=\"/healthz\" --label traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\" --label traefik.http.middlewares.app.retry.attempts=\"3\" --label traefik.http.middlewares.app.retry.initialinterval=\"500ms\" dhh/app:999", "docker run --detach --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=\"456\" --label service=\"app\" --label role=\"web\" --label traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.services.app.loadbalancer.healthcheck.path=\"/healthz\" --label traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\" --label traefik.http.middlewares.app.retry.attempts=\"5\" --label traefik.http.middlewares.app.retry.initialinterval=\"500ms\" dhh/app:999",
@app.run.join(" ") @app.run.join(" ")
end end

View File

@@ -42,7 +42,7 @@ class ConfigurationRoleTest < ActiveSupport::TestCase
end end
test "special label args for web" do test "special label args for web" do
assert_equal [ "--label", "service=\"app\"", "--label", "role=\"web\"", "--label", "traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\"", "--label", "traefik.http.middlewares.app.retry.attempts=\"3\"", "--label", "traefik.http.middlewares.app.retry.initialinterval=\"500ms\""], @config.role(:web).label_args assert_equal [ "--label", "service=\"app\"", "--label", "role=\"web\"", "--label", "traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\"", "--label", "traefik.http.middlewares.app.retry.attempts=\"5\"", "--label", "traefik.http.middlewares.app.retry.initialinterval=\"500ms\""], @config.role(:web).label_args
end end
test "custom labels" do test "custom labels" do
@@ -66,7 +66,7 @@ class ConfigurationRoleTest < ActiveSupport::TestCase
c[:servers]["beta"] = { "traefik" => "true", "hosts" => [ "1.1.1.5" ] } c[:servers]["beta"] = { "traefik" => "true", "hosts" => [ "1.1.1.5" ] }
}) })
assert_equal [ "--label", "service=\"app\"", "--label", "role=\"beta\"", "--label", "traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\"", "--label", "traefik.http.middlewares.app.retry.attempts=\"3\"", "--label", "traefik.http.middlewares.app.retry.initialinterval=\"500ms\"" ], config.role(:beta).label_args assert_equal [ "--label", "service=\"app\"", "--label", "role=\"beta\"", "--label", "traefik.http.routers.app.rule=\"PathPrefix(\\`/\\`)\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.path=\"/up\"", "--label", "traefik.http.services.app.loadbalancer.healthcheck.interval=\"1s\"", "--label", "traefik.http.middlewares.app.retry.attempts=\"5\"", "--label", "traefik.http.middlewares.app.retry.initialinterval=\"500ms\"" ], config.role(:beta).label_args
end end
test "env overwritten by role" do test "env overwritten by role" do

View File

@@ -27,3 +27,5 @@ accessories:
port: 6379 port: 6379
directories: directories:
- data:/data - data:/data
readiness_delay: 0