Merge branch 'main' into pr/205

* main:
  Simplify domain language to just "boot" and unscoped config keys
  Retain a fixed number of containers when pruning
  Don't assume rolling back in message
  Check all hosts before rolling back
  Ensure Traefik service name is consistent
  Extend traefik delay by 1 second
  Include traefik access logs
  Check if we are still getting a 404
  Also dump load balancer logs
  Dump traefik logs when app not booted
  Fix missing for apt-get
  Report on container health after failure
  Fix the integration test healthcheck
  Allow percentage-based rolling deployments
  Move `group_limit` & `group_wait` under `boot`
  Limit rolling deployment to boot operation
  Allow performing boot & start operations in groups
This commit is contained in:
David Heinemeier Hansson
2023-05-02 14:29:06 +02:00
24 changed files with 204 additions and 44 deletions

View File

@@ -11,7 +11,7 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
execute *MRSK.app.tag_current_as_latest
end
on(MRSK.hosts) do |host|
on(MRSK.hosts, **MRSK.boot_strategy) do |host|
roles = MRSK.roles_on(host)
roles.each do |role|

View File

@@ -233,15 +233,24 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
subcommand "lock", Mrsk::Cli::Lock
private
def container_available?(version, host: MRSK.primary_host)
available = nil
on(host) do
first_role = MRSK.roles_on(host).first
available = capture_with_info(*MRSK.app(role: first_role).container_id_for_version(version)).present?
def container_available?(version)
begin
on(MRSK.hosts) do
MRSK.roles_on(host).each do |role|
container_id = capture_with_info(*MRSK.app(role: role).container_id_for_version(version))
raise "Container not found" unless container_id.present?
end
end
rescue SSHKit::Runner::ExecuteError => e
if e.message =~ /Container not found/
say "Error looking for container version #{version}: #{e.message}"
return false
else
raise
end
end
available
true
end
def deploy_options

View File

@@ -7,7 +7,7 @@ class Mrsk::Cli::Prune < Mrsk::Cli::Base
end
end
desc "images", "Prune unused images older than 7 days"
desc "images", "Prune dangling images"
def images
with_lock do
on(MRSK.hosts) do
@@ -17,7 +17,7 @@ class Mrsk::Cli::Prune < Mrsk::Cli::Base
end
end
desc "containers", "Prune stopped containers older than 3 days"
desc "containers", "Prune all stopped containers, except the last 5"
def containers
with_lock do
on(MRSK.hosts) do

View File

@@ -51,6 +51,14 @@ class Mrsk::Commander
end
end
def boot_strategy
if config.boot.limit.present?
{ in: :groups, limit: config.boot.limit, wait: config.boot.wait }
else
{}
end
end
def roles_on(host)
roles.select { |role| role.hosts.include?(host.to_s) }.map(&:name)
end

View File

@@ -2,11 +2,19 @@ require "active_support/duration"
require "active_support/core_ext/numeric/time"
class Mrsk::Commands::Prune < Mrsk::Commands::Base
def images(until_hours: 7.days.in_hours.to_i)
docker :image, :prune, "--all", "--force", "--filter", "label=service=#{config.service}", "--filter", "until=#{until_hours}h"
def images
docker :image, :prune, "--all", "--force", "--filter", "label=service=#{config.service}", "--filter", "dangling=true"
end
def containers(until_hours: 3.days.in_hours.to_i)
docker :container, :prune, "--force", "--filter", "label=service=#{config.service}", "--filter", "until=#{until_hours}h"
def containers(keep_last: 5)
pipe \
docker(:ps, "-q", "-a", "--filter", "label=service=#{config.service}", *stopped_containers_filters),
"tail -n +#{keep_last + 1}",
"while read container_id; do docker rm $container_id; done"
end
private
def stopped_containers_filters
[ "created", "exited", "dead" ].flat_map { |status| ["--filter", "status=#{status}"] }
end
end

View File

@@ -87,6 +87,10 @@ class Mrsk::Configuration
roles.select(&:running_traefik?).flat_map(&:hosts).uniq
end
def boot
Mrsk::Configuration::Boot.new(config: self)
end
def repository
[ raw_config.registry["server"], image ].compact.join("/")

View File

@@ -0,0 +1,20 @@
class Mrsk::Configuration::Boot
def initialize(config:)
@options = config.raw_config.boot || {}
@host_count = config.all_hosts.count
end
def limit
limit = @options["limit"]
if limit.to_s.end_with?("%")
@host_count * limit.to_i / 100
else
limit
end
end
def wait
@options["wait"]
end
end

View File

@@ -89,6 +89,9 @@ class Mrsk::Configuration::Role
def traefik_labels
if running_traefik?
{
# Setting a service property ensures that the generated service name will be consistent between versions
"traefik.http.services.#{traefik_service}.loadbalancer.server.scheme" => "http",
"traefik.http.routers.#{traefik_service}.rule" => "PathPrefix(`/`)",
"traefik.http.middlewares.#{traefik_service}-retry.retry.attempts" => "5",
"traefik.http.middlewares.#{traefik_service}-retry.retry.initialinterval" => "500ms",

View File

@@ -1,5 +1,5 @@
class Mrsk::Utils::HealthcheckPoller
TRAEFIK_HEALTHY_DELAY = 1
TRAEFIK_HEALTHY_DELAY = 2
class HealthcheckError < StandardError; end