Deploy locks

Add a deploy lock for commands that are unsafe to run concurrently.

The lock is taken by creating a `mrsk_lock` directory on the primary
host. Details of who took the lock are added to a details file in that
directory.

Additional CLI commands have been added to manual release and acquire
the lock and to check its status.

```
Commands:
  mrsk lock acquire -m, --message=MESSAGE  # Acquire the deploy lock
  mrsk lock help [COMMAND]                 # Describe subcommands or one specific subcommand
  mrsk lock release                        # Release the deploy lock
  mrsk lock status                         # Report lock status

Options:
  -v, [--verbose], [--no-verbose]                # Detailed logging
  -q, [--quiet], [--no-quiet]                    # Minimal logging
      [--version=VERSION]                        # Run commands against a specific app version
  -p, [--primary], [--no-primary]                # Run commands only on primary host instead of all
  -h, [--hosts=HOSTS]                            # Run commands on these hosts instead of all (separate by comma)
  -r, [--roles=ROLES]                            # Run commands on these roles instead of all (separate by comma)
  -c, [--config-file=CONFIG_FILE]                # Path to config file
                                                 # Default: config/deploy.yml
  -d, [--destination=DESTINATION]                # Specify destination to be used for config file (staging -> deploy.staging.yml)
  -B, [--skip-broadcast], [--no-skip-broadcast]  # Skip audit broadcasts
```

If we add support for running multiple deployments on a single server
we'll need to extend the locking to lock per deployment.
This commit is contained in:
Donal McBreen
2023-03-23 16:58:49 +00:00
parent 17e74910e4
commit 8d8f9f6ada
18 changed files with 516 additions and 219 deletions

View File

@@ -692,6 +692,30 @@ Note that by default old containers are pruned after 3 days when you run `mrsk d
If you wish to remove the entire application, including Traefik, containers, images, and registry session, you can run `mrsk remove`. This will leave the servers clean.
## Locking
Commands that are unsafe to run concurrently will take a deploy lock while they run. The lock is the `mrsk_lock` directory on the primary server.
You can check the lock status with:
```
mrsk lock status
Locked by: AN Other at 2023-03-24 09:49:03 UTC
Version: 77f45c0686811c68989d6576748475a60bf53fc2
Message: Automatic deploy lock
```
You can also manually acquire and release the lock
```
mrsk lock acquire -m "Doing maintanence"
```
```
mrsk lock release
```
## Stage of development
This is beta software. Commands may still move around. But we're live in production at [37signals](https://37signals.com).

View File

@@ -1,6 +1,7 @@
class Mrsk::Cli::Accessory < Mrsk::Cli::Base
desc "boot [NAME]", "Boot new accessory service on host (use NAME=all to boot all accessories)"
def boot(name)
with_lock do
if name == "all"
MRSK.accessory_names.each { |accessory_name| boot(accessory_name) }
else
@@ -18,9 +19,11 @@ class Mrsk::Cli::Accessory < Mrsk::Cli::Base
end
end
end
end
desc "upload [NAME]", "Upload accessory files to host", hide: true
def upload(name)
with_lock do
with_accessory(name) do |accessory|
on(accessory.host) do
accessory.files.each do |(local, remote)|
@@ -33,9 +36,11 @@ class Mrsk::Cli::Accessory < Mrsk::Cli::Base
end
end
end
end
desc "directories [NAME]", "Create accessory directories on host", hide: true
def directories(name)
with_lock do
with_accessory(name) do |accessory|
on(accessory.host) do
accessory.directories.keys.each do |host_path|
@@ -44,18 +49,22 @@ class Mrsk::Cli::Accessory < Mrsk::Cli::Base
end
end
end
end
desc "reboot [NAME]", "Reboot existing accessory on host (stop container, remove container, start new container)"
def reboot(name)
with_lock do
with_accessory(name) do |accessory|
stop(name)
remove_container(name)
boot(name)
end
end
end
desc "start [NAME]", "Start existing accessory container on host"
def start(name)
with_lock do
with_accessory(name) do |accessory|
on(accessory.host) do
execute *MRSK.auditor.record("Started #{name} accessory"), verbosity: :debug
@@ -63,9 +72,11 @@ class Mrsk::Cli::Accessory < Mrsk::Cli::Base
end
end
end
end
desc "stop [NAME]", "Stop existing accessory container on host"
def stop(name)
with_lock do
with_accessory(name) do |accessory|
on(accessory.host) do
execute *MRSK.auditor.record("Stopped #{name} accessory"), verbosity: :debug
@@ -73,14 +84,17 @@ class Mrsk::Cli::Accessory < Mrsk::Cli::Base
end
end
end
end
desc "restart [NAME]", "Restart existing accessory container on host"
def restart(name)
with_lock do
with_accessory(name) do
stop(name)
start(name)
end
end
end
desc "details [NAME]", "Show details about accessory on host (use NAME=all to show all accessories)"
def details(name)

View File

@@ -1,6 +1,7 @@
class Mrsk::Cli::App < Mrsk::Cli::Base
desc "boot", "Boot app on servers (or reboot app if already running)"
def boot
with_lock do
say "Get most recent version available as an image...", :magenta unless options[:version]
using_version(version_or_latest) do |version|
say "Start container with version #{version} using a #{MRSK.config.readiness_delay}s readiness delay (or reboot if already running)...", :magenta
@@ -33,22 +34,27 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
end
end
end
end
desc "start", "Start existing app container on servers"
def start
with_lock do
on(MRSK.hosts) do
execute *MRSK.auditor.record("Started app version #{MRSK.config.version}"), verbosity: :debug
execute *MRSK.app.start, raise_on_non_zero_exit: false
end
end
end
desc "stop", "Stop app container on servers"
def stop
with_lock do
on(MRSK.hosts) do
execute *MRSK.auditor.record("Stopped app"), verbosity: :debug
execute *MRSK.app.stop, raise_on_non_zero_exit: false
end
end
end
# FIXME: Drop in favor of just containers?
desc "details", "Show details about app containers"
@@ -140,34 +146,42 @@ class Mrsk::Cli::App < Mrsk::Cli::Base
desc "remove", "Remove app containers and images from servers"
def remove
with_lock do
stop
remove_containers
remove_images
end
end
desc "remove_container [VERSION]", "Remove app container with given version from servers", hide: true
def remove_container(version)
with_lock do
on(MRSK.hosts) do
execute *MRSK.auditor.record("Removed app container with version #{version}"), verbosity: :debug
execute *MRSK.app.remove_container(version: version)
end
end
end
desc "remove_containers", "Remove all app containers from servers", hide: true
def remove_containers
with_lock do
on(MRSK.hosts) do
execute *MRSK.auditor.record("Removed all app containers"), verbosity: :debug
execute *MRSK.app.remove_containers
end
end
end
desc "remove_images", "Remove all app images from servers", hide: true
def remove_images
with_lock do
on(MRSK.hosts) do
execute *MRSK.auditor.record("Removed all app images"), verbosity: :debug
execute *MRSK.app.remove_images
end
end
end
desc "version", "Show app version currently running on servers"
def version

View File

@@ -6,6 +6,8 @@ module Mrsk::Cli
class Base < Thor
include SSHKit::DSL
class LockError < StandardError; end
def self.exit_on_failure?() true end
class_option :verbose, type: :boolean, aliases: "-v", desc: "Detailed logging"
@@ -71,5 +73,35 @@ module Mrsk::Cli
def audit_broadcast(line)
run_locally { execute *MRSK.auditor.broadcast(line), verbosity: :debug }
end
def with_lock
acquire_lock
yield
ensure
release_lock
end
def acquire_lock
if MRSK.lock_count == 0
say "Acquiring the deploy lock"
on(MRSK.primary_host) { execute *MRSK.lock.acquire("Automatic deploy lock", MRSK.config.version) }
end
MRSK.lock_count += 1
rescue SSHKit::Runner::ExecuteError => e
if e.message =~ /cannot create directory/
invoke "mrsk:cli:lock:status", []
end
raise LockError, "Deploy lock found"
end
def release_lock
MRSK.lock_count -= 1
if MRSK.lock_count == 0
say "Releasing the deploy lock"
on(MRSK.primary_host) { execute *MRSK.lock.release }
end
end
end
end

View File

@@ -1,12 +1,15 @@
class Mrsk::Cli::Build < Mrsk::Cli::Base
desc "deliver", "Build app and push app image to registry then pull image on servers"
def deliver
with_lock do
push
pull
end
end
desc "push", "Build and push app image to registry"
def push
with_lock do
cli = self
run_locally do
@@ -25,18 +28,22 @@ class Mrsk::Cli::Build < Mrsk::Cli::Base
end
end
end
end
desc "pull", "Pull app image from registry onto servers"
def pull
with_lock do
on(MRSK.hosts) do
execute *MRSK.auditor.record("Pulled image with version #{MRSK.config.version}"), verbosity: :debug
execute *MRSK.builder.clean, raise_on_non_zero_exit: false
execute *MRSK.builder.pull
end
end
end
desc "create", "Create a build setup"
def create
with_lock do
run_locally do
begin
debug "Using builder: #{MRSK.builder.name}"
@@ -51,14 +58,17 @@ class Mrsk::Cli::Build < Mrsk::Cli::Base
end
end
end
end
desc "remove", "Remove build setup"
def remove
with_lock do
run_locally do
debug "Using builder: #{MRSK.builder.name}"
execute *MRSK.builder.remove
end
end
end
desc "details", "Show build setup"
def details

37
lib/mrsk/cli/lock.rb Normal file
View File

@@ -0,0 +1,37 @@
class Mrsk::Cli::Lock < Mrsk::Cli::Base
desc "status", "Report lock status"
def status
handle_missing_lock do
on(MRSK.primary_host) { puts capture_with_info(*MRSK.lock.status) }
end
end
desc "acquire", "Acquire the deploy lock"
option :message, aliases: "-m", type: :string, desc: "A lock mesasge", required: true
def acquire
message = options[:message]
handle_missing_lock do
on(MRSK.primary_host) { execute *MRSK.lock.acquire(message, MRSK.config.version) }
say "Set the deploy lock"
end
end
desc "release", "Release the deploy lock"
def release
handle_missing_lock do
on(MRSK.primary_host) { execute *MRSK.lock.release }
say "Removed the deploy lock"
end
end
private
def handle_missing_lock
yield
rescue SSHKit::Runner::ExecuteError => e
if e.message =~ /No such file or directory/
say "There is no deploy lock"
else
raise
end
end
end

View File

@@ -1,16 +1,19 @@
class Mrsk::Cli::Main < Mrsk::Cli::Base
desc "setup", "Setup all accessories and deploy app to servers"
def setup
with_lock do
print_runtime do
invoke "mrsk:cli:server:bootstrap"
invoke "mrsk:cli:accessory:boot", [ "all" ]
deploy
end
end
end
desc "deploy", "Deploy app to servers"
option :skip_push, aliases: "-P", type: :boolean, default: false, desc: "Skip image build and push"
def deploy
with_lock do
invoke_options = deploy_options
runtime = print_runtime do
@@ -42,10 +45,12 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
audit_broadcast "Deployed #{service_version} in #{runtime.round} seconds" unless options[:skip_broadcast]
end
end
desc "redeploy", "Deploy app to servers without bootstrapping servers, starting Traefik, pruning, and registry login"
option :skip_push, aliases: "-P", type: :boolean, default: false, desc: "Skip image build and push"
def redeploy
with_lock do
invoke_options = deploy_options
runtime = print_runtime do
@@ -65,9 +70,11 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
audit_broadcast "Redeployed #{service_version} in #{runtime.round} seconds" unless options[:skip_broadcast]
end
end
desc "rollback [VERSION]", "Rollback app to VERSION"
def rollback(version)
with_lock do
MRSK.config.version = version
if container_name_available?(MRSK.config.service_with_version)
@@ -93,6 +100,7 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
say "The app version '#{version}' is not available as a container (use 'mrsk app containers' for available versions)", :red
end
end
end
desc "details", "Show details about all containers"
def details
@@ -163,6 +171,7 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
desc "remove", "Remove Traefik, app, accessories, and registry session from servers"
option :confirmed, aliases: "-y", type: :boolean, default: false, desc: "Proceed without confirmation question"
def remove
with_lock do
if options[:confirmed] || ask("This will remove all containers and images. Are you sure?", limited_to: %w( y N ), default: "N") == "y"
invoke "mrsk:cli:traefik:remove", [], options.without(:confirmed)
invoke "mrsk:cli:app:remove", [], options.without(:confirmed)
@@ -170,6 +179,7 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
invoke "mrsk:cli:registry:logout", [], options.without(:confirmed)
end
end
end
desc "version", "Show MRSK version"
def version
@@ -200,6 +210,9 @@ class Mrsk::Cli::Main < Mrsk::Cli::Base
desc "traefik", "Manage Traefik load balancer"
subcommand "traefik", Mrsk::Cli::Traefik
desc "lock", "Manage the deploy lock"
subcommand "lock", Mrsk::Cli::Lock
private
def container_name_available?(container_name, host: MRSK.primary_host)
container_names = nil

View File

@@ -1,23 +1,29 @@
class Mrsk::Cli::Prune < Mrsk::Cli::Base
desc "all", "Prune unused images and stopped containers"
def all
with_lock do
containers
images
end
end
desc "images", "Prune unused images older than 7 days"
def images
with_lock do
on(MRSK.hosts) do
execute *MRSK.auditor.record("Pruned images"), verbosity: :debug
execute *MRSK.prune.images
end
end
end
desc "containers", "Prune stopped containers older than 3 days"
def containers
with_lock do
on(MRSK.hosts) do
execute *MRSK.auditor.record("Pruned containers"), verbosity: :debug
execute *MRSK.prune.containers
end
end
end
end

View File

@@ -1,6 +1,7 @@
class Mrsk::Cli::Server < Mrsk::Cli::Base
desc "bootstrap", "Ensure curl and Docker are installed on servers"
def bootstrap
with_lock do
on(MRSK.hosts + MRSK.accessory_hosts) do
dependencies_to_install = Array.new.tap do |dependencies|
dependencies << "curl" unless execute "which curl", raise_on_non_zero_exit: false
@@ -13,3 +14,4 @@ class Mrsk::Cli::Server < Mrsk::Cli::Base
end
end
end
end

View File

@@ -1,37 +1,47 @@
class Mrsk::Cli::Traefik < Mrsk::Cli::Base
desc "boot", "Boot Traefik on servers"
def boot
with_lock do
on(MRSK.traefik_hosts) { execute *MRSK.traefik.run, raise_on_non_zero_exit: false }
end
end
desc "reboot", "Reboot Traefik on servers (stop container, remove container, start new container)"
def reboot
with_lock do
stop
remove_container
boot
end
end
desc "start", "Start existing Traefik container on servers"
def start
with_lock do
on(MRSK.traefik_hosts) do
execute *MRSK.auditor.record("Started traefik"), verbosity: :debug
execute *MRSK.traefik.start, raise_on_non_zero_exit: false
end
end
end
desc "stop", "Stop existing Traefik container on servers"
def stop
with_lock do
on(MRSK.traefik_hosts) do
execute *MRSK.auditor.record("Stopped traefik"), verbosity: :debug
execute *MRSK.traefik.stop, raise_on_non_zero_exit: false
end
end
end
desc "restart", "Restart existing Traefik container on servers"
def restart
with_lock do
stop
start
end
end
desc "details", "Show details about Traefik container from servers"
def details
@@ -64,24 +74,30 @@ class Mrsk::Cli::Traefik < Mrsk::Cli::Base
desc "remove", "Remove Traefik container and image from servers"
def remove
with_lock do
stop
remove_container
remove_image
end
end
desc "remove_container", "Remove Traefik container from servers", hide: true
def remove_container
with_lock do
on(MRSK.traefik_hosts) do
execute *MRSK.auditor.record("Removed traefik container"), verbosity: :debug
execute *MRSK.traefik.remove_container
end
end
end
desc "remove_container", "Remove Traefik image from servers", hide: true
def remove_image
with_lock do
on(MRSK.traefik_hosts) do
execute *MRSK.auditor.record("Removed traefik image"), verbosity: :debug
execute *MRSK.traefik.remove_image
end
end
end
end

View File

@@ -2,10 +2,11 @@ require "active_support/core_ext/enumerable"
require "active_support/core_ext/module/delegation"
class Mrsk::Commander
attr_accessor :verbosity
attr_accessor :verbosity, :lock_count
def initialize
self.verbosity = :info
self.lock_count = 0
end
@@ -84,6 +85,9 @@ class Mrsk::Commander
@traefik ||= Mrsk::Commands::Traefik.new(config)
end
def lock
@lock ||= Mrsk::Commands::Lock.new(config)
end
def with_verbosity(level)
old_level = self.verbosity
@@ -97,14 +101,6 @@ class Mrsk::Commander
SSHKit.config.output_verbosity = old_level
end
# Test-induced damage!
def reset
@config = nil
@app = @builder = @traefik = @registry = @prune = @auditor = nil
@verbosity = :info
end
private
# Lazy setup of SSHKit
def configure_sshkit_with(config)

View File

@@ -41,6 +41,10 @@ module Mrsk::Commands
combine *commands, by: ">>"
end
def write(*commands)
combine *commands, by: ">"
end
def xargs(command)
[ :xargs, command ].flatten
end

63
lib/mrsk/commands/lock.rb Normal file
View File

@@ -0,0 +1,63 @@
require "active_support/duration"
require "active_support/core_ext/numeric/time"
class Mrsk::Commands::Lock < Mrsk::Commands::Base
def acquire(message, version)
combine \
[:mkdir, lock_dir],
write_lock_details(message, version)
end
def release
combine \
[:rm, lock_details_file],
[:rm, "-r", lock_dir]
end
def status
combine \
stat_lock_dir,
read_lock_details
end
private
def write_lock_details(message, version)
write \
[:echo, "\"#{Base64.encode64(lock_details(message, version))}\""],
lock_details_file
end
def read_lock_details
pipe \
[:cat, lock_details_file],
[:base64, "-d"]
end
def stat_lock_dir
write \
[:stat, lock_dir],
"/dev/null"
end
def lock_dir
:mrsk_lock
end
def lock_details_file
[lock_dir, :details].join("/")
end
def lock_details(message, version)
<<~DETAILS.strip
Locked by: #{locked_by} at #{Time.now.gmtime}
Version: #{version}
Message: #{message}
DETAILS
end
def locked_by
`git config user.name`.strip
rescue Errno::ENOENT
"Unknown"
end
end

View File

@@ -15,8 +15,9 @@ class CliBuildTest < CliTestCase
end
test "push without builder" do
Mrsk::Cli::Build.any_instance.stubs(:create).returns(true)
stub_locking
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with { |arg| arg == :docker }
.raises(SSHKit::Command::Failed.new("no builder"))
.then
.returns(true)
@@ -40,7 +41,9 @@ class CliBuildTest < CliTestCase
end
test "create with error" do
stub_locking
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with { |arg| arg == :docker }
.raises(SSHKit::Command::Failed.new("stderr=error"))
run_command("create").tap do |output|
@@ -69,4 +72,11 @@ class CliBuildTest < CliTestCase
def run_command(*command)
stdouted { Mrsk::Cli::Build.start([*command, "-c", "test/fixtures/deploy_with_accessories.yml"]) }
end
def stub_locking
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with { |arg1, arg2| arg1 == :mkdir && arg2 == :mrsk_lock }
SSHKit::Backend::Abstract.any_instance.stubs(:execute)
.with { |arg1, arg2| arg1 == :rm && arg2 == "mrsk_lock/details" }
end
end

View File

@@ -8,13 +8,14 @@ class CliTestCase < ActiveSupport::TestCase
ENV["VERSION"] = "999"
ENV["RAILS_MASTER_KEY"] = "123"
ENV["MYSQL_ROOT_PASSWORD"] = "secret123"
Object.send(:remove_const, :MRSK)
Object.const_set(:MRSK, Mrsk::Commander.new)
end
teardown do
ENV.delete("RAILS_MASTER_KEY")
ENV.delete("MYSQL_ROOT_PASSWORD")
ENV.delete("VERSION")
MRSK.reset
end
private

20
test/cli/lock_test.rb Normal file
View File

@@ -0,0 +1,20 @@
require_relative "cli_test_case"
class CliLockTest < CliTestCase
test "status" do
run_command("status") do |output|
assert_match "stat lock", output
end
end
test "release" do
run_command("release") do |output|
assert_match "rm -rf lock", output
end
end
private
def run_command(*command)
stdouted { Mrsk::Cli::Lock.start([*command, "-c", "test/fixtures/deploy_with_accessories.yml"]) }
end
end

View File

@@ -42,12 +42,14 @@ class CliMainTest < CliTestCase
Mrsk::Cli::Main.any_instance.expects(:invoke).with("mrsk:cli:prune:all", [], invoke_options)
run_command("deploy", "--skip_push").tap do |output|
assert_match /Acquiring the deploy lock/, output
assert_match /Ensure curl and Docker are installed/, output
assert_match /Log into image registry/, output
assert_match /Pull app image/, output
assert_match /Ensure Traefik is running/, output
assert_match /Ensure app can pass healthcheck/, output
assert_match /Prune old containers and images/, output
assert_match /Releasing the deploy lock/, output
end
end

View File

@@ -0,0 +1,33 @@
require "test_helper"
class CommandsLockTest < ActiveSupport::TestCase
setup do
@config = {
service: "app", image: "dhh/app", registry: { "username" => "dhh", "password" => "secret" }, servers: [ "1.1.1.1" ],
traefik: { "args" => { "accesslog.format" => "json", "metrics.prometheus.buckets" => "0.1,0.3,1.2,5.0" } }
}
end
test "status" do
assert_equal \
"stat mrsk_lock > /dev/null && cat mrsk_lock/details | base64 -d",
new_command.status.join(" ")
end
test "acquire" do
assert_match \
/mkdir mrsk_lock && echo ".*" > mrsk_lock\/details/m,
new_command.acquire("Hello", "123").join(" ")
end
test "release" do
assert_match \
"rm mrsk_lock/details && rm -r mrsk_lock",
new_command.release.join(" ")
end
private
def new_command
Mrsk::Commands::Lock.new(Mrsk::Configuration.new(@config, version: "123"))
end
end