Prevent SSH connection restarts

Set a high idle timeout on the sshkit connection pool. This will
reduce the incidence of re-connection storms when a deployment has been
idle for a while (e.g. when waiting for a docker build).

The default timeout was 30 seconds, so we'll enable keepalives at a
30s interval to match. This is to help prevent connections from being
killed during long idle periods.
This commit is contained in:
Donal McBreen
2023-06-22 16:09:25 +01:00
parent b25cfa178b
commit f64b596907
4 changed files with 35 additions and 4 deletions

View File

@@ -143,6 +143,7 @@ class Mrsk::Commander
private private
# Lazy setup of SSHKit # Lazy setup of SSHKit
def configure_sshkit_with(config) def configure_sshkit_with(config)
SSHKit::Backend::Netssh.pool.idle_timeout = config.sshkit_pool_idle_timeout
SSHKit::Backend::Netssh.configure do |sshkit| SSHKit::Backend::Netssh.configure do |sshkit|
sshkit.max_concurrent_starts = config.sshkit_max_concurrent_starts if config.sshkit_max_concurrent_starts sshkit.max_concurrent_starts = config.sshkit_max_concurrent_starts if config.sshkit_max_concurrent_starts
sshkit.ssh_options = config.ssh_options sshkit.ssh_options = config.ssh_options

View File

@@ -153,7 +153,7 @@ class Mrsk::Configuration
end end
def ssh_options def ssh_options
{ user: ssh_user, proxy: ssh_proxy, auth_methods: [ "publickey" ] }.compact { user: ssh_user, proxy: ssh_proxy, auth_methods: [ "publickey" ], keepalive: true, keepalive_interval: 30 }.compact
end end
@@ -161,6 +161,15 @@ class Mrsk::Configuration
raw_config.sshkit["max_concurrent_starts"] if raw_config.sshkit.present? raw_config.sshkit["max_concurrent_starts"] if raw_config.sshkit.present?
end end
def sshkit_pool_idle_timeout
if raw_config.sshkit.present?
raw_config.sshkit["pool_idle_timeout"] || 900
else
900
end
end
def healthcheck def healthcheck
{ "path" => "/up", "port" => 3000, "max_attempts" => 7 }.merge(raw_config.healthcheck || {}) { "path" => "/up", "port" => 3000, "max_attempts" => 7 }.merge(raw_config.healthcheck || {})
end end

View File

@@ -226,7 +226,13 @@ class ConfigurationTest < ActiveSupport::TestCase
test "sshkit max concurrent starts" do test "sshkit max concurrent starts" do
config = Mrsk::Configuration.new(@deploy.tap { |c| c.merge!(sshkit: { "max_concurrent_starts" => 50 }) }) config = Mrsk::Configuration.new(@deploy.tap { |c| c.merge!(sshkit: { "max_concurrent_starts" => 50 }) })
assert_equal 50, @config.sshkit_max_concurrent_starts assert_equal 50, config.sshkit_max_concurrent_starts
end
test "sshkit pool idle timeout" do
assert_equal 900, @config.sshkit_pool_idle_timeout
config = Mrsk::Configuration.new(@deploy.tap { |c| c.merge!(sshkit: { "pool_idle_timeout" => 600 }) })
assert_equal 600, config.sshkit_pool_idle_timeout
end end
test "volume_args" do test "volume_args" do
@@ -271,7 +277,22 @@ class ConfigurationTest < ActiveSupport::TestCase
end end
test "to_h" do test "to_h" do
assert_equal({ :roles=>["web"], :hosts=>["1.1.1.1", "1.1.1.2"], :primary_host=>"1.1.1.1", :version=>"missing", :repository=>"dhh/app", :absolute_image=>"dhh/app:missing", :service_with_version=>"app-missing", :env_args=>["-e", "REDIS_URL=\"redis://x/y\""], :ssh_options=>{:user=>"root", :auth_methods=>["publickey"]}, :volume_args=>["--volume", "/local/path:/container/path"], :builder=>{}, :logging=>["--log-opt", "max-size=\"10m\""], :healthcheck=>{"path"=>"/up", "port"=>3000, "max_attempts" => 7 }}, @config.to_h) expected_config = \
{ :roles=>["web"],
:hosts=>["1.1.1.1", "1.1.1.2"],
:primary_host=>"1.1.1.1",
:version=>"missing",
:repository=>"dhh/app",
:absolute_image=>"dhh/app:missing",
:service_with_version=>"app-missing",
:env_args=>["-e", "REDIS_URL=\"redis://x/y\""],
:ssh_options=>{ :user=>"root", :auth_methods=>["publickey"], keepalive: true, keepalive_interval: 30 },
:volume_args=>["--volume", "/local/path:/container/path"],
:builder=>{},
:logging=>["--log-opt", "max-size=\"10m\""],
:healthcheck=>{ "path"=>"/up", "port"=>3000, "max_attempts" => 7 }}
assert_equal expected_config, @config.to_h
end end
test "min version is lower" do test "min version is lower" do

View File

@@ -51,7 +51,7 @@ class MainTest < IntegrationTest
assert_equal "app-#{version}", config[:service_with_version] assert_equal "app-#{version}", config[:service_with_version]
assert_equal [], config[:env_args] assert_equal [], config[:env_args]
assert_equal [], config[:volume_args] assert_equal [], config[:volume_args]
assert_equal({ user: "root", auth_methods: [ "publickey" ] }, config[:ssh_options]) assert_equal({ user: "root", auth_methods: [ "publickey" ], keepalive: true, keepalive_interval: 30 }, config[:ssh_options])
assert_equal({ "multiarch" => false, "args" => { "COMMIT_SHA" => version } }, config[:builder]) assert_equal({ "multiarch" => false, "args" => { "COMMIT_SHA" => version } }, config[:builder])
assert_equal [ "--log-opt", "max-size=\"10m\"" ], config[:logging] assert_equal [ "--log-opt", "max-size=\"10m\"" ], config[:logging]
assert_equal({ "path" => "/up", "port" => 3000, "max_attempts" => 7, "cmd" => "wget -qO- http://localhost > /dev/null" }, config[:healthcheck]) assert_equal({ "path" => "/up", "port" => 3000, "max_attempts" => 7, "cmd" => "wget -qO- http://localhost > /dev/null" }, config[:healthcheck])