Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:23
erlang
4501-inet_tls_dist-make-TLS-handshake-concurren...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 4501-inet_tls_dist-make-TLS-handshake-concurrent.patch of Package erlang
From 14bc45e39443eeb10adad352c318f6d3807b3745 Mon Sep 17 00:00:00 2001 From: Maxim Fedorov <dane@whatsapp.com> Date: Tue, 9 Jun 2020 19:21:26 -0700 Subject: [PATCH] inet_tls_dist: make TLS handshake concurrent TLS handshake may take considerable amount of time, so it should be done in a different process, not accepting process itself. Also, there is no need to kill the accepting process if TLS handshake fails. Spawned accepting process runs with normal priority. There is at most one accepting process at a time. When TCP connection is accepted, the process is added to map of processes pending TLS handshake. Amount of pending TLS attempts is limited to the same amount as listening socket backlog, option {backlog, B} of inet_dist_listen_options. If accepting process crashes or returns error during TCP accept phase, listener process exits as well, to close listening socket, which is most likely unusable - there is no other reason for accept to fail. --- lib/ssl/src/inet_tls_dist.erl | 98 ++++++++++++++++++++++++--------- lib/ssl/test/ssl_dist_SUITE.erl | 58 +++++++++++++++++++ 2 files changed, 129 insertions(+), 27 deletions(-) diff --git a/lib/ssl/src/inet_tls_dist.erl b/lib/ssl/src/inet_tls_dist.erl index eaa481f119..5cfbeff387 100644 --- a/lib/ssl/src/inet_tls_dist.erl +++ b/lib/ssl/src/inet_tls_dist.erl @@ -223,27 +223,62 @@ gen_accept(Driver, Listen) -> monitor_pid( spawn_opt( fun () -> - accept_loop(Driver, Listen, Kernel) + process_flag(trap_exit, true), + LOpts = application:get_env(kernel, inet_dist_listen_options, []), + MaxPending = + case lists:keyfind(backlog, 1, LOpts) of + {backlog, Backlog} -> Backlog; + false -> 128 + end, + DLK = {Driver, Listen, Kernel}, + accept_loop(DLK, spawn_accept(DLK), MaxPending, #{}) end, [link, {priority, max}])). -accept_loop(Driver, Listen, Kernel) -> - case Driver:accept(Listen) of - {ok, Socket} -> - case check_ip(Driver, Socket) of - true -> - accept_loop(Driver, Listen, Kernel, Socket); - {false,IP} -> - ?LOG_ERROR( - "** Connection attempt from " - "disallowed IP ~w ** ~n", [IP]), - ?shutdown2(no_node, trace({disallowed, IP})) - end; - Error -> - exit(trace(Error)) - end. +%% Concurrent accept loop will spawn a new HandshakePid when +%% there is no HandshakePid already running, and Pending map is +%% smaller than MaxPending +accept_loop(DLK, undefined, MaxPending, Pending) when map_size(Pending) < MaxPending -> + accept_loop(DLK, spawn_accept(DLK), MaxPending, Pending); +accept_loop(DLK, HandshakePid, MaxPending, Pending) -> + receive + {continue, HandshakePid} when is_pid(HandshakePid) -> + accept_loop(DLK, undefined, MaxPending, Pending#{HandshakePid => true}); + {'EXIT', Pid, Reason} when is_map_key(Pid, Pending) -> + Reason =/= normal andalso + ?LOG_ERROR("TLS distribution handshake failed: ~p~n", [Reason]), + accept_loop(DLK, HandshakePid, MaxPending, maps:remove(Pid, Pending)); + {'EXIT', HandshakePid, Reason} when is_pid(HandshakePid) -> + %% HandshakePid crashed before turning into Pending, which means + %% error happened in accept. Need to restart the listener. + exit(Reason); + Unexpected -> + ?LOG_WARNING("TLS distribution: unexpected message: ~p~n" ,[Unexpected]), + accept_loop(DLK, HandshakePid, MaxPending, Pending) + end. + +spawn_accept({Driver, Listen, Kernel}) -> + AcceptLoop = self(), + spawn_link( + fun () -> + case Driver:accept(Listen) of + {ok, Socket} -> + AcceptLoop ! {continue, self()}, + case check_ip(Driver, Socket) of + true -> + accept_one(Driver, Kernel, Socket); + {false,IP} -> + ?LOG_ERROR( + "** Connection attempt from " + "disallowed IP ~w ** ~n", [IP]), + trace({disallowed, IP}) + end; + Error -> + exit(Error) + end + end). -accept_loop(Driver, Listen, Kernel, Socket) -> +accept_one(Driver, Kernel, Socket) -> Opts = setup_verify_client(Socket, get_ssl_options(server)), wait_for_code_server(), case @@ -259,13 +294,18 @@ accept_loop(Driver, Listen, Kernel, Socket) -> Driver:family(), tls}), receive {Kernel, controller, Pid} -> - ok = ssl:controlling_process(SslSocket, Pid), - trace( - Pid ! {self(), controller}); + case ssl:controlling_process(SslSocket, Pid) of + ok -> + trace(Pid ! {self(), controller}); + Error -> + trace(Pid ! {self(), exit}), + ?LOG_ERROR( + "Cannot control TLS distribution connection: ~p~n", + [Error]) + end; {Kernel, unsupported_protocol} -> - exit(trace(unsupported_protocol)) - end, - accept_loop(Driver, Listen, Kernel); + trace(unsupported_protocol) + end; {error, {options, _}} = Error -> %% Bad options: that's probably our fault. %% Let's log that. @@ -273,10 +313,10 @@ accept_loop(Driver, Listen, Kernel, Socket) -> "Cannot accept TLS distribution connection: ~s~n", [ssl:format_error(Error)]), gen_tcp:close(Socket), - exit(trace(Error)); + trace(Error); Other -> gen_tcp:close(Socket), - exit(trace(Other)) + trace(Other) end. @@ -412,9 +452,9 @@ gen_accept_connection( do_accept( _Driver, AcceptPid, DistCtrl, MyNode, Allowed, SetupTime, Kernel) -> - {ok, SslSocket} = tls_sender:dist_tls_socket(DistCtrl), receive {AcceptPid, controller} -> + {ok, SslSocket} = tls_sender:dist_tls_socket(DistCtrl), Timer = dist_util:start_timer(SetupTime), NewAllowed = allowed_nodes(SslSocket, Allowed), HSData0 = hs_data_common(SslSocket), @@ -427,7 +467,11 @@ do_accept( this_flags = 0, allowed = NewAllowed}, link(DistCtrl), - dist_util:handshake_other_started(trace(HSData)) + dist_util:handshake_other_started(trace(HSData)); + {AcceptPid, exit} -> + %% this can happen when connection was initiated, but dropped + %% between TLS handshake completion and dist handshake start + ?shutdown2(MyNode, connection_setup_failed) end. allowed_nodes(_SslSocket, []) -> diff --git a/lib/ssl/test/ssl_dist_SUITE.erl b/lib/ssl/test/ssl_dist_SUITE.erl index 3b0c4d8c09..2123ac5def 100644 --- a/lib/ssl/test/ssl_dist_SUITE.erl +++ b/lib/ssl/test/ssl_dist_SUITE.erl @@ -39,6 +39,8 @@ basic/1, payload/0, payload/1, + dist_port_overload/0, + dist_port_overload/1, plain_options/0, plain_options/1, plain_verify_options/0, @@ -91,6 +93,7 @@ start_ssl_node_name(Name, Args) -> all() -> [basic, payload, + dist_port_overload, plain_options, plain_verify_options, nodelay_option, @@ -164,6 +167,61 @@ payload() -> payload(Config) when is_list(Config) -> gen_dist_test(payload_test, Config). +%%-------------------------------------------------------------------- +dist_port_overload() -> + [{doc, "Test that TLS distribution connections can be accepted concurrently"}]. +dist_port_overload(Config) when is_list(Config) -> + %% Start a node, and get the port number it's listening on. + #node_handle{nodename = NodeName} = NH1 = start_ssl_node(Config), + [Name, Host] = string:lexemes(atom_to_list(NodeName), "@"), + {ok, NodesPorts} = apply_on_ssl_node(NH1, fun net_adm:names/0), + {Name, Port} = lists:keyfind(Name, 1, NodesPorts), + %% Run 4 connections concurrently. When TLS handshake is not concurrent, + %% and with default net_setuptime of 7 seconds, only one connection per 7 + %% seconds is closed from server side. With concurrent accept, all 7 will + %% be dropped in 7 seconds + RequiredConcurrency = 4, + Started = [connect(self(), Host, Port) || _ <- lists:seq(1, RequiredConcurrency)], + %% give 10 seconds (more than 7, less than 2x7 seconds) + Responded = barrier(RequiredConcurrency, [], erlang:system_time(millisecond) + 10000), + %% clean up + stop_ssl_node(NH1), + [R ! exit || R <- Responded], + [exit(P, kill) || P <- Started -- Responded], + %% Ensure some amount of concurrency was reached. + (length(Responded) >= RequiredConcurrency) orelse + ct:fail({actual, length(Responded), expected, RequiredConcurrency}), + success(Config). + +barrier(0, Responded, _Until) -> + Responded; +barrier(RequiredConcurrency, Responded, Until) -> + Timeout = Until - erlang:system_time(millisecond), + receive + {waiting, Pid} -> + barrier(RequiredConcurrency - 1, [Pid | Responded], Until); + {error, Error} -> + ct:fail(Error) + after + Timeout -> Responded + end. + +connect(Control, Host, Port) -> + spawn( + fun () -> + case gen_tcp:connect(Host, Port, [{active, true}]) of + {ok, Sock} -> + receive + {tcp_closed, Sock} -> + Control ! {waiting, self()}; + exit -> + gen_tcp:close(Sock) + end; + Error -> + Control ! {error, Error} + end + end). + %%-------------------------------------------------------------------- plain_options() -> [{doc,"Test specifying tls options not related to certificate verification"}]. -- 2.26.2
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor