Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:23
erlang
4371-Keep-supervisor-children-linked-during-shu...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 4371-Keep-supervisor-children-linked-during-shutdown.patch of Package erlang
From ffd032a1364b8c976dc52a52230f7f10ba36f92b Mon Sep 17 00:00:00 2001 From: Maria Scott <maria-12648430@hnc-agency.org> Date: Fri, 17 Sep 2021 12:51:20 +0200 Subject: [PATCH] Keep supervisor children linked during shutdown Children were monitored and unlinked before sending the shutdown exit signal. If the supervisor dies between the unlinking and sending the exit signal, the child becomes an orphan. For similar reasons, erratic error messages would occur in logs when a child exited by itself while the supervisor tries to shut it down. The changes in this commit keep children linked until they have exited, and treat exit reasons as normal which may occur when a child exits by itself while being shut down by the supervisor. Co-authored-by: Jan Uhlig <juhlig@hnc-agency.org> --- lib/stdlib/src/supervisor.erl | 222 ++++++++++++++------------- lib/stdlib/test/Makefile | 1 + lib/stdlib/test/supervisor_4.erl | 49 ++++++ lib/stdlib/test/supervisor_SUITE.erl | 133 +++++++++++++++- 4 files changed, 295 insertions(+), 110 deletions(-) create mode 100644 lib/stdlib/test/supervisor_4.erl diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index fce18b18b7..eea5b6a513 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -898,11 +898,9 @@ terminate_children(Children, SupName) -> NChildren. do_terminate(Child, SupName) when is_pid(Child#child.pid) -> - case shutdown(Child#child.pid, Child#child.shutdown) of + case shutdown(Child) of ok -> ok; - {error, normal} when not (?is_permanent(Child)) -> - ok; {error, OtherReason} -> ?report_error(shutdown_error, OtherReason, Child, SupName) end, @@ -920,65 +918,61 @@ do_terminate(_Child, _SupName) -> %% supervisor. %% Returns: ok | {error, OtherReason} (this should be reported) %%----------------------------------------------------------------- -shutdown(Pid, brutal_kill) -> - case monitor_child(Pid) of - ok -> - exit(Pid, kill), - receive - {'DOWN', _MRef, process, Pid, killed} -> - ok; - {'DOWN', _MRef, process, Pid, OtherReason} -> - {error, OtherReason} - end; - {error, Reason} -> - {error, Reason} +shutdown(#child{pid=Pid, shutdown=brutal_kill} = Child) -> + Mon = monitor(process, Pid), + exit(Pid, kill), + receive + {'DOWN', Mon, process, Pid, Reason0} -> + case unlink_flush(Pid, Reason0) of + killed -> + ok; + {shutdown, _} when not (?is_permanent(Child)) -> + ok; + normal when not (?is_permanent(Child)) -> + ok; + Reason -> + {error, Reason} + end end; -shutdown(Pid, Time) -> - case monitor_child(Pid) of - ok -> - exit(Pid, shutdown), %% Try to shutdown gracefully - receive - {'DOWN', _MRef, process, Pid, shutdown} -> - ok; - {'DOWN', _MRef, process, Pid, OtherReason} -> - {error, OtherReason} - after Time -> - exit(Pid, kill), %% Force termination. - receive - {'DOWN', _MRef, process, Pid, OtherReason} -> - {error, OtherReason} - end - end; - {error, Reason} -> - {error, Reason} +shutdown(#child{pid=Pid, shutdown=Time} = Child) -> + Mon = monitor(process, Pid), + exit(Pid, shutdown), + receive + {'DOWN', Mon, process, Pid, Reason0} -> + case unlink_flush(Pid, Reason0) of + shutdown -> + ok; + {shutdown, _} when not (?is_permanent(Child)) -> + ok; + normal when not (?is_permanent(Child)) -> + ok; + Reason -> + {error, Reason} + end + after Time -> + exit(Pid, kill), + receive + {'DOWN', Mon, process, Pid, Reason0} -> + case unlink_flush(Pid, Reason0) of + shutdown -> + ok; + {shutdown, _} when not (?is_permanent(Child)) -> + ok; + normal when not (?is_permanent(Child)) -> + ok; + Reason -> + {error, Reason} + end + end end. -%% Help function to shutdown/2 switches from link to monitor approach -monitor_child(Pid) -> - - %% Do the monitor operation first so that if the child dies - %% before the monitoring is done causing a 'DOWN'-message with - %% reason noproc, we will get the real reason in the 'EXIT'-message - %% unless a naughty child has already done unlink... - erlang:monitor(process, Pid), +unlink_flush(Pid, DefaultReason) -> unlink(Pid), - receive - %% If the child dies before the unlik we must empty - %% the mail-box of the 'EXIT'-message and the 'DOWN'-message. - {'EXIT', Pid, Reason} -> - receive - {'DOWN', _, process, Pid, _} -> - {error, Reason} - end - after 0 -> - %% If a naughty child did unlink and the child dies before - %% monitor the result will be that shutdown/2 receives a - %% 'DOWN'-message with reason noproc. - %% If the child should die after the unlink there - %% will be a 'DOWN'-message with a correct reason - %% that will be handled in shutdown/2. - ok + {'EXIT', Pid, Reason} -> + Reason + after 0 -> + DefaultReason end. %%----------------------------------------------------------------- @@ -992,40 +986,37 @@ monitor_child(Pid) -> %%----------------------------------------------------------------- terminate_dynamic_children(State) -> Child = get_dynamic_child(State), - {Pids, EStack0} = monitor_dynamic_children(Child,State), + Pids = dyn_fold( + fun + (P, Acc) when is_pid(P) -> + Mon = monitor(process, P), + case Child#child.shutdown of + brutal_kill -> exit(P, kill); + _ -> exit(P, shutdown) + end, + Acc#{{P, Mon} => true}; + (?restarting(_), Acc) -> + Acc + end, + #{}, + State + ), + TRef = case Child#child.shutdown of + brutal_kill -> + undefined; + infinity -> + undefined; + Time -> + erlang:start_timer(Time, self(), kill) + end, Sz = maps:size(Pids), - EStack = case Child#child.shutdown of - brutal_kill -> - maps:foreach(fun(P, _) -> exit(P, kill) end, Pids), - wait_dynamic_children(Child, Pids, Sz, undefined, EStack0); - infinity -> - maps:foreach(fun(P, _) -> exit(P, shutdown) end, Pids), - wait_dynamic_children(Child, Pids, Sz, undefined, EStack0); - Time -> - maps:foreach(fun(P, _) -> exit(P, shutdown) end, Pids), - TRef = erlang:start_timer(Time, self(), kill), - wait_dynamic_children(Child, Pids, Sz, TRef, EStack0) - end, + EStack = wait_dynamic_children(Child, Pids, Sz, TRef, #{}), %% Unroll stacked errors and report them maps:foreach(fun(Reason, Ls) -> ?report_error(shutdown_error, Reason, Child#child{pid=Ls}, State#state.name) end, EStack). -monitor_dynamic_children(Child,State) -> - dyn_fold(fun(P,{Pids, EStack}) when is_pid(P) -> - case monitor_child(P) of - ok -> - {maps:put(P, P, Pids), EStack}; - {error, normal} when not (?is_permanent(Child)) -> - {Pids, EStack}; - {error, Reason} -> - {Pids, maps_prepend(Reason, P, EStack)} - end; - (?restarting(_), {Pids, EStack}) -> - {Pids, EStack} - end, {maps:new(), maps:new()}, State). - wait_dynamic_children(_Child, _Pids, 0, undefined, EStack) -> EStack; wait_dynamic_children(_Child, _Pids, 0, TRef, EStack) -> @@ -1041,34 +1032,51 @@ wait_dynamic_children(_Child, _Pids, 0, TRef, EStack) -> wait_dynamic_children(#child{shutdown=brutal_kill} = Child, Pids, Sz, TRef, EStack) -> receive - {'DOWN', _MRef, process, Pid, killed} -> - wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1, - TRef, EStack); - - {'DOWN', _MRef, process, Pid, Reason} -> - wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1, - TRef, maps_prepend(Reason, Pid, EStack)) + {'DOWN', Mon, process, Pid, Reason0} + when is_map_key({Pid, Mon}, Pids) -> + case unlink_flush(Pid, Reason0) of + killed -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, EStack); + + {shutdown, _} when not (?is_permanent(Child)) -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, EStack); + + normal when not (?is_permanent(Child)) -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, EStack); + Reason -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, maps_prepend(Reason, Pid, + EStack)) + end end; wait_dynamic_children(Child, Pids, Sz, TRef, EStack) -> receive - {'DOWN', _MRef, process, Pid, shutdown} -> - wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1, - TRef, EStack); - - {'DOWN', _MRef, process, Pid, {shutdown, _}} -> - wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1, - TRef, EStack); - - {'DOWN', _MRef, process, Pid, normal} when not (?is_permanent(Child)) -> - wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1, - TRef, EStack); - - {'DOWN', _MRef, process, Pid, Reason} -> - wait_dynamic_children(Child, maps:remove(Pid, Pids), Sz-1, - TRef, maps_prepend(Reason, Pid, EStack)); + {'DOWN', Mon, process, Pid, Reason0} + when is_map_key({Pid, Mon}, Pids) -> + case unlink_flush(Pid, Reason0) of + shutdown -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, EStack); + + {shutdown, _} when not (?is_permanent(Child)) -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, EStack); + + normal when not (?is_permanent(Child)) -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, EStack); + + Reason -> + wait_dynamic_children(Child, maps:remove({Pid, Mon}, Pids), + Sz-1, TRef, maps_prepend(Reason, Pid, + EStack)) + end; {timeout, TRef, kill} -> - maps:foreach(fun(P, _) -> exit(P, kill) end, Pids), + maps:foreach(fun({P, _}, _) -> exit(P, kill) end, Pids), wait_dynamic_children(Child, Pids, Sz, undefined, EStack) end. diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile index 5f9510cf16..bda16c263d 100644 --- a/lib/stdlib/test/Makefile +++ b/lib/stdlib/test/Makefile @@ -72,6 +72,7 @@ MODULES= \ supervisor_1 \ supervisor_2 \ supervisor_3 \ + supervisor_4 \ supervisor_deadlock \ naughty_child \ shell_SUITE \ diff --git a/lib/stdlib/test/supervisor_4.erl b/lib/stdlib/test/supervisor_4.erl new file mode 100644 index 0000000000..038c904c27 --- /dev/null +++ b/lib/stdlib/test/supervisor_4.erl @@ -0,0 +1,49 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% Description: Simulates the behaviour that a child process may have. +%% Is used by the supervisor_SUITE test suite. +-module(supervisor_4). + +-export([start_child/0, start_child/1, init/1]). + +-export([handle_call/3, handle_info/2, terminate/2]). + +start_child() -> + gen_server:start_link(?MODULE, undefined, []). + +start_child(Reason) -> + gen_server:start_link(?MODULE, {Reason}, []). + +init(Reason) -> + process_flag(trap_exit, true), + {ok, Reason}. + +handle_call(Req, _From, State) -> + {reply, Req, State}. + +handle_info(stop, State) -> + {stop, normal, State}; +handle_info(_, State) -> + {noreply, State}. + +terminate(_, {Reason}) -> + exit(Reason); +terminate(_, _) -> + ok. diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl index 3d05ce5402..5bae4f6e05 100644 --- a/lib/stdlib/test/supervisor_SUITE.erl +++ b/lib/stdlib/test/supervisor_SUITE.erl @@ -44,7 +44,9 @@ sup_start_child_returns_error_simple/1, sup_start_map/1, sup_start_map_simple/1, sup_start_map_faulty_specs/1, - sup_stop_infinity/1, sup_stop_timeout/1, sup_stop_brutal_kill/1, + sup_stop_infinity/1, sup_stop_timeout/1, sup_stop_timeout_dynamic/1, + sup_stop_brutal_kill/1, sup_stop_brutal_kill_dynamic/1, + sup_stop_race/1, sup_stop_non_shutdown_exit_dynamic/1, child_adm/1, child_adm_simple/1, child_specs/1, extra_return/1, sup_flags/1]). @@ -133,8 +135,9 @@ groups() -> {sup_start_map, [], [sup_start_map, sup_start_map_simple, sup_start_map_faulty_specs]}, {sup_stop, [], - [sup_stop_infinity, sup_stop_timeout, - sup_stop_brutal_kill]}, + [sup_stop_infinity, sup_stop_timeout, sup_stop_timeout_dynamic, + sup_stop_brutal_kill, sup_stop_brutal_kill_dynamic, + sup_stop_race, sup_stop_non_shutdown_exit_dynamic]}, {normal_termination, [], [permanent_normal, transient_normal, temporary_normal]}, {shutdown_termination, [], @@ -498,6 +501,20 @@ sup_stop_timeout(Config) when is_list(Config) -> check_exit_reason(CPid1, shutdown), check_exit_reason(CPid2, killed). +sup_stop_timeout_dynamic(Config) when is_list(Config) -> + process_flag(trap_exit, true), + Child = {child, {supervisor_1, start_child, []}, temporary, 1000, worker, []}, + {ok, Pid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}), + {ok, CPid1} = supervisor:start_child(Pid, []), + link(CPid1), + {ok, CPid2} = supervisor:start_child(Pid, []), + link(CPid2), + CPid2 ! {sleep, 200000}, + terminate(Pid, shutdown), + check_exit_reason(Pid, shutdown), + check_exit_reason(CPid1, shutdown), + check_exit_reason(CPid2, killed). + %%------------------------------------------------------------------------- %% See sup_stop/1 when Shutdown = brutal_kill @@ -518,6 +535,116 @@ sup_stop_brutal_kill(Config) when is_list(Config) -> check_exit_reason(CPid1, shutdown), check_exit_reason(CPid2, killed). +sup_stop_brutal_kill_dynamic(Config) when is_list(Config) -> + process_flag(trap_exit, true), + Child = {child, {supervisor_4, start_child, []}, temporary, brutal_kill, worker, []}, + {ok, Pid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}), + CPids = lists:map( + fun (_) -> + {ok, CPid} = supervisor:start_child(Pid, []), + link(CPid), + CPid + end, + lists:seq(1, 10)), + terminate(Pid, shutdown), + check_exit_reason(Pid, shutdown), + lists:foreach( + fun (CPid) -> + check_exit_reason(CPid, killed) + end, + CPids + ). + +%%------------------------------------------------------------------------- +%% Tests that a supervisor completes its shutdown properly while children +%% exit by themselves for reasons other than shutdown +sup_stop_race(Config) when is_list(Config) -> + process_flag(trap_exit, true), + {ok, Pid} = start_link({ok, {{one_for_one, 2, 3600}, []}}), + {ok, CPidA} = supervisor:start_child(Pid, + #{id => child_a, + start => {supervisor_1, start_child, []}, + restart => temporary, + shutdown => 1000}), + link(CPidA), + CPids = lists:foldl( + fun ({Restart, Reason}, Acc) -> + {ok, CPid} = supervisor:start_child(Pid, + #{id => {child, make_ref()}, + start => {supervisor_4, start_child, [Reason]}, + restart => Restart, + shutdown => 1000 + }), + link(CPid), + Acc#{CPid => Reason} + end, + #{}, + [ + {Restart, Reason} + || Restart <- [temporary, transient, permanent], + Reason <- [normal, shutdown, {shutdown, test}, other] + ] + ), + {ok, CPidZ} = supervisor:start_child(Pid, + #{ + id => child_z, + start => {supervisor_2, start_child, [1000]}, + restart => temporary, + shutdown => 2000 + }), + link(CPidZ), + Self = self(), + spawn_link( + fun () -> + timer:sleep(200), + maps:foreach( + fun (CPid, _) -> + CPid ! stop + end, + CPids + ), + unlink(Self) + end + ), + ok = terminate(Pid, shutdown), + check_exit_reason(Pid, shutdown), + check_exit_reason(CPidZ, shutdown), + maps:foreach( + fun (CPid, Reason) -> + check_exit_reason(CPid, Reason) + end, + CPids), + check_exit_reason(CPidA, shutdown). + +%%------------------------------------------------------------------------- +%% Tests that a simple_one_for_one supervisor shuts down correctly when +%% some children exit with a reason other than shutdown +sup_stop_non_shutdown_exit_dynamic(Config) when is_list(Config) -> + process_flag(trap_exit, true), + lists:foreach( + fun (Restart) -> + Child = {child, {supervisor_4, start_child, []}, Restart, 5000, worker, []}, + {ok, Pid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}), + CPids = lists:foldl( + fun (Reason, Acc) -> + {ok, CPid} = supervisor:start_child(Pid, [Reason]), + link(CPid), + Acc#{CPid => Reason} + end, + #{}, + [normal, shutdown, {shutdown, test}, other] + ), + ok = terminate(Pid, shutdown), + check_exit_reason(Pid, shutdown), + maps:foreach( + fun (CPid, Reason) -> + check_exit_reason(CPid, Reason) + end, + CPids) + end, + [temporary, transient, permanent] + ). + %%------------------------------------------------------------------------- %% The start function provided to start a child may return {ok, Pid} %% or {ok, Pid, Info}, if it returns the latter check that the -- 2.31.1
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor