Erlang:Supervisor

supervisor可以监控进程以防止僵尸进程并防止内存泄漏。另外,在监督下的应用程序的终止处理可以按正常顺序执行。关闭顶级监控程序会将终止处理传播到其子进程。因此,进程监视树需要以下元素:

  • 监控其他下级监控树和进程
  • 进程总是在监督下使用
  • 重启子进程的策略
  • 防止无限期重启
  • 指定依赖项

监控功能

init/1: init/1 返回值为

1
{ ok , {{ RestartStrategy , MaxRestart , MaxTime },[ ChildSpecs ]}}
  • RestartStrategy:重启策略
  • MaxRestart和MaxTime:如果在MaxTime(秒)内重启超过MaxRestart次数,Supervisor将关闭
  • ChildSpec:确定子进程规范的元组列表
{ok, {{one\_for\_all, 5, 60}, %% {RestartStrategy, MaxRestart, MaxTime} %% \[ChildSpecs\] \[{fake\_id, {fake\_mod, start\_link, \[SomeArg\]}, permanent, 5000, worker, \[fake\_mod\]}, {other\_id, {event\_manager\_mod, start\_link, \[\]}, transient, infinity, worker, dynamic}\]}}.

重启策略

one_for_one(一对一)

  • 一个进程死了,那么重新开始
  • 每个进程是独立的,将不会影响其他流程

one_for_one

rest_for_one

  • 进程崩溃,重新启动它,包括在该进程之后启动的进程
  • 流程像链一样依赖

rest_for_one

simple_one_for_one

当只有一种受监视进程并且想要动态添加它时使用

子进程规范(ChildSpec)

1
2
3
4
5
6
7
8
9
10
11
[{fake_id,  % ChildID
{fake_mod, start_link, [SomeArg]}, % StartFunc
permanent, % Restart
5000, % Shutdown
worker, % Type
[fake_mod]}, % Modules
{other_id, {event_manager_mod, start_link, []},
transient,
infinity,
worker,
dynamic}
  • ChildID:子进程名称, 用于调试等;
  • StartFunc:指定如何启动监控元组, 格式为{Mod, Func, Arity} ;
  • Restart: 告诉上级如何在子进程死亡时处理它( permanet:永远重启;temporary :不要重启; transient :永久和临时之间的处理, 始终重启直到成功完成;正常终止后不要重启 ;
  • Shutdown: 指定的时间结束,exit(Pid, kill)则发送它并终止该过程;
  • Type: supervisor ; supervisor_bridge; worker;
  • Modules: 子进程行为使用的回调模块;

supervisor职能

supervisor:start_link / 2-3

1
2
3
start_link (module , argument )

start_link (name , module , argument )
  • argument:init/1 回调模块调用 ,传递给回调函数的参数,{local, 名称} 您可以在中指定服务器的注册名称;
  • 返回值:{ok,pid()} ignore {error,startlink_err()}

supervisor检查

gen_server代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
-module(echo_server).
-behavior(gen_server).

%% API

-export([start_link/1, stop/2, echo/2]).
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).

%% gen_server callbacks
init(Name) ->
process_flag(trap_exit, true),
io:format("echo_server name:~p~n", [Name]),
{ok, Name}.

handle_call({echo, Message}, _From, Name) ->
io:format("echo_server ~p: echo ~p~n", [Name, Message]),
{reply, ok, Name};
handle_call({stop, Reason}, _From, Name) ->
{stop, Reason, ok, Name};
handle_call(_Message, _From, State) ->
{noreply, State}.

handle_cast(_Message, State) ->
{noreply, State}.

handle_info(Message, State) ->
io:format("undefined message: ~p~n", [Message]),
{noreply, State}.

code_change(_OldVsn, State, _Extra) ->
{ok, State}.

terminate(normal, Name) ->
io:format("echo_server name:~p~n", [Name]);
terminate(Reason, Name) ->
io:format("echo_server name~p reason~p.~n", [Name, Reason]).

%% api functions
start_link(Name) ->
gen_server:start_link({local, Name}, ?MODULE, Name, []).

stop(Pid, Reason) ->
gen_server:call(Pid, {stop, Reason}).

echo(Pid, Message) ->
gen_server:call(Pid, {echo, Message}).

supervisor代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
-module(echo_server_sup).
-behaviour(supervisor).

-export([start_link/3]).
-export([init/1]).

% supervisor api
start_link(RestartStrategy, MaxRestart, MaxTime) ->
supervisor:start_link({local,?MODULE}, ?MODULE, {RestartStrategy, MaxRestart, MaxTime}).



% callback function
init({simple_one_for_one, _MaxRestart, _MaxTime}) ->
{ok, {{simple_one_for_one, 3, 60},
[{dynamic_echo_server_sup,
{echo_server, start_link, []},
temporary, 1000, worker, [echo_server]}
]}};

init({RestartStrategy, MaxRestart, MaxTime}) ->
{ok, {{RestartStrategy, MaxRestart, MaxTime},

[{id_process1,
{echo_server, start_link, [process1]},
permanent, 1000, worker, [echo_server]},

{id_process2,
{echo_server, start_link, [process2]},
temporary, 1000, worker, [echo_server]},

{id_process3,
{echo_server, start_link, [process3]},
transient, 1000, worker, [echo_server]},

{id_process4,
{echo_server, start_link, [process4]},
transient, 1000, worker, [echo_server]}
]}}.

one_for_one的操作检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
1> c(echo_server).
{ok,echo_server}

2> c(echo_server_sup).
{ok,echo_server_sup}

%% one_for_one
3> {ok, Pid} = echo_server_sup:start_link(one_for_one, 5, 60).
echo_server name:process1
echo_server name:process2
echo_server name:process3
echo_server name:process4
{ok,<0.72.0>}

4> echo_server:stop(whereis(process1), normal).
echo_server name:process1
echo_server name:process1
ok

5> echo_server:stop(whereis(process1), kill).
echo_server name process1 reason kill.
echo_server name:process1
ok

6>
=ERROR REPORT==== 23-Mar-2018::01:11:26 ===
** Generic server process1 terminating
** Last message in was {stop,kill}
** When Server state == process1
** Reason for termination ==
** kill
** Client <0.60.0> stacktrace
** [{gen,do_call,4,[{file,"gen.erl"},{line,169}]},
{gen_server,call,2,[{file,"gen_server.erl"},{line,202}]},
{erl_eval,do_apply,6,[{file,"erl_eval.erl"},{line,674}]},
{shell,exprs,7,[{file,"shell.erl"},{line,687}]},
{shell,eval_exprs,7,[{file,"shell.erl"},{line,642}]},
{shell,eval_loop,3,[{file,"shell.erl"},{line,627}]}]

6> echo_server:stop(whereis(process1), exit).
echo_server name process1 reason exit.

=ERROR REPORT==== 23-Mar-2018::01:11:35 ===
** Generic server process1 terminating
** Last message in was {stop,exit}
** When Server state == process1
** Reason for termination ==
** exit
** Client <0.60.0> stacktrace
** [{gen,do_call,4,[{file,"gen.erl"},{line,169}]},
{gen_server,call,2,[{file,"gen_server.erl"},{line,202}]},
{erl_eval,do_apply,6,[{file,"erl_eval.erl"},{line,674}]},
{shell,exprs,7,[{file,"shell.erl"},{line,687}]},
{shell,eval_exprs,7,[{file,"shell.erl"},{line,642}]},
{shell,eval_loop,3,[{file,"shell.erl"},{line,627}]}]
echo_server name:process1
ok

7> echo_server:stop(whereis(process1), shutdown).
echo_server name process1 reason shutdown.
echo_server name:process1
ok

8> echo_server:stop(whereis(process2), exit).
echo_server name process2 reason exit.

=ERROR REPORT==== 23-Mar-2018::01:12:09 ===
** Generic server process2 terminating
** Last message in was {stop,exit}
** When Server state == process2
** Reason for termination ==
** exit
** Client <0.60.0> stacktrace
** [{gen,do_call,4,[{file,"gen.erl"},{line,169}]},
{gen_server,call,2,[{file,"gen_server.erl"},{line,202}]},
{erl_eval,do_apply,6,[{file,"erl_eval.erl"},{line,674}]},
{shell,exprs,7,[{file,"shell.erl"},{line,687}]},
{shell,eval_exprs,7,[{file,"shell.erl"},{line,642}]},
{shell,eval_loop,3,[{file,"shell.erl"},{line,627}]}]
ok

9> echo_server:stop(whereis(process3), normal).
echo_server name:process3
ok

10> echo_server:stop(whereis(process4), shutdown).
echo_server name process4 reason shutdown.
ok

%% 子进程确认

11> supervisor:which_children(Pid).
[{id_process4,undefined,worker,[echo_server]},
{id_process3,undefined,worker,[echo_server]},
{id_process1,<0.84.0>,worker,[echo_server]}]

12> supervisor:count_children(Pid).
[{specs,3},{active,1},{supervisors,0},{workers,3}]

rest_for_one检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
1> c(echo_server).
{ok,echo_server}

2> c(echo_server_sup).
{ok,echo_server_sup}

%% rest_for_one策略
3> {ok, Pid} = echo_server_sup:start_link(rest_for_one, 2, 60).
echo_server name:process1
echo_server name:process2
echo_server name:process3
echo_server name:process4
{ok,<0.72.0>}

%% permanent
4> echo_server:stop(whereis(process1), normal).
echo_server name:process1
echo_server name process4 reason shutdown.
ok
echo_server name process3 reason shutdown.
echo_server name process2 reason shutdown.
echo_server name:process1
echo_server name:process3
echo_server name:process4

5> exit(whereis(process1), kill).
echo_server name process4 reason shutdown.
true
echo_server name process3 reason shutdown.
echo_server name:process1
echo_server name:process3
echo_server name:process4

%% transient
6> echo_server:stop(whereis(process3), normal).
echo_server name:process3
ok

one_for_all

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
1> c(echo_server).
{ok,echo_server}

2> c(echo_server_sup).
{ok,echo_server_sup}

%% one_for_all
3> {ok, Pid} = echo_server_sup:start_link(one_for_all, 5, 60).
echo_server name:process1
echo_server name:process2
echo_server name:process3
echo_server name:process4
{ok,<0.72.0>}

%% permanent
4> echo_server:stop(whereis(process1), normal).
echo_server name:process1
echo_server name process4 reason shutdown.
ok
echo_server name process3 reason shutdown.
echo_server name process2 reason shutdown.
echo_server name:process1
echo_server name:process3
echo_server name:process4

%% transient
5> echo_server:stop(whereis(process3), normal).
echo_server name:process3
ok

%% transient
6> echo_server:stop(whereis(process4), kill).
echo_server name process4 reason kill.
echo_server name process1 reason shutdown.
ok
echo_server name:process1
echo_server name:process3
echo_server name:process4
=ERROR REPORT==== 23-Mar-2018::01:20:00 ===
** Generic server process4 terminating
** Last message in was {stop,kill}
** When Server state == process4
** Reason for termination ==
** kill
** Client <0.60.0> stacktrace
** [{gen,do_call,4,[{file,"gen.erl"},{line,169}]},
{gen_server,call,2,[{file,"gen_server.erl"},{line,202}]},
{erl_eval,do_apply,6,[{file,"erl_eval.erl"},{line,674}]},
{shell,exprs,7,[{file,"shell.erl"},{line,687}]},
{shell,eval_exprs,7,[{file,"shell.erl"},{line,642}]},
{shell,eval_loop,3,[{file,"shell.erl"},{line,627}]}]
  • transient方法shutdownnormal不会重新启动;然而rest_for_oneone_for_all通过shutdown会重新启动
  • temporary是rest_for_oneone_for_all参与shutdown,则不会重新启动
  • temporary处理异常终止,rest_for_oneone_for_all它不触发

simple_one_for_one

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
1> c(echo_server).
{ok,echo_server}

2> c(echo_server_sup).
{ok,echo_server_sup}

3> {ok, Pid} = echo_server_sup:start_link(simple_one_for_one, 5 ,60).
{ok,<0.72.0>}

4> supervisor:start_child(echo_server_sup, [process1]).
echo_server name:process1
{ok,<0.74.0>}

5> supervisor:start_child(echo_server_sup, [process2]).
echo_server name:process2
{ok,<0.76.0>}

6> supervisor:start_child(echo_server_sup, [process3]).
echo_server name:process3
{ok,<0.78.0>}

7> supervisor:terminate_child(echo_server_sup, whereis(process1)).
echo_server name process1 reason shutdown.
ok