I’m subscribed to confd
notifications: CONFD_NOTIF_HEARTBEAT | CONFD_NOTIF_HEALTH_CHECK
with 10 and 13 seconds using confd_notifications_connect2()`
.
When confd
is loaded with lots of calls simultaneously (https, snmp…), sometimes, confd
does not return a heartbeat for at least 3 times the timeout. due to that I automatically restart it.
This is confd 8.0.17
.
There’s nothing in syslog
/ devel.log
.
Using
/bin/confd --printlog /var/confd/log/error/confderr.log.1
I can see the latest errors from the relevant time:
24-Aug-2025::13:47:41.668 <0.47.0> <0.294.0> ** Generic server aaa_server terminating
** Last message in was {cdb_event,cdb,
{17634791,997212},
commit,
[{{progress,3},
em,#Fun<cst_progress.1.31411816>}],
[{0,[3,7]}]}
** When Server state == {state,
{cache,
[{user,<<"admin">>,
<<"$1$Lraa5OeJ6L$AssdgOeawAh4xLt.G.">>,md5,<<>>,
<<>>,9000,1000,
[1000]},
...
true,cdb,
[{3,[['http://tail-f.com/ns/aaa/1.1'|aaa]]},
{4,[['http://tail-f.com/ns/aaa/1.1'|alias]]},
{5,[['http://tail-f.com/ns/aaa/1.1'|user]]},
{6,[['http://tail-f.com/ns/aaa/1.1'|session]]},
{7,
[['urn:ietf:params:xml:ns:yang:ietf-netconf-acm'|
nacm]]}],
undefined,false,[],<<"read-only">>,
{aaa_env,"/etc/confd/ssh",false,"system-auth",10000,
true,true,"/bin/auth_client.bin",false,false,true,
false,[],false,false,false,false,[],false,false,
false,<<"read-only">>,
[localAuthentication,externalAuthentication,pam],
[externalValidation],
[externalChallenge],
system,ignore,known,1024,true,false},
'http://tail-f.com/ns/aaa/1.1',[],undefined,[],0,[]}
** Reason for termination ==
** {function_clause,
[{cdb_agent,with_agent_call,
[notab,
{rreq,default_db,lookup,
[[779594945,1138155655,[1100000446|1554580266]],false]}],
[{file,"cdb_agent.erl"},{line,149}]},
{cdb,get_elem,7,[{file,"cdb.erl"},{line,472}]},
{cs_dbm,do_op,8,[{file,"cs_dbm.erl"},{line,510}]},
{cs_dbm,rs_db_op,6,[{file,"cs_dbm.erl"},{line,194}]},
{cst_get_delem,do_sh_get_delem,3,
[{file,"cst_get_delem.erl"},{line,312}]},
{cst_get_delem,sh_get_delem3,3,[{file,"cst_get_delem.erl"},{line,62}]},
{cst_lib,sh_get_default_ref_value,2,[{file,"cst_lib.erl"},{line,868}]},
{cs_trans_diff_iterate,get_default_set_value,4,
[{file,"cs_trans_diff_iterate.erl"},{line,1446}]},
{cs_trans_diff_iterate,get_op_val,9,
[{file,"cs_trans_diff_iterate.erl"},{line,907}]},
{cs_trans_diff_iterate,sh_diff_iterate_x,11,
[{file,"cs_trans_diff_iterate.erl"},{line,232}]},
{cs_trans_diff_iterate,loop_next_call_single_tab,4,
[{file,"cs_trans_diff_iterate.erl"},{line,1094}]},
{cs_trans_diff_iterate,sh_diff_iterate_x,11,
[{file,"cs_trans_diff_iterate.erl"},{line,523}]},
{cs_trans_diff_iterate,loop_next_call_single_tab,4,
[{file,"cs_trans_diff_iterate.erl"},{line,1094}]},
{cs_trans_diff_iterate,sh_diff_iterate_x,11,
[{file,"cs_trans_diff_iterate.erl"},{line,523}]},
{cs_trans_diff_iterate,loop_next_call_single_tab,4,
[{file,"cs_trans_diff_iterate.erl"},{line,1094}]},
{cs_trans_diff_iterate,sh_diff_iterate_x,11,
[{file,"cs_trans_diff_iterate.erl"},{line,523}]},
{cs_trans_diff_iterate,sh_diff_iterate,7,
[{file,"cs_trans_diff_iterate.erl"},{line,154}]},
{aaa_server,interesting_event,1,[{file,"aaa_server.erl"},{line,4285}]},
{aaa_server,'-interesting_events/2-lc$^0/1-0-',1,
[{file,"aaa_server.erl"},{line,4264}]},
{aaa_server,interesting_events,2,
[{file,"aaa_server.erl"},{line,4264}]},
{aaa_server,handle_info,2,[{file,"aaa_server.erl"},{line,4215}]},
{gen_server,try_dispatch,4,[{file,"gen_server.erl"},{line,653}]},
{gen_server,handle_msg,6,[{file,"gen_server.erl"},{line,727}]},
{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,249}]}]}
{"24-Aug-2025::13:47:41.678",
{error_report,<0.47.0>,
{<0.294.0>,crash_report,
[[{initial_call,{aaa_server,init,['Argument__1']}},
{pid,<0.294.0>},
{registered_name,aaa_server},
{error_info,
{error,function_clause,
[{cdb_agent,with_agent_call,
[notab,
{rreq,default_db,lookup,
[[779594945,1138155655,[1100000446|1554580266]],
false]}],
[{file,"cdb_agent.erl"},{line,149}]},
{cdb,get_elem,7,[{file,"cdb.erl"},{line,472}]},
{cs_dbm,do_op,8,[{file,"cs_dbm.erl"},{line,510}]},
{cs_dbm,rs_db_op,6,[{file,"cs_dbm.erl"},{line,194}]},
{cst_get_delem,do_sh_get_delem,3,
[{file,"cst_get_delem.erl"},{line,312}]},
{cst_get_delem,sh_get_delem3,3,
[{file,"cst_get_delem.erl"},{line,62}]},
{cst_lib,sh_get_default_ref_value,2,
[{file,"cst_lib.erl"},{line,868}]},
{cs_trans_diff_iterate,get_default_set_value,4,
[{file,"cs_trans_diff_iterate.erl"},{line,1446}]},
{cs_trans_diff_iterate,get_op_val,9,
[{file,"cs_trans_diff_iterate.erl"},{line,907}]},
{cs_trans_diff_iterate,sh_diff_iterate_x,11,
[{file,"cs_trans_diff_iterate.erl"},{line,232}]},
{cs_trans_diff_iterate,loop_next_call_single_tab,4,
[{file,"cs_trans_diff_iterate.erl"},{line,1094}]},
{cs_trans_diff_iterate,sh_diff_iterate_x,11,
[{file,"cs_trans_diff_iterate.erl"},{line,523}]},
{cs_trans_diff_iterate,loop_next_call_single_tab,4,
[{file,"cs_trans_diff_iterate.erl"},{line,1094}]},
{cs_trans_diff_iterate,sh_diff_iterate_x,11,
[{file,"cs_trans_diff_iterate.erl"},{line,523}]},
{cs_trans_diff_iterate,loop_next_call_single_tab,4,
[{file,"cs_trans_diff_iterate.erl"},{line,1094}]},
{cs_trans_diff_iterate,sh_diff_iterate_x,11,
[{file,"cs_trans_diff_iterate.erl"},{line,523}]},
{cs_trans_diff_iterate,sh_diff_iterate,7,
[{file,"cs_trans_diff_iterate.erl"},{line,154}]},
{aaa_server,interesting_event,1,
[{file,"aaa_server.erl"},{line,4285}]},
{aaa_server,'-interesting_events/2-lc$^0/1-0-',1,
[{file,"aaa_server.erl"},{line,4264}]},
{aaa_server,interesting_events,2,
[{file,"aaa_server.erl"},{line,4264}]},
{aaa_server,handle_info,2,
[{file,"aaa_server.erl"},{line,4215}]},
{gen_server,try_dispatch,4,
[{file,"gen_server.erl"},{line,653}]},
{gen_server,handle_msg,6,
[{file,"gen_server.erl"},{line,727}]},
{proc_lib,init_p_do_apply,3,
[{file,"proc_lib.erl"},{line,249}]}]}},
{ancestors,[confd_second_sup,confd_sup,<0.48.0>]},
{message_queue_len,0},
{messages,[]},
{links,[<0.96.0>]},
{dictionary,[{'$confd_cfg_validateUtf8',true}]},
{trap_exit,true},
{status,running},
{heap_size,196650},
{stack_size,27},
{reductions,1400615528}],
[]]}}}
{"24-Aug-2025::13:47:41.686",
{error_report,<0.47.0>,
{<0.96.0>,supervisor_report,
[{supervisor,{local,confd_second_sup}},
{errorContext,child_terminated},
{reason,
{function_clause,
[{cdb_agent,with_agent_call,
[notab,
{rreq,default_db,lookup,
[[779594945,1138155655,[1100000446|1554580266]],
false]}],
[{file,"cdb_agent.erl"},{line,149}]},
{cdb,get_elem,7,[{file,"cdb.erl"},{line,472}]},
{cs_dbm,do_op,8,[{file,"cs_dbm.erl"},{line,510}]},
{cs_dbm,rs_db_op,6,[{file,"cs_dbm.erl"},{line,194}]},
{cst_get_delem,do_sh_get_delem,3,
[{file,"cst_get_delem.erl"},{line,312}]},
{cst_get_delem,sh_get_delem3,3,
[{file,"cst_get_delem.erl"},{line,62}]},
{cst_lib,sh_get_default_ref_value,2,
[{file,"cst_lib.erl"},{line,868}]},
{cs_trans_diff_iterate,get_default_set_value,4,
[{file,"cs_trans_diff_iterate.erl"},{line,1446}]},
{cs_trans_diff_iterate,get_op_val,9,
[{file,"cs_trans_diff_iterate.erl"},{line,907}]},
{cs_trans_diff_iterate,sh_diff_iterate_x,11,
[{file,"cs_trans_diff_iterate.erl"},{line,232}]},
{cs_trans_diff_iterate,loop_next_call_single_tab,4,
[{file,"cs_trans_diff_iterate.erl"},{line,1094}]},
{cs_trans_diff_iterate,sh_diff_iterate_x,11,
[{file,"cs_trans_diff_iterate.erl"},{line,523}]},
{cs_trans_diff_iterate,loop_next_call_single_tab,4,
[{file,"cs_trans_diff_iterate.erl"},{line,1094}]},
{cs_trans_diff_iterate,sh_diff_iterate_x,11,
[{file,"cs_trans_diff_iterate.erl"},{line,523}]},
{cs_trans_diff_iterate,loop_next_call_single_tab,4,
[{file,"cs_trans_diff_iterate.erl"},{line,1094}]},
{cs_trans_diff_iterate,sh_diff_iterate_x,11,
[{file,"cs_trans_diff_iterate.erl"},{line,523}]},
{cs_trans_diff_iterate,sh_diff_iterate,7,
[{file,"cs_trans_diff_iterate.erl"},{line,154}]},
{aaa_server,interesting_event,1,
[{file,"aaa_server.erl"},{line,4285}]},
{aaa_server,'-interesting_events/2-lc$^0/1-0-',1,
[{file,"aaa_server.erl"},{line,4264}]},
{aaa_server,interesting_events,2,
[{file,"aaa_server.erl"},{line,4264}]},
{aaa_server,handle_info,2,
[{file,"aaa_server.erl"},{line,4215}]},
{gen_server,try_dispatch,4,
[{file,"gen_server.erl"},{line,653}]},
{gen_server,handle_msg,6,[{file,"gen_server.erl"},{line,727}]},
{proc_lib,init_p_do_apply,3,
[{file,"proc_lib.erl"},{line,249}]}]}},
{offender,
[{pid,<0.294.0>},
{id,aaa_server},
{mfargs,{aaa_server,start_link,[]}},
{restart_type,permanent},
{shutdown,2000},
{child_type,worker}]}]}}}
Is there anything I can do?
Thank you!