diff --git a/erts/emulator/beam/beam_bif_load.c b/erts/emulator/beam/beam_bif_load.c index 8fecb109fa30..27d607abf511 100644 --- a/erts/emulator/beam/beam_bif_load.c +++ b/erts/emulator/beam/beam_bif_load.c @@ -630,7 +630,7 @@ BIF_RETTYPE erts_internal_check_dirty_process_code_2(BIF_ALIST_2) { erts_aint32_t state; Process *rp; - int dirty, busy, reds = 0; + int busy, reds = 0; Eterm res; if (BIF_P != erts_dirty_process_signal_handler @@ -652,13 +652,7 @@ BIF_RETTYPE erts_internal_check_dirty_process_code_2(BIF_ALIST_2) BIF_RET(am_false); state = erts_atomic32_read_nob(&rp->state); - dirty = (state & ERTS_PSFLG_DIRTY_RUNNING); - /* - * Ignore ERTS_PSFLG_DIRTY_RUNNING_SYS (see - * comment in erts_execute_dirty_system_task() - * in erl_process.c). - */ - if (!dirty) + if (!ERTS_PROC_IN_DIRTY_STATE(state)) BIF_RET(am_normal); busy = erts_proc_trylock(rp, ERTS_PROC_LOCK_MAIN) == EBUSY; diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c index 8cc97d6ff813..5ceaa91a62e4 100644 --- a/erts/emulator/beam/bif.c +++ b/erts/emulator/beam/bif.c @@ -1706,9 +1706,7 @@ static Eterm process_flag_aux(Process *c_p, int *redsp, Eterm flag, Eterm val) */ state = erts_atomic32_read_nob(&c_p->state); - if (state & (ERTS_PSFLG_RUNNING_SYS - | ERTS_PSFLG_DIRTY_RUNNING_SYS - | ERTS_PSFLG_DIRTY_RUNNING)) { + if (!(state & ERTS_PSFLG_RUNNING)) { /* * We are either processing signals before * being executed or executing dirty. That @@ -1717,7 +1715,6 @@ static Eterm process_flag_aux(Process *c_p, int *redsp, Eterm flag, Eterm val) *redsp = 1; } else { - ASSERT(state & ERTS_PSFLG_RUNNING); /* * F_DELAY_GC is currently only set when diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index 7dd79bacfbb3..f77206355a98 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -307,6 +307,7 @@ type SL_MPATHS SHORT_LIVED SYSTEM sl_migration_paths type T2B_DETERMINISTIC SHORT_LIVED PROCESSES term_to_binary_deterministic type DSIG_HNDL_NTFY SHORT_LIVED PROCESSES dirty_signal_handler_notification +type SCHD_SIG_NTFY SHORT_LIVED PROCESSES scheduled_signal_notify type CODE_COVERAGE STANDARD SYSTEM code_coverage # diff --git a/erts/emulator/beam/erl_proc_sig_queue.c b/erts/emulator/beam/erl_proc_sig_queue.c index c8ae6aa8ae39..dd984d3a78f3 100644 --- a/erts/emulator/beam/erl_proc_sig_queue.c +++ b/erts/emulator/beam/erl_proc_sig_queue.c @@ -918,7 +918,7 @@ notify_dirty_signal_handler(Eterm pid, ErtsMessage *mp; Process *sig_handler; - ASSERT(state & ERTS_PSFLG_DIRTY_RUNNING); + ASSERT(state & (ERTS_PSFLGS_DIRTY_WORK|ERTS_PSFLG_DIRTY_RUNNING)); if (prio < 0) prio = (int) ERTS_PSFLGS_GET_USR_PRIO(state); @@ -958,13 +958,10 @@ delayed_notify_dirty_signal_handler(void *vdshnp) if (proc) { erts_aint32_t state = erts_atomic32_read_acqb(&proc->state); /* - * Notify the dirty signal handler if it is still running - * dirty and still have signals to handle... + * Notify the dirty signal handler if it is still scheduled + * or running dirty and still have signals to handle... */ - if (!!(state & ERTS_PSFLG_DIRTY_RUNNING) - & !!(state & (ERTS_PSFLG_SIG_Q - | ERTS_PSFLG_NMSG_SIG_IN_Q - | ERTS_PSFLG_MSG_SIG_IN_Q))) { + if (ERTS_PROC_NEED_DIRTY_SIG_HANDLING(state)) { notify_dirty_signal_handler(dshnp->pid, state, dshnp->prio); } } @@ -1244,12 +1241,7 @@ maybe_elevate_sig_handling_prio(Process *c_p, int prio, Eterm other) if (res) { /* ensure handled if dirty executing... */ state = erts_atomic32_read_nob(&rp->state); - /* - * We ignore ERTS_PSFLG_DIRTY_RUNNING_SYS. For - * more info see erts_execute_dirty_system_task() - * in erl_process.c. - */ - if (state & ERTS_PSFLG_DIRTY_RUNNING) + if (ERTS_PROC_NEED_DIRTY_SIG_HANDLING(state)) erts_ensure_dirty_proc_signals_handled(rp, state, min_prio, 0); } @@ -1258,6 +1250,15 @@ maybe_elevate_sig_handling_prio(Process *c_p, int prio, Eterm other) return res; } +typedef struct { + Eterm pid; + int nmsig; + int msig; +} ErtsSchedSignalNotify; + +static void +sched_sig_notify(void *vssnp); + void erts_proc_sig_fetch__(Process *proc, ErtsSignalInQueueBufferArray *buffers, @@ -1391,11 +1392,53 @@ erts_proc_sig_fetch__(Process *proc, * future call to erts_proc_sig_fetch(). */ if (erts_atomic32_read_nob(&buffers->nonmsgs_in_slots)) - set_flags |= ERTS_PSFLG_ACTIVE_SYS|ERTS_PSFLG_NMSG_SIG_IN_Q; + set_flags |= ERTS_PSFLG_NMSG_SIG_IN_Q; if (erts_atomic32_read_nob(&buffers->msgs_in_slots)) - set_flags |= ERTS_PSFLG_ACTIVE|ERTS_PSFLG_MSG_SIG_IN_Q; - if (set_flags) - (void) erts_atomic32_read_bor_relb(&proc->state, set_flags); + set_flags |= ERTS_PSFLG_MSG_SIG_IN_Q; + if (set_flags) { + erts_aint32_t oflgs; + oflgs = erts_atomic32_read_bor_relb(&proc->state, set_flags); + if ((oflgs & (ERTS_PSFLG_NMSG_SIG_IN_Q + | ERTS_PSFLG_MSG_SIG_IN_Q)) != set_flags) { + int msig = 0, nmsig = 0; + /* + * We did set at least one of the flags; check if we may + * need to set corresponding active flag(s)... + */ + if ((!!(set_flags & ERTS_PSFLG_NMSG_SIG_IN_Q)) + & (!(oflgs & (ERTS_PSFLG_NMSG_SIG_IN_Q + | ERTS_PSFLG_ACTIVE_SYS)))) { + /* We set nmsig-in-q flag and active-sys missing... */ + nmsig = !0; + } + if ((!!(set_flags & ERTS_PSFLG_MSG_SIG_IN_Q)) + & (!(oflgs & (ERTS_PSFLG_MSG_SIG_IN_Q + | ERTS_PSFLG_ACTIVE)))) { + /* We set msig-in-q flag and active missing... */ + msig = !0; + } + if (msig | nmsig) { + /* + * We don't know exactly what locks we got, so + * we need to schedule the notification... + */ + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + int tid = (esdp && esdp->type == ERTS_SCHED_NORMAL + ? (int) esdp->no + : 1); + ErtsSchedSignalNotify *ssnp = + (ErtsSchedSignalNotify *) + erts_alloc(ERTS_ALC_T_SCHD_SIG_NTFY, + sizeof(ErtsSchedSignalNotify)); + ssnp->nmsig = nmsig; + ssnp->msig = msig; + ssnp->pid = proc->common.id; + erts_schedule_misc_aux_work(tid, + sched_sig_notify, + (void *) ssnp); + } + } + } /* else: * Another thread is currently operating on a buffer and * will soon set appropriate. @@ -1413,6 +1456,45 @@ erts_proc_sig_fetch__(Process *proc, } +static void +sched_sig_notify(void *vssnp) +{ + ErtsSchedSignalNotify *ssnp = (ErtsSchedSignalNotify *) vssnp; + Process *proc = erts_proc_lookup(ssnp->pid); + if (proc) { + erts_aint32_t state = erts_atomic32_read_acqb(&proc->state); + int nmsig = ssnp->nmsig; + int msig = ssnp->msig; + ASSERT(nmsig || msig); + if ((!!nmsig) & ((!(state & (ERTS_PSFLG_SIG_Q + | ERTS_PSFLG_NMSG_SIG_IN_Q))) + | (!!(state & ERTS_PSFLG_ACTIVE_SYS)))) { + /* + * Either already handled or someone else set the active-sys + * flag... + */ + nmsig = 0; + } + if ((!!msig) & (!!(state & ERTS_PSFLG_ACTIVE))) { + /* + * Someone else set the active flag (we cannot determine if it + * has been handled or not by looking at the state flag)... + */ + msig = 0; + } + if (msig|nmsig) { + if (!nmsig) { + erts_proc_notify_new_message(proc, 0); + } + else { + erts_aint32_t extra = msig ? ERTS_PSFLG_ACTIVE : 0; + erts_proc_notify_new_sig(proc, state, extra); + } + } + } + erts_free(ERTS_ALC_T_SCHD_SIG_NTFY, vssnp); +} + void erts_proc_sig_destroy_unlink_op(ErtsSigUnlinkOp *sulnk) { @@ -8165,12 +8247,7 @@ erts_internal_dirty_process_handle_signals_1(BIF_ALIST_1) BIF_RET(am_noproc); state = erts_atomic32_read_acqb(&rp->state); - dirty = (state & ERTS_PSFLG_DIRTY_RUNNING); - /* - * Ignore ERTS_PSFLG_DIRTY_RUNNING_SYS (see - * comment in erts_execute_dirty_system_task() - * in erl_process.c). - */ + dirty = ERTS_PROC_IN_DIRTY_STATE(state); if (!dirty) BIF_RET(am_normal); @@ -8184,7 +8261,7 @@ erts_internal_dirty_process_handle_signals_1(BIF_ALIST_1) state = erts_atomic32_read_mb(&rp->state); noproc = (state & ERTS_PSFLG_FREE); - dirty = (state & ERTS_PSFLG_DIRTY_RUNNING); + dirty = ERTS_PROC_NEED_DIRTY_SIG_HANDLING(state); if (busy) { if (noproc) diff --git a/erts/emulator/beam/erl_proc_sig_queue.h b/erts/emulator/beam/erl_proc_sig_queue.h index 7177ee538568..1caab5bb017d 100644 --- a/erts/emulator/beam/erl_proc_sig_queue.h +++ b/erts/emulator/beam/erl_proc_sig_queue.h @@ -2004,16 +2004,13 @@ erts_proc_notify_new_sig(Process* rp, erts_aint32_t state, state = erts_proc_sys_schedule(rp, state, enable_flag); } - if (state & ERTS_PSFLG_DIRTY_RUNNING) { - /* - * We ignore ERTS_PSFLG_DIRTY_RUNNING_SYS. For - * more info see erts_execute_dirty_system_task() - * in erl_process.c. - */ + if (ERTS_PROC_IN_DIRTY_STATE(state)) { erts_ensure_dirty_proc_signals_handled(rp, state, -1, 0); } } + + ERTS_GLB_INLINE void erts_proc_notify_new_message(Process *p, ErtsProcLocks locks) { @@ -2021,12 +2018,7 @@ erts_proc_notify_new_message(Process *p, ErtsProcLocks locks) erts_aint32_t state = erts_atomic32_read_nob(&p->state); if (!(state & ERTS_PSFLG_ACTIVE)) erts_schedule_process(p, state, locks); - if (state & ERTS_PSFLG_DIRTY_RUNNING) { - /* - * We ignore ERTS_PSFLG_DIRTY_RUNNING_SYS. For - * more info see erts_execute_dirty_system_task() - * in erl_process.c. - */ + if (ERTS_PROC_NEED_DIRTY_SIG_HANDLING(state)) { erts_ensure_dirty_proc_signals_handled(p, state, -1, locks); } } diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index f9a6ff0916f2..aa1b56dc3e81 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -6388,11 +6388,9 @@ static int check_dirty_enqueue_in_prio_queue(Process *c_p, erts_aint32_t *newp, erts_aint32_t actual, - erts_aint32_t aprio, - erts_aint32_t qbit) + erts_aint32_t aprio) { int queue; - erts_aint32_t dact, max_qbit; /* Termination should be done on an ordinary scheduler */ if ((*newp) & ERTS_PSFLG_EXITING) { @@ -6400,32 +6398,15 @@ check_dirty_enqueue_in_prio_queue(Process *c_p, return ERTS_ENQUEUE_NORMAL_QUEUE; } - /* - * If we have system tasks, we enqueue on ordinary run-queue - * and take care of those system tasks first. - */ - if ((*newp) & ERTS_PSFLG_SYS_TASKS) - return ERTS_ENQUEUE_NORMAL_QUEUE; - - dact = erts_atomic32_read_mb(&c_p->xstate); if (actual & (ERTS_PSFLG_DIRTY_ACTIVE_SYS | ERTS_PSFLG_DIRTY_CPU_PROC)) { - max_qbit = ((dact >> ERTS_PXSFLGS_IN_CPU_PRQ_MASK_OFFSET) - & ERTS_PXSFLGS_QMASK); queue = ERTS_ENQUEUE_DIRTY_CPU_QUEUE; } else { ASSERT(actual & ERTS_PSFLG_DIRTY_IO_PROC); - max_qbit = ((dact >> ERTS_PXSFLGS_IN_IO_PRQ_MASK_OFFSET) - & ERTS_PXSFLGS_QMASK); queue = ERTS_ENQUEUE_DIRTY_IO_QUEUE; } - max_qbit |= 1 << ERTS_PSFLGS_QMASK_BITS; - max_qbit &= -max_qbit; - - if (qbit >= max_qbit) - return ERTS_ENQUEUE_NOT; /* Already queued in higher or equal prio */ if ((actual & (ERTS_PSFLG_IN_RUNQ|ERTS_PSFLGS_USR_PRIO_MASK)) != (aprio << ERTS_PSFLGS_USR_PRIO_OFFSET)) { /* @@ -6487,19 +6468,26 @@ check_enqueue_in_prio_queue(Process *c_p, erts_aint32_t *newp, erts_aint32_t actual) { - erts_aint32_t aprio, qbit, max_qbit; + erts_aint32_t aprio, qbit, max_qbit, new = *newp; - aprio = ((*newp) >> ERTS_PSFLGS_ACT_PRIO_OFFSET) & ERTS_PSFLGS_PRIO_MASK; + aprio = (new >> ERTS_PSFLGS_ACT_PRIO_OFFSET) & ERTS_PSFLGS_PRIO_MASK; qbit = 1 << aprio; *prq_prio_p = aprio; - if (((actual & (ERTS_PSFLG_SUSPENDED - | ERTS_PSFLG_ACTIVE_SYS)) != (ERTS_PSFLG_SUSPENDED - | ERTS_PSFLG_ACTIVE_SYS)) - & (!!(actual & ERTS_PSFLGS_DIRTY_WORK))) { - int res = check_dirty_enqueue_in_prio_queue(c_p, newp, actual, - aprio, qbit); + if ((new & (ERTS_PSFLG_SUSPENDED + | ERTS_PSFLG_ACTIVE_SYS + | ERTS_PSFLG_DIRTY_ACTIVE_SYS)) == ERTS_PSFLG_SUSPENDED) { + /* + * Do not schedule this process since we are suspended and we have + * no system work to for the process... + */ + return ERTS_ENQUEUE_NOT; + } + + if ((!(new & ERTS_PSFLG_SYS_TASKS)) + & (!!(new & ERTS_PSFLGS_DIRTY_WORK))) { + int res = check_dirty_enqueue_in_prio_queue(c_p, newp, actual, aprio); if (res != ERTS_ENQUEUE_NORMAL_QUEUE) return res; } @@ -6600,19 +6588,19 @@ select_enqueue_run_queue(int enqueue, int enq_prio, Process *p, erts_aint32_t st * reference count on the process when done with it... */ static ERTS_INLINE int -schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, +schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t *statep, Process *p, Process *proxy, int is_normal_sched) { erts_aint32_t a, e, n, enq_prio = -1, running_flgs; int enqueue; /* < 0 -> use proxy */ ErtsRunQueue* runq; - ASSERT(!(state & (ERTS_PSFLG_DIRTY_IO_PROC|ERTS_PSFLG_DIRTY_CPU_PROC)) + a = *statep; + + ASSERT(!(a & (ERTS_PSFLG_DIRTY_IO_PROC|ERTS_PSFLG_DIRTY_CPU_PROC)) || (BeamIsOpCode(*(const BeamInstr*)p->i, op_call_nif_WWW) || BeamIsOpCode(*(const BeamInstr*)p->i, op_call_bif_W))); - a = state; - /* Clear activ-sys if needed... */ while (1) { n = e = a; @@ -6669,8 +6657,9 @@ schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, == ERTS_PSFLG_ACTIVE)); n &= ~running_flgs; - if ((!!(a & (ERTS_PSFLG_ACTIVE_SYS|ERTS_PSFLG_DIRTY_ACTIVE_SYS)) - | ((a & (ERTS_PSFLG_ACTIVE|ERTS_PSFLG_SUSPENDED)) == ERTS_PSFLG_ACTIVE))) { + if (a & (ERTS_PSFLG_ACTIVE_SYS + | ERTS_PSFLG_DIRTY_ACTIVE_SYS + | ERTS_PSFLG_ACTIVE)) { enqueue = check_enqueue_in_prio_queue(p, &enq_prio, &n, a); } a = erts_atomic32_cmpxchg_mb(&p->state, n, e); @@ -6678,6 +6667,8 @@ schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, break; } + *statep = n; + runq = select_enqueue_run_queue(enqueue, enq_prio, p, n); if (!runq) { @@ -7128,10 +7119,13 @@ suspend_process(Process *c_p, Process *p) if (c_p == p) { state = erts_atomic32_read_bor_relb(&p->state, ERTS_PSFLG_SUSPENDED); - ASSERT(state & (ERTS_PSFLG_RUNNING - | ERTS_PSFLG_RUNNING_SYS - | ERTS_PSFLG_DIRTY_RUNNING - | ERTS_PSFLG_DIRTY_RUNNING_SYS)); + ASSERT((state & (ERTS_PSFLG_RUNNING + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS)) + || ((ERTS_PROC_LOCK_MAIN + & erts_proc_lc_my_proc_locks(p)) + && (c_p->sig_qs.flags | FS_HANDLING_SIGS))); suspended = (state & ERTS_PSFLG_SUSPENDED) ? -1: 1; } else { @@ -9234,8 +9228,13 @@ void erts_suspend(Process* c_p, ErtsProcLocks c_p_locks, Port *busy_port) { int suspend; - - ASSERT(c_p == erts_get_current_process()); +#ifdef DEBUG + Process *curr_proc = erts_get_current_process(); + ASSERT(curr_proc == c_p + || curr_proc == erts_dirty_process_signal_handler + || curr_proc == erts_dirty_process_signal_handler_high + || curr_proc == erts_dirty_process_signal_handler_max); +#endif ERTS_LC_ASSERT(c_p_locks == erts_proc_lc_my_proc_locks(c_p)); if (!(c_p_locks & ERTS_PROC_LOCK_STATUS)) erts_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); @@ -9602,7 +9601,7 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) int dec_refc; /* schedule_out_process() returns with rq locked! */ - dec_refc = schedule_out_process(rq, state, p, + dec_refc = schedule_out_process(rq, &state, p, proxy_p, is_normal_sched); proxy_p = NULL; @@ -9619,6 +9618,17 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) | ERTS_PROC_LOCK_STATUS | ERTS_PROC_LOCK_TRACE)); + if (ERTS_PROC_NEED_DIRTY_SIG_HANDLING(state)) { + /* + * Ensure signals are handled while scheduled + * or running dirty... + */ + int prio = ERTS_PSFLGS_GET_ACT_PRIO(state); + erts_runq_unlock(rq); + erts_ensure_dirty_proc_signals_handled(p, state, prio, 0); + erts_runq_lock(rq); + } + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); if (state & ERTS_PSFLG_FREE) { @@ -10149,17 +10159,6 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) if (!is_normal_sched) { /* On dirty scheduler */ - if (!!(state & ERTS_PSFLG_DIRTY_RUNNING) - & !!(state & (ERTS_PSFLG_SIG_Q - | ERTS_PSFLG_NMSG_SIG_IN_Q - | ERTS_PSFLG_MSG_SIG_IN_Q))) { - /* Ensure signals are handled while executing dirty... */ - int prio = ERTS_PSFLGS_GET_ACT_PRIO(state); - erts_ensure_dirty_proc_signals_handled(p, - state, - prio, - ERTS_PROC_LOCK_MAIN); - } } else { /* On normal scheduler */ diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index fc150ac7e04d..44bc0b306935 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -1318,6 +1318,34 @@ void erts_check_for_holes(Process* p); | ERTS_PSFLG_DIRTY_RUNNING \ | ERTS_PSFLG_DIRTY_RUNNING_SYS) +/* + * Process is in a dirty state if it got dirty work scheduled or + * is running dirty. We do not include the dirty-running-sys state + * since it executing while holding the main process lock which makes + * it hard or impossible to manipulate from the outside. The time spent + * in the dirty-running-sys is also limited compared to the other dirty + * states. + * + * For more info on why we ignore dirty running sys see + * erts_execute_dirty_system_task() in erl_process.c. + */ +#define ERTS_PROC_IN_DIRTY_STATE(S) \ + ((!!((S) & (ERTS_PSFLGS_DIRTY_WORK \ + | ERTS_PSFLG_DIRTY_RUNNING))) \ + & (!((S) & (ERTS_PSFLG_DIRTY_RUNNING_SYS \ + | ERTS_PSFLG_RUNNING_SYS \ + | ERTS_PSFLG_RUNNING)))) + +/* + * A process needs dirty signal handling if it has unhandled signals + * and is in a dirty state... + */ +#define ERTS_PROC_NEED_DIRTY_SIG_HANDLING(S) \ + ((!!((S) & (ERTS_PSFLG_SIG_Q \ + | ERTS_PSFLG_NMSG_SIG_IN_Q \ + | ERTS_PSFLG_MSG_SIG_IN_Q))) \ + & ERTS_PROC_IN_DIRTY_STATE((S))) + #define ERTS_PSFLGS_GET_ACT_PRIO(PSFLGS) \ (((PSFLGS) >> ERTS_PSFLGS_ACT_PRIO_OFFSET) & ERTS_PSFLGS_PRIO_MASK) #define ERTS_PSFLGS_GET_USR_PRIO(PSFLGS) \ diff --git a/erts/emulator/test/signal_SUITE.erl b/erts/emulator/test/signal_SUITE.erl index 0c5b27a0b16c..0ca6623261c8 100644 --- a/erts/emulator/test/signal_SUITE.erl +++ b/erts/emulator/test/signal_SUITE.erl @@ -39,6 +39,7 @@ kill2killed/1, contended_signal_handling/1, dirty_signal_handling_race/1, + dirty_signal_handling/1, busy_dist_exit_signal/1, busy_dist_demonitor_signal/1, busy_dist_down_signal/1, @@ -60,7 +61,10 @@ simultaneous_signals_basic/1, simultaneous_signals_recv/1, simultaneous_signals_exit/1, - simultaneous_signals_recv_exit/1]). + simultaneous_signals_recv_exit/1, + parallel_signal_enqueue_race_1/1, + parallel_signal_enqueue_race_2/1, + dirty_schedule/1]). -export([spawn_spammers/3]). @@ -74,6 +78,9 @@ init_per_suite(Config) -> Config. end_per_suite(_Config) -> + try erts_debug:set_internal_state(available_internal_state, false) + catch _:_ -> ok + end, ok. suite() -> @@ -85,6 +92,7 @@ all() -> kill2killed, contended_signal_handling, dirty_signal_handling_race, + dirty_signal_handling, busy_dist_exit_signal, busy_dist_demonitor_signal, busy_dist_down_signal, @@ -95,6 +103,9 @@ all() -> monitor_named_order_local, monitor_named_order_remote, monitor_nodes_order, + parallel_signal_enqueue_race_1, + parallel_signal_enqueue_race_2, + dirty_schedule, {group, adjust_message_queue}]. groups() -> @@ -379,6 +390,33 @@ move_dirty_signal_handlers_to_first_scheduler() -> end, ok. +dirty_signal_handling(Config) when is_list(Config) -> + %% + %% PR-7822 (third commit) + %% + %% Make sure signals are handled regardless of whether a process is + %% executing dirty or is scheduled for dirty execution... + + %% Make sure all dirty I/O schedulers are occupied with work... + Ps = lists:map(fun (_) -> + spawn(fun () -> + erts_debug:dirty_io(wait, 1000) + end) + end, lists:seq(1, erlang:system_info(dirty_io_schedulers))), + %% P ends up in the run queue waiting for a free dirty I/O scheduler... + P = spawn(fun () -> + erts_debug:dirty_io(wait, 1000) + end), + receive after 300 -> ok end, + %% current_function is added to prevent read of status from being optimized + %% to read status directly... + [{status,runnable},{current_function, _}] = process_info(P, [status,current_function]), + receive after 1000 -> ok end, + [{status,running},{current_function, _}] = process_info(P, [status,current_function]), + lists:foreach(fun (X) -> exit(X, kill) end, [P|Ps]), + lists:foreach(fun (X) -> false = is_process_alive(X) end, [P|Ps]), + ok. + busy_dist_exit_signal(Config) when is_list(Config) -> ct:timetrap({seconds, 10}), @@ -1132,6 +1170,251 @@ receive_integer_pairs(Tmo) -> ok end. +parallel_signal_enqueue_race_1(Config) when is_list(Config) -> + erts_debug:set_internal_state(available_internal_state, true), + try + lists:foreach(fun (_) -> parallel_signal_enqueue_race_1_test() end, + lists:seq(1, 5)) + after + erts_debug:set_internal_state(available_internal_state, false) + end. + +parallel_signal_enqueue_race_1_test() -> + %% + %% PR-7822 (first commit) + %% + %% This bug could be triggered when + %% * receiver had parallel signal enqueue optimization enabled + %% * receiver fetched signals while it wasn't in a running state (only + %% happens when receive traced) + %% * signals were enqueued simultaneously as the fetch of signals + %% + %% When the bug was triggered, the receiver could end up in an inconsistent + %% state where it potentially would be stuck for ever. + %% + %% The above scenario is very hard to trigger, so the test typically do + %% not fail even with the bug present, but we at least try to massage + %% the scenario... + R = spawn_opt(fun () -> + true = erts_debug:set_internal_state(proc_sig_buffers, + true), + receive after infinity -> ok end + end, + [{message_queue_data, off_heap}, {priority, high}, link]), + T = spawn_link(fun FlushTrace () -> + receive {trace,R,'receive',_} -> ok end, + FlushTrace() + end), + 1 = erlang:trace(R, true, ['receive', {tracer, T}]), + CountLoop = fun CountLoop (0) -> + ok; + CountLoop (N) -> + CountLoop(N-1) + end, + SigLoop = fun SigLoop (0) -> + ok; + SigLoop (N) -> + CountLoop(rand:uniform(4000)), + erlang:demonitor(erlang:monitor(process, R), [flush]), + receive after 1 -> ok end, + SigLoop(N-1) + end, + SMs = lists:map(fun (X) -> + spawn_opt(fun () -> SigLoop(1000) end, + [{scheduler, X}, link, monitor]) + end, lists:seq(1,erlang:system_info(schedulers_online))), + R ! hello, + lists:foreach(fun ({P, M}) -> + receive {'DOWN', M, process, P, _} -> ok end + end, SMs), + + %% These signals would typically not be delivered if the bug was + %% triggered and the test case would time out. + true = is_process_alive(R), + unlink(R), + exit(R, kill), + false = is_process_alive(R), + + true = is_process_alive(T), + unlink(T), + exit(T, kill), + false = is_process_alive(T), + + ok. + +parallel_signal_enqueue_race_2(Config) when is_list(Config) -> + erts_debug:set_internal_state(available_internal_state, true), + try + parallel_signal_enqueue_race_2_test() + after + erts_debug:set_internal_state(available_internal_state, false) + end. + +parallel_signal_enqueue_race_2_test() -> + %% + %% PR-7822 (first commit) + %% + %% This bug could be triggered when + %% * A signal receiver process had the parallel signal enqueue optimization + %% enabled + %% * Another process called process_info(Receiver, message_queue_len) + %% while the receiver was not executing and the process_info() call + %% internaly called erts_proc_sig_fetch() on receiver trying to + %% optimize the process_info() call + %% * Yet another process simultaneously sent the receiver another + %% signal. + %% + %% When the bug was triggered, the receiver could end up in an inconsistent + %% state where it potentially would be stuck for ever. + %% + %% The above scenario is very hard to trigger, so the test typically do + %% not fail even with the bug present, but we at least try to massage + %% the scenario... + process_flag(scheduler, 1), + {RSched, PISched, LUSched} = case erlang:system_info(schedulers_online) of + 1 -> + {1, 1, 1}; + 2 -> + {2, 1, 2}; + 3 -> + {1, 2, 3}; + _ -> + {2, 3, 4} + end, + Tester = self(), + R = spawn_opt(fun () -> + true = erts_debug:set_internal_state(proc_sig_buffers, + true), + Tester ! recv_ready, + receive after infinity -> ok end + end, + [{message_queue_data, off_heap}, link, {scheduler, RSched}]), + + PI = spawn_opt(fun PILoop () -> + true = is_process_alive(R), + Tester ! pi_ready, + receive go -> ok end, + _ = process_info(R, message_queue_len), + PILoop() + end, + [link, {scheduler, PISched}]), + LU = spawn_opt(fun LULoop () -> + true = is_process_alive(R), + Tester ! lu_ready, + receive go -> ok end, + link(R), + unlink(R), + LULoop() + end, + [link, {scheduler, LUSched}]), + + receive recv_ready -> ok end, + TriggerLoop = fun TriggerLoop(0) -> + ok; + TriggerLoop (N) -> + receive lu_ready -> ok end, + receive pi_ready -> ok end, + %% Give them some time to schedule out... + erlang:yield(), + case N rem 2 of + 0 -> + PI ! go, + LU ! go; + 1 -> + LU ! go, + PI ! go + end, + TriggerLoop(N-1) + end, + TriggerLoop(400000), + + unlink(PI), + exit(PI, kill), + false = is_process_alive(PI), + unlink(LU), + exit(LU, kill), + false = is_process_alive(LU), + unlink(R), + exit(R, kill), + false = is_process_alive(R), + ok. + +dirty_schedule(Config) when is_list(Config) -> + lists:foreach(fun (_) -> + dirty_schedule_test() + end, + lists:seq(1, 5)), + ok. + +dirty_schedule_test() -> + %% + %% PR-7822 (second commit) + %% + %% This bug could occur when a process was to be scheduled due to an + %% incomming signal just as the receiving process was selected for + %% execution on a dirty scheduler. The process could then be inserted + %% into a run-queue simultaneously as it began executing dirty. If + %% the scheduled instance was selected for execution on one dirty + %% scheduler simultaneously as it was scheduled out on another scheduler + %% a race could cause the thread scheduling out the process to think it + %% already was in the run-queue, so there is no need to insert it in the + %% run-queue, while the other thread selecting it for execution dropped + %% the process, since it was already running on another scheduler. By + %% this the process ended up stuck in a runnable state, but not in the + %% run-queue. + %% + %% When the bug was triggered, the receiver could end up in an inconsistent + %% state where it potentially would be stuck for ever. + %% + %% The above scenario is very hard to trigger, so the test typically do + %% not fail even with the bug present, but we at least try to massage + %% the scenario... + %% + Proc = spawn_link(fun DirtyLoop () -> + erts_debug:dirty_io(scheduler,type), + DirtyLoop() + end), + NoPs = lists:seq(1, erlang:system_info(schedulers_online)), + SpawnSender = + fun (Prio) -> + spawn_opt( + fun () -> + Loop = fun Loop (0) -> + ok; + Loop (N) -> + _ = process_info(Proc, + current_function), + Loop(N-1) + end, + receive go -> ok end, + Loop(100000) + end, [monitor,{priority,Prio}]) + end, + Go = fun ({P, _M}) -> P ! go end, + WaitProcs = fun ({P, M}) -> + receive {'DOWN', M, process, P, R} -> + normal = R + end + end, + PM1s = lists:map(fun (_) -> SpawnSender(normal) end, NoPs), + lists:foreach(Go, PM1s), + lists:foreach(WaitProcs, PM1s), + PM2s = lists:map(fun (_) -> SpawnSender(high) end, NoPs), + lists:foreach(Go, PM2s), + lists:foreach(WaitProcs, PM2s), + PM3s = lists:map(fun (N) -> + Prio = case N rem 2 of + 0 -> normal; + 1 -> high + end, + SpawnSender(Prio) end, NoPs), + lists:foreach(Go, PM3s), + lists:foreach(WaitProcs, PM3s), + unlink(Proc), + exit(Proc, kill), + false = is_process_alive(Proc), + ok. + %% %% -- Internal utils -------------------------------------------------------- %% diff --git a/erts/etc/unix/etp-commands.in b/erts/etc/unix/etp-commands.in index bdddfb1d3c47..a62f908b2a2a 100644 --- a/erts/etc/unix/etp-commands.in +++ b/erts/etc/unix/etp-commands.in @@ -2499,7 +2499,7 @@ define etp-proc-state-int printf "active-sys | " end if ($arg0 & 0x80000) - printf "sig-in-q | " + printf "nmsig-in-q | " end if ($arg0 & 0x40000) printf "sys-tasks | " @@ -2520,7 +2520,7 @@ define etp-proc-state-int printf "active | " end if ($arg0 & 0x1000) - printf "maybe-self-sigs | " + printf "msig-in-q | " end if ($arg0 & 0x800) printf "exiting | "