Skip to content

Commit 8badd9f

Browse files
committed
Improve UART input wait with coroutine yielding
When guest OS waits for UART input (e.g., running 'cat'), semu spins at 100% CPU polling stdin. In SMP mode with 4 cores, this becomes 400% CPU usage, completely saturating the host system during idle periods. The original implementation polled stdin continuously in a busy loop: - Single-core: 100% CPU (polling in main loop) - 4-core SMP: 400% CPU (4 harts × 100% each) Even with WFI optimization, harts were resumed every iteration and immediately re-checked UART status, defeating the event-driven design. Implemented hart-level yielding when UART input is unavailable: 1. u8250_wait_for_input(): New coroutine yield function - Checks if running in coroutine context (hart_id != UINT32_MAX) - Marks hart as waiting via uart.waiting_hart_id - Yields control back to scheduler - Clears waiting state after resume 2. u8250_state_t fields (device.h): - waiting_hart_id: Tracks which hart is waiting (UINT32_MAX if none) - has_waiting_hart: Boolean flag for fast checking Result: 76.8% CPU reduction (100% → 23.2%) Fixed coro_current_hart_id() to return UINT32_MAX when coroutine is not initialized (single-core mode), preventing incorrect yielding attempts. Result: Eliminates single-core mode errors Optimization: Moved hart waiting check BEFORE resume loop: Before: /* Resume all harts unconditionally */ for (i = 0; i < n_hart; i++) coro_resume_hart(i); /* Then check if waiting */ if (all_waiting) kevent(...); /* Event-driven wait */ After: /* Check FIRST if all waiting */ if (all_waiting) { kevent(...); /* Event-driven wait */ /* Resume UART-waiting hart only if stdin ready */ if (uart.has_waiting_hart && uart.in_ready) coro_resume_hart(uart.waiting_hart_id); } else { /* Only resume when there's actual work */ for (i = 0; i < n_hart; i++) coro_resume_hart(i); } Key changes: - all_waiting check now includes UART waiting state - Harts are NOT resumed unless there's work or input available - True event-driven blocking when all harts idle Result: 88.5% → 99.4% CPU reduction (11.5% → 0.6-0.9%) - Cannot register stdin with kqueue (terminal device limitation) - Uses 1ms timer for event-driven wake - Polls stdin via u8250_check_ready() after wake - Direct stdin monitoring via pollfd - Purely event-driven (no polling) Even with coroutine yielding, the original pattern woke all harts every loop iteration. This defeated the event-driven design because: 1. Hart resumes → checks UART → no input → yields → repeat 2. This happened thousands of times per second (timer fires every 1ms) 3. Each resume/yield cycle consumed CPU The check-before-wake pattern ensures: 1. When idle, system blocks in kevent()/poll() 2. Only wakes on actual events (timer expiry, stdin input) 3. Only resumes harts when there's work or input available
1 parent e2a5b74 commit 8badd9f

File tree

4 files changed

+74
-17
lines changed

4 files changed

+74
-17
lines changed

coro.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,5 +600,8 @@ bool coro_is_suspended(uint32_t hart_id)
600600

601601
uint32_t coro_current_hart_id(void)
602602
{
603+
/* Return sentinel value if coroutine subsystem not initialized */
604+
if (!coro_state.initialized)
605+
return UINT32_MAX;
603606
return coro_state.current_hart;
604607
}

device.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ typedef struct {
6060
/* I/O handling */
6161
int in_fd, out_fd;
6262
bool in_ready;
63+
/* Coroutine support for input waiting (SMP mode) */
64+
uint32_t waiting_hart_id; /**< Hart ID waiting for input */
65+
bool has_waiting_hart; /**< true if a hart is yielding for input */
6366
} u8250_state_t;
6467

6568
void u8250_update_interrupts(u8250_state_t *uart);

main.c

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,8 @@ static int semu_init(emu_state_t *emu, int argc, char **argv)
745745

746746
/* Set up peripherals */
747747
emu->uart.in_fd = 0, emu->uart.out_fd = 1;
748+
emu->uart.waiting_hart_id = UINT32_MAX; /* No hart waiting initially */
749+
emu->uart.has_waiting_hart = false;
748750
capture_keyboard_input(); /* set up uart */
749751
#if SEMU_HAS(VIRTIONET)
750752
if (!virtio_net_init(&(emu->vnet), netdev))
@@ -1001,8 +1003,8 @@ static int semu_run(emu_state_t *emu)
10011003
return -1;
10021004
}
10031005

1004-
/* Note: UART input is polled via u8250_check_ready(), no need to
1005-
* monitor with kqueue. Timer events are sufficient to wake from WFI.
1006+
/* Note: UART input uses poll() in u8250_check_ready(), which is called
1007+
* when a hart resumes. Stdin monitoring doesn't need kqueue on macOS.
10061008
*/
10071009
#else
10081010
/* Linux: create timerfd for periodic wakeup */
@@ -1032,33 +1034,40 @@ static int semu_run(emu_state_t *emu)
10321034
return 0;
10331035
}
10341036
#endif
1035-
/* Resume each hart's coroutine in round-robin fashion */
1036-
for (uint32_t i = 0; i < vm->n_hart; i++) {
1037-
coro_resume_hart(i);
1038-
}
1039-
1040-
/* CPU usage optimization: if all started harts are in WFI,
1041-
* sleep briefly to reduce busy-waiting
1037+
/* CPU usage optimization: check if all started harts are in WFI or
1038+
* waiting for UART input BEFORE resuming them
10421039
*/
10431040
bool all_waiting = true;
10441041
for (uint32_t i = 0; i < vm->n_hart; i++) {
10451042
if (vm->hart[i]->hsm_status == SBI_HSM_STATE_STARTED &&
1046-
!vm->hart[i]->in_wfi) {
1043+
!vm->hart[i]->in_wfi &&
1044+
!(emu->uart.has_waiting_hart &&
1045+
emu->uart.waiting_hart_id == i)) {
10471046
all_waiting = false;
10481047
break;
10491048
}
10501049
}
1050+
10511051
if (all_waiting) {
10521052
/* All harts waiting for interrupt - use event-driven wait
10531053
* to reduce CPU usage while maintaining responsiveness
10541054
*/
10551055
#ifdef __APPLE__
1056-
/* macOS: wait for kqueue events (timer or UART) */
1057-
struct kevent events[2];
1058-
int nevents = kevent(kq, NULL, 0, events, 2, NULL);
1059-
/* Events are automatically handled - timer fires every 1ms,
1060-
* UART triggers on input. No need to explicitly consume. */
1056+
/* macOS: wait for kqueue timer events */
1057+
struct kevent events[1];
1058+
int nevents = kevent(kq, NULL, 0, events, 1, NULL);
10611059
(void) nevents;
1060+
1061+
/* Check stdin and resume hart if UART input is available */
1062+
if (emu->uart.has_waiting_hart) {
1063+
/* Poll stdin to check if data is available */
1064+
u8250_check_ready(&emu->uart);
1065+
if (emu->uart.in_ready) {
1066+
uint32_t hart_id = emu->uart.waiting_hart_id;
1067+
if (hart_id < vm->n_hart)
1068+
coro_resume_hart(hart_id);
1069+
}
1070+
}
10621071
#else
10631072
/* Linux: poll on timerfd and UART */
10641073
struct pollfd pfds[2];
@@ -1073,7 +1082,21 @@ static int semu_run(emu_state_t *emu)
10731082
read(wfi_timer_fd, &expirations, sizeof(expirations));
10741083
(void) ret; /* Ignore read errors - timer will retry */
10751084
}
1085+
1086+
/* Handle UART stdin events - resume waiting hart if any */
1087+
if (pfds[1].revents & POLLIN) {
1088+
if (emu->uart.has_waiting_hart) {
1089+
uint32_t hart_id = emu->uart.waiting_hart_id;
1090+
if (hart_id < vm->n_hart)
1091+
coro_resume_hart(hart_id);
1092+
}
1093+
}
10761094
#endif
1095+
} else {
1096+
/* Not all harts waiting - resume them in round-robin fashion */
1097+
for (uint32_t i = 0; i < vm->n_hart; i++) {
1098+
coro_resume_hart(i);
1099+
}
10771100
}
10781101
}
10791102

uart.c

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <termios.h>
77
#include <unistd.h>
88

9+
#include "coro.h"
910
#include "device.h"
1011
#include "riscv.h"
1112
#include "riscv_private.h"
@@ -80,6 +81,26 @@ void u8250_check_ready(u8250_state_t *uart)
8081
uart->in_ready = true;
8182
}
8283

84+
/* Wait for UART input using coroutine yield (SMP mode only) */
85+
static void u8250_wait_for_input(u8250_state_t *uart)
86+
{
87+
/* Only yield in SMP mode - single-core mode doesn't use coroutines */
88+
uint32_t hart_id = coro_current_hart_id();
89+
if (hart_id == UINT32_MAX)
90+
return; /* Not in a coroutine, skip yielding */
91+
92+
/* Mark this hart as waiting for UART input */
93+
uart->waiting_hart_id = hart_id;
94+
uart->has_waiting_hart = true;
95+
96+
/* Yield until stdin has data available */
97+
coro_yield();
98+
99+
/* Resumed - clear waiting state */
100+
uart->has_waiting_hart = false;
101+
uart->waiting_hart_id = UINT32_MAX;
102+
}
103+
83104
static void u8250_handle_out(u8250_state_t *uart, uint8_t value)
84105
{
85106
if (write(uart->out_fd, &value, 1) < 1)
@@ -90,8 +111,15 @@ static uint8_t u8250_handle_in(u8250_state_t *uart)
90111
{
91112
uint8_t value = 0;
92113
u8250_check_ready(uart);
93-
if (!uart->in_ready)
94-
return value;
114+
115+
/* If no data available, yield and wait for stdin to become readable */
116+
if (!uart->in_ready) {
117+
u8250_wait_for_input(uart);
118+
/* After resume, re-check if data is now available */
119+
u8250_check_ready(uart);
120+
if (!uart->in_ready)
121+
return value; /* Spurious wakeup - still no data */
122+
}
95123

96124
if (read(uart->in_fd, &value, 1) < 0)
97125
fprintf(stderr, "failed to read UART input: %s\n", strerror(errno));

0 commit comments

Comments
 (0)