From 5b235129417ac8ab91cd8948c6a67b2e9979e269 Mon Sep 17 00:00:00 2001 From: Gengchen Tuo Date: Wed, 18 Sep 2024 14:44:49 -0400 Subject: [PATCH] Add JFR CPULoad and ThreadCPU event support Depends on: https://github.com/eclipse/omr/pull/7491 Signed-off-by: Gengchen Tuo --- runtime/oti/j9consts.h | 2 + runtime/oti/j9nonbuilder.h | 26 +++++++ runtime/vm/BufferWriter.hpp | 10 +++ runtime/vm/JFRChunkWriter.hpp | 69 +++++++++++++++++ runtime/vm/JFRConstantPoolTypes.cpp | 45 +++++++++++ runtime/vm/JFRConstantPoolTypes.hpp | 66 +++++++++++++++++ runtime/vm/jfr.cpp | 111 ++++++++++++++++++++++++++++ runtime/vm/vmthread.cpp | 4 + 8 files changed, 333 insertions(+) diff --git a/runtime/oti/j9consts.h b/runtime/oti/j9consts.h index 71c977b99f0..bfe0aae2c50 100644 --- a/runtime/oti/j9consts.h +++ b/runtime/oti/j9consts.h @@ -931,6 +931,8 @@ extern "C" { #define J9JFR_EVENT_TYPE_THREAD_START 1 #define J9JFR_EVENT_TYPE_THREAD_END 2 #define J9JFR_EVENT_TYPE_THREAD_SLEEP 3 +#define J9JFR_EVENT_TYPE_CPU_LOAD 4 +#define J9JFR_EVENT_TYPE_THREAD_CPU_LOAD 5 /* JFR thread states */ diff --git a/runtime/oti/j9nonbuilder.h b/runtime/oti/j9nonbuilder.h index 1667d1a2c98..fec44722462 100644 --- a/runtime/oti/j9nonbuilder.h +++ b/runtime/oti/j9nonbuilder.h @@ -345,6 +345,12 @@ struct J9VMContinuation; #if defined(J9VM_OPT_JFR) +typedef struct J9VMThreadJFRState { + U_64 prevUserTime; + U_64 prevSysTime; + I_64 prevTimestamp; +} ThreadJFRState; + typedef struct J9JFRBufferWalkState { U_8 *current; U_8 *end; @@ -402,6 +408,19 @@ typedef struct J9JFRThreadSlept { #define J9JFRTHREADSLEPT_STACKTRACE(jfrEvent) ((UDATA*)(((J9JFRThreadSlept*)(jfrEvent)) + 1)) +typedef struct J9JFRCPULoad { + J9JFR_EVENT_COMMON_FIELDS + float jvmUser; + float jvmSystem; + float machineTotal; +} J9JFRCPULoad; + +typedef struct J9JFRThreadCPULoad { + J9JFR_EVENT_COMMON_FIELDS + float user; + float system; +} J9JFRThreadCPULoad; + #endif /* defined(J9VM_OPT_JFR) */ /* @ddr_namespace: map_to_type=J9CfrError */ @@ -5563,6 +5582,9 @@ typedef struct J9VMThread { j9object_t closeScopeObj; #endif /* JAVA_SPEC_VERSION >= 22 */ UDATA unsafeIndexableHeaderSize; +#if defined(J9VM_OPT_JFR) + ThreadJFRState threadJfrState; +#endif /* defined(J9VM_OPT_JFR) */ } J9VMThread; #define J9VMTHREAD_ALIGNMENT 0x100 @@ -5640,6 +5662,9 @@ typedef struct JFRState { void *constantEvents; BOOLEAN isConstantEventsInitialized; omrthread_monitor_t isConstantEventsInitializedMutex; + J9SysinfoCPUTime prevSysCPUTime; + omrthread_process_time_t prevProcCPUTime; + I_64 prevProcTimestamp; } JFRState; typedef struct J9ReflectFunctionTable { @@ -6179,6 +6204,7 @@ typedef struct J9JavaVM { omrthread_t jfrSamplerThread; UDATA jfrSamplerState; IDATA jfrAsyncKey; + IDATA jfrThreadCPULoadAsyncKey; #endif /* defined(J9VM_OPT_JFR) */ #if JAVA_SPEC_VERSION >= 22 omrthread_monitor_t closeScopeMutex; diff --git a/runtime/vm/BufferWriter.hpp b/runtime/vm/BufferWriter.hpp index 990774985c5..f9fb25ef2c7 100644 --- a/runtime/vm/BufferWriter.hpp +++ b/runtime/vm/BufferWriter.hpp @@ -293,6 +293,16 @@ class VM_BufferWriter { writeU8(((newVal >> 21) & 0x7F)); } + void + writeFloat(float val) + { + U_32 newVal = *(U_32 *)&val; + if (_isLE) { + newVal = byteSwap(newVal); + } + writeData((U_8 *)&newVal, 4); + } + static U_32 convertFromLEB128ToU32(U_8 *start) { diff --git a/runtime/vm/JFRChunkWriter.hpp b/runtime/vm/JFRChunkWriter.hpp index 76c678f32bd..3ae35b52c87 100644 --- a/runtime/vm/JFRChunkWriter.hpp +++ b/runtime/vm/JFRChunkWriter.hpp @@ -70,6 +70,8 @@ enum MetadataTypeID { VirtualizationInformationID = 88, InitialSystemPropertyID = 89, CPUInformationID = 92, + CPULoadID = 94, + ThreadCPULoadID = 95, PhysicalMemoryID = 107, ExecutionSampleID = 108, ThreadID = 163, @@ -155,6 +157,8 @@ class VM_JFRChunkWriter { static constexpr int CPU_INFORMATION_EVENT_SIZE = 600; static constexpr int OS_INFORMATION_EVENT_SIZE = 100; static constexpr int INITIAL_SYSTEM_PROPERTY_EVENT_SIZE = 6000; + static constexpr int CPU_LOAD_EVENT_SIZE = (3 * sizeof(float)) + (3 * sizeof(I_64)); + static constexpr int THREAD_CPU_LOAD_EVENT_SIZE = (2 * sizeof(float)) + (4 * sizeof(I_64)); static constexpr int METADATA_ID = 1; @@ -323,6 +327,10 @@ class VM_JFRChunkWriter { pool_do(_constantPoolTypes.getThreadSleepTable(), &writeThreadSleepEvent, _bufferWriter); + pool_do(_constantPoolTypes.getCPULoadTable(), &writeCPULoadEvent, _bufferWriter); + + pool_do(_constantPoolTypes.getThreadCPULoadTable(), &writeThreadCPULoadEvent, _bufferWriter); + /* Only write constant events in first chunk */ if (0 == _vm->jfrState.jfrChunkCount) { writeJVMInformationEvent(); @@ -477,6 +485,62 @@ class VM_JFRChunkWriter { _bufferWriter->writeLEB128PaddedU32(dataStart, _bufferWriter->getCursor() - dataStart); } + static void + writeCPULoadEvent(void *anElement, void *userData) + { + CPULoadEntry *entry = (CPULoadEntry *)anElement; + VM_BufferWriter *_bufferWriter = (VM_BufferWriter *)userData; + + /* reserve size field */ + U_8 *dataStart = _bufferWriter->getAndIncCursor(sizeof(U_32)); + + /* write event type */ + _bufferWriter->writeLEB128(CPULoadID); + + /* write start time */ + _bufferWriter->writeLEB128(entry->ticks); + + /* write user CPU load */ + _bufferWriter->writeFloat(entry->jvmUser); + + /* write system CPU load */ + _bufferWriter->writeFloat(entry->jvmSystem); + + /* write machine total CPU load */ + _bufferWriter->writeFloat(entry->machineTotal); + + /* write size */ + _bufferWriter->writeLEB128PaddedU32(dataStart, _bufferWriter->getCursor() - dataStart); + } + + static void + writeThreadCPULoadEvent(void *anElement, void *userData) + { + ThreadCPULoadEntry *entry = (ThreadCPULoadEntry *)anElement; + VM_BufferWriter *_bufferWriter = (VM_BufferWriter *)userData; + + /* reserve size field */ + U_8 *dataStart = _bufferWriter->getAndIncCursor(sizeof(U_32)); + + /* write event type */ + _bufferWriter->writeLEB128(ThreadCPULoadID); + + /* write start time */ + _bufferWriter->writeLEB128(entry->ticks); + + /* write thread index */ + _bufferWriter->writeLEB128(entry->threadIndex); + + /* write user thread CPU load */ + _bufferWriter->writeFloat(entry->user); + + /* write system thread CPU load */ + _bufferWriter->writeFloat(entry->system); + + /* write size */ + _bufferWriter->writeLEB128PaddedU32(dataStart, _bufferWriter->getCursor() - dataStart); + } + void writeJFRChunkToFile() { @@ -590,6 +654,11 @@ class VM_JFRChunkWriter { requireBufferSize += CPU_INFORMATION_EVENT_SIZE; requireBufferSize += INITIAL_SYSTEM_PROPERTY_EVENT_SIZE; + + requireBufferSize += _constantPoolTypes.getCPULoadCount() * CPU_LOAD_EVENT_SIZE; + + requireBufferSize += _constantPoolTypes.getThreadCPULoadCount() * THREAD_CPU_LOAD_EVENT_SIZE; + return requireBufferSize; } diff --git a/runtime/vm/JFRConstantPoolTypes.cpp b/runtime/vm/JFRConstantPoolTypes.cpp index f21230b3143..679444a278d 100644 --- a/runtime/vm/JFRConstantPoolTypes.cpp +++ b/runtime/vm/JFRConstantPoolTypes.cpp @@ -1031,6 +1031,51 @@ VM_JFRConstantPoolTypes::addThreadSleepEntry(J9JFRThreadSlept *threadSleepData) return index; } +U_32 +VM_JFRConstantPoolTypes::addCPULoadEntry(J9JFRCPULoad *cpuLoadData) +{ + CPULoadEntry *entry = (CPULoadEntry *)pool_newElement(_cpuLoadTable); + U_32 index = U_32_MAX; + + if (NULL == entry) { + _buildResult = OutOfMemory; + goto done; + } + + entry->ticks = cpuLoadData->startTicks; + entry->jvmUser = cpuLoadData->jvmUser; + entry->jvmSystem = cpuLoadData->jvmSystem; + entry->machineTotal = cpuLoadData->machineTotal; + + index = _cpuLoadCount++; + +done: + return index; +} + +U_32 +VM_JFRConstantPoolTypes::addThreadCPULoadEntry(J9JFRThreadCPULoad *threadCPULoadData) +{ + ThreadCPULoadEntry *entry = (ThreadCPULoadEntry *)pool_newElement(_threadCPULoadTable); + U_32 index = U_32_MAX; + + if (NULL == entry) { + _buildResult = OutOfMemory; + goto done; + } + + entry->ticks = threadCPULoadData->startTicks; + entry->user = threadCPULoadData->user; + entry->system = threadCPULoadData->system; + + entry->threadIndex = addThreadEntry(threadCPULoadData->vmThread); + if (isResultNotOKay()) goto done; + + index = _threadCPULoadCount++; + +done: + return index; +} void VM_JFRConstantPoolTypes::printTables() diff --git a/runtime/vm/JFRConstantPoolTypes.hpp b/runtime/vm/JFRConstantPoolTypes.hpp index b4dc2747e2b..b5778f9062b 100644 --- a/runtime/vm/JFRConstantPoolTypes.hpp +++ b/runtime/vm/JFRConstantPoolTypes.hpp @@ -199,6 +199,20 @@ struct StackTraceEntry { StackTraceEntry *next; }; +struct CPULoadEntry { + I_64 ticks; + float jvmUser; + float jvmSystem; + float machineTotal; +}; + +struct ThreadCPULoadEntry { + I_64 ticks; + U_32 threadIndex; + float user; + float system; +}; + struct JVMInformationEntry { const char *jvmName; const char *jvmVersion; @@ -274,6 +288,10 @@ class VM_JFRConstantPoolTypes { UDATA _threadEndCount; J9Pool *_threadSleepTable; UDATA _threadSleepCount; + J9Pool *_cpuLoadTable; + UDATA _cpuLoadCount; + J9Pool *_threadCPULoadTable; + UDATA _threadCPULoadCount; /* Processing buffers */ StackFrame *_currentStackFrameBuffer; @@ -515,6 +533,10 @@ class VM_JFRConstantPoolTypes { U_32 addThreadSleepEntry(J9JFRThreadSlept *threadSleepData); + U_32 addCPULoadEntry(J9JFRCPULoad *cpuLoadData); + + U_32 addThreadCPULoadEntry(J9JFRThreadCPULoad *threadCPULoadData); + J9Pool *getExecutionSampleTable() { return _executionSampleTable; @@ -535,6 +557,16 @@ class VM_JFRConstantPoolTypes { return _threadSleepTable; } + J9Pool *getCPULoadTable() + { + return _cpuLoadTable; + } + + J9Pool *getThreadCPULoadTable() + { + return _threadCPULoadTable; + } + UDATA getExecutionSampleCount() { return _executionSampleCount; @@ -555,6 +587,16 @@ class VM_JFRConstantPoolTypes { return _threadSleepCount; } + UDATA getCPULoadCount() + { + return _cpuLoadCount; + } + + UDATA getThreadCPULoadCount() + { + return _threadCPULoadCount; + } + ClassloaderEntry *getClassloaderEntry() { return _firstClassloaderEntry; @@ -694,6 +736,12 @@ class VM_JFRConstantPoolTypes { case J9JFR_EVENT_TYPE_THREAD_SLEEP: addThreadSleepEntry((J9JFRThreadSlept*) event); break; + case J9JFR_EVENT_TYPE_CPU_LOAD: + addCPULoadEntry((J9JFRCPULoad *)event); + break; + case J9JFR_EVENT_TYPE_THREAD_CPU_LOAD: + addThreadCPULoadEntry((J9JFRThreadCPULoad *)event); + break; default: Assert_VM_unreachable(); break; @@ -986,6 +1034,10 @@ class VM_JFRConstantPoolTypes { , _threadEndCount(0) , _threadSleepTable(NULL) , _threadSleepCount(0) + , _cpuLoadTable(NULL) + , _cpuLoadCount(0) + , _threadCPULoadTable(NULL) + , _threadCPULoadCount(0) , _previousStackTraceEntry(NULL) , _firstStackTraceEntry(NULL) , _previousThreadEntry(NULL) @@ -1082,6 +1134,18 @@ class VM_JFRConstantPoolTypes { goto done; } + _cpuLoadTable = pool_new(sizeof(CPULoadEntry), 0, sizeof(U_64), 0, J9_GET_CALLSITE(), OMRMEM_CATEGORY_VM, POOL_FOR_PORT(privatePortLibrary)); + if (NULL == _cpuLoadTable) { + _buildResult = OutOfMemory; + goto done; + } + + _threadCPULoadTable = pool_new(sizeof(ThreadCPULoadEntry), 0, sizeof(U_64), 0, J9_GET_CALLSITE(), OMRMEM_CATEGORY_VM, POOL_FOR_PORT(privatePortLibrary)); + if (NULL == _threadCPULoadTable) { + _buildResult = OutOfMemory; + goto done; + } + /* Add reserved index for default entries. For strings zero is the empty or NUll string. * For package zero is the deafult package, for Module zero is the unnamed module. ThreadGroup * zero is NULL threadGroup. @@ -1161,6 +1225,8 @@ class VM_JFRConstantPoolTypes { pool_kill(_threadStartTable); pool_kill(_threadEndTable); pool_kill(_threadSleepTable); + pool_kill(_cpuLoadTable); + pool_kill(_threadCPULoadTable); j9mem_free_memory(_globalStringTable); } diff --git a/runtime/vm/jfr.cpp b/runtime/vm/jfr.cpp index 3ed18edaf2c..3d89b9f1fd2 100644 --- a/runtime/vm/jfr.cpp +++ b/runtime/vm/jfr.cpp @@ -22,6 +22,7 @@ #include "JFRConstantPoolTypes.hpp" #include "j9protos.h" #include "omrlinkedlist.h" +#include "thread_api.h" #include "ut_j9vm.h" #include "vm_internal.h" @@ -55,6 +56,7 @@ static void jfrVMInitialized(J9HookInterface **hook, UDATA eventNum, void *event static void initializeEventFields(J9VMThread *currentThread, J9JFREvent *jfrEvent, UDATA eventType); static int J9THREAD_PROC jfrSamplingThreadProc(void *entryArg); static void jfrExecutionSampleCallback(J9VMThread *currentThread, IDATA handlerKey, void *userData); +static void jfrThreadCPULoadCallback(J9VMThread *currentThread, IDATA handlerKey, void *userData); /** * Calculate the size in bytes of a JFR event. @@ -80,6 +82,12 @@ jfrEventSize(J9JFREvent *jfrEvent) case J9JFR_EVENT_TYPE_THREAD_SLEEP: size = sizeof(J9JFRThreadSlept) + (((J9JFRThreadSlept*)jfrEvent)->stackTraceSize * sizeof(UDATA)); break; + case J9JFR_EVENT_TYPE_CPU_LOAD: + size = sizeof(J9JFRCPULoad); + break; + case J9JFR_EVENT_TYPE_THREAD_CPU_LOAD: + size = sizeof(J9JFRThreadCPULoad); + break; default: Assert_VM_unreachable(); break; @@ -559,6 +567,11 @@ initializeJFR(J9JavaVM *vm) goto fail; } + vm->jfrThreadCPULoadAsyncKey = J9RegisterAsyncEvent(vm, jfrThreadCPULoadCallback, NULL); + if (vm->jfrThreadCPULoadAsyncKey < 0) { + goto fail; + } + if ((*vmHooks)->J9HookRegisterWithCallSite(vmHooks, J9HOOK_VM_THREAD_CREATED, jfrThreadCreated, OMR_GET_CALLSITE(), NULL)) { goto fail; } @@ -609,6 +622,8 @@ initializeJFR(J9JavaVM *vm) goto fail; } + vm->jfrState.prevSysCPUTime.timestamp = -1; + vm->jfrState.prevProcTimestamp = -1; if (omrthread_monitor_init_with_name(&vm->jfrBufferMutex, 0, "JFR global buffer mutex")) { goto fail; } @@ -689,6 +704,10 @@ tearDownJFR(J9JavaVM *vm) J9UnregisterAsyncEvent(vm, vm->jfrAsyncKey); vm->jfrAsyncKey = -1; } + if (vm->jfrThreadCPULoadAsyncKey >= 0) { + J9UnregisterAsyncEvent(vm, vm->jfrThreadCPULoadAsyncKey); + vm->jfrThreadCPULoadAsyncKey = -1; + } } @@ -729,6 +748,90 @@ jfrExecutionSampleCallback(J9VMThread *currentThread, IDATA handlerKey, void *us jfrExecutionSample(currentThread, currentThread); } +void +jfrCPULoad(J9VMThread *currentThread) +{ + PORT_ACCESS_FROM_VMC(currentThread); + OMRPORT_ACCESS_FROM_J9PORT(PORTLIB); + + omrthread_process_time_t currentProcCPUTime = {0}; + intptr_t processTimeRC = omrthread_get_process_times(¤tProcCPUTime); + + J9SysinfoCPUTime currentSysCPUTime = {0}; + intptr_t sysTimeRC = omrsysinfo_get_CPU_utilization(¤tSysCPUTime); + + if (0 == processTimeRC && 0 == sysTimeRC) { + J9JFRCPULoad *jfrEvent = (J9JFRCPULoad *)reserveBuffer(currentThread, sizeof(J9JFRCPULoad)); + if (NULL != jfrEvent) { + initializeEventFields(currentThread, (J9JFREvent *)jfrEvent, J9JFR_EVENT_TYPE_CPU_LOAD); + UDATA numberOfCpus = j9sysinfo_get_number_CPUs_by_type(J9PORT_CPU_ONLINE); + U_64 currentTime = (U_64)j9time_nano_time(); + + omrthread_process_time_t *prevProcCPUTime = ¤tThread->javaVM->jfrState.prevProcCPUTime; + I_64 *prevProcTimestamp = ¤tThread->javaVM->jfrState.prevProcTimestamp; + if (-1 == *prevProcTimestamp) { + jfrEvent->jvmUser = 0; + jfrEvent->jvmSystem = 0; + } else { + U_64 timeDelta = currentTime - *prevProcTimestamp; + jfrEvent->jvmUser = OMR_MIN((currentProcCPUTime._userTime - prevProcCPUTime->_userTime) / ((double)numberOfCpus * timeDelta), 1.0); + jfrEvent->jvmSystem = OMR_MIN((currentProcCPUTime._systemTime - prevProcCPUTime->_systemTime) / ((double)numberOfCpus * timeDelta), 1.0); + } + *prevProcCPUTime = currentProcCPUTime; + *prevProcTimestamp = currentTime; + + J9SysinfoCPUTime *prevSysCPUTime = ¤tThread->javaVM->jfrState.prevSysCPUTime; + if (-1 == prevSysCPUTime->timestamp) { + jfrEvent->machineTotal = 0; + } else { + jfrEvent->machineTotal = OMR_MIN((currentSysCPUTime.cpuTime - prevSysCPUTime->cpuTime) / ((double)numberOfCpus * (currentSysCPUTime.timestamp - prevSysCPUTime->timestamp)), 1.0); + } + *prevSysCPUTime = currentSysCPUTime; + } + } + +} + +void +jfrThreadCPULoad(J9VMThread *currentThread, J9VMThread *sampleThread) +{ + PORT_ACCESS_FROM_VMC(currentThread); + + omrthread_thread_time_t threadTime; + + int32_t rc = omrthread_get_self_thread_time(&threadTime); + + if (-1 != rc) { + int64_t sysTime = threadTime.sysTime; + int64_t userTime = threadTime.userTime; + U_64 currentTime = (U_64)j9time_nano_time(); + + J9JFRThreadCPULoad *jfrEvent = (J9JFRThreadCPULoad*)reserveBuffer(currentThread, sizeof(*jfrEvent)); + if (NULL != jfrEvent) { + initializeEventFields(currentThread, (J9JFREvent *)jfrEvent, J9JFR_EVENT_TYPE_THREAD_CPU_LOAD); + ThreadJFRState* jfrState = &sampleThread->threadJfrState; + I_64 *prevTimestamp = &jfrState->prevTimestamp; + if (-1 == *prevTimestamp) { + jfrEvent->user = 0; + jfrEvent->system = 0; + } else { + U_64 timeDelta = currentTime - *prevTimestamp; + jfrEvent->user = OMR_MIN((userTime - jfrState->prevUserTime) / ((double)timeDelta), 1.0); + jfrEvent->system = OMR_MIN((sysTime - jfrState->prevSysTime) / ((double)timeDelta), 1.0); + } + *prevTimestamp = currentTime; + jfrState->prevUserTime = userTime; + jfrState->prevSysTime = sysTime; + } + } +} + +static void +jfrThreadCPULoadCallback(J9VMThread *currentThread, IDATA handlerKey, void *userData) +{ + jfrThreadCPULoad(currentThread, currentThread); +} + static int J9THREAD_PROC jfrSamplingThreadProc(void *entryArg) { @@ -739,8 +842,16 @@ jfrSamplingThreadProc(void *entryArg) omrthread_monitor_enter(vm->jfrSamplerMutex); vm->jfrSamplerState = J9JFR_SAMPLER_STATE_RUNNING; omrthread_monitor_notify_all(vm->jfrSamplerMutex); + UDATA count = 0; while (J9JFR_SAMPLER_STATE_STOP != vm->jfrSamplerState) { J9SignalAsyncEvent(vm, NULL, vm->jfrAsyncKey); + if (0 == (count % 100)) { // 1000ms + jfrCPULoad(currentThread); + } + if (0 == (count % 1000)) { // 10s + J9SignalAsyncEvent(vm, NULL, vm->jfrThreadCPULoadAsyncKey); + } + count++; omrthread_monitor_wait_timed(vm->jfrSamplerMutex, J9JFR_SAMPLING_RATE, 0); } omrthread_monitor_exit(vm->jfrSamplerMutex); diff --git a/runtime/vm/vmthread.cpp b/runtime/vm/vmthread.cpp index 3a019c0ac39..cd5966d47fd 100644 --- a/runtime/vm/vmthread.cpp +++ b/runtime/vm/vmthread.cpp @@ -297,6 +297,10 @@ allocateVMThread(J9JavaVM *vm, omrthread_t osThread, UDATA privateFlags, void *m newThread->scopedValueCache = NULL; #endif /* JAVA_SPEC_VERSION >= 19 */ +#if defined(J9VM_OPT_JFR) + newThread->threadJfrState.prevTimestamp = -1; +#endif /* defined(J9VM_OPT_JFR) */ + /* If an exclusive access request is in progress, mark this thread */ omrthread_monitor_enter(vm->exclusiveAccessMutex);