Skip to content

Commit 0fa27ff

Browse files
committed
Autodetect number of threads (remove setting). Fix some bugs.
1 parent 03c79ae commit 0fa27ff

File tree

14 files changed

+112
-40
lines changed

14 files changed

+112
-40
lines changed

Common/CPUDetect.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@ struct CPUInfo {
3939
bool Mode64bit;
4040

4141
bool HTT;
42+
43+
// Number of real CPU cores.
4244
int num_cores;
45+
// Number of logical CPUs per core.
4346
int logical_cpu_count;
4447

4548
bool bAtom;

Common/Thread/Channel.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,18 @@
22

33
#include <mutex>
44
#include <condition_variable>
5+
#include <cassert>
6+
7+
// Named Channel.h because I originally intended to support a multi item channel as
8+
// well as a simple blocking mailbox. Let's see if we get there.
59

610
// Single item mailbox.
711
template<class T>
812
struct Mailbox {
913
Mailbox() : refcount_(1) {}
14+
~Mailbox() {
15+
assert(refcount_ == 0);
16+
}
1017

1118
std::mutex mutex_;
1219
std::condition_variable condvar_;

Common/Thread/ParallelLoop.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <cstring>
22

3-
#include "ParallelLoop.h"
3+
#include "Common/Thread/ParallelLoop.h"
4+
#include "Common/CPUDetect.h"
45

56
class LoopRangeTask : public Task {
67
public:
@@ -24,7 +25,6 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
2425
minSize = 1;
2526
}
2627

27-
// TODO: Optimize using minSize.
2828
int numTasks = threadMan->GetNumLooperThreads();
2929

3030
int range = upper - lower;
@@ -37,7 +37,7 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
3737
// Just assign one task per thread, as many as we have.
3838
WaitableCounter *counter = new WaitableCounter(range);
3939
for (int i = 0; i < range; i++) {
40-
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(counter, loop, i, i + 1));
40+
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(counter, loop, i, i + 1), TaskType::CPU_COMPUTE);
4141
}
4242
return counter;
4343
} else {
@@ -50,14 +50,21 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
5050
int start = lastEnd;
5151
d += dx;
5252
int end = i == numTasks - 1 ? range : (int)d;
53-
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(counter, loop, start, end));
53+
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(counter, loop, start, end), TaskType::CPU_COMPUTE);
5454
lastEnd = end;
5555
}
5656
return counter;
5757
}
5858
}
5959

6060
void ParallelRangeLoop(ThreadManager *threadMan, const std::function<void(int, int)> &loop, int lower, int upper, int minSize) {
61+
if (cpu_info.num_cores == 1) {
62+
// "Optimization" for single-core devices.
63+
// No point in adding threading overhead, let's just do it inline.
64+
loop(lower, upper);
65+
return;
66+
}
67+
6168
if (minSize == -1) {
6269
minSize = 4;
6370
}

Common/Thread/Promise.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,11 @@ class PromiseTask : public Task {
1010
public:
1111
PromiseTask() {}
1212
~PromiseTask() {
13-
tx_->Release();
1413
}
1514

1615
void Run() override {
1716
T *value = fun_();
1817
tx_->Send(value);
19-
tx_->Release();
2018
}
2119

2220
std::function<T *()> fun_;
@@ -30,18 +28,17 @@ class PromiseTask : public Task {
3028
template<class T>
3129
class Promise {
3230
public:
33-
static Promise<T> *Spawn(ThreadManager *threadman, std::function<T *()> fun) {
31+
static Promise<T> *Spawn(ThreadManager *threadman, std::function<T *()> fun, TaskType taskType) {
3432
// std::pair<Rx<T>, Tx<T>> channel = CreateChannel<T>();
3533
Mailbox<T> *mailbox = new Mailbox<T>();
3634

3735
PromiseTask<T> *task = new PromiseTask<T>();
3836
task->fun_ = fun;
3937
task->tx_ = mailbox;
40-
threadman->EnqueueTask(task);
38+
threadman->EnqueueTask(task, taskType);
4139

4240
Promise<T> *promise = new Promise<T>();
4341
promise->rx_ = mailbox;
44-
mailbox->AddRef();
4542
return promise;
4643
}
4744

Common/Thread/ThreadManager.cpp

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,24 @@
1111
#include "Common/Thread/ThreadUtil.h"
1212
#include "Common/Thread/ThreadManager.h"
1313

14+
// Threads and task scheduling
15+
//
16+
// * The threadpool should contain a number of threads that's the the number of cores,
17+
// plus a fixed number more for I/O-limited background tasks.
18+
// * Parallel compute-limited loops should use as many threads as there are cores.
19+
// They should always be scheduled to the first N threads.
20+
// * For some tasks, splitting the input values up linearly between the threads
21+
// is not fair. However, we ignore that for now.
22+
23+
const int MAX_CORES_TO_USE = 16;
24+
const int EXTRA_THREADS = 4; // For I/O limited tasks
25+
1426
struct GlobalThreadContext {
1527
std::mutex mutex; // associated with each respective condition variable
1628
std::deque<Task *> queue;
1729
std::vector<ThreadContext *> threads_;
30+
31+
int roundRobin;
1832
};
1933

2034
struct ThreadContext {
@@ -80,7 +94,11 @@ static void WorkerThreadFunc(GlobalThreadContext *global, ThreadContext *thread)
8094
}
8195
}
8296

83-
void ThreadManager::Init(int numThreads) {
97+
void ThreadManager::Init(int numRealCores, int numLogicalCores) {
98+
numComputeThreads_ = std::min(numRealCores, MAX_CORES_TO_USE);
99+
int numThreads = numComputeThreads_ + EXTRA_THREADS;
100+
numThreads_ = numThreads;
101+
84102
for (int i = 0; i < numThreads; i++) {
85103
ThreadContext *thread = new ThreadContext();
86104
thread->cancelled.store(false);
@@ -91,9 +109,25 @@ void ThreadManager::Init(int numThreads) {
91109
}
92110

93111
void ThreadManager::EnqueueTask(Task *task, TaskType taskType) {
112+
int maxThread;
113+
int threadOffset = 0;
114+
if (taskType == TaskType::CPU_COMPUTE) {
115+
// only the threads reserved for heavy compute.
116+
maxThread = numComputeThreads_;
117+
threadOffset = 0;
118+
} else {
119+
// any free thread
120+
maxThread = numThreads_;
121+
threadOffset = numComputeThreads_;
122+
}
123+
94124
// Find a thread with no outstanding work.
95-
for (int i = 0; i < global_->threads_.size(); i++) {
96-
ThreadContext *thread = global_->threads_[i];
125+
int threadNum = threadOffset;
126+
for (int i = 0; i < maxThread; i++, threadNum++) {
127+
if (threadNum >= global_->threads_.size()) {
128+
threadNum = 0;
129+
}
130+
ThreadContext *thread = global_->threads_[threadNum];
97131
if (thread->queueSize.load() == 0) {
98132
std::unique_lock<std::mutex> lock(thread->mutex);
99133
thread->private_queue.push_back(task);
@@ -104,11 +138,13 @@ void ThreadManager::EnqueueTask(Task *task, TaskType taskType) {
104138
}
105139
}
106140

107-
// Still not scheduled? Put it on the global queue and notify a random thread.
141+
// Still not scheduled? Put it on the global queue and notify a thread chosen by round-robin.
142+
// Not particularly scientific, but hopefully we should not run into this too much.
108143
{
109144
std::unique_lock<std::mutex> lock(global_->mutex);
110145
global_->queue.push_back(task);
111-
global_->threads_[0]->cond.notify_one();
146+
global_->threads_[global_->roundRobin % maxThread]->cond.notify_one();
147+
global_->roundRobin++;
112148
}
113149
}
114150

@@ -123,10 +159,7 @@ void ThreadManager::EnqueueTaskOnThread(int threadNum, Task *task, TaskType task
123159
}
124160

125161
int ThreadManager::GetNumLooperThreads() const {
126-
// If possible, let's use all threads but one for parallel loops.
127-
// Not sure what's the best policy here. Maybe we should just have more threads than CPUs.
128-
int numLooperThreads = (int)(global_->threads_.size()) - 1;
129-
return std::max(numLooperThreads, 1);
162+
return numComputeThreads_;
130163
}
131164

132165
void ThreadManager::TryCancelTask(uint64_t taskID) {

Common/Thread/ThreadManager.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,12 @@ class ThreadManager {
4141
ThreadManager();
4242
~ThreadManager();
4343

44-
void Init(int numWorkerThreads);
45-
void EnqueueTask(Task *task, TaskType taskType = TaskType::CPU_COMPUTE);
46-
void EnqueueTaskOnThread(int threadNum, Task *task, TaskType taskType = TaskType::CPU_COMPUTE);
44+
// The distinction here is to be able to take hyper-threading into account.
45+
// It gets even trickier when you think about mobile chips with BIG/LITTLE, but we'll
46+
// just ignore it and let the OS handle it.
47+
void Init(int numRealCores, int numLogicalCores);
48+
void EnqueueTask(Task *task, TaskType taskType);
49+
void EnqueueTaskOnThread(int threadNum, Task *task, TaskType taskType);
4750

4851
// Currently does nothing. It will always be best-effort - maybe it cancels,
4952
// maybe it doesn't. Note that the id is the id() returned by the task. You need to make that
@@ -57,6 +60,9 @@ class ThreadManager {
5760
private:
5861
GlobalThreadContext *global_;
5962

63+
int numThreads_ = 0;
64+
int numComputeThreads_ = 0;
65+
6066
friend struct ThreadContext;
6167
};
6268

Common/Thread/ThreadUtil.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#include "ppsspp_config.h"
2+
13
#ifdef _WIN32
24
#include <windows.h>
35
#ifdef __MINGW32__
@@ -16,6 +18,9 @@
1618

1719
#if defined(__ANDROID__) || defined(__APPLE__) || (defined(__GLIBC__) && defined(_GNU_SOURCE))
1820
#include <pthread.h>
21+
#include <sys/types.h>
22+
#include <unistd.h>
23+
#include <sys/syscall.h>
1924
#endif
2025

2126
#ifdef TLS_SUPPORTED
@@ -41,12 +46,14 @@ static EXCEPTION_DISPOSITION NTAPI ignore_handler(EXCEPTION_RECORD *rec,
4146
#endif
4247

4348
void SetCurrentThreadName(const char* threadName) {
44-
#ifdef _WIN32
49+
#if PPSSPP_PLATFORM(WINDOWS)
4550
// Set the debugger-visible threadname through an unholy magic hack
4651
static const DWORD MS_VC_EXCEPTION = 0x406D1388;
4752
#endif
4853

49-
#if defined(_WIN32) && defined(__MINGW32__)
54+
// TODO: Use the new function SetThreadDescription available since Windows 10, version 1607.
55+
56+
#if PPSSPP_PLATFORM(WINDOWS) && defined(__MINGW32__)
5057
// Thread information for VS compatible debugger. -1 sets current thread.
5158
THREADNAME_INFO ti;
5259
ti.dwType = 0x1000;
@@ -67,7 +74,7 @@ void SetCurrentThreadName(const char* threadName) {
6774

6875
// Pop exception handler
6976
tib->ExceptionList = tib->ExceptionList->Next;
70-
#elif defined(_WIN32)
77+
#elif PPSSPP_PLATFORM(WINDOWS)
7178
#pragma pack(push,8)
7279
struct THREADNAME_INFO {
7380
DWORD dwType; // must be 0x1000
@@ -121,3 +128,11 @@ void AssertCurrentThreadName(const char *threadName) {
121128
}
122129
#endif
123130
}
131+
132+
int GetCurrentThreadIdForDebug() {
133+
#if PPSSPP_PLATFORM(WINDOWS)
134+
return (int)GetCurrentThreadId();
135+
#else
136+
return gettid();
137+
#endif
138+
}

Common/Thread/ThreadUtil.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,7 @@
66
// for AssertCurrentThreadName to work.
77
void SetCurrentThreadName(const char *threadName);
88
void AssertCurrentThreadName(const char *threadName);
9+
10+
// Just gets a cheap thread identifier so that you can see different threads in debug output,
11+
// exactly what it is is badly specified and not useful for anything.
12+
int GetCurrentThreadIdForDebug();

Core/Config.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -423,12 +423,6 @@ std::string CreateRandMAC() {
423423
return randStream.str();
424424
}
425425

426-
static int DefaultNumWorkers() {
427-
// Let's cap the global thread pool at 16 threads. Nothing we do really should have much
428-
// use for more...
429-
return std::min(16, cpu_info.num_cores);
430-
}
431-
432426
static int DefaultCpuCore() {
433427
#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64) || PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
434428
return (int)CPUCore::JIT;
@@ -473,7 +467,6 @@ static ConfigSetting generalSettings[] = {
473467
ConfigSetting("DiscordPresence", &g_Config.bDiscordPresence, true, true, false), // Or maybe it makes sense to have it per-game? Race conditions abound...
474468
ConfigSetting("UISound", &g_Config.bUISound, false, true, false),
475469

476-
ReportedConfigSetting("NumWorkerThreads", &g_Config.iNumWorkerThreads, &DefaultNumWorkers, true, true),
477470
ConfigSetting("AutoLoadSaveState", &g_Config.iAutoLoadSaveState, 0, true, true),
478471
ReportedConfigSetting("EnableCheats", &g_Config.bEnableCheats, false, true, true),
479472
ConfigSetting("CwCheatRefreshRate", &g_Config.iCwCheatRefreshRate, 77, true, true),

Core/Config.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ struct Config {
7272
bool bBrowse; // when opening the emulator, immediately show a file browser
7373

7474
// General
75-
int iNumWorkerThreads;
7675
bool bScreenshotsAsPNG;
7776
bool bUseFFV1;
7877
bool bDumpFrames;

0 commit comments

Comments
 (0)