initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
This commit is contained in:
15
thirdparty/embree/common/tasking/taskscheduler.h
vendored
Normal file
15
thirdparty/embree/common/tasking/taskscheduler.h
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
|
||||
# include "taskschedulerinternal.h"
|
||||
#elif defined(TASKING_TBB)
|
||||
# include "taskschedulertbb.h"
|
||||
#elif defined(TASKING_PPL)
|
||||
# include "taskschedulerppl.h"
|
||||
#else
|
||||
# error "no tasking system enabled"
|
||||
#endif
|
||||
|
404
thirdparty/embree/common/tasking/taskschedulerinternal.cpp
vendored
Normal file
404
thirdparty/embree/common/tasking/taskschedulerinternal.cpp
vendored
Normal file
@@ -0,0 +1,404 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "taskschedulerinternal.h"
|
||||
#include "../math/emath.h"
|
||||
#include "../sys/sysinfo.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
RTC_NAMESPACE_BEGIN
|
||||
|
||||
static MutexSys g_mutex;
|
||||
size_t TaskScheduler::g_numThreads = 0;
|
||||
__thread TaskScheduler* TaskScheduler::g_instance = nullptr;
|
||||
std::vector<Ref<TaskScheduler>> g_instance_vector;
|
||||
__thread TaskScheduler::Thread* TaskScheduler::thread_local_thread = nullptr;
|
||||
TaskScheduler::ThreadPool* TaskScheduler::threadPool = nullptr;
|
||||
|
||||
template<typename Predicate, typename Body>
|
||||
__forceinline void TaskScheduler::steal_loop(Thread& thread, const Predicate& pred, const Body& body)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
/*! some rounds that yield */
|
||||
for (size_t i=0; i<32; i++)
|
||||
{
|
||||
/*! some spinning rounds */
|
||||
const size_t threadCount = thread.threadCount();
|
||||
for (size_t j=0; j<1024; j+=threadCount)
|
||||
{
|
||||
if (!pred()) return;
|
||||
if (thread.scheduler->steal_from_other_threads(thread)) {
|
||||
i=j=0;
|
||||
body();
|
||||
}
|
||||
}
|
||||
yield();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*! run this task */
|
||||
void TaskScheduler::Task::run_internal (Thread& thread) // FIXME: avoid as many dll_exports as possible
|
||||
{
|
||||
/* try to run if not already stolen */
|
||||
if (try_switch_state(INITIALIZED,DONE))
|
||||
{
|
||||
Task* prevTask = thread.task;
|
||||
thread.task = this;
|
||||
//try {
|
||||
// if (context->cancellingException == nullptr)
|
||||
closure->execute();
|
||||
//} catch (...) {
|
||||
// if (context->cancellingException == nullptr)
|
||||
// context->cancellingException = std::current_exception();
|
||||
//}
|
||||
thread.task = prevTask;
|
||||
add_dependencies(-1);
|
||||
}
|
||||
|
||||
/* steal until all dependencies have completed */
|
||||
steal_loop(thread,
|
||||
[&] () { return dependencies>0; },
|
||||
[&] () { while (thread.tasks.execute_local_internal(thread,this)); });
|
||||
|
||||
/* now signal our parent task that we are finished */
|
||||
if (parent)
|
||||
parent->add_dependencies(-1);
|
||||
}
|
||||
|
||||
/*! run this task */
|
||||
dll_export void TaskScheduler::Task::run (Thread& thread) {
|
||||
run_internal(thread);
|
||||
}
|
||||
|
||||
bool TaskScheduler::TaskQueue::execute_local_internal(Thread& thread, Task* parent)
|
||||
{
|
||||
/* stop if we run out of local tasks or reach the waiting task */
|
||||
if (right == 0 || &tasks[right-1] == parent)
|
||||
return false;
|
||||
|
||||
/* execute task */
|
||||
size_t oldRight = right;
|
||||
tasks[right-1].run_internal(thread);
|
||||
if (right != oldRight) {
|
||||
THROW_RUNTIME_ERROR("you have to wait for spawned subtasks");
|
||||
}
|
||||
|
||||
/* pop task and closure from stack */
|
||||
right--;
|
||||
if (tasks[right].stackPtr != size_t(-1))
|
||||
stackPtr = tasks[right].stackPtr;
|
||||
|
||||
/* also move left pointer */
|
||||
if (left >= right) left.store(right.load());
|
||||
|
||||
return right != 0;
|
||||
}
|
||||
|
||||
dll_export bool TaskScheduler::TaskQueue::execute_local(Thread& thread, Task* parent) {
|
||||
return execute_local_internal(thread,parent);
|
||||
}
|
||||
|
||||
bool TaskScheduler::TaskQueue::steal(Thread& thread)
|
||||
{
|
||||
size_t l = left;
|
||||
size_t r = right;
|
||||
if (l < r)
|
||||
{
|
||||
l = left++;
|
||||
if (l >= r)
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
if (!tasks[l].try_steal(thread.tasks.tasks[thread.tasks.right]))
|
||||
return false;
|
||||
|
||||
thread.tasks.right++;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* we steal from the left */
|
||||
size_t TaskScheduler::TaskQueue::getTaskSizeAtLeft()
|
||||
{
|
||||
if (left >= right) return 0;
|
||||
return tasks[left].N;
|
||||
}
|
||||
|
||||
void threadPoolFunction(std::pair<TaskScheduler::ThreadPool*,size_t>* pair)
|
||||
{
|
||||
TaskScheduler::ThreadPool* pool = pair->first;
|
||||
size_t threadIndex = pair->second;
|
||||
delete pair;
|
||||
pool->thread_loop(threadIndex);
|
||||
}
|
||||
|
||||
TaskScheduler::ThreadPool::ThreadPool(bool set_affinity)
|
||||
: numThreads(0), numThreadsRunning(0), set_affinity(set_affinity), running(false) {}
|
||||
|
||||
dll_export void TaskScheduler::ThreadPool::startThreads()
|
||||
{
|
||||
if (running) return;
|
||||
setNumThreads(numThreads,true);
|
||||
}
|
||||
|
||||
void TaskScheduler::ThreadPool::setNumThreads(size_t newNumThreads, bool startThreads)
|
||||
{
|
||||
Lock<MutexSys> lock(g_mutex);
|
||||
assert(newNumThreads);
|
||||
if (newNumThreads == std::numeric_limits<size_t>::max())
|
||||
newNumThreads = (size_t) getNumberOfLogicalThreads();
|
||||
|
||||
numThreads = newNumThreads;
|
||||
if (!startThreads && !running) return;
|
||||
running = true;
|
||||
size_t numThreadsActive = numThreadsRunning;
|
||||
|
||||
mutex.lock();
|
||||
numThreadsRunning = newNumThreads;
|
||||
mutex.unlock();
|
||||
condition.notify_all();
|
||||
|
||||
/* start new threads */
|
||||
for (size_t t=numThreadsActive; t<numThreads; t++)
|
||||
{
|
||||
if (t == 0) continue;
|
||||
auto pair = new std::pair<TaskScheduler::ThreadPool*,size_t>(this,t);
|
||||
threads.push_back(createThread((thread_func)threadPoolFunction,pair,4*1024*1024,set_affinity ? t : -1));
|
||||
}
|
||||
|
||||
/* stop some threads if we reduce the number of threads */
|
||||
for (ssize_t t=numThreadsActive-1; t>=ssize_t(numThreadsRunning); t--) {
|
||||
if (t == 0) continue;
|
||||
embree::join(threads.back());
|
||||
threads.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
TaskScheduler::ThreadPool::~ThreadPool()
|
||||
{
|
||||
/* leave all taskschedulers */
|
||||
mutex.lock();
|
||||
numThreadsRunning = 0;
|
||||
mutex.unlock();
|
||||
condition.notify_all();
|
||||
|
||||
/* wait for threads to terminate */
|
||||
for (size_t i=0; i<threads.size(); i++)
|
||||
embree::join(threads[i]);
|
||||
}
|
||||
|
||||
dll_export void TaskScheduler::ThreadPool::add(const Ref<TaskScheduler>& scheduler)
|
||||
{
|
||||
mutex.lock();
|
||||
schedulers.push_back(scheduler);
|
||||
mutex.unlock();
|
||||
condition.notify_all();
|
||||
}
|
||||
|
||||
dll_export void TaskScheduler::ThreadPool::remove(const Ref<TaskScheduler>& scheduler)
|
||||
{
|
||||
Lock<MutexSys> lock(mutex);
|
||||
for (std::list<Ref<TaskScheduler> >::iterator it = schedulers.begin(); it != schedulers.end(); it++) {
|
||||
if (scheduler == *it) {
|
||||
schedulers.erase(it);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TaskScheduler::ThreadPool::thread_loop(size_t globalThreadIndex)
|
||||
{
|
||||
while (globalThreadIndex < numThreadsRunning)
|
||||
{
|
||||
Ref<TaskScheduler> scheduler = NULL;
|
||||
ssize_t threadIndex = -1;
|
||||
{
|
||||
Lock<MutexSys> lock(mutex);
|
||||
condition.wait(mutex, [&] () { return globalThreadIndex >= numThreadsRunning || !schedulers.empty(); });
|
||||
if (globalThreadIndex >= numThreadsRunning) break;
|
||||
scheduler = schedulers.front();
|
||||
threadIndex = scheduler->allocThreadIndex();
|
||||
}
|
||||
scheduler->thread_loop(threadIndex);
|
||||
}
|
||||
}
|
||||
|
||||
TaskScheduler::TaskScheduler()
|
||||
: threadCounter(0), anyTasksRunning(0), hasRootTask(false)
|
||||
{
|
||||
assert(threadPool);
|
||||
threadLocal.resize(2 * TaskScheduler::threadCount()); // FIXME: this has to be 2x as in the compatibility join mode with rtcCommitScene the worker threads also join. When disallowing rtcCommitScene to join a build we can remove the 2x.
|
||||
for (size_t i=0; i<threadLocal.size(); i++)
|
||||
threadLocal[i].store(nullptr);
|
||||
}
|
||||
|
||||
TaskScheduler::~TaskScheduler()
|
||||
{
|
||||
assert(threadCounter == 0);
|
||||
}
|
||||
|
||||
dll_export size_t TaskScheduler::threadID()
|
||||
{
|
||||
Thread* thread = TaskScheduler::thread();
|
||||
if (thread) return thread->threadIndex;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
dll_export size_t TaskScheduler::threadIndex()
|
||||
{
|
||||
Thread* thread = TaskScheduler::thread();
|
||||
if (thread) return thread->threadIndex;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
dll_export size_t TaskScheduler::threadCount() {
|
||||
return threadPool->size();
|
||||
}
|
||||
|
||||
dll_export TaskScheduler* TaskScheduler::instance()
|
||||
{
|
||||
if (g_instance == NULL) {
|
||||
Lock<MutexSys> lock(g_mutex);
|
||||
g_instance = new TaskScheduler;
|
||||
g_instance_vector.push_back(g_instance);
|
||||
}
|
||||
return g_instance;
|
||||
}
|
||||
|
||||
void TaskScheduler::create(size_t numThreads, bool set_affinity, bool start_threads)
|
||||
{
|
||||
if (!threadPool) threadPool = new TaskScheduler::ThreadPool(set_affinity);
|
||||
threadPool->setNumThreads(numThreads,start_threads);
|
||||
}
|
||||
|
||||
void TaskScheduler::destroy() {
|
||||
delete threadPool; threadPool = nullptr;
|
||||
}
|
||||
|
||||
dll_export ssize_t TaskScheduler::allocThreadIndex()
|
||||
{
|
||||
size_t threadIndex = threadCounter++;
|
||||
assert(threadIndex < threadLocal.size());
|
||||
return threadIndex;
|
||||
}
|
||||
|
||||
void TaskScheduler::join()
|
||||
{
|
||||
mutex.lock();
|
||||
size_t threadIndex = allocThreadIndex();
|
||||
condition.wait(mutex, [&] () { return hasRootTask.load(); });
|
||||
mutex.unlock();
|
||||
thread_loop(threadIndex);
|
||||
}
|
||||
|
||||
void TaskScheduler::reset() {
|
||||
hasRootTask = false;
|
||||
}
|
||||
|
||||
void TaskScheduler::wait_for_threads(size_t threadCount)
|
||||
{
|
||||
while (threadCounter < threadCount-1)
|
||||
pause_cpu();
|
||||
}
|
||||
|
||||
dll_export TaskScheduler::Thread* TaskScheduler::thread() {
|
||||
return thread_local_thread;
|
||||
}
|
||||
|
||||
dll_export TaskScheduler::Thread* TaskScheduler::swapThread(Thread* thread)
|
||||
{
|
||||
Thread* old = thread_local_thread;
|
||||
thread_local_thread = thread;
|
||||
return old;
|
||||
}
|
||||
|
||||
dll_export void TaskScheduler::wait()
|
||||
{
|
||||
Thread* thread = TaskScheduler::thread();
|
||||
if (thread == nullptr)
|
||||
return;
|
||||
while (thread->tasks.execute_local_internal(*thread,thread->task)) {};
|
||||
}
|
||||
|
||||
void TaskScheduler::thread_loop(size_t threadIndex)
|
||||
{
|
||||
/* allocate thread structure */
|
||||
std::unique_ptr<Thread> mthread(new Thread(threadIndex,this)); // too large for stack allocation
|
||||
Thread& thread = *mthread;
|
||||
threadLocal[threadIndex].store(&thread);
|
||||
Thread* oldThread = swapThread(&thread);
|
||||
|
||||
/* main thread loop */
|
||||
while (anyTasksRunning)
|
||||
{
|
||||
steal_loop(thread,
|
||||
[&] () { return anyTasksRunning > 0; },
|
||||
[&] () {
|
||||
anyTasksRunning++;
|
||||
while (thread.tasks.execute_local_internal(thread,nullptr));
|
||||
anyTasksRunning--;
|
||||
});
|
||||
}
|
||||
threadLocal[threadIndex].store(nullptr);
|
||||
swapThread(oldThread);
|
||||
|
||||
/* wait for all threads to terminate */
|
||||
threadCounter--;
|
||||
#if defined(__WIN32__)
|
||||
size_t loopIndex = 1;
|
||||
#endif
|
||||
#define LOOP_YIELD_THRESHOLD (4096)
|
||||
while (threadCounter > 0) {
|
||||
#if defined(__WIN32__)
|
||||
if ((loopIndex % LOOP_YIELD_THRESHOLD) == 0)
|
||||
yield();
|
||||
else
|
||||
_mm_pause();
|
||||
loopIndex++;
|
||||
#else
|
||||
yield();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
bool TaskScheduler::steal_from_other_threads(Thread& thread)
|
||||
{
|
||||
const size_t threadIndex = thread.threadIndex;
|
||||
const size_t threadCount = this->threadCounter;
|
||||
|
||||
for (size_t i=1; i<threadCount; i++)
|
||||
{
|
||||
pause_cpu(32);
|
||||
size_t otherThreadIndex = threadIndex+i;
|
||||
if (otherThreadIndex >= threadCount) otherThreadIndex -= threadCount;
|
||||
|
||||
Thread* othread = threadLocal[otherThreadIndex].load();
|
||||
if (!othread)
|
||||
continue;
|
||||
|
||||
if (othread->tasks.steal(thread))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
dll_export void TaskScheduler::startThreads() {
|
||||
threadPool->startThreads();
|
||||
}
|
||||
|
||||
dll_export void TaskScheduler::addScheduler(const Ref<TaskScheduler>& scheduler) {
|
||||
threadPool->add(scheduler);
|
||||
}
|
||||
|
||||
dll_export void TaskScheduler::removeScheduler(const Ref<TaskScheduler>& scheduler) {
|
||||
threadPool->remove(scheduler);
|
||||
}
|
||||
|
||||
RTC_NAMESPACE_END
|
||||
}
|
384
thirdparty/embree/common/tasking/taskschedulerinternal.h
vendored
Normal file
384
thirdparty/embree/common/tasking/taskschedulerinternal.h
vendored
Normal file
@@ -0,0 +1,384 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../include/embree4/rtcore.h"
|
||||
#include "../sys/platform.h"
|
||||
#include "../sys/alloc.h"
|
||||
#include "../sys/barrier.h"
|
||||
#include "../sys/thread.h"
|
||||
#include "../sys/mutex.h"
|
||||
#include "../sys/condition.h"
|
||||
#include "../sys/ref.h"
|
||||
#include "../sys/atomic.h"
|
||||
#include "../math/range.h"
|
||||
|
||||
#include <list>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
||||
/* The tasking system exports some symbols to be used by the tutorials. Thus we
|
||||
hide is also in the API namespace when requested. */
|
||||
RTC_NAMESPACE_BEGIN
|
||||
|
||||
struct TaskScheduler : public RefCount
|
||||
{
|
||||
ALIGNED_STRUCT_(64);
|
||||
friend class Device;
|
||||
|
||||
static const size_t TASK_STACK_SIZE = 4*1024; //!< task structure stack
|
||||
static const size_t CLOSURE_STACK_SIZE = 512*1024; //!< stack for task closures
|
||||
|
||||
struct Thread;
|
||||
|
||||
/*! virtual interface for all tasks */
|
||||
struct TaskFunction {
|
||||
virtual void execute() = 0;
|
||||
};
|
||||
|
||||
|
||||
struct TaskGroupContext {
|
||||
TaskGroupContext() : cancellingException(nullptr) {}
|
||||
|
||||
std::exception_ptr cancellingException;
|
||||
};
|
||||
|
||||
/*! builds a task interface from a closure */
|
||||
template<typename Closure>
|
||||
struct ClosureTaskFunction : public TaskFunction
|
||||
{
|
||||
Closure closure;
|
||||
__forceinline ClosureTaskFunction (const Closure& closure) : closure(closure) {}
|
||||
void execute() { closure(); };
|
||||
};
|
||||
|
||||
struct __aligned(64) Task
|
||||
{
|
||||
/*! states a task can be in */
|
||||
enum { DONE, INITIALIZED };
|
||||
|
||||
/*! switch from one state to another */
|
||||
__forceinline void switch_state(int from, int to)
|
||||
{
|
||||
__memory_barrier();
|
||||
MAYBE_UNUSED bool success = state.compare_exchange_strong(from,to);
|
||||
assert(success);
|
||||
}
|
||||
|
||||
/*! try to switch from one state to another */
|
||||
__forceinline bool try_switch_state(int from, int to) {
|
||||
__memory_barrier();
|
||||
return state.compare_exchange_strong(from,to);
|
||||
}
|
||||
|
||||
/*! increment/decrement dependency counter */
|
||||
void add_dependencies(int n) {
|
||||
dependencies+=n;
|
||||
}
|
||||
|
||||
/*! initialize all tasks to DONE state by default */
|
||||
__forceinline Task()
|
||||
: state(DONE) {}
|
||||
|
||||
/*! construction of new task */
|
||||
__forceinline Task (TaskFunction* closure, Task* parent, TaskGroupContext* context, size_t stackPtr, size_t N)
|
||||
: dependencies(1), stealable(true), closure(closure), parent(parent), context(context), stackPtr(stackPtr), N(N)
|
||||
{
|
||||
if (parent) parent->add_dependencies(+1);
|
||||
switch_state(DONE,INITIALIZED);
|
||||
}
|
||||
|
||||
/*! construction of stolen task, stealing thread will decrement initial dependency */
|
||||
__forceinline Task (TaskFunction* closure, Task* parent, TaskGroupContext* context)
|
||||
: dependencies(1), stealable(false), closure(closure), parent(parent), context(context), stackPtr(-1), N(1)
|
||||
{
|
||||
switch_state(DONE,INITIALIZED);
|
||||
}
|
||||
|
||||
/*! try to steal this task */
|
||||
bool try_steal(Task& child)
|
||||
{
|
||||
if (!stealable) return false;
|
||||
if (!try_switch_state(INITIALIZED,DONE)) return false;
|
||||
new (&child) Task(closure, this, context);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! run this task */
|
||||
dll_export void run(Thread& thread);
|
||||
|
||||
void run_internal(Thread& thread);
|
||||
|
||||
public:
|
||||
std::atomic<int> state; //!< state this task is in
|
||||
std::atomic<int> dependencies; //!< dependencies to wait for
|
||||
std::atomic<bool> stealable; //!< true if task can be stolen
|
||||
TaskFunction* closure; //!< the closure to execute
|
||||
Task* parent; //!< parent task to signal when we are finished
|
||||
TaskGroupContext* context;
|
||||
size_t stackPtr; //!< stack location where closure is stored
|
||||
size_t N; //!< approximative size of task
|
||||
};
|
||||
|
||||
struct TaskQueue
|
||||
{
|
||||
TaskQueue ()
|
||||
: left(0), right(0), stackPtr(0) {}
|
||||
|
||||
__forceinline void* alloc(size_t bytes, size_t align = 64)
|
||||
{
|
||||
size_t ofs = bytes + ((align - stackPtr) & (align-1));
|
||||
if (stackPtr + ofs > CLOSURE_STACK_SIZE)
|
||||
abort(); //throw std::runtime_error("closure stack overflow");
|
||||
stackPtr += ofs;
|
||||
return &stack[stackPtr-bytes];
|
||||
}
|
||||
|
||||
template<typename Closure>
|
||||
__forceinline void push_right(Thread& thread, const size_t size, const Closure& closure, TaskGroupContext* context)
|
||||
{
|
||||
if (right >= TASK_STACK_SIZE)
|
||||
abort(); //throw std::runtime_error("task stack overflow");
|
||||
|
||||
/* allocate new task on right side of stack */
|
||||
size_t oldStackPtr = stackPtr;
|
||||
TaskFunction* func = new (alloc(sizeof(ClosureTaskFunction<Closure>))) ClosureTaskFunction<Closure>(closure);
|
||||
new (&tasks[right.load()]) Task(func,thread.task,context,oldStackPtr,size);
|
||||
right++;
|
||||
|
||||
/* also move left pointer */
|
||||
if (left >= right-1) left = right-1;
|
||||
}
|
||||
|
||||
dll_export bool execute_local(Thread& thread, Task* parent);
|
||||
bool execute_local_internal(Thread& thread, Task* parent);
|
||||
bool steal(Thread& thread);
|
||||
size_t getTaskSizeAtLeft();
|
||||
|
||||
bool empty() { return right == 0; }
|
||||
|
||||
public:
|
||||
|
||||
/* task stack */
|
||||
Task tasks[TASK_STACK_SIZE];
|
||||
__aligned(64) std::atomic<size_t> left; //!< threads steal from left
|
||||
__aligned(64) std::atomic<size_t> right; //!< new tasks are added to the right
|
||||
|
||||
/* closure stack */
|
||||
__aligned(64) char stack[CLOSURE_STACK_SIZE];
|
||||
size_t stackPtr;
|
||||
};
|
||||
|
||||
/*! thread local structure for each thread */
|
||||
struct Thread
|
||||
{
|
||||
ALIGNED_STRUCT_(64);
|
||||
|
||||
Thread (size_t threadIndex, const Ref<TaskScheduler>& scheduler)
|
||||
: threadIndex(threadIndex), task(nullptr), scheduler(scheduler) {}
|
||||
|
||||
__forceinline size_t threadCount() {
|
||||
return scheduler->threadCounter;
|
||||
}
|
||||
|
||||
size_t threadIndex; //!< ID of this thread
|
||||
TaskQueue tasks; //!< local task queue
|
||||
Task* task; //!< current active task
|
||||
Ref<TaskScheduler> scheduler; //!< pointer to task scheduler
|
||||
};
|
||||
|
||||
/*! pool of worker threads */
|
||||
struct ThreadPool
|
||||
{
|
||||
ThreadPool (bool set_affinity);
|
||||
~ThreadPool ();
|
||||
|
||||
/*! starts the threads */
|
||||
dll_export void startThreads();
|
||||
|
||||
/*! sets number of threads to use */
|
||||
void setNumThreads(size_t numThreads, bool startThreads = false);
|
||||
|
||||
/*! adds a task scheduler object for scheduling */
|
||||
dll_export void add(const Ref<TaskScheduler>& scheduler);
|
||||
|
||||
/*! remove the task scheduler object again */
|
||||
dll_export void remove(const Ref<TaskScheduler>& scheduler);
|
||||
|
||||
/*! returns number of threads of the thread pool */
|
||||
size_t size() const { return numThreads; }
|
||||
|
||||
/*! main loop for all threads */
|
||||
void thread_loop(size_t threadIndex);
|
||||
|
||||
private:
|
||||
std::atomic<size_t> numThreads;
|
||||
std::atomic<size_t> numThreadsRunning;
|
||||
bool set_affinity;
|
||||
std::atomic<bool> running;
|
||||
std::vector<thread_t> threads;
|
||||
|
||||
private:
|
||||
MutexSys mutex;
|
||||
ConditionSys condition;
|
||||
std::list<Ref<TaskScheduler> > schedulers;
|
||||
};
|
||||
|
||||
TaskScheduler ();
|
||||
~TaskScheduler ();
|
||||
|
||||
/*! initializes the task scheduler */
|
||||
static void create(size_t numThreads, bool set_affinity, bool start_threads);
|
||||
|
||||
/*! destroys the task scheduler again */
|
||||
static void destroy();
|
||||
|
||||
/*! lets new worker threads join the tasking system */
|
||||
void join();
|
||||
void reset();
|
||||
|
||||
/*! let a worker thread allocate a thread index */
|
||||
dll_export ssize_t allocThreadIndex();
|
||||
|
||||
/*! wait for some number of threads available (threadCount includes main thread) */
|
||||
void wait_for_threads(size_t threadCount);
|
||||
|
||||
/*! thread loop for all worker threads */
|
||||
void thread_loop(size_t threadIndex);
|
||||
|
||||
/*! steals a task from a different thread */
|
||||
bool steal_from_other_threads(Thread& thread);
|
||||
|
||||
template<typename Predicate, typename Body>
|
||||
static void steal_loop(Thread& thread, const Predicate& pred, const Body& body);
|
||||
|
||||
/* spawn a new task at the top of the threads task stack */
|
||||
template<typename Closure>
|
||||
void spawn_root(const Closure& closure, TaskGroupContext* context, size_t size = 1, bool useThreadPool = true)
|
||||
{
|
||||
if (useThreadPool) startThreads();
|
||||
|
||||
size_t threadIndex = allocThreadIndex();
|
||||
std::unique_ptr<Thread> mthread(new Thread(threadIndex,this)); // too large for stack allocation
|
||||
Thread& thread = *mthread;
|
||||
assert(threadLocal[threadIndex].load() == nullptr);
|
||||
threadLocal[threadIndex] = &thread;
|
||||
Thread* oldThread = swapThread(&thread);
|
||||
thread.tasks.push_right(thread,size,closure,context);
|
||||
{
|
||||
Lock<MutexSys> lock(mutex);
|
||||
anyTasksRunning++;
|
||||
hasRootTask = true;
|
||||
condition.notify_all();
|
||||
}
|
||||
|
||||
if (useThreadPool) addScheduler(this);
|
||||
|
||||
while (thread.tasks.execute_local(thread,nullptr));
|
||||
anyTasksRunning--;
|
||||
if (useThreadPool) removeScheduler(this);
|
||||
|
||||
threadLocal[threadIndex] = nullptr;
|
||||
swapThread(oldThread);
|
||||
|
||||
/* remember exception to throw */
|
||||
std::exception_ptr except = nullptr;
|
||||
if (context->cancellingException != nullptr) except = context->cancellingException;
|
||||
|
||||
/* wait for all threads to terminate */
|
||||
threadCounter--;
|
||||
while (threadCounter > 0) yield();
|
||||
context->cancellingException = nullptr;
|
||||
|
||||
/* re-throw proper exception */
|
||||
if (except != nullptr) {
|
||||
std::rethrow_exception(except);
|
||||
}
|
||||
}
|
||||
|
||||
/* spawn a new task at the top of the threads task stack */
|
||||
template<typename Closure>
|
||||
static __forceinline void spawn(size_t size, const Closure& closure, TaskGroupContext* context)
|
||||
{
|
||||
Thread* thread = TaskScheduler::thread();
|
||||
if (likely(thread != nullptr)) thread->tasks.push_right(*thread,size,closure,context);
|
||||
else instance()->spawn_root(closure,context,size);
|
||||
}
|
||||
|
||||
/* spawn a new task at the top of the threads task stack */
|
||||
template<typename Closure>
|
||||
static __forceinline void spawn(const Closure& closure, TaskGroupContext* taskGroupContext) {
|
||||
spawn(1,closure,taskGroupContext);
|
||||
}
|
||||
|
||||
/* spawn a new task set */
|
||||
template<typename Index, typename Closure>
|
||||
static void spawn(const Index begin, const Index end, const Index blockSize, const Closure& closure, TaskGroupContext* context)
|
||||
{
|
||||
spawn(end-begin, [=]()
|
||||
{
|
||||
if (end-begin <= blockSize) {
|
||||
return closure(range<Index>(begin,end));
|
||||
}
|
||||
const Index center = (begin+end)/2;
|
||||
spawn(begin,center,blockSize,closure,context);
|
||||
spawn(center,end ,blockSize,closure,context);
|
||||
wait();
|
||||
},context);
|
||||
}
|
||||
|
||||
/* work on spawned subtasks and wait until all have finished */
|
||||
dll_export static void wait();
|
||||
|
||||
/* returns the ID of the current thread */
|
||||
dll_export static size_t threadID();
|
||||
|
||||
/* returns the index (0..threadCount-1) of the current thread */
|
||||
dll_export static size_t threadIndex();
|
||||
|
||||
/* returns the total number of threads */
|
||||
dll_export static size_t threadCount();
|
||||
|
||||
private:
|
||||
|
||||
/* returns the thread local task list of this worker thread */
|
||||
dll_export static Thread* thread();
|
||||
|
||||
/* sets the thread local task list of this worker thread */
|
||||
dll_export static Thread* swapThread(Thread* thread);
|
||||
|
||||
/*! returns the taskscheduler object to be used by the master thread */
|
||||
dll_export static TaskScheduler* instance();
|
||||
|
||||
/*! starts the threads */
|
||||
dll_export static void startThreads();
|
||||
|
||||
/*! adds a task scheduler object for scheduling */
|
||||
dll_export static void addScheduler(const Ref<TaskScheduler>& scheduler);
|
||||
|
||||
/*! remove the task scheduler object again */
|
||||
dll_export static void removeScheduler(const Ref<TaskScheduler>& scheduler);
|
||||
|
||||
private:
|
||||
std::vector<atomic<Thread*>> threadLocal;
|
||||
std::atomic<size_t> threadCounter;
|
||||
std::atomic<size_t> anyTasksRunning;
|
||||
std::atomic<bool> hasRootTask;
|
||||
MutexSys mutex;
|
||||
ConditionSys condition;
|
||||
|
||||
private:
|
||||
static size_t g_numThreads;
|
||||
static __thread TaskScheduler* g_instance;
|
||||
static __thread Thread* thread_local_thread;
|
||||
static ThreadPool* threadPool;
|
||||
};
|
||||
|
||||
RTC_NAMESPACE_END
|
||||
|
||||
#if defined(RTC_NAMESPACE)
|
||||
using RTC_NAMESPACE::TaskScheduler;
|
||||
#endif
|
||||
}
|
46
thirdparty/embree/common/tasking/taskschedulerppl.h
vendored
Normal file
46
thirdparty/embree/common/tasking/taskschedulerppl.h
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../sys/platform.h"
|
||||
#include "../sys/alloc.h"
|
||||
#include "../sys/barrier.h"
|
||||
#include "../sys/thread.h"
|
||||
#include "../sys/mutex.h"
|
||||
#include "../sys/condition.h"
|
||||
#include "../sys/ref.h"
|
||||
|
||||
#if !defined(__WIN32__)
|
||||
#error PPL tasking system only available under windows
|
||||
#endif
|
||||
|
||||
#include <ppl.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct TaskScheduler
|
||||
{
|
||||
/*! initializes the task scheduler */
|
||||
static void create(size_t numThreads, bool set_affinity, bool start_threads);
|
||||
|
||||
/*! destroys the task scheduler again */
|
||||
static void destroy();
|
||||
|
||||
/* returns the ID of the current thread */
|
||||
static __forceinline size_t threadID() {
|
||||
return GetCurrentThreadId();
|
||||
}
|
||||
|
||||
/* returns the index (0..threadCount-1) of the current thread */
|
||||
/* FIXME: threadIndex is NOT supported by PPL! */
|
||||
static __forceinline size_t threadIndex() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* returns the total number of threads */
|
||||
static __forceinline size_t threadCount() {
|
||||
return GetMaximumProcessorCount(ALL_PROCESSOR_GROUPS) + 1;
|
||||
}
|
||||
};
|
||||
};
|
89
thirdparty/embree/common/tasking/taskschedulertbb.h
vendored
Normal file
89
thirdparty/embree/common/tasking/taskschedulertbb.h
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../sys/platform.h"
|
||||
#include "../sys/alloc.h"
|
||||
#include "../sys/barrier.h"
|
||||
#include "../sys/thread.h"
|
||||
#include "../sys/mutex.h"
|
||||
#include "../sys/condition.h"
|
||||
#include "../sys/ref.h"
|
||||
|
||||
#if defined(__WIN32__) && !defined(NOMINMAX)
|
||||
# define NOMINMAX
|
||||
#endif
|
||||
|
||||
#if defined(__INTEL_LLVM_COMPILER)
|
||||
// prevents "'__thiscall' calling convention is not supported for this target" warning from TBB
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
|
||||
// We need to define these to avoid implicit linkage against
|
||||
// tbb_debug.lib under Windows. When removing these lines debug build
|
||||
// under Windows fails.
|
||||
#define __TBB_NO_IMPLICIT_LINKAGE 1
|
||||
#define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1
|
||||
#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1
|
||||
#define TBB_PREVIEW_ISOLATED_TASK_GROUP 1
|
||||
#include "tbb/tbb.h"
|
||||
#include "tbb/parallel_sort.h"
|
||||
|
||||
#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR >= 8)
|
||||
# define USE_TASK_ARENA 1
|
||||
#else
|
||||
# define USE_TASK_ARENA 0
|
||||
#endif
|
||||
|
||||
#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION >= 11009) // TBB 2019 Update 9
|
||||
# define TASKING_TBB_USE_TASK_ISOLATION 1
|
||||
#else
|
||||
# define TASKING_TBB_USE_TASK_ISOLATION 0
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct TaskScheduler
|
||||
{
|
||||
/*! initializes the task scheduler */
|
||||
static void create(size_t numThreads, bool set_affinity, bool start_threads);
|
||||
|
||||
/*! destroys the task scheduler again */
|
||||
static void destroy();
|
||||
|
||||
/* returns the ID of the current thread */
|
||||
static __forceinline size_t threadID()
|
||||
{
|
||||
return threadIndex();
|
||||
}
|
||||
|
||||
/* returns the index (0..threadCount-1) of the current thread */
|
||||
static __forceinline size_t threadIndex()
|
||||
{
|
||||
#if TBB_INTERFACE_VERSION >= 9100
|
||||
return tbb::this_task_arena::current_thread_index();
|
||||
#elif TBB_INTERFACE_VERSION >= 9000
|
||||
return tbb::task_arena::current_thread_index();
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* returns the total number of threads */
|
||||
static __forceinline size_t threadCount() {
|
||||
#if TBB_INTERFACE_VERSION >= 9100
|
||||
return tbb::this_task_arena::max_concurrency();
|
||||
#else
|
||||
return tbb::task_scheduler_init::default_num_threads();
|
||||
#endif
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#if defined(__INTEL_LLVM_COMPILER)
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
Reference in New Issue
Block a user