initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
This commit is contained in:
85
thirdparty/embree/common/algorithms/parallel_prefix_sum.h
vendored
Normal file
85
thirdparty/embree/common/algorithms/parallel_prefix_sum.h
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "parallel_for.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Value>
|
||||
struct ParallelPrefixSumState
|
||||
{
|
||||
enum { MAX_TASKS = 64 };
|
||||
Value counts[MAX_TASKS];
|
||||
Value sums [MAX_TASKS];
|
||||
};
|
||||
|
||||
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_prefix_sum( ParallelPrefixSumState<Value>& state, Index first, Index last, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction)
|
||||
{
|
||||
/* calculate number of tasks to use */
|
||||
const size_t numThreads = TaskScheduler::threadCount();
|
||||
const size_t numBlocks = (last-first+minStepSize-1)/minStepSize;
|
||||
const size_t taskCount = min(numThreads,numBlocks,size_t(ParallelPrefixSumState<Value>::MAX_TASKS));
|
||||
|
||||
/* perform parallel prefix sum */
|
||||
parallel_for(taskCount, [&](const size_t taskIndex)
|
||||
{
|
||||
const size_t i0 = first+(taskIndex+0)*(last-first)/taskCount;
|
||||
const size_t i1 = first+(taskIndex+1)*(last-first)/taskCount;
|
||||
state.counts[taskIndex] = func(range<size_t>(i0,i1),state.sums[taskIndex]);
|
||||
});
|
||||
|
||||
/* calculate prefix sum */
|
||||
Value sum=identity;
|
||||
for (size_t i=0; i<taskCount; i++)
|
||||
{
|
||||
const Value c = state.counts[i];
|
||||
state.sums[i] = sum;
|
||||
sum=reduction(sum,c);
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/*! parallel calculation of prefix sums */
|
||||
template<typename SrcArray, typename DstArray, typename Value, typename Add>
|
||||
__forceinline Value parallel_prefix_sum(const SrcArray& src, DstArray& dst, size_t N, const Value& identity, const Add& add, const size_t SINGLE_THREAD_THRESHOLD = 4096)
|
||||
{
|
||||
/* perform single threaded prefix operation for small N */
|
||||
if (N < SINGLE_THREAD_THRESHOLD)
|
||||
{
|
||||
Value sum=identity;
|
||||
for (size_t i=0; i<N; sum=add(sum,src[i++])) dst[i] = sum;
|
||||
return sum;
|
||||
}
|
||||
|
||||
/* perform parallel prefix operation for large N */
|
||||
else
|
||||
{
|
||||
ParallelPrefixSumState<Value> state;
|
||||
|
||||
/* initial run just sets up start values for subtasks */
|
||||
parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
|
||||
|
||||
Value s = identity;
|
||||
for (size_t i=r.begin(); i<r.end(); i++) s = add(s,src[i]);
|
||||
return s;
|
||||
|
||||
}, add);
|
||||
|
||||
/* final run calculates prefix sum */
|
||||
return parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
|
||||
|
||||
Value s = identity;
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
dst[i] = add(sum,s);
|
||||
s = add(s,src[i]);
|
||||
}
|
||||
return s;
|
||||
|
||||
}, add);
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user